Source: https://github.com/markziemann/background/

Introduction

Here, we look at the sensitivity of pathway analysis of RNA-seq data.

suppressPackageStartupMessages({
  library("DESeq2")
  library("eulerr")
  library("HGNChelper")
  library("mitch")
  library("kableExtra")
  library("beeswarm")
  library("gplots")
  library("gridExtra")
  library("png")
  library("fgsea")
  library("parallel")
  library("RhpcBLASctl")
  library("edgeR")
  library("vioplot")
})

CORES=16

Load pathway data

Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.

gs <- gmt_import("c2.cp.reactome.v2023.2.Hs.symbols.gmt")

names(gs) <- gsub("REACTOME_","",names(gs))
names(gs) <- gsub("_"," ",names(gs))

Load RNA expression

From GEO https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE158420 GSE158420_counts.txt.gz

if (!file.exists("GSE158420_counts.txt.gz")) {
  download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE158420&format=file&file=GSE158420%5Fcounts%2Etxt%2Egz",
    destfile="GSE158420_counts.txt.gz")
}

x <- read.table("GSE158420_counts.txt.gz",row.names=NULL)
dim(x)

## [1] 18747    75

x <- aggregate(. ~ row.names,x,sum)
dim(x)

## [1] 18745    75

#RUFKM
head(x)

##   row.names X10N X10T X11N X11T X12N X12T X14N X14T X15N X15T X16N X16T X17N
## 1     1-Dec    0   31    0    1    0    1    0    0    0    0    0    0    0
## 2     1-Mar  431  461  478   98  862  635  489  499  543  608  707  498  700
## 3     1-Sep  160  131  280  131  132  383  381  124  114  217  258  886  302
## 4    10-Mar  286  217  130   58    2   18   19   39  417   68   12   90    9
## 5    10-Sep 3541 4311 2174 3675 2856 1864 3306 1546 3507 2483 3568 1294 3658
## 6    11-Mar    0   40    6    1    0    0    0    0    0    1    0    0    0
##   X17T X18N X18T X19N X19T X20N X20T X21N X21T X22N X22T X23N X23T X24N X24T
## 1    1    0   79    0   18    0    1    0    1    0    0    0    1    0    2
## 2  411  607  446  608  463  361  488  615  350  673 1838  297  262  762  529
## 3  162  133  279  138   88  239  533  212   53  258  126  197  229  137  236
## 4  122  123  313   70  127 1481 1010   97    7   94   21  588  694   92   24
## 5  542 3337 1050 4527 1865 2656 1314 4449 1060 3196  400 3499 1709 3280 3528
## 6    2    1   38    0   18    0    0    1    3    0    3    1    2    0    5
##   X25N X25T X26N X26T X27N X27T X28N X28T X29N X29T  X2N  X2T X30N X30T X31N
## 1    1    0    0    3    0    2    0    2    0    0    0    0    0    0    0
## 2  534  860  451 1110  798  380  572  336  289  514  575  257  464  483  535
## 3  132   66   98   56  573  190  211   75   79  205  151  147  183  144  212
## 4   87   23  126  368  192  114  441    5 2242   19   23   80  616   24  589
## 5 3463 2832 2893 3445 2442 3703 3250 5465 2028 1971 3502 2354 2605  934 3291
## 6    4    3    0    3    0    4    0    0    2    1    0    0    1    3    1
##   X31T X32N X32T X33N X33T X34N X34T X35N X35T X36N X36T X37N X37T X38N X38T
## 1    2    0    0    0    0    1    0    0    1    0    1    0    0    0    4
## 2  478  311  280  630  437  713  334  643  789  286  339  488  320  427  507
## 3  144  302 1086  231  363  185  290  125  199   61  355   59  121   92  203
## 4   28   48   27    4   11  956   13  640   63  654   19 1234 2076  380  185
## 5 4226 3135 1416 3072 1702 3356 2515 3034 2532 3678  502 1512 1324 2684 2542
## 6    5    0    0    0    1    1    0    0    1    1    6    0    6    0    9
##   X39N X39T  X3N  X3T  X4N  X4T  X5N  X5T  X6N  X6T  X7N  X7T  X8N  X8T  X9N
## 1    0    1    1    3    0    2    0    8    2    0    1    1   17    2    0
## 2  695  458  628  287  305  255  801  364  626  320  638  412  264  436  504
## 3  329  174  120  114  119   59  144   44  110  184  181  327  101  217  191
## 4   43  135   11   63  323   33   78   63   28    6  323   16  185  168   76
## 5 1770 1446 2921 1688 2692 2447 3367 4396 3830 2841 2623 3671 1614 2102 3507
## 6    1    2    0   10    2    3    0    1    0    0    2    1    7    9    0
##    X9T
## 1    2
## 2  317
## 3   48
## 4    7
## 5 2482
## 6    5

There are some gene name errors, so I will use HGNChelper to repair them.

human=x$row.names
new.hgnc.table <- readRDS("new.hgnc.table.rds")
fix <- checkGeneSymbols(human,map=new.hgnc.table)

## Warning in checkGeneSymbols(human, map = new.hgnc.table): Human gene symbols
## should be all upper-case except for the 'orf' in open reading frames. The case
## of some letters was corrected.

## Warning in checkGeneSymbols(human, map = new.hgnc.table): x contains
## non-approved gene symbols

x$gene <- fix$Suggested.Symbol
x$row.names=NULL
x <- aggregate(. ~ gene,x,sum)
dim(x)

## [1] 18704    75

row.names(x)=x$gene
x$gene=NULL
head(x)

##          X10N X10T  X11N X11T   X12N  X12T   X14N  X14T   X15N   X15T   X16N
## A1BG      120  217   148   39     73    87    146    95     88     69     72
## A1CF        8   29    22    2      0     1      4     0      2      3      1
## A2M     89330 6096 39181 3282 126977 89739 108730 40523 160543 179385 155740
## A2ML1      16 1307    50 2729     26    19     31   393     20     16     21
## A3GALT2     5   49    21    3      2     0      1     5      1      4      5
## A4GALT    304 1138   350 1439    219   149    178   809    446    150    300
##          X16T   X17N  X17T   X18N  X18T   X19N X19T   X20N  X20T   X21N  X21T
## A1BG      121     63    30    113   279     43  137    152   129     56    19
## A1CF        0      3     9      4   104      0   46      6     3      1     1
## A2M     39899 129176 16554 109697 15992 125658 3504 106485 59452 194434 10592
## A2ML1      17     15   114     13   591     10 5830     16    38     15    29
## A3GALT2     4      2     3     13    76      3   53     17     5     14     6
## A4GALT    249    203   286    296   787    209 1814    335   554    220   311
##           X22N X22T   X23N   X23T  X24N X24T   X25N X25T   X26N  X26T   X27N
## A1BG        92   12    106     85    68  101    107   15     50    46    110
## A1CF         1   10      2      5     3    1      3    7      2     5      1
## A2M     127707 7544 129002 107685 84494 8974 125875 3304 133114 23805 124283
## A2ML1       13  146     13     24    16  298     96 2730      7   257      9
## A3GALT2      1    6      4      1     8   11     17    0      4    13      9
## A4GALT     248  115    291    567   177  728    248  536    284   445    235
##          X27T   X28N  X28T  X29N  X29T    X2N   X2T   X30N  X30T   X31N X31T
## A1BG      155    143    64   204   125     82   117     97   127    115   39
## A1CF        1      2     4     3     4      0     0      2     6      1    1
## A2M     16776 163693 15056 56772 32450 110818 57261 242936 22835 157038 5604
## A2ML1    3891     74    67    19    50      8    11      8    28     31   93
## A3GALT2    16      6     1    11     6      6     6     19     1     10    8
## A4GALT   1059    586  3534   808   461    280  1505    723   236    442  481
##           X32N  X32T  X33N  X33T   X34N  X34T   X35N   X35T   X36N X36T  X37N
## A1BG        82   105   116   104    119    63    101    111     58   90   107
## A1CF         0     0     3     2      3     1      7      4      1    3     4
## A2M     158632 11656 64662 92680 122607 84313 190327 145504 148968 7674 38901
## A2ML1        6   130    42     9     17    31     22     33      9   33    24
## A3GALT2     14     3     6    10     10    12      8     14      1   47     5
## A4GALT     299    94   292   564    345   694    452    521    454  322   656
##          X37T  X38N   X38T   X39N  X39T   X3N  X3T    X4N  X4T   X5N  X5T   X6N
## A1BG       57    57    133    105   125    67   94     99  291    87   59    68
## A1CF        9     1     15      2     5     2   18      1    6     3   10     0
## A2M     65047 82116 123542 127464 91470 99241 9283 117914 5648 89583 2462 97346
## A2ML1      58    12     61     13    26    13 3833     57  499    37 1108     5
## A3GALT2     1    14     30      2    10    12   21     18   11     8    3     2
## A4GALT    949   588    934    209   473   359 1134    257  711   235  627   126
##          X6T   X7N   X7T   X8N   X8T    X9N  X9T
## A1BG      27   108    86   194    84     77    8
## A1CF       0     2     6    35     9      1    0
## A2M     9188 98328 29242 15384 26414 121946 4447
## A2ML1    632     8   329   158  1072     13 2879
## A3GALT2    1    16     9    57     9     11    3
## A4GALT  1176   225  2035   498   491    160 1470

MDS plot

patient <- head(as.character(unlist(lapply(1:ncol(x),function(i) {c(i,i)}))),ncol(x))
tumor <- rep(c(0,1),ncol(x)/2)
ss <- data.frame(patient,tumor)
rownames(ss) <- colnames(x)

mds <- cmdscale(dist(t(scale(x))))

cols <- gsub("1","pink",gsub("0","lightblue",tumor))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2", pch=16,col=cols,cex=2)

text(mds, labels=colnames(x),cex=0.7)

Stability - coefficient of variation

colcols <- as.character(as.numeric(grepl("T",colnames(x))))
colcols <- gsub("1","red",gsub("0","blue",colcols))

gspear <- cor(x,method="spearman")

heatmap.2( gspear, scale="none", ColSideColors=colcols ,
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Spearman - genes")
mtext("Blue=normal,Red=tumour")

xn <- x[,grep("N",colnames(x))]
nspear <- cor(xn,method="spearman")
summary(as.vector(nspear))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8960  0.9619  0.9722  0.9675  0.9797  1.0000

xt <- x[,grep("T",colnames(x))]
tspear <- cor(xt,method="spearman")
summary(as.vector(tspear))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7485  0.8733  0.9028  0.8997  0.9291  1.0000

# rank
xr <- apply(x,2,rank)

res <- apply(xr,2, function(z) {
  lapply(gs,function(set) {
    median(z[which(names(z) %in% set)])
  } )
})

res <- do.call(cbind,res)
dim(res)

## [1] 1692   74

res2 <- apply(res,2,as.numeric)
rownames(res2) <- rownames(res)
res <- res2
res_spear <- cor(res,method="spearman")

heatmap.2( res_spear, scale="none", ColSideColors=colcols ,
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Spearman - pathways")
mtext("Blue=normal,Red=tumour")

resn <- res[,grep("N",colnames(res))]
resnspear <- cor(resn,method="spearman")
summary(as.vector(resnspear))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9255  0.9584  0.9706  0.9683  0.9787  1.0000

rest <- res[,grep("T",colnames(x))]
restspear <- cor(rest,method="spearman")
summary(as.vector(restspear))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6643  0.8576  0.9068  0.8931  0.9352  1.0000

xl <- list("gene N"=as.vector(nspear), "gene T"=as.vector(tspear),
  "set N"=as.vector(resnspear),"set T"=as.vector(restspear))

vioplot(xl,main="Within group sample correlation",
  col=c("red","red","blue","blue"))
mtext("Spearman rho")

abline(h=median(as.vector(nspear)),lty=2,col="red")
abline(h=median(as.vector(tspear)),lty=2,col="red")
abline(h=median(as.vector(resnspear)),lty=2,col="blue")
abline(h=median(as.vector(restspear)),lty=2,col="blue")

par(mfrow=c(2,2))

gmds <- cmdscale(dist(t(xr)),k=10)/nrow(xr)
barplot(colSums(abs(gmds)),names.arg=1:10,ylab="Gene PCA magnitude")
SUM=round(sum(colSums(abs(gmds))),digits=1)
mtext(paste("Sum =",SUM))

plot(gmds,col=colcols,xlab="MDS1",ylab="MDS2",main="Gene rank MDS")
mtext("Blue=Normal,Red=Tumour")

gsmds <- cmdscale(dist(t(res)),k=10)/nrow(res)
barplot(colSums(abs(gsmds)),names.arg=1:10,ylab="Gene set PCA magnitude")
SUM=round(sum(colSums(abs(gsmds))),digits=1)
mtext(paste("Sum =",SUM))

plot(gsmds,col=colcols,xlab="MDS1",ylab="MDS2",main="Gene set rank MDS")
mtext("Blue=Normal,Red=Tumour")

par(mfrow=c(2,2))
gmdsn <- cmdscale(dist(t(xn)),k=10)/nrow(xn)
barplot(colSums(abs(gmdsn)),names.arg=1:10,main="Gene PCA magnitude - Normal")
SUM=round(sum(colSums(abs(gmdsn))),digits=1)
mtext(paste("Sum =",SUM))

gmdst <- cmdscale(dist(t(xt)),k=10)/nrow(xt)
barplot(colSums(abs(gmdst)),names.arg=1:10,main="Gene PCA magnitude - Tumor")
SUM=round(sum(colSums(abs(gmdst))),digits=1)
mtext(paste("Sum =",SUM))

gsmdsn <- cmdscale(dist(t(resn)),k=10)/nrow(resn)
barplot(colSums(abs(gsmdsn)),names.arg=1:10,main="Gene set PCA magnitude - Normal")
SUM=round(sum(colSums(abs(gsmdsn))),digits=1)
mtext(paste("Sum =",SUM))

gsmdst <- cmdscale(dist(t(rest)),k=10)/nrow(rest)
barplot(colSums(abs(gsmdst)),names.arg=1:10,main="Gene set PCA magnitude - Tumour")
SUM=round(sum(colSums(abs(gsmdst))),digits=1)
mtext(paste("Sum =",SUM))

par(mfrow=c(1,1))

Differential expression

Now do DESeq2

RhpcBLASctl::blas_set_num_threads(4) #set automatic cores to 1

dds <- DESeqDataSetFromMatrix(countData = x , colData=ss, design = ~ patient + tumor)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge_up <- rownames(subset(dge,log2FoldChange>0 & padj<0.05))
dge_dn <- rownames(subset(dge,log2FoldChange<0 & padj<0.05))
genes <- c(paste(dge_up,"up"),paste(dge_dn,"dn"))

sig <- subset(dge,padj<0.05)
sig1 <- subset(dge,padj<0.01)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste("mRNA", SIG , "DEGs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3,
  xlab="log2 base mean",
  ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 ,xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- rep(c(0,1),ncol(x)/2)
colCols <- gsub("0","gray",colCols)
colCols <- gsub("1","black",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

FGSEA

dge$stat[which(is.na(dge$stat))] <- 1

stat <- dge$stat
names(stat) <- rownames(dge)

fres <- fgsea(pathways=gs,stats=stat,minSize=5, nproc=8)

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.57% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |=======                                                               |  11%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |==============                                                        |  21%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |=====================                                                 |  31%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  40%  |                                                                              |============================                                          |  41%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |==============================                                        |  44%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  56%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |====================================================================  |  98%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================| 100%

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-50. You can
## set the `eps` argument to zero for better estimation.

fsig <- subset(fres,padj<0.05)
nrow(fsig)

## [1] 451

fres <- fres[order(-abs(fres$ES)),]

head(subset(fres,padj<0.05 & ES>0),20) %>%
  kbl(caption="mRNA upregulated pathways") %>%
  kable_styling("hover",full_width=FALSE)

mRNA upregulated pathways
pathway	pval	padj	log2err	ES	NES	size	leadingEdge
PHOSPHORYLATION OF EMI1	0.0000000	0.0000006	0.7195128	0.9745672	2.108535	6	CDC20, C….
TFAP2A ACTS AS A TRANSCRIPTIONAL REPRESSOR DURING RETINOIC ACID INDUCED CELL DIFFERENTIATION	0.0001119	0.0008191	0.5384341	0.9292476	1.877955	5	MYBL2, T….
G2 M DNA REPLICATION CHECKPOINT	0.0006800	0.0039083	0.4772708	0.8971791	1.813146	5	CCNB2, C….
UNWINDING OF DNA	0.0000000	0.0000001	0.7477397	0.8956446	2.424575	12	CDC45, G….
CONDENSATION OF PROMETAPHASE CHROMOSOMES	0.0000012	0.0000125	0.6435518	0.8637411	2.287884	11	NCAPH, C….
P75NTR NEGATIVELY REGULATES CELL CYCLE VIA SC1	0.0003487	0.0021588	0.4984931	0.8621706	1.865358	6	HDAC2, N….
TFAP2 AP 2 FAMILY REGULATES TRANSCRIPTION OF CELL CYCLE FACTORS	0.0022692	0.0114412	0.4317077	0.8599702	1.737949	5	TFAP2A, ….
VASOPRESSIN LIKE RECEPTORS	0.0013252	0.0069738	0.4550599	0.8321301	1.800364	6	OXT, AVP….
REGULATION OF GENE EXPRESSION IN ENDOCRINE COMMITTED NEUROG3 PROGENITOR CELLS	0.0060696	0.0260715	0.4070179	0.8211669	1.659530	5	NKX2-2, ….
POLO LIKE KINASE MEDIATED EVENTS	0.0000003	0.0000039	0.6749629	0.8057940	2.373274	16	MYBL2, C….
DNA REPLICATION INITIATION	0.0008595	0.0047588	0.4772708	0.8028437	1.909841	8	POLE2, P….
PROTON COUPLED MONOCARBOXYLATE TRANSPORT	0.0034679	0.0164042	0.4317077	0.8003236	1.731548	6	SLC16A1,….
E2F ENABLED INHIBITION OF PRE REPLICATION COMPLEX FORMATION	0.0004181	0.0025325	0.4984931	0.7923224	1.939597	9	ORC6, CC….
REPLACEMENT OF PROTAMINES BY NUCLEOSOMES IN THE MALE PRONUCLEUS	0.0000000	0.0000000	1.0276699	0.7849609	2.894875	42	H2BC17, ….
CDC6 ASSOCIATION WITH THE ORC ORIGIN COMPLEX	0.0021224	0.0107977	0.4317077	0.7759047	1.845757	8	CDC6, OR….
DNA STRAND ELONGATION	0.0000000	0.0000000	0.8513391	0.7716566	2.710256	32	CDC45, G….
ASSEMBLY OF THE ORC COMPLEX AT THE ORIGIN OF REPLICATION	0.0000000	0.0000000	1.2378967	0.7630369	3.127401	68	H3C13, O….
DEPOSITION OF NEW CENPA CONTAINING NUCLEOSOMES AT THE CENTROMERE	0.0000000	0.0000000	1.2295041	0.7612403	3.120038	68	HJURP, H….
ACTIVATION OF ATR IN RESPONSE TO REPLICATION STRESS	0.0000000	0.0000000	0.8870750	0.7591156	2.744684	37	CDC6, CD….
DNA METHYLATION	0.0000000	0.0000000	1.1690700	0.7587791	3.040084	64	H3C13, H….

head(subset(fres,padj<0.05 & ES<0),20) %>%
  kbl(caption="mRNA downregulated pathways") %>%
  kable_styling("hover",full_width=FALSE)

mRNA downregulated pathways
pathway	pval	padj	log2err	ES	NES	size	leadingEdge
DISEASES ASSOCIATED WITH SURFACTANT METABOLISM	0.0000007	0.0000077	0.6594444	-0.8906262	-2.404983	10	SFTPC, S….
DEFECTIVE CSF2RB CAUSES SMDP5	0.0000181	0.0001555	0.5756103	-0.8905215	-2.226438	8	SFTPC, S….
NOSTRIN MEDIATED ENOS TRAFFICKING	0.0006862	0.0039172	0.4772708	-0.8868488	-1.914604	5	CAV1, NO….
ERYTHROCYTES TAKE UP OXYGEN AND RELEASE CARBON DIOXIDE	0.0000754	0.0005743	0.5384341	-0.8262034	-2.178705	9	CA4, HBA….
SYNTHESIS OF LIPOXINS LX	0.0016936	0.0087757	0.4550599	-0.8189498	-1.871383	6	ALOX5AP,….
BIOSYNTHESIS OF EPA DERIVED SPMS	0.0048752	0.0216620	0.4070179	-0.7827040	-1.788557	6	LTA4H, A….
RSK ACTIVATION	0.0059801	0.0257556	0.4070179	-0.7739407	-1.845961	7	RPS6KA1,….
MUCOPOLYSACCHARIDOSES	0.0002282	0.0015496	0.5188481	-0.7728430	-2.144261	11	HYAL1, I….
PD 1 SIGNALING	0.0000001	0.0000007	0.7195128	-0.7712443	-2.552518	21	HLA-DRB5….
ERYTHROCYTES TAKE UP CARBON DIOXIDE AND RELEASE OXYGEN	0.0000719	0.0005507	0.5384341	-0.7568012	-2.216958	13	CA4, HBA….
ACYL CHAIN REMODELING OF DAG AND TAG	0.0108588	0.0417492	0.3807304	-0.7478565	-1.783746	7	PNPLA2, MGLL
LOSS OF FUNCTION OF SMAD2 3 IN CANCER	0.0115219	0.0439974	0.3807304	-0.7430239	-1.772220	7	TGFBR2, ….
NEGATIVE FEEDBACK REGULATION OF MAPK PATHWAY	0.0125982	0.0473556	0.3807304	-0.7391251	-1.688975	6	MAPK3, B….
SIGNALING BY TGF BETA RECEPTOR COMPLEX IN CANCER	0.0091783	0.0368005	0.3807304	-0.7295896	-1.824084	8	TGFBR2, ….
REGULATION OF CYTOSKELETAL REMODELING AND CELL SPREADING BY IPP COMPLEX COMPONENTS	0.0095151	0.0376197	0.3807304	-0.7277154	-1.819398	8	ARHGEF6,….
MAPK1 ERK2 ACTIVATION	0.0042291	0.0193528	0.4070179	-0.7165852	-1.889641	9	IL6R, IL….
NR1H2 NR1H3 REGULATE GENE EXPRESSION TO CONTROL BILE ACID HOMEOSTASIS	0.0042291	0.0193528	0.4070179	-0.7164348	-1.889244	9	NR1H2, N….
REGULATION OF FOXO TRANSCRIPTIONAL ACTIVITY BY ACETYLATION	0.0068155	0.0285505	0.4070179	-0.6975844	-1.883707	10	TXNIP, F….
AKT PHOSPHORYLATES TARGETS IN THE NUCLEUS	0.0086650	0.0349926	0.3807304	-0.6898235	-1.819070	9	FOXO4, N….
FCGR ACTIVATION	0.0028031	0.0136429	0.4317077	-0.6818157	-1.955432	12	FGR, FCG….

fsets <- paste(fsig$pathway,as.character(fsig$ES > 0))
str(fsets)

##  chr [1:451] "ABC TRANSPORTER DISORDERS TRUE" ...

fsig1<- subset(fres,padj<0.01)
fsets1 <- paste(fsig1$pathway,as.character(fsig1$ES > 0))

NRES=nrow(fres)
NSIG=nrow(fsig)
NUP=nrow(subset(fsig,ES>0))
NDN=nrow(subset(fsig,ES<0))
HEADER=paste(NRES,"pathways,",NSIG,"with FDR<0.05,",NUP,"up and",NDN,"down")
plot(fres$ES,-log10(fres$pval),pch=19,cex=0.6,col="gray",
  xlab="ES",ylab="-log10 p-value",main="FGSEA")
points(fsig$ES,-log10(fsig$pval),cex=0.6,col="red",pch=19)
mtext(HEADER)

FORA

sigup <- rownames(subset(dge,padj<0.05 & log2FoldChange>0))
sigdn <- rownames(subset(dge,padj<0.05 & log2FoldChange<0))
bg <- rownames(dge)

oresup <- as.data.frame(fora(pathways=gs,genes=sigup,universe=bg,minSize=5))
oresup$es <- (oresup$overlap / length(sigup) ) / (oresup$size / length(bg))
head(oresup,10)

##                           pathway         pval         padj overlap size
## 1               METABOLISM OF RNA 5.768129e-54 9.713530e-51     481  709
## 2                      CELL CYCLE 1.556582e-40 1.310642e-37     436  675
## 3              CELL CYCLE MITOTIC 3.816447e-35 2.142299e-32     358  547
## 4          CELL CYCLE CHECKPOINTS 1.188430e-31 5.003290e-29     208  282
## 5                 DNA REPLICATION 5.502834e-27 1.850513e-24     146  187
## 6                     TRANSLATION 6.593276e-27 1.850513e-24     203  287
## 7                         M PHASE 8.492242e-26 2.042991e-23     263  403
## 8                G2 M CHECKPOINTS 1.294813e-25 2.725582e-23     131  165
## 9  DNA REPLICATION PRE INITIATION 1.503811e-24 2.813797e-22     126  159
## 10                RRNA PROCESSING 1.113509e-21 1.875150e-19     144  197
##    overlapGenes       es
## 1  A1CF, AD.... 1.711284
## 2  AHCTF1, .... 1.629319
## 3  AHCTF1, .... 1.650893
## 4  AHCTF1, .... 1.860534
## 5  ANAPC1, .... 1.969403
## 6  AARS1, A.... 1.784175
## 7  AHCTF1, .... 1.646168
## 8  ATR, ATR.... 2.002676
## 9  ANAPC1, .... 1.998926
## 10 BMS1, BO.... 1.843825

oresdn <- as.data.frame(fora(pathways=gs,genes=sigdn,universe=bg,minSize=5))
oresdn$es <- (oresdn$overlap / length(sigdn) ) / (oresdn$size / length(bg))
head(oresup,10)

##                           pathway         pval         padj overlap size
## 1               METABOLISM OF RNA 5.768129e-54 9.713530e-51     481  709
## 2                      CELL CYCLE 1.556582e-40 1.310642e-37     436  675
## 3              CELL CYCLE MITOTIC 3.816447e-35 2.142299e-32     358  547
## 4          CELL CYCLE CHECKPOINTS 1.188430e-31 5.003290e-29     208  282
## 5                 DNA REPLICATION 5.502834e-27 1.850513e-24     146  187
## 6                     TRANSLATION 6.593276e-27 1.850513e-24     203  287
## 7                         M PHASE 8.492242e-26 2.042991e-23     263  403
## 8                G2 M CHECKPOINTS 1.294813e-25 2.725582e-23     131  165
## 9  DNA REPLICATION PRE INITIATION 1.503811e-24 2.813797e-22     126  159
## 10                RRNA PROCESSING 1.113509e-21 1.875150e-19     144  197
##    overlapGenes       es
## 1  A1CF, AD.... 1.711284
## 2  AHCTF1, .... 1.629319
## 3  AHCTF1, .... 1.650893
## 4  AHCTF1, .... 1.860534
## 5  ANAPC1, .... 1.969403
## 6  AARS1, A.... 1.784175
## 7  AHCTF1, .... 1.646168
## 8  ATR, ATR.... 2.002676
## 9  ANAPC1, .... 1.998926
## 10 BMS1, BO.... 1.843825

oresup <- subset(oresup,padj<0.05)
oresup <- paste(oresup$pathway,"TRUE")

oresdn <- subset(oresdn,padj<0.05)
oresdn <- paste(oresdn$pathway,"FALSE")

osets <- c(oresup,oresdn)
str(osets)

##  chr [1:375] "METABOLISM OF RNA TRUE" "CELL CYCLE TRUE" ...

oresup <- as.data.frame(fora(pathways=gs,genes=sigup,universe=bg,minSize=5))
oresup$es <- (oresup$overlap / length(sigup) ) / (oresup$size / length(bg))

oresdn <- as.data.frame(fora(pathways=gs,genes=sigdn,universe=bg,minSize=5))
oresdn$es <- (oresdn$overlap / length(sigdn) ) / (oresdn$size / length(bg))

oresup <- subset(oresup,padj<0.01)
oresup <- paste(oresup$pathway,"TRUE")

oresdn <- subset(oresdn,padj<0.01)
oresdn <- paste(oresdn$pathway,"FALSE")

osets1 <- c(oresup,oresdn)
str(osets1)

##  chr [1:287] "METABOLISM OF RNA TRUE" "CELL CYCLE TRUE" ...

Assess the effect of gene number on pathway results.

sizes <- c(100,200,500,1000,2000,3000,4000,5000,6000,7000,8000)

ores <- lapply(sizes, function(size) {
  sigup <- head(rownames(subset(dge,padj<0.05 & log2FoldChange>0)),size)
  sigdn <- head(rownames(subset(dge,padj<0.05 & log2FoldChange<0)),size)
  bg <- rownames(dge)

  oresup <- as.data.frame(fora(pathways=gs,genes=sigup,universe=bg,minSize=5))
  oresup$es <- (oresup$overlap / length(sigup) ) / (oresup$size / length(bg))
  nup <- nrow(subset(oresup,padj<0.05))

  oresdn <- as.data.frame(fora(pathways=gs,genes=sigdn,universe=bg,minSize=5))
  oresdn$es <- (oresdn$overlap / length(sigdn) ) / (oresdn$size / length(bg))
  ndn <- nrow(subset(oresdn,padj<0.05))

  return(c(nup,ndn))
})

ores <- do.call(rbind,ores)
rownames(ores) <- sizes
colnames(ores) <- c("up","dn")
ores <- as.data.frame(ores)
ores$tot <- ores$up + ores$dn
tot <- ores$tot
names(tot) <- sizes
par(mar=c(mar = c(5.1, 5.1, 4.1, 2.1)))
barplot(tot,horiz=TRUE,las=1,xlab="no. pathways FDR<0.05",ylab="no. genes selected")

Full counts

Downsample DESeq2

FDR 5%

SAMPLESIZES=c(2,3,5,10,15,20,25,30)
#SAMPLESIZES=c(3,6,10,22)
dres <- lapply(SAMPLESIZES,function(n) {
  SEEDS <- seq(100,5000,100) # 50 repetitions
  dsres <- mclapply(SEEDS, function(SEED) {
    set.seed(SEED)
    mysample <- sample(1:37,n,replace=FALSE)*2
    mysample <- c(mysample,(mysample-1))
    mysample <- mysample[order(mysample)]
    ss2 <- ss[mysample,]
    x2 <- x[,mysample]
    x2 <- x2[rowMeans(x2)>=10,]

    dds <- DESeqDataSetFromMatrix(countData = x2 , colData=ss2, design = ~ patient + tumor)
    res <- DESeq(dds)
    z <- results(res)
    vsd <- vst(dds, blind=FALSE)
    zz <-cbind(as.data.frame(z),assay(vsd))
    dge <- as.data.frame(zz[order(zz$pvalue),])

    dge_up <- rownames(subset(dge,log2FoldChange>0 & padj<0.05))
    dge_dn <- rownames(subset(dge,log2FoldChange<0 & padj<0.05))
    genes2 <- c(paste(dge_up,"up"),paste(dge_dn,"dn"))

    TOT=length(genes2)
    TP=length(which(genes2 %in% genes))
    FP=length(which(!genes2 %in% genes))
    FN=length(which(!genes %in% genes2))
    RES=c("TOT"=TOT,"TP"=TP,"FP"=FP,"FN"=FN)
    RES
  },mc.cores=16)
  do.call(rbind,dsres)
})

ssres <- dres
par(mfrow=c(3,1))

dots <- lapply(ssres,function(x) { x[,"TP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    87.0   537.5  1312.0  1387.3  1895.0  3622.0 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      36    1198    1831    2071    2884    4540 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     622    2064    3620    3455    4678    6113 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4087    5680    6583    6439    7250    8084 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6774    7564    8078    8027    8437    9254 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7854    8880    9178    9156    9449    9987 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9396    9725    9888    9904   10047   10548 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10086   10344   10524   10493   10628   10898

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. consistent",xlab="sample size",cex=0,main="DESeq2 5% FDR",ylim=c(0,nrow(sig))) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)
abline(h=nrow(sig))

dots <- lapply(ssres,function(x) { x[,"FP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00   22.25   64.50   72.32  108.00  271.00 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   39.00   53.00   87.52  125.50  357.00 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     9.0    49.0    96.0   116.3   147.5   500.0 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    59.0   142.5   194.5   201.9   256.2   471.0 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    90.0   159.2   218.5   231.2   305.8   502.0 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   105.0   201.2   231.5   257.6   311.5   675.0 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   124.0   196.2   244.5   261.8   329.2   460.0 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   118.0   222.0   245.0   270.5   299.2   609.0

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. inconsistent",xlab="sample size",cex=0) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)

dots <- lapply(ssres,function(x) { x[,"TP"] })
TPdots <- do.call(rbind,dots)
dots <- lapply(ssres,function(x) { x[,"FP"] })
FPdots <- do.call(rbind,dots)
ratio <- t(FPdots / (TPdots + FPdots))
ratio <- lapply(1:ncol(ratio),function(i) {ratio[,i]})
lapply(ratio,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01250 0.03029 0.04288 0.05598 0.06400 0.20000 
## 
## [[2]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.009335 0.023119 0.035135 0.043507 0.052306 0.199739 
## 
## [[3]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.007779 0.019386 0.028803 0.031149 0.039594 0.076748 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01190 0.02288 0.02780 0.02964 0.03493 0.06199 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01184 0.02024 0.02613 0.02763 0.03479 0.05731 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01099 0.02097 0.02473 0.02719 0.03251 0.07138 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01266 0.01952 0.02410 0.02563 0.03185 0.04561 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01127 0.02041 0.02302 0.02507 0.02730 0.05605

boxplot(ratio,names=SAMPLESIZES,col="white",ylab="proportion inconsistent",xlab="sample size",cex=0,ylim=c(0,0.2)) ; grid()
beeswarm(ratio,add=TRUE,pch=1,cex=0.6)

FDR 1%

SAMPLESIZES=c(2,3,5,10,15,20,25,30)
#SAMPLESIZES=c(3,6,10,22)
dres1 <- lapply(SAMPLESIZES,function(n) {
  SEEDS <- seq(100,5000,100) # 50 repetitions
  dsres <- mclapply(SEEDS, function(SEED) {
    set.seed(SEED)
    mysample <- sample(1:37,n,replace=FALSE)*2
    mysample <- c(mysample,(mysample-1))
    mysample <- mysample[order(mysample)]
    ss2 <- ss[mysample,]
    x2 <- x[,mysample]
    x2 <- x2[rowMeans(x2)>=10,]

    dds <- DESeqDataSetFromMatrix(countData = x2 , colData=ss2, design = ~ patient + tumor)
    res <- DESeq(dds)
    z <- results(res)
    vsd <- vst(dds, blind=FALSE)
    zz <-cbind(as.data.frame(z),assay(vsd))
    dge <- as.data.frame(zz[order(zz$pvalue),])

    dge_up <- rownames(subset(dge,log2FoldChange>0 & padj<0.01))
    dge_dn <- rownames(subset(dge,log2FoldChange<0 & padj<0.01))
    genes2 <- c(paste(dge_up,"up"),paste(dge_dn,"dn"))

    TOT=length(genes2)
    TP=length(which(genes2 %in% genes))
    FP=length(which(!genes2 %in% genes))
    FN=length(which(!genes %in% genes2))
    RES=c("TOT"=TOT,"TP"=TP,"FP"=FP,"FN"=FN)
    RES
  },mc.cores=16)
  do.call(rbind,dsres)
})

ssres <- dres1
par(mfrow=c(3,1))

dots <- lapply(ssres,function(x) { x[,"TP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    45.0   289.8   697.5   868.4  1237.0  2512.0 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    21.0   601.8  1082.5  1274.2  1841.2  3193.0 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     248    1097    2292    2207    3095    4669 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2315    3836    4754    4636    5548    6439 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4680    5690    6262    6219    6717    7821 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5822    7268    7576    7554    7998    8683 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7911    8195    8448    8452    8615    9369 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    8668    8986    9168    9171    9339    9742

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. consistent",xlab="sample size",cex=0,main="DESeq2 1% FDR",ylim=c(0,nrow(sig))) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)
abline(h=nrow(sig))

dots <- lapply(ssres,function(x) { x[,"FP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    7.25   27.00   32.74   48.00  146.00 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   13.00   18.50   32.68   47.75  171.00 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   10.50   30.00   38.44   47.00  227.00 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.00   35.00   45.50   52.38   62.75  185.00 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.00   29.25   43.00   51.78   69.75  149.00 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.00   31.25   45.50   52.88   67.25  246.00 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   25.00   33.00   42.50   58.75  144.00 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   14.00   22.00   29.50   34.75  146.00

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. inconsistent",xlab="sample size",cex=0) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)

dots <- lapply(ssres,function(x) { x[,"TP"] })
TPdots <- do.call(rbind,dots)
dots <- lapply(ssres,function(x) { x[,"FP"] })
FPdots <- do.call(rbind,dots)
ratio <- t(FPdots / (TPdots + FPdots))
ratio <- lapply(1:ncol(ratio),function(i) {ratio[,i]})
lapply(ratio,summary)

## [[1]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.004082 0.018791 0.032998 0.041797 0.049795 0.190332 
## 
## [[2]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.004069 0.012121 0.018211 0.030602 0.035111 0.169065 
## 
## [[3]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001520 0.008255 0.013881 0.017660 0.022391 0.104444 
## 
## [[4]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.003044 0.006853 0.009970 0.010671 0.013320 0.031946 
## 
## [[5]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001353 0.004804 0.007270 0.008039 0.010079 0.021607 
## 
## [[6]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001683 0.004294 0.005588 0.006864 0.008114 0.031648 
## 
## [[7]]
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0003791 0.0031027 0.0038630 0.0049556 0.0067558 0.0174567 
## 
## [[8]]
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0002256 0.0015784 0.0023921 0.0031829 0.0037272 0.0156568

boxplot(ratio,names=SAMPLESIZES,col="white",ylab="proportion inconsistent",xlab="sample size",cex=0,ylim=c(0,0.2)) ; grid()
beeswarm(ratio,add=TRUE,pch=1,cex=0.6)

Downsample FGSEA

This will run the FGSEA analysis at group sizes of 2, 3, 6, 10, 16, 22 and 30, 50 times each, with pseudorandom seeds from 100 to 5000 jumping up by 100. FDR cutoff of 0.05.

RhpcBLASctl::blas_set_num_threads(1) #set automatic cores to 1

SAMPLESIZES=c(2,3,5,10,15,20,25,30)
#SAMPLESIZES=c(3,6,10,22)
fgres <- lapply(SAMPLESIZES,function(n) {
  SEEDS <- seq(100,5000,100) # 50 repetitions
  dsres <- mclapply(SEEDS, function(SEED) {
    set.seed(SEED)
    mysample <- sample(1:37,n,replace=FALSE)*2
    mysample <- c(mysample,(mysample-1))
    mysample <- mysample[order(mysample)]
    ss2 <- ss[mysample,]
    x2 <- x[,mysample]
    x2 <- x2[rowMeans(x2)>=10,]

    dds <- DESeqDataSetFromMatrix(countData = x2 , colData=ss2, design = ~ patient + tumor)
    res <- DESeq(dds)
    z <- results(res)
    vsd <- vst(dds, blind=FALSE)
    zz <-cbind(as.data.frame(z),assay(vsd))
    dge <- as.data.frame(zz[order(zz$pvalue),])

    dge$stat[which(is.na(dge$stat))] <- 1
    stat <- dge$stat
    names(stat) <- rownames(dge)
    fres <- fgsea(pathways=gs,stats=stat,minSize=5,nproc=3)
    fres <- fres[order(-abs(fres$ES)),]
    fsig <- subset(fres,padj<0.05)
    fsets2 <- paste(fsig$pathway,as.character(fsig$ES > 0))

    TOT=length(fsets2)
    TP=length(which(fsets2 %in% fsets))
    FP=length(which(!fsets2 %in% fsets))
    FN=length(which(!fsets %in% fsets2))
    RES=c("TOT"=TOT,"TP"=TP,"FP"=FP,"FN"=FN)
    RES
  },mc.cores=16)
  do.call(rbind,dsres)
})

ssres <- fgres
par(mfrow=c(3,1))

dots <- lapply(ssres,function(x) { x[,"TP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    31.0   214.0   278.0   258.3   306.8   358.0 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   129.0   255.8   306.0   288.0   332.5   360.0 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   206.0   307.2   343.5   325.4   353.8   386.0 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   248.0   334.2   361.5   354.8   378.5   399.0 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   271.0   357.2   375.0   370.2   387.0   421.0 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   329.0   372.2   387.5   383.3   400.5   417.0 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   352.0   384.2   393.0   393.6   406.0   419.0 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   373.0   397.5   406.0   404.5   411.5   427.0

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. consistent",xlab="sample size",cex=0,main="fgsea 5% FDR",ylim=c(0,nrow(fsig))) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)
abline(h=nrow(fsig))

dots <- lapply(ssres,function(x) { x[,"FP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   24.00   46.25   68.50   74.68   90.50  247.00 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   24.00   53.25   74.50   77.54   94.75  198.00 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   27.00   51.25   70.00   77.64   95.00  197.00 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   40.00   59.50   64.58   87.75  130.00 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   36.25   49.00   52.72   64.75  114.00 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.00   29.00   41.00   42.88   52.50  106.00 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   12.00   27.25   35.00   36.36   43.00   73.00 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   12.00   23.25   30.00   31.58   38.75   60.00

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. inconsistent",xlab="sample size",cex=0) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)

dots <- lapply(ssres,function(x) { x[,"TP"] })
TPdots <- do.call(rbind,dots)
dots <- lapply(ssres,function(x) { x[,"FP"] })
FPdots <- do.call(rbind,dots)
ratio <- t(FPdots / (TPdots + FPdots))
ratio <- lapply(1:ncol(ratio),function(i) {ratio[,i]})
lapply(ratio,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1049  0.1493  0.2149  0.2214  0.2620  0.5358 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.08224 0.15126 0.18958 0.20542 0.23371 0.45278 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.07874 0.14346 0.17052 0.18808 0.21641 0.46244 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0609  0.1009  0.1413  0.1493  0.1950  0.2562 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.05723 0.09460 0.11561 0.12171 0.14759 0.22800 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04638 0.06938 0.09486 0.09803 0.11370 0.20543 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.03297 0.06284 0.08117 0.08308 0.09924 0.15974 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.03085 0.05471 0.06785 0.07162 0.08562 0.12876

boxplot(ratio,names=SAMPLESIZES,col="white",ylab="proportion inconsistent",xlab="sample size",cex=0,ylim=c(0,0.5)) ; grid()
beeswarm(ratio,add=TRUE,pch=1,cex=0.6)

FDR threshold of 0.01

RhpcBLASctl::blas_set_num_threads(1) #set automatic cores to 1

SAMPLESIZES=c(2,3,5,10,15,20,25,30)
#SAMPLESIZES=c(3,6,10,22)
fgres1 <- lapply(SAMPLESIZES,function(n) {
  SEEDS <- seq(100,5000,100) # 50 repetitions
  dsres <- mclapply(SEEDS, function(SEED) {
    set.seed(SEED)
    mysample <- sample(1:37,n,replace=FALSE)*2
    mysample <- c(mysample,(mysample-1))
    mysample <- mysample[order(mysample)]
    ss2 <- ss[mysample,]
    x2 <- x[,mysample]
    x2 <- x2[rowMeans(x2)>=10,]

    dds <- DESeqDataSetFromMatrix(countData = x2 , colData=ss2, design = ~ patient + tumor)
    res <- DESeq(dds)
    z <- results(res)
    vsd <- vst(dds, blind=FALSE)
    zz <-cbind(as.data.frame(z),assay(vsd))
    dge <- as.data.frame(zz[order(zz$pvalue),])

    dge$stat[which(is.na(dge$stat))] <- 1
    stat <- dge$stat
    names(stat) <- rownames(dge)
    fres <- fgsea(pathways=gs,stats=stat,minSize=5,nproc=3)
    fres <- fres[order(-abs(fres$ES)),]
    fsig <- subset(fres,padj<0.01)
    fsets2 <- paste(fsig$pathway,as.character(fsig$ES > 0))

    TOT=length(fsets2)
    TP=length(which(fsets2 %in% fsets))
    FP=length(which(!fsets2 %in% fsets))
    FN=length(which(!fsets %in% fsets2))
    RES=c("TOT"=TOT,"TP"=TP,"FP"=FP,"FN"=FN)
    RES
  },mc.cores=16)
  do.call(rbind,dsres)
})

ssres <- fgres1
par(mfrow=c(3,1))

dots <- lapply(ssres,function(x) { x[,"TP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    16.0   168.2   211.0   205.5   251.5   304.0 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    88.0   199.0   242.0   234.5   283.2   324.0 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   148.0   241.8   273.5   266.5   300.2   347.0 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   178.0   275.5   306.0   295.7   328.8   353.0 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   205.0   284.0   304.0   300.5   324.8   356.0 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   232.0   299.0   313.0   311.0   334.5   361.0 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   263.0   304.2   321.5   316.8   336.5   354.0 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   293.0   313.8   327.0   324.9   337.0   352.0

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. consistent",xlab="sample size",cex=0,main="fgsea 1% FDR",ylim=c(0,nrow(fsig))) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)
abline(h=nrow(fsig))

dots <- lapply(ssres,function(x) { x[,"FP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     7.0    14.0    26.5    29.8    35.5   127.0 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   18.00   26.50   30.64   38.75  109.00 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   13.00   19.50   27.40   37.75   98.00 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00    7.25   17.00   18.98   29.00   50.00 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    6.00   11.00   11.96   16.50   44.00 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.25    5.00    8.08    9.75   39.00 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.00    3.00    5.04    6.75   24.00 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    1.00    2.00    3.62    5.00   23.00

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. inconsistent",xlab="sample size",cex=0) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)

dots <- lapply(ssres,function(x) { x[,"TP"] })
TPdots <- do.call(rbind,dots)
dots <- lapply(ssres,function(x) { x[,"FP"] })
FPdots <- do.call(rbind,dots)
ratio <- t(FPdots / (TPdots + FPdots))
ratio <- lapply(1:ncol(ratio),function(i) {ratio[,i]})
lapply(ratio,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04147 0.07619 0.10987 0.12486 0.15178 0.39446 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01274 0.07442 0.09690 0.11015 0.13586 0.29707 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.02643 0.04497 0.07151 0.08953 0.10706 0.28295 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01271 0.02680 0.05236 0.05716 0.08557 0.13123 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.02012 0.03533 0.03681 0.04843 0.11429 
## 
## [[6]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000000 0.008483 0.016649 0.023638 0.029932 0.102632 
## 
## [[7]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000000 0.005903 0.009646 0.014835 0.018944 0.066482 
## 
## [[8]]
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000000 0.003015 0.006341 0.010434 0.014632 0.061828

boxplot(ratio,names=SAMPLESIZES,col="white",ylab="proportion inconsistent",xlab="sample size",cex=0,ylim=c(0,0.5)) ; grid()
beeswarm(ratio,add=TRUE,pch=1,cex=0.6)

FORA Downsample

FDR threshold of 0.05.

SAMPLESIZES=c(2,3,5,10,15,20,25,30)
fores <- lapply(SAMPLESIZES,function(n) {
  SEEDS <- seq(100,5000,100) # 50 repetitions
  dsres <- mclapply(SEEDS, function(SEED) {
    set.seed(SEED)
    mysample <- sample(1:37,n,replace=FALSE)*2
    mysample <- c(mysample,(mysample-1))
    mysample <- mysample[order(mysample)]
    ss2 <- ss[mysample,]
    x2 <- x[,mysample]
    x2 <- x2[rowMeans(x2)>=10,]

    dds <- DESeqDataSetFromMatrix(countData = x2 , colData=ss2, design = ~ patient + tumor)
    res <- DESeq(dds)
    z <- results(res)
    vsd <- vst(dds, blind=FALSE)
    zz <-cbind(as.data.frame(z),assay(vsd))
    dge <- as.data.frame(zz[order(zz$pvalue),])

    sigup <- rownames(subset(dge,padj<0.05 & log2FoldChange>0))
    if (length(sigup) < 250) { sigup <- head(rownames(subset(dge, log2FoldChange>0)),250) }
    sigdn <- rownames(subset(dge,padj<0.05 & log2FoldChange<0))
    if (length(sigdn) < 250) { sigdn <- head(rownames(subset(dge, log2FoldChange<0)),250) }
    bg <- rownames(dge)

    oresup <- as.data.frame(fora(pathways=gs,genes=sigup,universe=bg,minSize=5))
    oresup <- subset(oresup,padj<0.05)
    oresup <- paste(oresup$pathway,"TRUE")

    oresdn <- as.data.frame(fora(pathways=gs,genes=sigdn,universe=bg,minSize=5))
    oresdn <- subset(oresdn,padj<0.05)
    oresdn <- paste(oresdn$pathway,"FALSE")

    osets2 <- c(oresup,oresdn)

    TOT=length(osets2)
    TP=length(which(osets2 %in% osets))
    FP=length(which(!osets2 %in% osets))
    FN=length(which(!osets %in% osets2))
    RES=c("TOT"=TOT,"TP"=TP,"FP"=FP,"FN"=FN)
    RES
  },mc.cores=16)
  do.call(rbind,dsres)
})

ssres <- fores

par(mfrow=c(3,1))

dots <- lapply(ssres,function(x) { x[,"TP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   80.75  122.00  105.56  142.25  242.00 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     3.0   102.8   139.5   125.2   155.8   270.0 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     3.0   129.5   155.5   156.8   188.5   274.0 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   147.0   211.0   264.0   251.7   291.0   314.0 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   162.0   249.8   287.0   274.2   304.8   336.0 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   236.0   285.0   300.5   297.4   314.8   330.0 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   275.0   299.0   304.5   308.7   322.2   342.0 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   300.0   311.0   317.0   317.7   323.0   344.0

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. consistent",xlab="sample size",cex=0,main="fora 5% FDR",ylim=c(0,length(osets))) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)
abline(h=length(osets))

dots <- lapply(ssres,function(x) { x[,"FP"] })
lapply(dots,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       5      28      45      41      53      78 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.00   30.25   44.50   45.12   58.75  115.00 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   31.00   40.50   46.20   57.75  128.00 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.00   38.25   50.00   51.50   66.75   97.00 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.00   29.00   40.50   43.34   54.00   84.00 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.00   27.25   38.00   38.92   47.00   79.00 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   23.50   29.50   32.96   37.75   75.00 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.00   22.00   26.00   29.32   36.00   71.00

boxplot(dots,names=SAMPLESIZES,col="white",ylab="no. inconsistent",xlab="sample size",cex=0) ; grid()
beeswarm(dots,add=TRUE,pch=1,cex=0.6)

dots <- lapply(ssres,function(x) { x[,"TP"] })
TPdots <- do.call(rbind,dots)
dots <- lapply(ssres,function(x) { x[,"FP"] })
FPdots <- do.call(rbind,dots)
ratio <- t(FPdots / (TPdots + FPdots))
ratio <- lapply(1:ncol(ratio),function(i) {ratio[,i]})
lapply(ratio,summary)

## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1278  0.2446  0.2790  0.3328  0.3230  0.8667 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1528  0.2143  0.2556  0.2971  0.3158  0.8000 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1200  0.1893  0.2110  0.2331  0.2418  0.5000 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.09053 0.14317 0.16345 0.16776 0.19836 0.28340 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.06742 0.10865 0.13464 0.13281 0.15445 0.22105 
## 
## [[6]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04281 0.08769 0.10953 0.11309 0.13847 0.19603 
## 
## [[7]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.05678 0.07221 0.08749 0.09505 0.11061 0.18519 
## 
## [[8]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04678 0.06498 0.07625 0.08360 0.10175 0.17574

boxplot(ratio,names=SAMPLESIZES,col="white",ylab="proportion inconsistent",xlab="sample size",cex=0,ylim=c(0,0.5)) ; grid()
beeswarm(ratio,add=TRUE,pch=1,cex=0.6)