Source: https://github.com/markziemann/background

Introduction

This guide is a Rmarkdown script that conducts differential expression and enrichment analysis, which are very popular workflows for transcriptome data.

The goal of this work is to understand the behaviour of ClusterProfiler’s p-value adjustment method as we have observed some weird behaviour. In particular we see that clusterprofiler’s criteria for inclusion/exclusion of gene sets is irregular. For example clusterprofiler seems to include gene sets as detected if there is just 1 member detected in the foreground list. If a gene set has 10 in the background and 0 in the foreground it looks like it gets excluded. This is a bit strange and contrary to best practice, where the detection threshold should be set by the overlap size in the background list, rather than the foreground list.

Theoretically, this behaviour could have a negative impact on performance, as FDR is applied to the wrong pathways.

suppressPackageStartupMessages({
  library("getDEE2")
  library("DESeq2")
  library("kableExtra")
  library("clusterProfiler")
  library("fgsea")
  library("eulerr")
  library("gplots")
  library("parallel")
})

For this guide I will be using bulk RNA-seq data from several previous studies.

Load data

Gene sets were obtained from MSigDB.

myfiles <- list.files("../dataprep",pattern="*Rds",full.names=TRUE)
d <- lapply(myfiles,readRDS)
names(d) <- basename(myfiles)

mygmts <- list.files("../gmt",pattern="*gmt",full.names=TRUE)
gs <- lapply(mygmts,gmtPathways)
names(gs) <- basename(mygmts)

names(gs)
## [1] "c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt"
## [2] "c2.cp.reactome.v2023.2.Hs.symbols.gmt"    
## [3] "c2.cp.wikipathways.v2023.2.Hs.symbols.gmt"
## [4] "c3.mir.mirdb.v2023.2.Hs.symbols.gmt"      
## [5] "c3.tft.gtrd.v2023.2.Hs.symbols.gmt"       
## [6] "c5.go.v2023.2.Hs.symbols.gmt"             
## [7] "c5.hpo.v2023.2.Hs.symbols.gmt"            
## [8] "c8.all.v2023.2.Hs.symbols.gmt"            
## [9] "h.all.v2023.2.Hs.symbols.gmt"
names(gs) <- c("KEGG", "Reactome", "Wikipathways", "miR targets", "TFT GTRD",
  "GO", "HPO", "Cellmarkers", "Hallmark" )

Basic geneset analysis

# number of sets
lapply(gs,length)
## $KEGG
## [1] 619
## 
## $Reactome
## [1] 1692
## 
## $Wikipathways
## [1] 791
## 
## $`miR targets`
## [1] 2377
## 
## $`TFT GTRD`
## [1] 505
## 
## $GO
## [1] 10461
## 
## $HPO
## [1] 5547
## 
## $Cellmarkers
## [1] 830
## 
## $Hallmark
## [1] 50
# set size summary
lapply(gs,function(x) {
  y <- unlist(lapply(x, length ))
  summary(y)
} )
## $KEGG
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00    8.00   13.00   15.59   19.00   92.00 
## 
## $Reactome
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   11.00   23.00   56.28   57.00 1463.00 
## 
## $Wikipathways
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   14.00   28.00   42.92   53.00  438.00 
## 
## $`miR targets`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     5.0    59.0   103.0   156.5   186.0  1071.0 
## 
## $`TFT GTRD`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     5.0    55.0   287.0   509.3   880.0  1999.0 
## 
## $GO
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00    9.00   18.00   79.83   55.00 1994.00 
## 
## $HPO
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   10.00   23.00   91.32   74.00 1735.00 
## 
## $Cellmarkers
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     5.0    52.0   115.0   180.5   223.2  1770.0 
## 
## $Hallmark
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    32.0    98.0   180.0   146.4   200.0   200.0
# gene coverage
lapply(gs,function(x) {
  y <- unique(unlist(x))
  length(y)
} )
## $KEGG
## [1] 2727
## 
## $Reactome
## [1] 11155
## 
## $Wikipathways
## [1] 8313
## 
## $`miR targets`
## [1] 16655
## 
## $`TFT GTRD`
## [1] 26807
## 
## $GO
## [1] 19428
## 
## $HPO
## [1] 4997
## 
## $Cellmarkers
## [1] 20418
## 
## $Hallmark
## [1] 4384
# total number of annotations
lapply(gs, function(x) {
  sum(unlist(lapply(x,length)))
} )
## $KEGG
## [1] 9651
## 
## $Reactome
## [1] 95226
## 
## $Wikipathways
## [1] 33947
## 
## $`miR targets`
## [1] 372061
## 
## $`TFT GTRD`
## [1] 257181
## 
## $GO
## [1] 835146
## 
## $HPO
## [1] 506552
## 
## $Cellmarkers
## [1] 149827
## 
## $Hallmark
## [1] 7322

Enrichment with Clusterprofiler

Here is a function that conducts over-representation analysis with ClusterProfiler. Genes that are not annotated as having any gene set category are discarded.

# df = deseq output
# df must have "ensID geneSymbol" as rownames

# gs = gene sets
# gs format is list of symbols
orabad <- function( df, gs, ngenes=2000 ) {

  def <- df

  gs1 <- stack(gs)
  colnames(gs1) <- c("gene","term")
  gs1 <- gs1[,c(2,1)]

  defup <- head(rownames(subset(def,log2FoldChange>0)),ngenes)
  defup <- defup[lapply(strsplit(defup," "),length)==2]
  defup <- unique(sapply(strsplit(defup," "),"[[",2))

  defdn <- head(rownames(subset(def,log2FoldChange<0)),ngenes)
  defdn <- defdn[lapply(strsplit(defdn," "),length)==2]
  defdn <- unique(sapply(strsplit(defdn," "),"[[",2))

  bg <- rownames(def)
  bg <- bg[lapply(strsplit(bg," "),length)==2]
  bg <- unique(sapply(strsplit(bg," "),"[[",2))

  ora_up <- as.data.frame(enricher(gene = defup ,
    universe = bg, minGSSize = 5, maxGSSize = 500000, TERM2GENE = gs1,
    pAdjustMethod="fdr",  pvalueCutoff = 1, qvalueCutoff = 1  ))

  ora_up$geneID <- NULL
  ora_ups <- rownames(ora_up)

  gr <- as.numeric(sapply(strsplit(ora_up$GeneRatio,"/"),"[[",1)) /
    as.numeric(sapply(strsplit(ora_up$GeneRatio,"/"),"[[",2))

  br <- as.numeric(sapply(strsplit(ora_up$BgRatio,"/"),"[[",1)) /
    as.numeric(sapply(strsplit(ora_up$BgRatio,"/"),"[[",2))

  ora_up$es <- gr/br
  ora_up <- ora_up[order(-ora_up$es),]
  ora_up$Description=NULL

  ora_dn <- as.data.frame(enricher(gene = defdn ,
    universe = bg, minGSSize = 5, maxGSSize = 500000, TERM2GENE = gs1,
    pAdjustMethod="fdr",  pvalueCutoff = 1, qvalueCutoff = 1  ))

  ora_dn$geneID <- NULL
  ora_dns <- rownames(ora_dn)

  gr <- as.numeric(sapply(strsplit(ora_dn$GeneRatio,"/"),"[[",1)) /
    as.numeric(sapply(strsplit(ora_dn$GeneRatio,"/"),"[[",2))

  br <- as.numeric(sapply(strsplit(ora_dn$BgRatio,"/"),"[[",1)) /
    as.numeric(sapply(strsplit(ora_dn$BgRatio,"/"),"[[",2))

  ora_dn$es <- gr/br
  ora_dn <- ora_dn[order(-ora_dn$es),]
  ora_dn$Description=NULL

  result <- list("orabad_up"=ora_up,"orabad_dn"=ora_dn)
  return(result)
}

This function fixes the problem with not inlcuding sets for FDR correction.

oragood <- function( df, gs, ngenes=2000 ) {

  def <- df

  gs1 <- stack(gs)
  colnames(gs1) <- c("gene","term")
  gs1 <- gs1[,c(2,1)]

  defup <- head(rownames(subset(def,log2FoldChange>0)),ngenes)
  defup <- defup[lapply(strsplit(defup," "),length)==2]
  defup <- unique(sapply(strsplit(defup," "),"[[",2))

  defdn <- head(rownames(subset(def,log2FoldChange<0)),ngenes)
  defdn <- defdn[lapply(strsplit(defdn," "),length)==2]
  defdn <- unique(sapply(strsplit(defdn," "),"[[",2))

  bg <- rownames(def)
  bg <- bg[lapply(strsplit(bg," "),length)==2]
  bg <- unique(sapply(strsplit(bg," "),"[[",2))

  gs1 <- gs1[which(gs1$gene %in% bg),]
  terms <- names(which(table(gs1$term)>5))
  gs1 <- gs1[gs1$term %in% terms,]

  ora_up <- as.data.frame(enricher(gene = defup ,
    universe = bg, minGSSize = 5, maxGSSize = 500000, TERM2GENE = gs1,
    pAdjustMethod="fdr",  pvalueCutoff = 1, qvalueCutoff = 1  ))

  ora_up$geneID <- NULL
  ora_ups <- rownames(ora_up)

  gr <- as.numeric(sapply(strsplit(ora_up$GeneRatio,"/"),"[[",1)) /
    as.numeric(sapply(strsplit(ora_up$GeneRatio,"/"),"[[",2))

  br <- as.numeric(sapply(strsplit(ora_up$BgRatio,"/"),"[[",1)) /
    as.numeric(sapply(strsplit(ora_up$BgRatio,"/"),"[[",2))

  ora_up$es <- gr/br
  ora_up <- ora_up[order(-ora_up$es),]
  ora_up$Description=NULL

  nsets <- length(which(table(gs1$term)>5))
  nres <- nrow(ora_up)
  diff <- nsets - nres
  pvals <- c(ora_up$pvalue,rep(1,diff))
  ora_up$p.adjust <- p.adjust(pvals,method="fdr")[1:nrow(ora_up)]

  ora_dn <- as.data.frame(enricher(gene = defdn ,
    universe = bg, minGSSize = 5, maxGSSize = 500000, TERM2GENE = gs1,
    pAdjustMethod="fdr",  pvalueCutoff = 1, qvalueCutoff = 1  ))

  ora_dn$geneID <- NULL
  ora_dns <- rownames(ora_dn)

  gr <- as.numeric(sapply(strsplit(ora_dn$GeneRatio,"/"),"[[",1)) /
    as.numeric(sapply(strsplit(ora_dn$GeneRatio,"/"),"[[",2))

  br <- as.numeric(sapply(strsplit(ora_dn$BgRatio,"/"),"[[",1)) /
    as.numeric(sapply(strsplit(ora_dn$BgRatio,"/"),"[[",2))

  ora_dn$es <- gr/br
  ora_dn <- ora_dn[order(-ora_dn$es),]
  ora_dn$Description=NULL

  nsets <- length(which(table(gs1$term)>5))
  nres <- nrow(ora_dn)
  diff <- nsets - nres
  pvals <- c(ora_dn$pvalue,rep(1,diff))
  ora_dn$p.adjust <- p.adjust(pvals,method="fdr")[1:nrow(ora_dn)]

  result <- list("oragood_up"=ora_up,"oragood_dn"=ora_dn)
  return(result)
}

Compare the results of bad and good analysis

# bad=ORA analysis, a list with results of up and down regulated pathways
# good=same as bad, except fixed the background problem
compare_ora_plots <- function(bad,good,libname=NULL,setsize=NULL){
  badup <- bad$orabad_up
  baddn <- bad$orabad_dn
  goodup <- good$oragood_up
  gooddn <- good$oragood_dn
  mup <- merge(badup,goodup,by=0)

  MAX=max(-log10(c(mup$p.adjust.x,mup$p.adjust.y)))
  UPHEADER=paste(libname,"up",setsize,"genes")
  DNHEADER=paste(libname,"dn",setsize,"genes")

  par(mfrow=c(1,2))

  plot(-log10(mup$p.adjust.x), -log10(mup$p.adjust.y),
    xlim=c(0,MAX), ylim=c(0,MAX),
    xlab="original -log10 FDR", ylab="corrected -log10 FDR", main=UPHEADER)
  grid()
  abline(a = 0, b = 1,lwd=2,lty=2)

  mdn <- merge(baddn,gooddn,by=0)

  MAX=max(-log10(c(mdn$p.adjust.x,mdn$p.adjust.y)))

  plot(-log10(mdn$p.adjust.x), -log10(mdn$p.adjust.y),
    xlim=c(0,MAX), ylim=c(0,MAX),
    xlab="original -log10 FDR", ylab="corrected -log10 FDR", main=DNHEADER)
  grid()
  abline(a = 0, b = 1,lwd=2,lty=2)
}

compare_ora <- function(bad,good){
  badup <- bad$orabad_up
  baddn <- bad$orabad_dn
  goodup <- good$oragood_up
  gooddn <- good$oragood_dn
  bup <- rownames(subset(badup,p.adjust<0.05))
  gup <- rownames(subset(goodup,p.adjust<0.05))
  bdn <- rownames(subset(baddn,p.adjust<0.05))
  gdn <- rownames(subset(gooddn,p.adjust<0.05))
  sig_bad <- unique(c(bup,bdn))
  sig_good <- unique(c(gup,gdn))
  nsig_bad <- length(sig_bad)
  nsig_good <- length(sig_good)
  jac <- length(intersect(sig_bad,sig_good)) / length(union(sig_bad,sig_good))
  result=c("nsig_bad"=nsig_bad,"nsig_good"=nsig_good,"Jaccard"=jac)
  return(result)
}

Run some analyses

params <- expand.grid(1:length(d),1:length(gs))
params2 <- expand.grid(names(d),names(gs))

colnames(params) <- c("d","gs")

ora_dat <- mclapply(1:nrow(params), function(i) {
  j=params[i,1]
  k=params[i,2]
  bad <- orabad(d[[j]],gs[[k]])
  good <- oragood(d[[j]],gs[[k]])
  res <- compare_ora(bad,good)
  return(res)
} , mc.cores=8)

ora_dat <- do.call(rbind,ora_dat)

res1 <- cbind(params2,ora_dat)

res1 %>%
  kbl(row.names = TRUE, caption="all results") %>%
  kable_paper("hover", full_width = F)
all results
Var1 Var2 nsig_bad nsig_good Jaccard
1 bulkrna1.Rds KEGG 3 3 1.0000000
2 bulkrna2.Rds KEGG 1 0 0.0000000
3 bulkrna3.Rds KEGG 1 0 0.0000000
4 bulkrna4.Rds KEGG 8 8 1.0000000
5 bulkrna5.Rds KEGG 0 0 NaN
6 bulkrna6.Rds KEGG 1 1 1.0000000
7 bulkrna7.Rds KEGG 0 0 NaN
8 bulkrna1.Rds Reactome 122 121 0.9918033
9 bulkrna2.Rds Reactome 96 96 1.0000000
10 bulkrna3.Rds Reactome 98 97 0.9897959
11 bulkrna4.Rds Reactome 108 105 0.9722222
12 bulkrna5.Rds Reactome 39 36 0.9230769
13 bulkrna6.Rds Reactome 74 72 0.9729730
14 bulkrna7.Rds Reactome 24 17 0.7083333
15 bulkrna1.Rds Wikipathways 18 18 1.0000000
16 bulkrna2.Rds Wikipathways 18 18 1.0000000
17 bulkrna3.Rds Wikipathways 3 3 1.0000000
18 bulkrna4.Rds Wikipathways 0 0 NaN
19 bulkrna5.Rds Wikipathways 0 0 NaN
20 bulkrna6.Rds Wikipathways 18 18 1.0000000
21 bulkrna7.Rds Wikipathways 6 6 1.0000000
22 bulkrna1.Rds miR targets 188 184 0.9787234
23 bulkrna2.Rds miR targets 37 37 1.0000000
24 bulkrna3.Rds miR targets 3 3 1.0000000
25 bulkrna4.Rds miR targets 2 2 1.0000000
26 bulkrna5.Rds miR targets 270 269 0.9962963
27 bulkrna6.Rds miR targets 690 688 0.9971014
28 bulkrna7.Rds miR targets 36 36 1.0000000
29 bulkrna1.Rds TFT GTRD 88 88 1.0000000
30 bulkrna2.Rds TFT GTRD 88 87 0.9886364
31 bulkrna3.Rds TFT GTRD 100 96 0.9600000
32 bulkrna4.Rds TFT GTRD 66 66 1.0000000
33 bulkrna5.Rds TFT GTRD 113 111 0.9823009
34 bulkrna6.Rds TFT GTRD 89 87 0.9775281
35 bulkrna7.Rds TFT GTRD 64 62 0.9687500
36 bulkrna1.Rds GO 289 289 1.0000000
37 bulkrna2.Rds GO 621 600 0.9661836
38 bulkrna3.Rds GO 269 262 0.9739777
39 bulkrna4.Rds GO 88 82 0.9318182
40 bulkrna5.Rds GO 214 205 0.9579439
41 bulkrna6.Rds GO 188 177 0.9414894
42 bulkrna7.Rds GO 323 313 0.9690402
43 bulkrna1.Rds HPO 9 9 1.0000000
44 bulkrna2.Rds HPO 2 2 1.0000000
45 bulkrna3.Rds HPO 1 1 1.0000000
46 bulkrna4.Rds HPO 0 0 NaN
47 bulkrna5.Rds HPO 0 0 NaN
48 bulkrna6.Rds HPO 0 0 NaN
49 bulkrna7.Rds HPO 0 0 NaN
50 bulkrna1.Rds Cellmarkers 82 81 0.9878049
51 bulkrna2.Rds Cellmarkers 255 255 1.0000000
52 bulkrna3.Rds Cellmarkers 91 91 1.0000000
53 bulkrna4.Rds Cellmarkers 31 31 1.0000000
54 bulkrna5.Rds Cellmarkers 43 43 1.0000000
55 bulkrna6.Rds Cellmarkers 104 104 1.0000000
56 bulkrna7.Rds Cellmarkers 240 242 0.9917355
57 bulkrna1.Rds Hallmark 3 3 1.0000000
58 bulkrna2.Rds Hallmark 13 13 1.0000000
59 bulkrna3.Rds Hallmark 3 3 1.0000000
60 bulkrna4.Rds Hallmark 2 2 1.0000000
61 bulkrna5.Rds Hallmark 0 0 NaN
62 bulkrna6.Rds Hallmark 12 12 1.0000000
63 bulkrna7.Rds Hallmark 3 3 1.0000000
summary(res1$nsig_bad)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.50   31.00   85.05   99.00  690.00
summary(res1$nsig_good)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.50   31.00   83.46   96.50  688.00
summary(res1$Jaccard)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.9749  1.0000  0.9468  1.0000  1.0000       9
# NUMBER OF SIGNIFICANT SETS
ns1 <- matrix( res1$nsig_bad , ncol=9 )
rownames(ns1) <- names(d)
colnames(ns1) <- names(gs)

ns1 %>%
  kbl(row.names = TRUE, caption="significant genesets in original analysis") %>%
  kable_paper("hover", full_width = F)
significant genesets in original analysis
KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers Hallmark
bulkrna1.Rds 3 122 18 188 88 289 9 82 3
bulkrna2.Rds 1 96 18 37 88 621 2 255 13
bulkrna3.Rds 1 98 3 3 100 269 1 91 3
bulkrna4.Rds 8 108 0 2 66 88 0 31 2
bulkrna5.Rds 0 39 0 270 113 214 0 43 0
bulkrna6.Rds 1 74 18 690 89 188 0 104 12
bulkrna7.Rds 0 24 6 36 64 323 0 240 3
ns1 <- apply(ns1,2,function(x) { x <- x[!is.na(x)] ; x <- x[is.finite(x)] ;  mean(x) } )

ns2 <- matrix( res1$nsig_good , ncol=9 )
rownames(ns2) <- names(d)
colnames(ns2) <- names(gs)

ns2 %>%
  kbl(row.names = TRUE, caption="significant genesets in corrected analysis") %>%
  kable_paper("hover", full_width = F)
significant genesets in corrected analysis
KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers Hallmark
bulkrna1.Rds 3 121 18 184 88 289 9 81 3
bulkrna2.Rds 0 96 18 37 87 600 2 255 13
bulkrna3.Rds 0 97 3 3 96 262 1 91 3
bulkrna4.Rds 8 105 0 2 66 82 0 31 2
bulkrna5.Rds 0 36 0 269 111 205 0 43 0
bulkrna6.Rds 1 72 18 688 87 177 0 104 12
bulkrna7.Rds 0 17 6 36 62 313 0 242 3
ns2 <- apply(ns2,2,function(x) { x <- x[!is.na(x)] ; x <- x[is.finite(x)] ;  mean(x) } )
ns2
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##     1.714286    77.714286     9.000000   174.142857    85.285714   275.428571 
##          HPO  Cellmarkers     Hallmark 
##     1.714286   121.000000     5.142857
df <- data.frame(ns1,ns2)
df <- df[order(df$ns1),]

par(mar=c(c(5.1, 7.1, 4.1, 2.1) ))
barplot(t(df),beside=TRUE,horiz=TRUE,las=1,xlim=c(0,330),
  xlab="no. significant sets", main="Effect of correcting the FDR bug",
  col=c("gray30","gray80"))
text(df$ns1+20,((1:nrow(df))*3)-1.5,labels=signif(df[,1],3))
text(df$ns2+20,((1:nrow(df))*3)-0.5,labels=signif(df[,2],3))
legend("bottomright", inset=.02, legend=c("corrected","original"),
  fill=c("gray80","gray30"), horiz=FALSE, cex=1)

# RATIO
rat <- matrix( (res1$nsig_good+0) / (res1$nsig_bad+0) , ncol=9 )
rownames(rat) <- names(d)
colnames(rat) <- names(gs)

signif(rat,3) %>%
  kbl(row.names = TRUE, caption="Ratio of significant genesets Corrected:Original") %>%
  kable_paper("hover", full_width = F)
Ratio of significant genesets Corrected:Original
KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers Hallmark
bulkrna1.Rds 1 0.992 1 0.979 1.000 1.000 1 0.988 1
bulkrna2.Rds 0 1.000 1 1.000 0.989 0.966 1 1.000 1
bulkrna3.Rds 0 0.990 1 1.000 0.960 0.974 1 1.000 1
bulkrna4.Rds 1 0.972 NaN 1.000 1.000 0.932 NaN 1.000 1
bulkrna5.Rds NaN 0.923 NaN 0.996 0.982 0.958 NaN 1.000 NaN
bulkrna6.Rds 1 0.973 1 0.997 0.978 0.941 NaN 1.000 1
bulkrna7.Rds NaN 0.708 1 1.000 0.969 0.969 NaN 1.010 1
rat2 <- apply(rat,2,function(x) { x <- x[!is.na(x)] ; x <- x[is.finite(x)] ;  mean(x) } )
rat2
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.6000000    0.9368864    1.0000000    0.9960173    0.9824593    0.9629219 
##          HPO  Cellmarkers     Hallmark 
##    1.0000000    0.9994483    1.0000000
# JACCARD
jac <- matrix( res1$Jaccard , ncol=9 )
rownames(jac) <- names(d)
colnames(jac) <- names(gs)

signif(jac,3) %>%
  kbl(row.names = TRUE, caption="Jaccard similarity Corrected:Original") %>%
  kable_paper("hover", full_width = F)
Jaccard similarity Corrected:Original
KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers Hallmark
bulkrna1.Rds 1 0.992 1 0.979 1.000 1.000 1 0.988 1
bulkrna2.Rds 0 1.000 1 1.000 0.989 0.966 1 1.000 1
bulkrna3.Rds 0 0.990 1 1.000 0.960 0.974 1 1.000 1
bulkrna4.Rds 1 0.972 NaN 1.000 1.000 0.932 NaN 1.000 1
bulkrna5.Rds NaN 0.923 NaN 0.996 0.982 0.958 NaN 1.000 NaN
bulkrna6.Rds 1 0.973 1 0.997 0.978 0.941 NaN 1.000 1
bulkrna7.Rds NaN 0.708 1 1.000 0.969 0.969 NaN 0.992 1
jac2 <- apply(jac,2,function(x) { x <- x[!is.na(x)] ; x <- x[is.finite(x)] ;  mean(x) } )
jac2
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.6000000    0.9368864    1.0000000    0.9960173    0.9824593    0.9629219 
##          HPO  Cellmarkers     Hallmark 
##    1.0000000    0.9970772    1.0000000
barplot(sort(jac2),horiz=TRUE,las=1,xlim=c(0,1),
  main="Jaccard similarity Corrected:Original",xlab="Jaccard index")

# overall
oa <- matrix(paste(res1$nsig_bad,res1$nsig_good,signif(res1$Jaccard,2)),ncol=9)
rownames(oa) <- names(d)
colnames(oa) <- names(gs)

oa %>%
  kbl(row.names = TRUE, caption="Original:Corrected:Jaccard") %>%
  kable_paper("hover", full_width = F)
Original:Corrected:Jaccard
KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers Hallmark
bulkrna1.Rds 3 3 1 122 121 0.99 18 18 1 188 184 0.98 88 88 1 289 289 1 9 9 1 82 81 0.99 3 3 1
bulkrna2.Rds 1 0 0 96 96 1 18 18 1 37 37 1 88 87 0.99 621 600 0.97 2 2 1 255 255 1 13 13 1
bulkrna3.Rds 1 0 0 98 97 0.99 3 3 1 3 3 1 100 96 0.96 269 262 0.97 1 1 1 91 91 1 3 3 1
bulkrna4.Rds 8 8 1 108 105 0.97 0 0 NaN 2 2 1 66 66 1 88 82 0.93 0 0 NaN 31 31 1 2 2 1
bulkrna5.Rds 0 0 NaN 39 36 0.92 0 0 NaN 270 269 1 113 111 0.98 214 205 0.96 0 0 NaN 43 43 1 0 0 NaN
bulkrna6.Rds 1 1 1 74 72 0.97 18 18 1 690 688 1 89 87 0.98 188 177 0.94 0 0 NaN 104 104 1 12 12 1
bulkrna7.Rds 0 0 NaN 24 17 0.71 6 6 1 36 36 1 64 62 0.97 323 313 0.97 0 0 NaN 240 242 0.99 3 3 1

Now run an exhaustive analysis, changing the gene list length from 125 to 2000.

ngenes <- c(125,250,500,1000,2000)
params <- expand.grid(1:length(d),1:length(gs),ngenes)
params2 <- expand.grid(names(d),names(gs),ngenes)

colnames(params) <- c("d","gs","ngenes")

ora_dat <- mclapply(1:nrow(params), function(i) {
  j=params[i,1]
  k=params[i,2]
  l=params[i,3]
  bad <- orabad(d[[j]],gs[[k]],l)
  good <- oragood(d[[j]],gs[[k]],l)
  res <- compare_ora(bad,good)
  return(res)
} , mc.cores=8)

ora_dat <- do.call(rbind,ora_dat)

res1 <- cbind(params2,ora_dat)

res1 %>%
  kbl(row.names = TRUE, caption="all results") %>%
  kable_paper("hover", full_width = F)
all results
Var1 Var2 Var3 nsig_bad nsig_good Jaccard
1 bulkrna1.Rds KEGG 125 0 0 NaN
2 bulkrna2.Rds KEGG 125 0 0 NaN
3 bulkrna3.Rds KEGG 125 0 0 NaN
4 bulkrna4.Rds KEGG 125 0 0 NaN
5 bulkrna5.Rds KEGG 125 0 0 NaN
6 bulkrna6.Rds KEGG 125 0 0 NaN
7 bulkrna7.Rds KEGG 125 0 0 NaN
8 bulkrna1.Rds Reactome 125 0 0 NaN
9 bulkrna2.Rds Reactome 125 0 0 NaN
10 bulkrna3.Rds Reactome 125 0 0 NaN
11 bulkrna4.Rds Reactome 125 0 0 NaN
12 bulkrna5.Rds Reactome 125 0 0 NaN
13 bulkrna6.Rds Reactome 125 0 0 NaN
14 bulkrna7.Rds Reactome 125 0 0 NaN
15 bulkrna1.Rds Wikipathways 125 0 0 NaN
16 bulkrna2.Rds Wikipathways 125 1 0 0.0000000
17 bulkrna3.Rds Wikipathways 125 0 0 NaN
18 bulkrna4.Rds Wikipathways 125 0 0 NaN
19 bulkrna5.Rds Wikipathways 125 0 0 NaN
20 bulkrna6.Rds Wikipathways 125 0 0 NaN
21 bulkrna7.Rds Wikipathways 125 0 0 NaN
22 bulkrna1.Rds miR targets 125 0 0 NaN
23 bulkrna2.Rds miR targets 125 0 0 NaN
24 bulkrna3.Rds miR targets 125 0 0 NaN
25 bulkrna4.Rds miR targets 125 0 0 NaN
26 bulkrna5.Rds miR targets 125 0 0 NaN
27 bulkrna6.Rds miR targets 125 0 0 NaN
28 bulkrna7.Rds miR targets 125 0 0 NaN
29 bulkrna1.Rds TFT GTRD 125 0 0 NaN
30 bulkrna2.Rds TFT GTRD 125 0 0 NaN
31 bulkrna3.Rds TFT GTRD 125 0 0 NaN
32 bulkrna4.Rds TFT GTRD 125 0 0 NaN
33 bulkrna5.Rds TFT GTRD 125 0 0 NaN
34 bulkrna6.Rds TFT GTRD 125 0 0 NaN
35 bulkrna7.Rds TFT GTRD 125 0 0 NaN
36 bulkrna1.Rds GO 125 9 6 0.6666667
37 bulkrna2.Rds GO 125 0 0 NaN
38 bulkrna3.Rds GO 125 0 0 NaN
39 bulkrna4.Rds GO 125 0 0 NaN
40 bulkrna5.Rds GO 125 13 0 0.0000000
41 bulkrna6.Rds GO 125 0 0 NaN
42 bulkrna7.Rds GO 125 1 0 0.0000000
43 bulkrna1.Rds HPO 125 2 0 0.0000000
44 bulkrna2.Rds HPO 125 0 0 NaN
45 bulkrna3.Rds HPO 125 0 0 NaN
46 bulkrna4.Rds HPO 125 1 0 0.0000000
47 bulkrna5.Rds HPO 125 0 0 NaN
48 bulkrna6.Rds HPO 125 0 0 NaN
49 bulkrna7.Rds HPO 125 1 1 1.0000000
50 bulkrna1.Rds Cellmarkers 125 0 0 NaN
51 bulkrna2.Rds Cellmarkers 125 2 0 0.0000000
52 bulkrna3.Rds Cellmarkers 125 0 0 NaN
53 bulkrna4.Rds Cellmarkers 125 2 0 0.0000000
54 bulkrna5.Rds Cellmarkers 125 0 0 NaN
55 bulkrna6.Rds Cellmarkers 125 0 0 NaN
56 bulkrna7.Rds Cellmarkers 125 0 0 NaN
57 bulkrna1.Rds Hallmark 125 0 0 NaN
58 bulkrna2.Rds Hallmark 125 0 0 NaN
59 bulkrna3.Rds Hallmark 125 0 0 NaN
60 bulkrna4.Rds Hallmark 125 0 0 NaN
61 bulkrna5.Rds Hallmark 125 1 0 0.0000000
62 bulkrna6.Rds Hallmark 125 0 0 NaN
63 bulkrna7.Rds Hallmark 125 0 0 NaN
64 bulkrna1.Rds KEGG 250 0 0 NaN
65 bulkrna2.Rds KEGG 250 2 1 0.5000000
66 bulkrna3.Rds KEGG 250 0 0 NaN
67 bulkrna4.Rds KEGG 250 0 0 NaN
68 bulkrna5.Rds KEGG 250 0 0 NaN
69 bulkrna6.Rds KEGG 250 0 0 NaN
70 bulkrna7.Rds KEGG 250 0 0 NaN
71 bulkrna1.Rds Reactome 250 0 0 NaN
72 bulkrna2.Rds Reactome 250 3 3 1.0000000
73 bulkrna3.Rds Reactome 250 0 0 NaN
74 bulkrna4.Rds Reactome 250 0 0 NaN
75 bulkrna5.Rds Reactome 250 0 0 NaN
76 bulkrna6.Rds Reactome 250 0 0 NaN
77 bulkrna7.Rds Reactome 250 2 0 0.0000000
78 bulkrna1.Rds Wikipathways 250 0 0 NaN
79 bulkrna2.Rds Wikipathways 250 0 0 NaN
80 bulkrna3.Rds Wikipathways 250 0 0 NaN
81 bulkrna4.Rds Wikipathways 250 0 0 NaN
82 bulkrna5.Rds Wikipathways 250 1 0 0.0000000
83 bulkrna6.Rds Wikipathways 250 0 0 NaN
84 bulkrna7.Rds Wikipathways 250 1 1 1.0000000
85 bulkrna1.Rds miR targets 250 0 0 NaN
86 bulkrna2.Rds miR targets 250 0 0 NaN
87 bulkrna3.Rds miR targets 250 0 0 NaN
88 bulkrna4.Rds miR targets 250 0 0 NaN
89 bulkrna5.Rds miR targets 250 1 1 1.0000000
90 bulkrna6.Rds miR targets 250 1 1 1.0000000
91 bulkrna7.Rds miR targets 250 0 0 NaN
92 bulkrna1.Rds TFT GTRD 250 0 0 NaN
93 bulkrna2.Rds TFT GTRD 250 7 5 0.7142857
94 bulkrna3.Rds TFT GTRD 250 1 1 1.0000000
95 bulkrna4.Rds TFT GTRD 250 1 1 1.0000000
96 bulkrna5.Rds TFT GTRD 250 1 1 1.0000000
97 bulkrna6.Rds TFT GTRD 250 2 2 1.0000000
98 bulkrna7.Rds TFT GTRD 250 1 0 0.0000000
99 bulkrna1.Rds GO 250 1 0 0.0000000
100 bulkrna2.Rds GO 250 16 12 0.7500000
101 bulkrna3.Rds GO 250 0 0 NaN
102 bulkrna4.Rds GO 250 0 0 NaN
103 bulkrna5.Rds GO 250 20 10 0.5000000
104 bulkrna6.Rds GO 250 0 0 NaN
105 bulkrna7.Rds GO 250 0 0 NaN
106 bulkrna1.Rds HPO 250 0 0 NaN
107 bulkrna2.Rds HPO 250 0 0 NaN
108 bulkrna3.Rds HPO 250 3 1 0.3333333
109 bulkrna4.Rds HPO 250 0 0 NaN
110 bulkrna5.Rds HPO 250 0 0 NaN
111 bulkrna6.Rds HPO 250 0 0 NaN
112 bulkrna7.Rds HPO 250 19 8 0.4210526
113 bulkrna1.Rds Cellmarkers 250 0 0 NaN
114 bulkrna2.Rds Cellmarkers 250 32 30 0.9375000
115 bulkrna3.Rds Cellmarkers 250 4 4 1.0000000
116 bulkrna4.Rds Cellmarkers 250 2 2 1.0000000
117 bulkrna5.Rds Cellmarkers 250 0 0 NaN
118 bulkrna6.Rds Cellmarkers 250 4 2 0.5000000
119 bulkrna7.Rds Cellmarkers 250 12 12 1.0000000
120 bulkrna1.Rds Hallmark 250 0 0 NaN
121 bulkrna2.Rds Hallmark 250 1 1 1.0000000
122 bulkrna3.Rds Hallmark 250 0 0 NaN
123 bulkrna4.Rds Hallmark 250 0 0 NaN
124 bulkrna5.Rds Hallmark 250 0 0 NaN
125 bulkrna6.Rds Hallmark 250 2 2 1.0000000
126 bulkrna7.Rds Hallmark 250 0 0 NaN
127 bulkrna1.Rds KEGG 500 0 0 NaN
128 bulkrna2.Rds KEGG 500 0 0 NaN
129 bulkrna3.Rds KEGG 500 0 0 NaN
130 bulkrna4.Rds KEGG 500 0 0 NaN
131 bulkrna5.Rds KEGG 500 0 0 NaN
132 bulkrna6.Rds KEGG 500 0 0 NaN
133 bulkrna7.Rds KEGG 500 0 0 NaN
134 bulkrna1.Rds Reactome 500 0 0 NaN
135 bulkrna2.Rds Reactome 500 3 2 0.6666667
136 bulkrna3.Rds Reactome 500 0 0 NaN
137 bulkrna4.Rds Reactome 500 0 0 NaN
138 bulkrna5.Rds Reactome 500 0 0 NaN
139 bulkrna6.Rds Reactome 500 0 0 NaN
140 bulkrna7.Rds Reactome 500 0 0 NaN
141 bulkrna1.Rds Wikipathways 500 0 0 NaN
142 bulkrna2.Rds Wikipathways 500 0 0 NaN
143 bulkrna3.Rds Wikipathways 500 0 0 NaN
144 bulkrna4.Rds Wikipathways 500 0 0 NaN
145 bulkrna5.Rds Wikipathways 500 0 0 NaN
146 bulkrna6.Rds Wikipathways 500 0 0 NaN
147 bulkrna7.Rds Wikipathways 500 0 0 NaN
148 bulkrna1.Rds miR targets 500 7 7 1.0000000
149 bulkrna2.Rds miR targets 500 0 0 NaN
150 bulkrna3.Rds miR targets 500 0 0 NaN
151 bulkrna4.Rds miR targets 500 0 0 NaN
152 bulkrna5.Rds miR targets 500 1 1 1.0000000
153 bulkrna6.Rds miR targets 500 115 108 0.9391304
154 bulkrna7.Rds miR targets 500 10 10 1.0000000
155 bulkrna1.Rds TFT GTRD 500 11 11 1.0000000
156 bulkrna2.Rds TFT GTRD 500 6 5 0.8333333
157 bulkrna3.Rds TFT GTRD 500 3 3 1.0000000
158 bulkrna4.Rds TFT GTRD 500 4 4 1.0000000
159 bulkrna5.Rds TFT GTRD 500 2 2 1.0000000
160 bulkrna6.Rds TFT GTRD 500 5 5 1.0000000
161 bulkrna7.Rds TFT GTRD 500 2 2 1.0000000
162 bulkrna1.Rds GO 500 19 9 0.4736842
163 bulkrna2.Rds GO 500 69 42 0.6086957
164 bulkrna3.Rds GO 500 6 0 0.0000000
165 bulkrna4.Rds GO 500 0 0 NaN
166 bulkrna5.Rds GO 500 39 26 0.6666667
167 bulkrna6.Rds GO 500 1 1 1.0000000
168 bulkrna7.Rds GO 500 1 1 1.0000000
169 bulkrna1.Rds HPO 500 0 0 NaN
170 bulkrna2.Rds HPO 500 0 0 NaN
171 bulkrna3.Rds HPO 500 0 0 NaN
172 bulkrna4.Rds HPO 500 0 0 NaN
173 bulkrna5.Rds HPO 500 0 0 NaN
174 bulkrna6.Rds HPO 500 0 0 NaN
175 bulkrna7.Rds HPO 500 0 0 NaN
176 bulkrna1.Rds Cellmarkers 500 2 2 1.0000000
177 bulkrna2.Rds Cellmarkers 500 45 45 1.0000000
178 bulkrna3.Rds Cellmarkers 500 6 6 1.0000000
179 bulkrna4.Rds Cellmarkers 500 1 1 1.0000000
180 bulkrna5.Rds Cellmarkers 500 0 0 NaN
181 bulkrna6.Rds Cellmarkers 500 4 3 0.7500000
182 bulkrna7.Rds Cellmarkers 500 32 31 0.9687500
183 bulkrna1.Rds Hallmark 500 0 0 NaN
184 bulkrna2.Rds Hallmark 500 0 0 NaN
185 bulkrna3.Rds Hallmark 500 0 0 NaN
186 bulkrna4.Rds Hallmark 500 0 0 NaN
187 bulkrna5.Rds Hallmark 500 0 0 NaN
188 bulkrna6.Rds Hallmark 500 0 0 NaN
189 bulkrna7.Rds Hallmark 500 0 0 NaN
190 bulkrna1.Rds KEGG 1000 0 0 NaN
191 bulkrna2.Rds KEGG 1000 0 0 NaN
192 bulkrna3.Rds KEGG 1000 0 0 NaN
193 bulkrna4.Rds KEGG 1000 0 0 NaN
194 bulkrna5.Rds KEGG 1000 0 0 NaN
195 bulkrna6.Rds KEGG 1000 0 0 NaN
196 bulkrna7.Rds KEGG 1000 0 0 NaN
197 bulkrna1.Rds Reactome 1000 49 36 0.7346939
198 bulkrna2.Rds Reactome 1000 65 56 0.8615385
199 bulkrna3.Rds Reactome 1000 103 91 0.8834951
200 bulkrna4.Rds Reactome 1000 0 0 NaN
201 bulkrna5.Rds Reactome 1000 0 0 NaN
202 bulkrna6.Rds Reactome 1000 81 77 0.9506173
203 bulkrna7.Rds Reactome 1000 34 26 0.7647059
204 bulkrna1.Rds Wikipathways 1000 1 1 1.0000000
205 bulkrna2.Rds Wikipathways 1000 2 2 1.0000000
206 bulkrna3.Rds Wikipathways 1000 2 2 1.0000000
207 bulkrna4.Rds Wikipathways 1000 0 0 NaN
208 bulkrna5.Rds Wikipathways 1000 1 1 1.0000000
209 bulkrna6.Rds Wikipathways 1000 2 2 1.0000000
210 bulkrna7.Rds Wikipathways 1000 1 1 1.0000000
211 bulkrna1.Rds miR targets 1000 12 12 1.0000000
212 bulkrna2.Rds miR targets 1000 3 3 1.0000000
213 bulkrna3.Rds miR targets 1000 3 3 1.0000000
214 bulkrna4.Rds miR targets 1000 0 0 NaN
215 bulkrna5.Rds miR targets 1000 87 86 0.9885057
216 bulkrna6.Rds miR targets 1000 307 305 0.9934853
217 bulkrna7.Rds miR targets 1000 2 2 1.0000000
218 bulkrna1.Rds TFT GTRD 1000 33 33 1.0000000
219 bulkrna2.Rds TFT GTRD 1000 40 40 1.0000000
220 bulkrna3.Rds TFT GTRD 1000 36 32 0.8888889
221 bulkrna4.Rds TFT GTRD 1000 19 19 1.0000000
222 bulkrna5.Rds TFT GTRD 1000 45 45 1.0000000
223 bulkrna6.Rds TFT GTRD 1000 28 27 0.9642857
224 bulkrna7.Rds TFT GTRD 1000 19 17 0.8947368
225 bulkrna1.Rds GO 1000 91 73 0.8021978
226 bulkrna2.Rds GO 1000 214 189 0.8831776
227 bulkrna3.Rds GO 1000 105 92 0.8761905
228 bulkrna4.Rds GO 1000 11 8 0.7272727
229 bulkrna5.Rds GO 1000 78 57 0.7307692
230 bulkrna6.Rds GO 1000 73 60 0.8219178
231 bulkrna7.Rds GO 1000 93 80 0.8602151
232 bulkrna1.Rds HPO 1000 3 3 1.0000000
233 bulkrna2.Rds HPO 1000 0 0 NaN
234 bulkrna3.Rds HPO 1000 1 1 1.0000000
235 bulkrna4.Rds HPO 1000 0 0 NaN
236 bulkrna5.Rds HPO 1000 0 0 NaN
237 bulkrna6.Rds HPO 1000 0 0 NaN
238 bulkrna7.Rds HPO 1000 3 3 1.0000000
239 bulkrna1.Rds Cellmarkers 1000 34 34 1.0000000
240 bulkrna2.Rds Cellmarkers 1000 117 112 0.9572650
241 bulkrna3.Rds Cellmarkers 1000 36 36 1.0000000
242 bulkrna4.Rds Cellmarkers 1000 7 7 1.0000000
243 bulkrna5.Rds Cellmarkers 1000 12 10 0.8333333
244 bulkrna6.Rds Cellmarkers 1000 43 42 0.9767442
245 bulkrna7.Rds Cellmarkers 1000 99 98 0.9898990
246 bulkrna1.Rds Hallmark 1000 2 2 1.0000000
247 bulkrna2.Rds Hallmark 1000 7 7 1.0000000
248 bulkrna3.Rds Hallmark 1000 3 3 1.0000000
249 bulkrna4.Rds Hallmark 1000 0 0 NaN
250 bulkrna5.Rds Hallmark 1000 0 0 NaN
251 bulkrna6.Rds Hallmark 1000 5 5 1.0000000
252 bulkrna7.Rds Hallmark 1000 0 0 NaN
253 bulkrna1.Rds KEGG 2000 3 3 1.0000000
254 bulkrna2.Rds KEGG 2000 1 0 0.0000000
255 bulkrna3.Rds KEGG 2000 1 0 0.0000000
256 bulkrna4.Rds KEGG 2000 8 8 1.0000000
257 bulkrna5.Rds KEGG 2000 0 0 NaN
258 bulkrna6.Rds KEGG 2000 1 1 1.0000000
259 bulkrna7.Rds KEGG 2000 0 0 NaN
260 bulkrna1.Rds Reactome 2000 122 121 0.9918033
261 bulkrna2.Rds Reactome 2000 96 96 1.0000000
262 bulkrna3.Rds Reactome 2000 98 97 0.9897959
263 bulkrna4.Rds Reactome 2000 108 105 0.9722222
264 bulkrna5.Rds Reactome 2000 39 36 0.9230769
265 bulkrna6.Rds Reactome 2000 74 72 0.9729730
266 bulkrna7.Rds Reactome 2000 24 17 0.7083333
267 bulkrna1.Rds Wikipathways 2000 18 18 1.0000000
268 bulkrna2.Rds Wikipathways 2000 18 18 1.0000000
269 bulkrna3.Rds Wikipathways 2000 3 3 1.0000000
270 bulkrna4.Rds Wikipathways 2000 0 0 NaN
271 bulkrna5.Rds Wikipathways 2000 0 0 NaN
272 bulkrna6.Rds Wikipathways 2000 18 18 1.0000000
273 bulkrna7.Rds Wikipathways 2000 6 6 1.0000000
274 bulkrna1.Rds miR targets 2000 188 184 0.9787234
275 bulkrna2.Rds miR targets 2000 37 37 1.0000000
276 bulkrna3.Rds miR targets 2000 3 3 1.0000000
277 bulkrna4.Rds miR targets 2000 2 2 1.0000000
278 bulkrna5.Rds miR targets 2000 270 269 0.9962963
279 bulkrna6.Rds miR targets 2000 690 688 0.9971014
280 bulkrna7.Rds miR targets 2000 36 36 1.0000000
281 bulkrna1.Rds TFT GTRD 2000 88 88 1.0000000
282 bulkrna2.Rds TFT GTRD 2000 88 87 0.9886364
283 bulkrna3.Rds TFT GTRD 2000 100 96 0.9600000
284 bulkrna4.Rds TFT GTRD 2000 66 66 1.0000000
285 bulkrna5.Rds TFT GTRD 2000 113 111 0.9823009
286 bulkrna6.Rds TFT GTRD 2000 89 87 0.9775281
287 bulkrna7.Rds TFT GTRD 2000 64 62 0.9687500
288 bulkrna1.Rds GO 2000 289 289 1.0000000
289 bulkrna2.Rds GO 2000 621 600 0.9661836
290 bulkrna3.Rds GO 2000 269 262 0.9739777
291 bulkrna4.Rds GO 2000 88 82 0.9318182
292 bulkrna5.Rds GO 2000 214 205 0.9579439
293 bulkrna6.Rds GO 2000 188 177 0.9414894
294 bulkrna7.Rds GO 2000 323 313 0.9690402
295 bulkrna1.Rds HPO 2000 9 9 1.0000000
296 bulkrna2.Rds HPO 2000 2 2 1.0000000
297 bulkrna3.Rds HPO 2000 1 1 1.0000000
298 bulkrna4.Rds HPO 2000 0 0 NaN
299 bulkrna5.Rds HPO 2000 0 0 NaN
300 bulkrna6.Rds HPO 2000 0 0 NaN
301 bulkrna7.Rds HPO 2000 0 0 NaN
302 bulkrna1.Rds Cellmarkers 2000 82 81 0.9878049
303 bulkrna2.Rds Cellmarkers 2000 255 255 1.0000000
304 bulkrna3.Rds Cellmarkers 2000 91 91 1.0000000
305 bulkrna4.Rds Cellmarkers 2000 31 31 1.0000000
306 bulkrna5.Rds Cellmarkers 2000 43 43 1.0000000
307 bulkrna6.Rds Cellmarkers 2000 104 104 1.0000000
308 bulkrna7.Rds Cellmarkers 2000 240 242 0.9917355
309 bulkrna1.Rds Hallmark 2000 3 3 1.0000000
310 bulkrna2.Rds Hallmark 2000 13 13 1.0000000
311 bulkrna3.Rds Hallmark 2000 3 3 1.0000000
312 bulkrna4.Rds Hallmark 2000 2 2 1.0000000
313 bulkrna5.Rds Hallmark 2000 0 0 NaN
314 bulkrna6.Rds Hallmark 2000 12 12 1.0000000
315 bulkrna7.Rds Hallmark 2000 3 3 1.0000000
summary(res1$nsig_bad)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     1.0    25.2    10.5   690.0
summary(res1$nsig_good)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00   23.92    8.00  688.00
summary(res1$Jaccard)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.8779  1.0000  0.8465  1.0000  1.0000     157
lapply(ngenes, function(n) {
  res2 <- subset(res1,Var3==n) ; summary(res2$nsig_bad)
} )
## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.5238  0.0000 13.0000 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   2.222   1.000  32.000 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   6.254   3.000 115.000 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    3.00   31.94   41.50  307.00 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.50   31.00   85.05   99.00  690.00
lapply(ngenes, function(n) {
  res2 <- subset(res1,Var3==n) ; summary(res2$nsig_good)
} )
## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.1111  0.0000  6.0000 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   1.603   1.000  30.000 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00    5.19    2.00  108.00 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    3.00   29.22   38.00  305.00 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.50   31.00   83.46   96.50  688.00
lapply(ngenes, function(n) {
  res2 <- subset(res1,Var3==n) ; summary(res2$Jaccard)
} )
## [[1]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0000  0.0000  0.1667  0.0000  1.0000      53 
## 
## [[2]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.5000  1.0000  0.7063  1.0000  1.0000      38 
## 
## [[3]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.8125  1.0000  0.8711  1.0000  1.0000      39 
## 
## [[4]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.7273  0.8835  1.0000  0.9419  1.0000  1.0000      18 
## 
## [[5]]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.9749  1.0000  0.9468  1.0000  1.0000       9
# NUMBER OF SIGNIFICANT SETS

ns1 <- lapply(ngenes ,function(n) {
  res2 <- subset(res1,Var3==n)
  ns <- matrix( res2$nsig_bad , ncol=9 )
  rownames(ns) <- names(d)
  colnames(ns) <- names(gs)
  return(ns)
} )

names(ns1) <- ngenes

ns1
## $`125`
##              KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds    0        0            0           0        0  9   2           0
## bulkrna2.Rds    0        0            1           0        0  0   0           2
## bulkrna3.Rds    0        0            0           0        0  0   0           0
## bulkrna4.Rds    0        0            0           0        0  0   1           2
## bulkrna5.Rds    0        0            0           0        0 13   0           0
## bulkrna6.Rds    0        0            0           0        0  0   0           0
## bulkrna7.Rds    0        0            0           0        0  1   1           0
##              Hallmark
## bulkrna1.Rds        0
## bulkrna2.Rds        0
## bulkrna3.Rds        0
## bulkrna4.Rds        0
## bulkrna5.Rds        1
## bulkrna6.Rds        0
## bulkrna7.Rds        0
## 
## $`250`
##              KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds    0        0            0           0        0  1   0           0
## bulkrna2.Rds    2        3            0           0        7 16   0          32
## bulkrna3.Rds    0        0            0           0        1  0   3           4
## bulkrna4.Rds    0        0            0           0        1  0   0           2
## bulkrna5.Rds    0        0            1           1        1 20   0           0
## bulkrna6.Rds    0        0            0           1        2  0   0           4
## bulkrna7.Rds    0        2            1           0        1  0  19          12
##              Hallmark
## bulkrna1.Rds        0
## bulkrna2.Rds        1
## bulkrna3.Rds        0
## bulkrna4.Rds        0
## bulkrna5.Rds        0
## bulkrna6.Rds        2
## bulkrna7.Rds        0
## 
## $`500`
##              KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds    0        0            0           7       11 19   0           2
## bulkrna2.Rds    0        3            0           0        6 69   0          45
## bulkrna3.Rds    0        0            0           0        3  6   0           6
## bulkrna4.Rds    0        0            0           0        4  0   0           1
## bulkrna5.Rds    0        0            0           1        2 39   0           0
## bulkrna6.Rds    0        0            0         115        5  1   0           4
## bulkrna7.Rds    0        0            0          10        2  1   0          32
##              Hallmark
## bulkrna1.Rds        0
## bulkrna2.Rds        0
## bulkrna3.Rds        0
## bulkrna4.Rds        0
## bulkrna5.Rds        0
## bulkrna6.Rds        0
## bulkrna7.Rds        0
## 
## $`1000`
##              KEGG Reactome Wikipathways miR targets TFT GTRD  GO HPO
## bulkrna1.Rds    0       49            1          12       33  91   3
## bulkrna2.Rds    0       65            2           3       40 214   0
## bulkrna3.Rds    0      103            2           3       36 105   1
## bulkrna4.Rds    0        0            0           0       19  11   0
## bulkrna5.Rds    0        0            1          87       45  78   0
## bulkrna6.Rds    0       81            2         307       28  73   0
## bulkrna7.Rds    0       34            1           2       19  93   3
##              Cellmarkers Hallmark
## bulkrna1.Rds          34        2
## bulkrna2.Rds         117        7
## bulkrna3.Rds          36        3
## bulkrna4.Rds           7        0
## bulkrna5.Rds          12        0
## bulkrna6.Rds          43        5
## bulkrna7.Rds          99        0
## 
## $`2000`
##              KEGG Reactome Wikipathways miR targets TFT GTRD  GO HPO
## bulkrna1.Rds    3      122           18         188       88 289   9
## bulkrna2.Rds    1       96           18          37       88 621   2
## bulkrna3.Rds    1       98            3           3      100 269   1
## bulkrna4.Rds    8      108            0           2       66  88   0
## bulkrna5.Rds    0       39            0         270      113 214   0
## bulkrna6.Rds    1       74           18         690       89 188   0
## bulkrna7.Rds    0       24            6          36       64 323   0
##              Cellmarkers Hallmark
## bulkrna1.Rds          82        3
## bulkrna2.Rds         255       13
## bulkrna3.Rds          91        3
## bulkrna4.Rds          31        2
## bulkrna5.Rds          43        0
## bulkrna6.Rds         104       12
## bulkrna7.Rds         240        3
ns1 <- lapply(ns1,function(y) {
  res <- apply(y,2,function(x) {
    x <- x[!is.na(x)]
    x <- x[is.finite(x)]
    mean(x)
  } )
  return(res)
})

ns1
## $`125`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.0000000    0.0000000    0.1428571    0.0000000    0.0000000    3.2857143 
##          HPO  Cellmarkers     Hallmark 
##    0.5714286    0.5714286    0.1428571 
## 
## $`250`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.2857143    0.7142857    0.2857143    0.2857143    1.8571429    5.2857143 
##          HPO  Cellmarkers     Hallmark 
##    3.1428571    7.7142857    0.4285714 
## 
## $`500`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.0000000    0.4285714    0.0000000   19.0000000    4.7142857   19.2857143 
##          HPO  Cellmarkers     Hallmark 
##    0.0000000   12.8571429    0.0000000 
## 
## $`1000`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##     0.000000    47.428571     1.285714    59.142857    31.428571    95.000000 
##          HPO  Cellmarkers     Hallmark 
##     1.000000    49.714286     2.428571 
## 
## $`2000`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##     2.000000    80.142857     9.000000   175.142857    86.857143   284.571429 
##          HPO  Cellmarkers     Hallmark 
##     1.714286   120.857143     5.142857
ns2 <- lapply(ngenes ,function(n) {
  res2 <- subset(res1,Var3==n)
  ns <- matrix( res2$nsig_good , ncol=9 )
  rownames(ns) <- names(d)
  colnames(ns) <- names(gs)
  return(ns)
} )

names(ns2) <- ngenes

ns2
## $`125`
##              KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds    0        0            0           0        0  6   0           0
## bulkrna2.Rds    0        0            0           0        0  0   0           0
## bulkrna3.Rds    0        0            0           0        0  0   0           0
## bulkrna4.Rds    0        0            0           0        0  0   0           0
## bulkrna5.Rds    0        0            0           0        0  0   0           0
## bulkrna6.Rds    0        0            0           0        0  0   0           0
## bulkrna7.Rds    0        0            0           0        0  0   1           0
##              Hallmark
## bulkrna1.Rds        0
## bulkrna2.Rds        0
## bulkrna3.Rds        0
## bulkrna4.Rds        0
## bulkrna5.Rds        0
## bulkrna6.Rds        0
## bulkrna7.Rds        0
## 
## $`250`
##              KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds    0        0            0           0        0  0   0           0
## bulkrna2.Rds    1        3            0           0        5 12   0          30
## bulkrna3.Rds    0        0            0           0        1  0   1           4
## bulkrna4.Rds    0        0            0           0        1  0   0           2
## bulkrna5.Rds    0        0            0           1        1 10   0           0
## bulkrna6.Rds    0        0            0           1        2  0   0           2
## bulkrna7.Rds    0        0            1           0        0  0   8          12
##              Hallmark
## bulkrna1.Rds        0
## bulkrna2.Rds        1
## bulkrna3.Rds        0
## bulkrna4.Rds        0
## bulkrna5.Rds        0
## bulkrna6.Rds        2
## bulkrna7.Rds        0
## 
## $`500`
##              KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds    0        0            0           7       11  9   0           2
## bulkrna2.Rds    0        2            0           0        5 42   0          45
## bulkrna3.Rds    0        0            0           0        3  0   0           6
## bulkrna4.Rds    0        0            0           0        4  0   0           1
## bulkrna5.Rds    0        0            0           1        2 26   0           0
## bulkrna6.Rds    0        0            0         108        5  1   0           3
## bulkrna7.Rds    0        0            0          10        2  1   0          31
##              Hallmark
## bulkrna1.Rds        0
## bulkrna2.Rds        0
## bulkrna3.Rds        0
## bulkrna4.Rds        0
## bulkrna5.Rds        0
## bulkrna6.Rds        0
## bulkrna7.Rds        0
## 
## $`1000`
##              KEGG Reactome Wikipathways miR targets TFT GTRD  GO HPO
## bulkrna1.Rds    0       36            1          12       33  73   3
## bulkrna2.Rds    0       56            2           3       40 189   0
## bulkrna3.Rds    0       91            2           3       32  92   1
## bulkrna4.Rds    0        0            0           0       19   8   0
## bulkrna5.Rds    0        0            1          86       45  57   0
## bulkrna6.Rds    0       77            2         305       27  60   0
## bulkrna7.Rds    0       26            1           2       17  80   3
##              Cellmarkers Hallmark
## bulkrna1.Rds          34        2
## bulkrna2.Rds         112        7
## bulkrna3.Rds          36        3
## bulkrna4.Rds           7        0
## bulkrna5.Rds          10        0
## bulkrna6.Rds          42        5
## bulkrna7.Rds          98        0
## 
## $`2000`
##              KEGG Reactome Wikipathways miR targets TFT GTRD  GO HPO
## bulkrna1.Rds    3      121           18         184       88 289   9
## bulkrna2.Rds    0       96           18          37       87 600   2
## bulkrna3.Rds    0       97            3           3       96 262   1
## bulkrna4.Rds    8      105            0           2       66  82   0
## bulkrna5.Rds    0       36            0         269      111 205   0
## bulkrna6.Rds    1       72           18         688       87 177   0
## bulkrna7.Rds    0       17            6          36       62 313   0
##              Cellmarkers Hallmark
## bulkrna1.Rds          81        3
## bulkrna2.Rds         255       13
## bulkrna3.Rds          91        3
## bulkrna4.Rds          31        2
## bulkrna5.Rds          43        0
## bulkrna6.Rds         104       12
## bulkrna7.Rds         242        3
ns2 <- lapply(ns2,function(y) {
  res <- apply(y,2,function(x) {
    x <- x[!is.na(x)]
    x <- x[is.finite(x)]
    mean(x)
  } )
  return(res)
})

ns2
## $`125`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000    0.8571429 
##          HPO  Cellmarkers     Hallmark 
##    0.1428571    0.0000000    0.0000000 
## 
## $`250`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.1428571    0.4285714    0.1428571    0.2857143    1.4285714    3.1428571 
##          HPO  Cellmarkers     Hallmark 
##    1.2857143    7.1428571    0.4285714 
## 
## $`500`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.0000000    0.2857143    0.0000000   18.0000000    4.5714286   11.2857143 
##          HPO  Cellmarkers     Hallmark 
##    0.0000000   12.5714286    0.0000000 
## 
## $`1000`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##     0.000000    40.857143     1.285714    58.714286    30.428571    79.857143 
##          HPO  Cellmarkers     Hallmark 
##     1.000000    48.428571     2.428571 
## 
## $`2000`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##     1.714286    77.714286     9.000000   174.142857    85.285714   275.428571 
##          HPO  Cellmarkers     Hallmark 
##     1.714286   121.000000     5.142857
# JACCARD
jac <- lapply(ngenes ,function(n) {
  res2 <- subset(res1,Var3==n)
  ns <- matrix( res2$Jaccard , ncol=9 )
  rownames(ns) <- names(d)
  colnames(ns) <- names(gs)
  return(ns)
} )
names(jac) <- ngenes

jac
## $`125`
##              KEGG Reactome Wikipathways miR targets TFT GTRD        GO HPO
## bulkrna1.Rds  NaN      NaN          NaN         NaN      NaN 0.6666667   0
## bulkrna2.Rds  NaN      NaN            0         NaN      NaN       NaN NaN
## bulkrna3.Rds  NaN      NaN          NaN         NaN      NaN       NaN NaN
## bulkrna4.Rds  NaN      NaN          NaN         NaN      NaN       NaN   0
## bulkrna5.Rds  NaN      NaN          NaN         NaN      NaN 0.0000000 NaN
## bulkrna6.Rds  NaN      NaN          NaN         NaN      NaN       NaN NaN
## bulkrna7.Rds  NaN      NaN          NaN         NaN      NaN 0.0000000   1
##              Cellmarkers Hallmark
## bulkrna1.Rds         NaN      NaN
## bulkrna2.Rds           0      NaN
## bulkrna3.Rds         NaN      NaN
## bulkrna4.Rds           0      NaN
## bulkrna5.Rds         NaN        0
## bulkrna6.Rds         NaN      NaN
## bulkrna7.Rds         NaN      NaN
## 
## $`250`
##              KEGG Reactome Wikipathways miR targets  TFT GTRD   GO       HPO
## bulkrna1.Rds  NaN      NaN          NaN         NaN       NaN 0.00       NaN
## bulkrna2.Rds  0.5        1          NaN         NaN 0.7142857 0.75       NaN
## bulkrna3.Rds  NaN      NaN          NaN         NaN 1.0000000  NaN 0.3333333
## bulkrna4.Rds  NaN      NaN          NaN         NaN 1.0000000  NaN       NaN
## bulkrna5.Rds  NaN      NaN            0           1 1.0000000 0.50       NaN
## bulkrna6.Rds  NaN      NaN          NaN           1 1.0000000  NaN       NaN
## bulkrna7.Rds  NaN        0            1         NaN 0.0000000  NaN 0.4210526
##              Cellmarkers Hallmark
## bulkrna1.Rds         NaN      NaN
## bulkrna2.Rds      0.9375        1
## bulkrna3.Rds      1.0000      NaN
## bulkrna4.Rds      1.0000      NaN
## bulkrna5.Rds         NaN      NaN
## bulkrna6.Rds      0.5000        1
## bulkrna7.Rds      1.0000      NaN
## 
## $`500`
##              KEGG  Reactome Wikipathways miR targets  TFT GTRD        GO HPO
## bulkrna1.Rds  NaN       NaN          NaN   1.0000000 1.0000000 0.4736842 NaN
## bulkrna2.Rds  NaN 0.6666667          NaN         NaN 0.8333333 0.6086957 NaN
## bulkrna3.Rds  NaN       NaN          NaN         NaN 1.0000000 0.0000000 NaN
## bulkrna4.Rds  NaN       NaN          NaN         NaN 1.0000000       NaN NaN
## bulkrna5.Rds  NaN       NaN          NaN   1.0000000 1.0000000 0.6666667 NaN
## bulkrna6.Rds  NaN       NaN          NaN   0.9391304 1.0000000 1.0000000 NaN
## bulkrna7.Rds  NaN       NaN          NaN   1.0000000 1.0000000 1.0000000 NaN
##              Cellmarkers Hallmark
## bulkrna1.Rds     1.00000      NaN
## bulkrna2.Rds     1.00000      NaN
## bulkrna3.Rds     1.00000      NaN
## bulkrna4.Rds     1.00000      NaN
## bulkrna5.Rds         NaN      NaN
## bulkrna6.Rds     0.75000      NaN
## bulkrna7.Rds     0.96875      NaN
## 
## $`1000`
##              KEGG  Reactome Wikipathways miR targets  TFT GTRD        GO HPO
## bulkrna1.Rds  NaN 0.7346939            1   1.0000000 1.0000000 0.8021978   1
## bulkrna2.Rds  NaN 0.8615385            1   1.0000000 1.0000000 0.8831776 NaN
## bulkrna3.Rds  NaN 0.8834951            1   1.0000000 0.8888889 0.8761905   1
## bulkrna4.Rds  NaN       NaN          NaN         NaN 1.0000000 0.7272727 NaN
## bulkrna5.Rds  NaN       NaN            1   0.9885057 1.0000000 0.7307692 NaN
## bulkrna6.Rds  NaN 0.9506173            1   0.9934853 0.9642857 0.8219178 NaN
## bulkrna7.Rds  NaN 0.7647059            1   1.0000000 0.8947368 0.8602151   1
##              Cellmarkers Hallmark
## bulkrna1.Rds   1.0000000        1
## bulkrna2.Rds   0.9572650        1
## bulkrna3.Rds   1.0000000        1
## bulkrna4.Rds   1.0000000      NaN
## bulkrna5.Rds   0.8333333      NaN
## bulkrna6.Rds   0.9767442        1
## bulkrna7.Rds   0.9898990      NaN
## 
## $`2000`
##              KEGG  Reactome Wikipathways miR targets  TFT GTRD        GO HPO
## bulkrna1.Rds    1 0.9918033            1   0.9787234 1.0000000 1.0000000   1
## bulkrna2.Rds    0 1.0000000            1   1.0000000 0.9886364 0.9661836   1
## bulkrna3.Rds    0 0.9897959            1   1.0000000 0.9600000 0.9739777   1
## bulkrna4.Rds    1 0.9722222          NaN   1.0000000 1.0000000 0.9318182 NaN
## bulkrna5.Rds  NaN 0.9230769          NaN   0.9962963 0.9823009 0.9579439 NaN
## bulkrna6.Rds    1 0.9729730            1   0.9971014 0.9775281 0.9414894 NaN
## bulkrna7.Rds  NaN 0.7083333            1   1.0000000 0.9687500 0.9690402 NaN
##              Cellmarkers Hallmark
## bulkrna1.Rds   0.9878049        1
## bulkrna2.Rds   1.0000000        1
## bulkrna3.Rds   1.0000000        1
## bulkrna4.Rds   1.0000000        1
## bulkrna5.Rds   1.0000000      NaN
## bulkrna6.Rds   1.0000000        1
## bulkrna7.Rds   0.9917355        1
jac <- lapply(jac,function(y) {
  res <- apply(y,2,function(x) {
    x <- x[!is.na(x)]
    x <- x[is.finite(x)]
    mean(x)
  } )
  return(res)
})

jac
## $`125`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##          NaN          NaN    0.0000000          NaN          NaN    0.2222222 
##          HPO  Cellmarkers     Hallmark 
##    0.3333333    0.0000000    0.0000000 
## 
## $`250`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.5000000    0.5000000    0.5000000    1.0000000    0.7857143    0.4166667 
##          HPO  Cellmarkers     Hallmark 
##    0.3771930    0.8875000    1.0000000 
## 
## $`500`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##          NaN    0.6666667          NaN    0.9847826    0.9761905    0.6248411 
##          HPO  Cellmarkers     Hallmark 
##          NaN    0.9531250          NaN 
## 
## $`1000`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##          NaN    0.8390101    1.0000000    0.9969985    0.9639873    0.8145344 
##          HPO  Cellmarkers     Hallmark 
##    1.0000000    0.9653202    1.0000000 
## 
## $`2000`
##         KEGG     Reactome Wikipathways  miR targets     TFT GTRD           GO 
##    0.6000000    0.9368864    1.0000000    0.9960173    0.9824593    0.9629219 
##          HPO  Cellmarkers     Hallmark 
##    1.0000000    0.9970772    1.0000000
jac <- do.call(rbind,jac)

plot(jac[,"GO"],type="b", pch=19,cex=1.5,lwd=2,
  xaxt = "n",ylim=c(0,1),ylab="Jaccard index",xlab="no. genes selected")
axis(1, at = seq(1, 5, by = 1),
     labels = ngenes)
points(jac[,"Reactome"],type="b",col="red",pch=19,cex=1.5,lwd=2)
points(jac[,"Cellmarkers"],type="b",col="blue",pch=19,cex=1.5,lwd=2)
points(jac[,"TFT GTRD"],type="b",col="gray50",pch=19,cex=1.5,lwd=2)
mtext("impact of gene list length on FDR problem severity")

legend("bottomright", legend=c("GO","Reactome", "Cellmarkers", "TFT"),
       col=c("black", "red", "blue", "gray50"), lty=1, cex=1.2,lwd=3,pch=19)

Make charts showing changes in pvalues

For bulk RNA-seq data set #1, run enrichment with all the gene set libraries.

names(gs)
## [1] "KEGG"         "Reactome"     "Wikipathways" "miR targets"  "TFT GTRD"    
## [6] "GO"           "HPO"          "Cellmarkers"  "Hallmark"
lapply(1:length(gs), function(i) {
  bad <- orabad(d[[1]],gs[[i]],ngenes=125)
  good <- oragood(d[[1]],gs[[i]],ngenes=125)
  compare_ora_plots(bad,good,libname=names(gs)[i],setsize=125)
} )

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
lapply(1:length(gs), function(i) {
  bad <- orabad(d[[1]],gs[[i]],ngenes=250)
  good <- oragood(d[[1]],gs[[i]],ngenes=250)
  compare_ora_plots(bad,good,libname=names(gs)[i],setsize=250)
} )

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
lapply(1:length(gs), function(i) {
  bad <- orabad(d[[1]],gs[[i]],ngenes=500)
  good <- oragood(d[[1]],gs[[i]],ngenes=500)
  compare_ora_plots(bad,good,libname=names(gs)[i],setsize=500)
} )

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
lapply(1:length(gs), function(i) {
  bad <- orabad(d[[1]],gs[[i]],ngenes=1000)
  good <- oragood(d[[1]],gs[[i]],ngenes=1000)
  compare_ora_plots(bad,good,libname=names(gs)[i],setsize=1000)
} )

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
lapply(1:length(gs), function(i) {
  bad <- orabad(d[[1]],gs[[i]],ngenes=2000)
  good <- oragood(d[[1]],gs[[i]],ngenes=2000)
  compare_ora_plots(bad,good,libname=names(gs)[i],setsize=2000)
} )

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL

Session information

For reproducibility

sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] gplots_3.1.3.1              eulerr_7.0.2               
##  [3] fgsea_1.30.0                clusterProfiler_4.12.0     
##  [5] kableExtra_1.4.0            DESeq2_1.44.0              
##  [7] SummarizedExperiment_1.34.0 Biobase_2.64.0             
##  [9] MatrixGenerics_1.16.0       matrixStats_1.3.0          
## [11] GenomicRanges_1.56.0        GenomeInfoDb_1.40.0        
## [13] IRanges_2.38.0              S4Vectors_0.42.0           
## [15] BiocGenerics_0.50.0         getDEE2_1.14.0             
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.16.0       jsonlite_1.8.8         
##   [4] magrittr_2.0.3          farver_2.1.2            rmarkdown_2.27         
##   [7] fs_1.6.4                zlibbioc_1.50.0         vctrs_0.6.5            
##  [10] memoise_2.0.1.9000      ggtree_3.12.0           htmltools_0.5.8.1      
##  [13] S4Arrays_1.4.0          SparseArray_1.4.3       gridGraphics_0.5-1     
##  [16] sass_0.4.9              KernSmooth_2.23-24      bslib_0.7.0            
##  [19] plyr_1.8.9              cachem_1.1.0            igraph_2.0.3           
##  [22] lifecycle_1.0.4         pkgconfig_2.0.3         gson_0.1.0             
##  [25] Matrix_1.7-0            R6_2.5.1                fastmap_1.2.0          
##  [28] GenomeInfoDbData_1.2.12 digest_0.6.35           aplot_0.2.2            
##  [31] enrichplot_1.24.0       colorspace_2.1-0        patchwork_1.2.0        
##  [34] AnnotationDbi_1.66.0    RSQLite_2.3.7           fansi_1.0.6            
##  [37] httr_1.4.7              polyclip_1.10-6         abind_1.4-5            
##  [40] compiler_4.4.0          bit64_4.0.5             withr_3.0.0            
##  [43] BiocParallel_1.38.0     viridis_0.6.5           DBI_1.2.3              
##  [46] highr_0.11              ggforce_0.4.2           MASS_7.3-60.2          
##  [49] DelayedArray_0.30.1     HDO.db_0.99.1           caTools_1.18.2         
##  [52] gtools_3.9.5            tools_4.4.0             scatterpie_0.2.2       
##  [55] ape_5.8                 glue_1.7.0              nlme_3.1-164           
##  [58] GOSemSim_2.30.0         shadowtext_0.1.3        grid_4.4.0             
##  [61] reshape2_1.4.4          generics_0.1.3          gtable_0.3.5           
##  [64] tidyr_1.3.1             data.table_1.15.4       tidygraph_1.3.1        
##  [67] xml2_1.3.6              utf8_1.2.4              XVector_0.44.0         
##  [70] ggrepel_0.9.5           pillar_1.9.0            stringr_1.5.1          
##  [73] yulab.utils_0.1.4       splines_4.4.0           dplyr_1.1.4            
##  [76] tweenr_2.0.3            treeio_1.28.0           lattice_0.22-6         
##  [79] bit_4.0.5               tidyselect_1.2.1        GO.db_3.19.1           
##  [82] locfit_1.5-9.9          Biostrings_2.72.0       knitr_1.47             
##  [85] gridExtra_2.3           svglite_2.1.3           xfun_0.44              
##  [88] graphlayouts_1.1.1      stringi_1.8.4           UCSC.utils_1.0.0       
##  [91] lazyeval_0.2.2          ggfun_0.1.5             yaml_2.3.8             
##  [94] evaluate_0.23           codetools_0.2-20        ggraph_2.2.1           
##  [97] tibble_3.2.1            qvalue_2.36.0           ggplotify_0.1.2        
## [100] cli_3.6.2               systemfonts_1.1.0       munsell_0.5.1          
## [103] jquerylib_0.1.4         Rcpp_1.0.12             png_0.1-8              
## [106] ggplot2_3.5.1           blob_1.2.4              DOSE_3.30.1            
## [109] htm2txt_2.2.2           bitops_1.0-7            viridisLite_0.4.2      
## [112] tidytree_0.4.6          scales_1.3.0            purrr_1.0.2            
## [115] crayon_1.5.2            rlang_1.1.4             cowplot_1.1.3          
## [118] fastmatch_1.1-4         KEGGREST_1.44.0