Source: https://github.com/markziemann/background
This guide is a Rmarkdown script that conducts differential expression and enrichment analysis, which are very popular workflows for transcriptome data.
The goal of this work is to understand the behaviour of ClusterProfiler’s p-value adjustment method as we have observed some weird behaviour. In particular we see that clusterprofiler’s criteria for inclusion/exclusion of gene sets is irregular. For example clusterprofiler seems to include gene sets as detected if there is just 1 member detected in the foreground list. If a gene set has 10 in the background and 0 in the foreground it looks like it gets excluded. This is a bit strange and contrary to best practice, where the detection threshold should be set by the overlap size in the background list, rather than the foreground list.
Theoretically, this behaviour could have a negative impact on performance, as FDR is applied to the wrong pathways.
suppressPackageStartupMessages({
library("getDEE2")
library("DESeq2")
library("kableExtra")
library("clusterProfiler")
library("fgsea")
library("eulerr")
library("gplots")
library("parallel")
})
For this guide I will be using bulk RNA-seq data from several previous studies.
Gene sets were obtained from MSigDB.
myfiles <- list.files("../dataprep",pattern="*Rds",full.names=TRUE)
d <- lapply(myfiles,readRDS)
names(d) <- basename(myfiles)
mygmts <- list.files("../gmt",pattern="*gmt",full.names=TRUE)
gs <- lapply(mygmts,gmtPathways)
names(gs) <- basename(mygmts)
names(gs)
## [1] "c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt"
## [2] "c2.cp.reactome.v2023.2.Hs.symbols.gmt"
## [3] "c2.cp.wikipathways.v2023.2.Hs.symbols.gmt"
## [4] "c3.mir.mirdb.v2023.2.Hs.symbols.gmt"
## [5] "c3.tft.gtrd.v2023.2.Hs.symbols.gmt"
## [6] "c5.go.v2023.2.Hs.symbols.gmt"
## [7] "c5.hpo.v2023.2.Hs.symbols.gmt"
## [8] "c8.all.v2023.2.Hs.symbols.gmt"
## [9] "h.all.v2023.2.Hs.symbols.gmt"
names(gs) <- c("KEGG", "Reactome", "Wikipathways", "miR targets", "TFT GTRD",
"GO", "HPO", "Cellmarkers", "Hallmark" )
# number of sets
lapply(gs,length)
## $KEGG
## [1] 619
##
## $Reactome
## [1] 1692
##
## $Wikipathways
## [1] 791
##
## $`miR targets`
## [1] 2377
##
## $`TFT GTRD`
## [1] 505
##
## $GO
## [1] 10461
##
## $HPO
## [1] 5547
##
## $Cellmarkers
## [1] 830
##
## $Hallmark
## [1] 50
# set size summary
lapply(gs,function(x) {
y <- unlist(lapply(x, length ))
summary(y)
} )
## $KEGG
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 8.00 13.00 15.59 19.00 92.00
##
## $Reactome
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 11.00 23.00 56.28 57.00 1463.00
##
## $Wikipathways
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 14.00 28.00 42.92 53.00 438.00
##
## $`miR targets`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.0 59.0 103.0 156.5 186.0 1071.0
##
## $`TFT GTRD`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.0 55.0 287.0 509.3 880.0 1999.0
##
## $GO
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 9.00 18.00 79.83 55.00 1994.00
##
## $HPO
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 10.00 23.00 91.32 74.00 1735.00
##
## $Cellmarkers
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.0 52.0 115.0 180.5 223.2 1770.0
##
## $Hallmark
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 32.0 98.0 180.0 146.4 200.0 200.0
# gene coverage
lapply(gs,function(x) {
y <- unique(unlist(x))
length(y)
} )
## $KEGG
## [1] 2727
##
## $Reactome
## [1] 11155
##
## $Wikipathways
## [1] 8313
##
## $`miR targets`
## [1] 16655
##
## $`TFT GTRD`
## [1] 26807
##
## $GO
## [1] 19428
##
## $HPO
## [1] 4997
##
## $Cellmarkers
## [1] 20418
##
## $Hallmark
## [1] 4384
# total number of annotations
lapply(gs, function(x) {
sum(unlist(lapply(x,length)))
} )
## $KEGG
## [1] 9651
##
## $Reactome
## [1] 95226
##
## $Wikipathways
## [1] 33947
##
## $`miR targets`
## [1] 372061
##
## $`TFT GTRD`
## [1] 257181
##
## $GO
## [1] 835146
##
## $HPO
## [1] 506552
##
## $Cellmarkers
## [1] 149827
##
## $Hallmark
## [1] 7322
Here is a function that conducts over-representation analysis with ClusterProfiler. Genes that are not annotated as having any gene set category are discarded.
# df = deseq output
# df must have "ensID geneSymbol" as rownames
# gs = gene sets
# gs format is list of symbols
orabad <- function( df, gs, ngenes=2000 ) {
def <- df
gs1 <- stack(gs)
colnames(gs1) <- c("gene","term")
gs1 <- gs1[,c(2,1)]
defup <- head(rownames(subset(def,log2FoldChange>0)),ngenes)
defup <- defup[lapply(strsplit(defup," "),length)==2]
defup <- unique(sapply(strsplit(defup," "),"[[",2))
defdn <- head(rownames(subset(def,log2FoldChange<0)),ngenes)
defdn <- defdn[lapply(strsplit(defdn," "),length)==2]
defdn <- unique(sapply(strsplit(defdn," "),"[[",2))
bg <- rownames(def)
bg <- bg[lapply(strsplit(bg," "),length)==2]
bg <- unique(sapply(strsplit(bg," "),"[[",2))
ora_up <- as.data.frame(enricher(gene = defup ,
universe = bg, minGSSize = 5, maxGSSize = 500000, TERM2GENE = gs1,
pAdjustMethod="fdr", pvalueCutoff = 1, qvalueCutoff = 1 ))
ora_up$geneID <- NULL
ora_ups <- rownames(ora_up)
gr <- as.numeric(sapply(strsplit(ora_up$GeneRatio,"/"),"[[",1)) /
as.numeric(sapply(strsplit(ora_up$GeneRatio,"/"),"[[",2))
br <- as.numeric(sapply(strsplit(ora_up$BgRatio,"/"),"[[",1)) /
as.numeric(sapply(strsplit(ora_up$BgRatio,"/"),"[[",2))
ora_up$es <- gr/br
ora_up <- ora_up[order(-ora_up$es),]
ora_up$Description=NULL
ora_dn <- as.data.frame(enricher(gene = defdn ,
universe = bg, minGSSize = 5, maxGSSize = 500000, TERM2GENE = gs1,
pAdjustMethod="fdr", pvalueCutoff = 1, qvalueCutoff = 1 ))
ora_dn$geneID <- NULL
ora_dns <- rownames(ora_dn)
gr <- as.numeric(sapply(strsplit(ora_dn$GeneRatio,"/"),"[[",1)) /
as.numeric(sapply(strsplit(ora_dn$GeneRatio,"/"),"[[",2))
br <- as.numeric(sapply(strsplit(ora_dn$BgRatio,"/"),"[[",1)) /
as.numeric(sapply(strsplit(ora_dn$BgRatio,"/"),"[[",2))
ora_dn$es <- gr/br
ora_dn <- ora_dn[order(-ora_dn$es),]
ora_dn$Description=NULL
result <- list("orabad_up"=ora_up,"orabad_dn"=ora_dn)
return(result)
}
This function fixes the problem with not inlcuding sets for FDR correction.
oragood <- function( df, gs, ngenes=2000 ) {
def <- df
gs1 <- stack(gs)
colnames(gs1) <- c("gene","term")
gs1 <- gs1[,c(2,1)]
defup <- head(rownames(subset(def,log2FoldChange>0)),ngenes)
defup <- defup[lapply(strsplit(defup," "),length)==2]
defup <- unique(sapply(strsplit(defup," "),"[[",2))
defdn <- head(rownames(subset(def,log2FoldChange<0)),ngenes)
defdn <- defdn[lapply(strsplit(defdn," "),length)==2]
defdn <- unique(sapply(strsplit(defdn," "),"[[",2))
bg <- rownames(def)
bg <- bg[lapply(strsplit(bg," "),length)==2]
bg <- unique(sapply(strsplit(bg," "),"[[",2))
gs1 <- gs1[which(gs1$gene %in% bg),]
terms <- names(which(table(gs1$term)>5))
gs1 <- gs1[gs1$term %in% terms,]
ora_up <- as.data.frame(enricher(gene = defup ,
universe = bg, minGSSize = 5, maxGSSize = 500000, TERM2GENE = gs1,
pAdjustMethod="fdr", pvalueCutoff = 1, qvalueCutoff = 1 ))
ora_up$geneID <- NULL
ora_ups <- rownames(ora_up)
gr <- as.numeric(sapply(strsplit(ora_up$GeneRatio,"/"),"[[",1)) /
as.numeric(sapply(strsplit(ora_up$GeneRatio,"/"),"[[",2))
br <- as.numeric(sapply(strsplit(ora_up$BgRatio,"/"),"[[",1)) /
as.numeric(sapply(strsplit(ora_up$BgRatio,"/"),"[[",2))
ora_up$es <- gr/br
ora_up <- ora_up[order(-ora_up$es),]
ora_up$Description=NULL
nsets <- length(which(table(gs1$term)>5))
nres <- nrow(ora_up)
diff <- nsets - nres
pvals <- c(ora_up$pvalue,rep(1,diff))
ora_up$p.adjust <- p.adjust(pvals,method="fdr")[1:nrow(ora_up)]
ora_dn <- as.data.frame(enricher(gene = defdn ,
universe = bg, minGSSize = 5, maxGSSize = 500000, TERM2GENE = gs1,
pAdjustMethod="fdr", pvalueCutoff = 1, qvalueCutoff = 1 ))
ora_dn$geneID <- NULL
ora_dns <- rownames(ora_dn)
gr <- as.numeric(sapply(strsplit(ora_dn$GeneRatio,"/"),"[[",1)) /
as.numeric(sapply(strsplit(ora_dn$GeneRatio,"/"),"[[",2))
br <- as.numeric(sapply(strsplit(ora_dn$BgRatio,"/"),"[[",1)) /
as.numeric(sapply(strsplit(ora_dn$BgRatio,"/"),"[[",2))
ora_dn$es <- gr/br
ora_dn <- ora_dn[order(-ora_dn$es),]
ora_dn$Description=NULL
nsets <- length(which(table(gs1$term)>5))
nres <- nrow(ora_dn)
diff <- nsets - nres
pvals <- c(ora_dn$pvalue,rep(1,diff))
ora_dn$p.adjust <- p.adjust(pvals,method="fdr")[1:nrow(ora_dn)]
result <- list("oragood_up"=ora_up,"oragood_dn"=ora_dn)
return(result)
}
# bad=ORA analysis, a list with results of up and down regulated pathways
# good=same as bad, except fixed the background problem
compare_ora_plots <- function(bad,good,libname=NULL,setsize=NULL){
badup <- bad$orabad_up
baddn <- bad$orabad_dn
goodup <- good$oragood_up
gooddn <- good$oragood_dn
mup <- merge(badup,goodup,by=0)
MAX=max(-log10(c(mup$p.adjust.x,mup$p.adjust.y)))
UPHEADER=paste(libname,"up",setsize,"genes")
DNHEADER=paste(libname,"dn",setsize,"genes")
par(mfrow=c(1,2))
plot(-log10(mup$p.adjust.x), -log10(mup$p.adjust.y),
xlim=c(0,MAX), ylim=c(0,MAX),
xlab="original -log10 FDR", ylab="corrected -log10 FDR", main=UPHEADER)
grid()
abline(a = 0, b = 1,lwd=2,lty=2)
mdn <- merge(baddn,gooddn,by=0)
MAX=max(-log10(c(mdn$p.adjust.x,mdn$p.adjust.y)))
plot(-log10(mdn$p.adjust.x), -log10(mdn$p.adjust.y),
xlim=c(0,MAX), ylim=c(0,MAX),
xlab="original -log10 FDR", ylab="corrected -log10 FDR", main=DNHEADER)
grid()
abline(a = 0, b = 1,lwd=2,lty=2)
}
compare_ora <- function(bad,good){
badup <- bad$orabad_up
baddn <- bad$orabad_dn
goodup <- good$oragood_up
gooddn <- good$oragood_dn
bup <- rownames(subset(badup,p.adjust<0.05))
gup <- rownames(subset(goodup,p.adjust<0.05))
bdn <- rownames(subset(baddn,p.adjust<0.05))
gdn <- rownames(subset(gooddn,p.adjust<0.05))
sig_bad <- unique(c(bup,bdn))
sig_good <- unique(c(gup,gdn))
nsig_bad <- length(sig_bad)
nsig_good <- length(sig_good)
jac <- length(intersect(sig_bad,sig_good)) / length(union(sig_bad,sig_good))
result=c("nsig_bad"=nsig_bad,"nsig_good"=nsig_good,"Jaccard"=jac)
return(result)
}
params <- expand.grid(1:length(d),1:length(gs))
params2 <- expand.grid(names(d),names(gs))
colnames(params) <- c("d","gs")
ora_dat <- mclapply(1:nrow(params), function(i) {
j=params[i,1]
k=params[i,2]
bad <- orabad(d[[j]],gs[[k]])
good <- oragood(d[[j]],gs[[k]])
res <- compare_ora(bad,good)
return(res)
} , mc.cores=8)
ora_dat <- do.call(rbind,ora_dat)
res1 <- cbind(params2,ora_dat)
res1 %>%
kbl(row.names = TRUE, caption="all results") %>%
kable_paper("hover", full_width = F)
Var1 | Var2 | nsig_bad | nsig_good | Jaccard | |
---|---|---|---|---|---|
1 | bulkrna1.Rds | KEGG | 3 | 3 | 1.0000000 |
2 | bulkrna2.Rds | KEGG | 1 | 0 | 0.0000000 |
3 | bulkrna3.Rds | KEGG | 1 | 0 | 0.0000000 |
4 | bulkrna4.Rds | KEGG | 8 | 8 | 1.0000000 |
5 | bulkrna5.Rds | KEGG | 0 | 0 | NaN |
6 | bulkrna6.Rds | KEGG | 1 | 1 | 1.0000000 |
7 | bulkrna7.Rds | KEGG | 0 | 0 | NaN |
8 | bulkrna1.Rds | Reactome | 122 | 121 | 0.9918033 |
9 | bulkrna2.Rds | Reactome | 96 | 96 | 1.0000000 |
10 | bulkrna3.Rds | Reactome | 98 | 97 | 0.9897959 |
11 | bulkrna4.Rds | Reactome | 108 | 105 | 0.9722222 |
12 | bulkrna5.Rds | Reactome | 39 | 36 | 0.9230769 |
13 | bulkrna6.Rds | Reactome | 74 | 72 | 0.9729730 |
14 | bulkrna7.Rds | Reactome | 24 | 17 | 0.7083333 |
15 | bulkrna1.Rds | Wikipathways | 18 | 18 | 1.0000000 |
16 | bulkrna2.Rds | Wikipathways | 18 | 18 | 1.0000000 |
17 | bulkrna3.Rds | Wikipathways | 3 | 3 | 1.0000000 |
18 | bulkrna4.Rds | Wikipathways | 0 | 0 | NaN |
19 | bulkrna5.Rds | Wikipathways | 0 | 0 | NaN |
20 | bulkrna6.Rds | Wikipathways | 18 | 18 | 1.0000000 |
21 | bulkrna7.Rds | Wikipathways | 6 | 6 | 1.0000000 |
22 | bulkrna1.Rds | miR targets | 188 | 184 | 0.9787234 |
23 | bulkrna2.Rds | miR targets | 37 | 37 | 1.0000000 |
24 | bulkrna3.Rds | miR targets | 3 | 3 | 1.0000000 |
25 | bulkrna4.Rds | miR targets | 2 | 2 | 1.0000000 |
26 | bulkrna5.Rds | miR targets | 270 | 269 | 0.9962963 |
27 | bulkrna6.Rds | miR targets | 690 | 688 | 0.9971014 |
28 | bulkrna7.Rds | miR targets | 36 | 36 | 1.0000000 |
29 | bulkrna1.Rds | TFT GTRD | 88 | 88 | 1.0000000 |
30 | bulkrna2.Rds | TFT GTRD | 88 | 87 | 0.9886364 |
31 | bulkrna3.Rds | TFT GTRD | 100 | 96 | 0.9600000 |
32 | bulkrna4.Rds | TFT GTRD | 66 | 66 | 1.0000000 |
33 | bulkrna5.Rds | TFT GTRD | 113 | 111 | 0.9823009 |
34 | bulkrna6.Rds | TFT GTRD | 89 | 87 | 0.9775281 |
35 | bulkrna7.Rds | TFT GTRD | 64 | 62 | 0.9687500 |
36 | bulkrna1.Rds | GO | 289 | 289 | 1.0000000 |
37 | bulkrna2.Rds | GO | 621 | 600 | 0.9661836 |
38 | bulkrna3.Rds | GO | 269 | 262 | 0.9739777 |
39 | bulkrna4.Rds | GO | 88 | 82 | 0.9318182 |
40 | bulkrna5.Rds | GO | 214 | 205 | 0.9579439 |
41 | bulkrna6.Rds | GO | 188 | 177 | 0.9414894 |
42 | bulkrna7.Rds | GO | 323 | 313 | 0.9690402 |
43 | bulkrna1.Rds | HPO | 9 | 9 | 1.0000000 |
44 | bulkrna2.Rds | HPO | 2 | 2 | 1.0000000 |
45 | bulkrna3.Rds | HPO | 1 | 1 | 1.0000000 |
46 | bulkrna4.Rds | HPO | 0 | 0 | NaN |
47 | bulkrna5.Rds | HPO | 0 | 0 | NaN |
48 | bulkrna6.Rds | HPO | 0 | 0 | NaN |
49 | bulkrna7.Rds | HPO | 0 | 0 | NaN |
50 | bulkrna1.Rds | Cellmarkers | 82 | 81 | 0.9878049 |
51 | bulkrna2.Rds | Cellmarkers | 255 | 255 | 1.0000000 |
52 | bulkrna3.Rds | Cellmarkers | 91 | 91 | 1.0000000 |
53 | bulkrna4.Rds | Cellmarkers | 31 | 31 | 1.0000000 |
54 | bulkrna5.Rds | Cellmarkers | 43 | 43 | 1.0000000 |
55 | bulkrna6.Rds | Cellmarkers | 104 | 104 | 1.0000000 |
56 | bulkrna7.Rds | Cellmarkers | 240 | 242 | 0.9917355 |
57 | bulkrna1.Rds | Hallmark | 3 | 3 | 1.0000000 |
58 | bulkrna2.Rds | Hallmark | 13 | 13 | 1.0000000 |
59 | bulkrna3.Rds | Hallmark | 3 | 3 | 1.0000000 |
60 | bulkrna4.Rds | Hallmark | 2 | 2 | 1.0000000 |
61 | bulkrna5.Rds | Hallmark | 0 | 0 | NaN |
62 | bulkrna6.Rds | Hallmark | 12 | 12 | 1.0000000 |
63 | bulkrna7.Rds | Hallmark | 3 | 3 | 1.0000000 |
summary(res1$nsig_bad)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 2.50 31.00 85.05 99.00 690.00
summary(res1$nsig_good)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 2.50 31.00 83.46 96.50 688.00
summary(res1$Jaccard)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.9749 1.0000 0.9468 1.0000 1.0000 9
# NUMBER OF SIGNIFICANT SETS
ns1 <- matrix( res1$nsig_bad , ncol=9 )
rownames(ns1) <- names(d)
colnames(ns1) <- names(gs)
ns1 %>%
kbl(row.names = TRUE, caption="significant genesets in original analysis") %>%
kable_paper("hover", full_width = F)
KEGG | Reactome | Wikipathways | miR targets | TFT GTRD | GO | HPO | Cellmarkers | Hallmark | |
---|---|---|---|---|---|---|---|---|---|
bulkrna1.Rds | 3 | 122 | 18 | 188 | 88 | 289 | 9 | 82 | 3 |
bulkrna2.Rds | 1 | 96 | 18 | 37 | 88 | 621 | 2 | 255 | 13 |
bulkrna3.Rds | 1 | 98 | 3 | 3 | 100 | 269 | 1 | 91 | 3 |
bulkrna4.Rds | 8 | 108 | 0 | 2 | 66 | 88 | 0 | 31 | 2 |
bulkrna5.Rds | 0 | 39 | 0 | 270 | 113 | 214 | 0 | 43 | 0 |
bulkrna6.Rds | 1 | 74 | 18 | 690 | 89 | 188 | 0 | 104 | 12 |
bulkrna7.Rds | 0 | 24 | 6 | 36 | 64 | 323 | 0 | 240 | 3 |
ns1 <- apply(ns1,2,function(x) { x <- x[!is.na(x)] ; x <- x[is.finite(x)] ; mean(x) } )
ns2 <- matrix( res1$nsig_good , ncol=9 )
rownames(ns2) <- names(d)
colnames(ns2) <- names(gs)
ns2 %>%
kbl(row.names = TRUE, caption="significant genesets in corrected analysis") %>%
kable_paper("hover", full_width = F)
KEGG | Reactome | Wikipathways | miR targets | TFT GTRD | GO | HPO | Cellmarkers | Hallmark | |
---|---|---|---|---|---|---|---|---|---|
bulkrna1.Rds | 3 | 121 | 18 | 184 | 88 | 289 | 9 | 81 | 3 |
bulkrna2.Rds | 0 | 96 | 18 | 37 | 87 | 600 | 2 | 255 | 13 |
bulkrna3.Rds | 0 | 97 | 3 | 3 | 96 | 262 | 1 | 91 | 3 |
bulkrna4.Rds | 8 | 105 | 0 | 2 | 66 | 82 | 0 | 31 | 2 |
bulkrna5.Rds | 0 | 36 | 0 | 269 | 111 | 205 | 0 | 43 | 0 |
bulkrna6.Rds | 1 | 72 | 18 | 688 | 87 | 177 | 0 | 104 | 12 |
bulkrna7.Rds | 0 | 17 | 6 | 36 | 62 | 313 | 0 | 242 | 3 |
ns2 <- apply(ns2,2,function(x) { x <- x[!is.na(x)] ; x <- x[is.finite(x)] ; mean(x) } )
ns2
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 1.714286 77.714286 9.000000 174.142857 85.285714 275.428571
## HPO Cellmarkers Hallmark
## 1.714286 121.000000 5.142857
df <- data.frame(ns1,ns2)
df <- df[order(df$ns1),]
par(mar=c(c(5.1, 7.1, 4.1, 2.1) ))
barplot(t(df),beside=TRUE,horiz=TRUE,las=1,xlim=c(0,330),
xlab="no. significant sets", main="Effect of correcting the FDR bug",
col=c("gray30","gray80"))
text(df$ns1+20,((1:nrow(df))*3)-1.5,labels=signif(df[,1],3))
text(df$ns2+20,((1:nrow(df))*3)-0.5,labels=signif(df[,2],3))
legend("bottomright", inset=.02, legend=c("corrected","original"),
fill=c("gray80","gray30"), horiz=FALSE, cex=1)
# RATIO
rat <- matrix( (res1$nsig_good+0) / (res1$nsig_bad+0) , ncol=9 )
rownames(rat) <- names(d)
colnames(rat) <- names(gs)
signif(rat,3) %>%
kbl(row.names = TRUE, caption="Ratio of significant genesets Corrected:Original") %>%
kable_paper("hover", full_width = F)
KEGG | Reactome | Wikipathways | miR targets | TFT GTRD | GO | HPO | Cellmarkers | Hallmark | |
---|---|---|---|---|---|---|---|---|---|
bulkrna1.Rds | 1 | 0.992 | 1 | 0.979 | 1.000 | 1.000 | 1 | 0.988 | 1 |
bulkrna2.Rds | 0 | 1.000 | 1 | 1.000 | 0.989 | 0.966 | 1 | 1.000 | 1 |
bulkrna3.Rds | 0 | 0.990 | 1 | 1.000 | 0.960 | 0.974 | 1 | 1.000 | 1 |
bulkrna4.Rds | 1 | 0.972 | NaN | 1.000 | 1.000 | 0.932 | NaN | 1.000 | 1 |
bulkrna5.Rds | NaN | 0.923 | NaN | 0.996 | 0.982 | 0.958 | NaN | 1.000 | NaN |
bulkrna6.Rds | 1 | 0.973 | 1 | 0.997 | 0.978 | 0.941 | NaN | 1.000 | 1 |
bulkrna7.Rds | NaN | 0.708 | 1 | 1.000 | 0.969 | 0.969 | NaN | 1.010 | 1 |
rat2 <- apply(rat,2,function(x) { x <- x[!is.na(x)] ; x <- x[is.finite(x)] ; mean(x) } )
rat2
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.6000000 0.9368864 1.0000000 0.9960173 0.9824593 0.9629219
## HPO Cellmarkers Hallmark
## 1.0000000 0.9994483 1.0000000
# JACCARD
jac <- matrix( res1$Jaccard , ncol=9 )
rownames(jac) <- names(d)
colnames(jac) <- names(gs)
signif(jac,3) %>%
kbl(row.names = TRUE, caption="Jaccard similarity Corrected:Original") %>%
kable_paper("hover", full_width = F)
KEGG | Reactome | Wikipathways | miR targets | TFT GTRD | GO | HPO | Cellmarkers | Hallmark | |
---|---|---|---|---|---|---|---|---|---|
bulkrna1.Rds | 1 | 0.992 | 1 | 0.979 | 1.000 | 1.000 | 1 | 0.988 | 1 |
bulkrna2.Rds | 0 | 1.000 | 1 | 1.000 | 0.989 | 0.966 | 1 | 1.000 | 1 |
bulkrna3.Rds | 0 | 0.990 | 1 | 1.000 | 0.960 | 0.974 | 1 | 1.000 | 1 |
bulkrna4.Rds | 1 | 0.972 | NaN | 1.000 | 1.000 | 0.932 | NaN | 1.000 | 1 |
bulkrna5.Rds | NaN | 0.923 | NaN | 0.996 | 0.982 | 0.958 | NaN | 1.000 | NaN |
bulkrna6.Rds | 1 | 0.973 | 1 | 0.997 | 0.978 | 0.941 | NaN | 1.000 | 1 |
bulkrna7.Rds | NaN | 0.708 | 1 | 1.000 | 0.969 | 0.969 | NaN | 0.992 | 1 |
jac2 <- apply(jac,2,function(x) { x <- x[!is.na(x)] ; x <- x[is.finite(x)] ; mean(x) } )
jac2
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.6000000 0.9368864 1.0000000 0.9960173 0.9824593 0.9629219
## HPO Cellmarkers Hallmark
## 1.0000000 0.9970772 1.0000000
barplot(sort(jac2),horiz=TRUE,las=1,xlim=c(0,1),
main="Jaccard similarity Corrected:Original",xlab="Jaccard index")
# overall
oa <- matrix(paste(res1$nsig_bad,res1$nsig_good,signif(res1$Jaccard,2)),ncol=9)
rownames(oa) <- names(d)
colnames(oa) <- names(gs)
oa %>%
kbl(row.names = TRUE, caption="Original:Corrected:Jaccard") %>%
kable_paper("hover", full_width = F)
KEGG | Reactome | Wikipathways | miR targets | TFT GTRD | GO | HPO | Cellmarkers | Hallmark | |
---|---|---|---|---|---|---|---|---|---|
bulkrna1.Rds | 3 3 1 | 122 121 0.99 | 18 18 1 | 188 184 0.98 | 88 88 1 | 289 289 1 | 9 9 1 | 82 81 0.99 | 3 3 1 |
bulkrna2.Rds | 1 0 0 | 96 96 1 | 18 18 1 | 37 37 1 | 88 87 0.99 | 621 600 0.97 | 2 2 1 | 255 255 1 | 13 13 1 |
bulkrna3.Rds | 1 0 0 | 98 97 0.99 | 3 3 1 | 3 3 1 | 100 96 0.96 | 269 262 0.97 | 1 1 1 | 91 91 1 | 3 3 1 |
bulkrna4.Rds | 8 8 1 | 108 105 0.97 | 0 0 NaN | 2 2 1 | 66 66 1 | 88 82 0.93 | 0 0 NaN | 31 31 1 | 2 2 1 |
bulkrna5.Rds | 0 0 NaN | 39 36 0.92 | 0 0 NaN | 270 269 1 | 113 111 0.98 | 214 205 0.96 | 0 0 NaN | 43 43 1 | 0 0 NaN |
bulkrna6.Rds | 1 1 1 | 74 72 0.97 | 18 18 1 | 690 688 1 | 89 87 0.98 | 188 177 0.94 | 0 0 NaN | 104 104 1 | 12 12 1 |
bulkrna7.Rds | 0 0 NaN | 24 17 0.71 | 6 6 1 | 36 36 1 | 64 62 0.97 | 323 313 0.97 | 0 0 NaN | 240 242 0.99 | 3 3 1 |
Now run an exhaustive analysis, changing the gene list length from 125 to 2000.
ngenes <- c(125,250,500,1000,2000)
params <- expand.grid(1:length(d),1:length(gs),ngenes)
params2 <- expand.grid(names(d),names(gs),ngenes)
colnames(params) <- c("d","gs","ngenes")
ora_dat <- mclapply(1:nrow(params), function(i) {
j=params[i,1]
k=params[i,2]
l=params[i,3]
bad <- orabad(d[[j]],gs[[k]],l)
good <- oragood(d[[j]],gs[[k]],l)
res <- compare_ora(bad,good)
return(res)
} , mc.cores=8)
ora_dat <- do.call(rbind,ora_dat)
res1 <- cbind(params2,ora_dat)
res1 %>%
kbl(row.names = TRUE, caption="all results") %>%
kable_paper("hover", full_width = F)
Var1 | Var2 | Var3 | nsig_bad | nsig_good | Jaccard | |
---|---|---|---|---|---|---|
1 | bulkrna1.Rds | KEGG | 125 | 0 | 0 | NaN |
2 | bulkrna2.Rds | KEGG | 125 | 0 | 0 | NaN |
3 | bulkrna3.Rds | KEGG | 125 | 0 | 0 | NaN |
4 | bulkrna4.Rds | KEGG | 125 | 0 | 0 | NaN |
5 | bulkrna5.Rds | KEGG | 125 | 0 | 0 | NaN |
6 | bulkrna6.Rds | KEGG | 125 | 0 | 0 | NaN |
7 | bulkrna7.Rds | KEGG | 125 | 0 | 0 | NaN |
8 | bulkrna1.Rds | Reactome | 125 | 0 | 0 | NaN |
9 | bulkrna2.Rds | Reactome | 125 | 0 | 0 | NaN |
10 | bulkrna3.Rds | Reactome | 125 | 0 | 0 | NaN |
11 | bulkrna4.Rds | Reactome | 125 | 0 | 0 | NaN |
12 | bulkrna5.Rds | Reactome | 125 | 0 | 0 | NaN |
13 | bulkrna6.Rds | Reactome | 125 | 0 | 0 | NaN |
14 | bulkrna7.Rds | Reactome | 125 | 0 | 0 | NaN |
15 | bulkrna1.Rds | Wikipathways | 125 | 0 | 0 | NaN |
16 | bulkrna2.Rds | Wikipathways | 125 | 1 | 0 | 0.0000000 |
17 | bulkrna3.Rds | Wikipathways | 125 | 0 | 0 | NaN |
18 | bulkrna4.Rds | Wikipathways | 125 | 0 | 0 | NaN |
19 | bulkrna5.Rds | Wikipathways | 125 | 0 | 0 | NaN |
20 | bulkrna6.Rds | Wikipathways | 125 | 0 | 0 | NaN |
21 | bulkrna7.Rds | Wikipathways | 125 | 0 | 0 | NaN |
22 | bulkrna1.Rds | miR targets | 125 | 0 | 0 | NaN |
23 | bulkrna2.Rds | miR targets | 125 | 0 | 0 | NaN |
24 | bulkrna3.Rds | miR targets | 125 | 0 | 0 | NaN |
25 | bulkrna4.Rds | miR targets | 125 | 0 | 0 | NaN |
26 | bulkrna5.Rds | miR targets | 125 | 0 | 0 | NaN |
27 | bulkrna6.Rds | miR targets | 125 | 0 | 0 | NaN |
28 | bulkrna7.Rds | miR targets | 125 | 0 | 0 | NaN |
29 | bulkrna1.Rds | TFT GTRD | 125 | 0 | 0 | NaN |
30 | bulkrna2.Rds | TFT GTRD | 125 | 0 | 0 | NaN |
31 | bulkrna3.Rds | TFT GTRD | 125 | 0 | 0 | NaN |
32 | bulkrna4.Rds | TFT GTRD | 125 | 0 | 0 | NaN |
33 | bulkrna5.Rds | TFT GTRD | 125 | 0 | 0 | NaN |
34 | bulkrna6.Rds | TFT GTRD | 125 | 0 | 0 | NaN |
35 | bulkrna7.Rds | TFT GTRD | 125 | 0 | 0 | NaN |
36 | bulkrna1.Rds | GO | 125 | 9 | 6 | 0.6666667 |
37 | bulkrna2.Rds | GO | 125 | 0 | 0 | NaN |
38 | bulkrna3.Rds | GO | 125 | 0 | 0 | NaN |
39 | bulkrna4.Rds | GO | 125 | 0 | 0 | NaN |
40 | bulkrna5.Rds | GO | 125 | 13 | 0 | 0.0000000 |
41 | bulkrna6.Rds | GO | 125 | 0 | 0 | NaN |
42 | bulkrna7.Rds | GO | 125 | 1 | 0 | 0.0000000 |
43 | bulkrna1.Rds | HPO | 125 | 2 | 0 | 0.0000000 |
44 | bulkrna2.Rds | HPO | 125 | 0 | 0 | NaN |
45 | bulkrna3.Rds | HPO | 125 | 0 | 0 | NaN |
46 | bulkrna4.Rds | HPO | 125 | 1 | 0 | 0.0000000 |
47 | bulkrna5.Rds | HPO | 125 | 0 | 0 | NaN |
48 | bulkrna6.Rds | HPO | 125 | 0 | 0 | NaN |
49 | bulkrna7.Rds | HPO | 125 | 1 | 1 | 1.0000000 |
50 | bulkrna1.Rds | Cellmarkers | 125 | 0 | 0 | NaN |
51 | bulkrna2.Rds | Cellmarkers | 125 | 2 | 0 | 0.0000000 |
52 | bulkrna3.Rds | Cellmarkers | 125 | 0 | 0 | NaN |
53 | bulkrna4.Rds | Cellmarkers | 125 | 2 | 0 | 0.0000000 |
54 | bulkrna5.Rds | Cellmarkers | 125 | 0 | 0 | NaN |
55 | bulkrna6.Rds | Cellmarkers | 125 | 0 | 0 | NaN |
56 | bulkrna7.Rds | Cellmarkers | 125 | 0 | 0 | NaN |
57 | bulkrna1.Rds | Hallmark | 125 | 0 | 0 | NaN |
58 | bulkrna2.Rds | Hallmark | 125 | 0 | 0 | NaN |
59 | bulkrna3.Rds | Hallmark | 125 | 0 | 0 | NaN |
60 | bulkrna4.Rds | Hallmark | 125 | 0 | 0 | NaN |
61 | bulkrna5.Rds | Hallmark | 125 | 1 | 0 | 0.0000000 |
62 | bulkrna6.Rds | Hallmark | 125 | 0 | 0 | NaN |
63 | bulkrna7.Rds | Hallmark | 125 | 0 | 0 | NaN |
64 | bulkrna1.Rds | KEGG | 250 | 0 | 0 | NaN |
65 | bulkrna2.Rds | KEGG | 250 | 2 | 1 | 0.5000000 |
66 | bulkrna3.Rds | KEGG | 250 | 0 | 0 | NaN |
67 | bulkrna4.Rds | KEGG | 250 | 0 | 0 | NaN |
68 | bulkrna5.Rds | KEGG | 250 | 0 | 0 | NaN |
69 | bulkrna6.Rds | KEGG | 250 | 0 | 0 | NaN |
70 | bulkrna7.Rds | KEGG | 250 | 0 | 0 | NaN |
71 | bulkrna1.Rds | Reactome | 250 | 0 | 0 | NaN |
72 | bulkrna2.Rds | Reactome | 250 | 3 | 3 | 1.0000000 |
73 | bulkrna3.Rds | Reactome | 250 | 0 | 0 | NaN |
74 | bulkrna4.Rds | Reactome | 250 | 0 | 0 | NaN |
75 | bulkrna5.Rds | Reactome | 250 | 0 | 0 | NaN |
76 | bulkrna6.Rds | Reactome | 250 | 0 | 0 | NaN |
77 | bulkrna7.Rds | Reactome | 250 | 2 | 0 | 0.0000000 |
78 | bulkrna1.Rds | Wikipathways | 250 | 0 | 0 | NaN |
79 | bulkrna2.Rds | Wikipathways | 250 | 0 | 0 | NaN |
80 | bulkrna3.Rds | Wikipathways | 250 | 0 | 0 | NaN |
81 | bulkrna4.Rds | Wikipathways | 250 | 0 | 0 | NaN |
82 | bulkrna5.Rds | Wikipathways | 250 | 1 | 0 | 0.0000000 |
83 | bulkrna6.Rds | Wikipathways | 250 | 0 | 0 | NaN |
84 | bulkrna7.Rds | Wikipathways | 250 | 1 | 1 | 1.0000000 |
85 | bulkrna1.Rds | miR targets | 250 | 0 | 0 | NaN |
86 | bulkrna2.Rds | miR targets | 250 | 0 | 0 | NaN |
87 | bulkrna3.Rds | miR targets | 250 | 0 | 0 | NaN |
88 | bulkrna4.Rds | miR targets | 250 | 0 | 0 | NaN |
89 | bulkrna5.Rds | miR targets | 250 | 1 | 1 | 1.0000000 |
90 | bulkrna6.Rds | miR targets | 250 | 1 | 1 | 1.0000000 |
91 | bulkrna7.Rds | miR targets | 250 | 0 | 0 | NaN |
92 | bulkrna1.Rds | TFT GTRD | 250 | 0 | 0 | NaN |
93 | bulkrna2.Rds | TFT GTRD | 250 | 7 | 5 | 0.7142857 |
94 | bulkrna3.Rds | TFT GTRD | 250 | 1 | 1 | 1.0000000 |
95 | bulkrna4.Rds | TFT GTRD | 250 | 1 | 1 | 1.0000000 |
96 | bulkrna5.Rds | TFT GTRD | 250 | 1 | 1 | 1.0000000 |
97 | bulkrna6.Rds | TFT GTRD | 250 | 2 | 2 | 1.0000000 |
98 | bulkrna7.Rds | TFT GTRD | 250 | 1 | 0 | 0.0000000 |
99 | bulkrna1.Rds | GO | 250 | 1 | 0 | 0.0000000 |
100 | bulkrna2.Rds | GO | 250 | 16 | 12 | 0.7500000 |
101 | bulkrna3.Rds | GO | 250 | 0 | 0 | NaN |
102 | bulkrna4.Rds | GO | 250 | 0 | 0 | NaN |
103 | bulkrna5.Rds | GO | 250 | 20 | 10 | 0.5000000 |
104 | bulkrna6.Rds | GO | 250 | 0 | 0 | NaN |
105 | bulkrna7.Rds | GO | 250 | 0 | 0 | NaN |
106 | bulkrna1.Rds | HPO | 250 | 0 | 0 | NaN |
107 | bulkrna2.Rds | HPO | 250 | 0 | 0 | NaN |
108 | bulkrna3.Rds | HPO | 250 | 3 | 1 | 0.3333333 |
109 | bulkrna4.Rds | HPO | 250 | 0 | 0 | NaN |
110 | bulkrna5.Rds | HPO | 250 | 0 | 0 | NaN |
111 | bulkrna6.Rds | HPO | 250 | 0 | 0 | NaN |
112 | bulkrna7.Rds | HPO | 250 | 19 | 8 | 0.4210526 |
113 | bulkrna1.Rds | Cellmarkers | 250 | 0 | 0 | NaN |
114 | bulkrna2.Rds | Cellmarkers | 250 | 32 | 30 | 0.9375000 |
115 | bulkrna3.Rds | Cellmarkers | 250 | 4 | 4 | 1.0000000 |
116 | bulkrna4.Rds | Cellmarkers | 250 | 2 | 2 | 1.0000000 |
117 | bulkrna5.Rds | Cellmarkers | 250 | 0 | 0 | NaN |
118 | bulkrna6.Rds | Cellmarkers | 250 | 4 | 2 | 0.5000000 |
119 | bulkrna7.Rds | Cellmarkers | 250 | 12 | 12 | 1.0000000 |
120 | bulkrna1.Rds | Hallmark | 250 | 0 | 0 | NaN |
121 | bulkrna2.Rds | Hallmark | 250 | 1 | 1 | 1.0000000 |
122 | bulkrna3.Rds | Hallmark | 250 | 0 | 0 | NaN |
123 | bulkrna4.Rds | Hallmark | 250 | 0 | 0 | NaN |
124 | bulkrna5.Rds | Hallmark | 250 | 0 | 0 | NaN |
125 | bulkrna6.Rds | Hallmark | 250 | 2 | 2 | 1.0000000 |
126 | bulkrna7.Rds | Hallmark | 250 | 0 | 0 | NaN |
127 | bulkrna1.Rds | KEGG | 500 | 0 | 0 | NaN |
128 | bulkrna2.Rds | KEGG | 500 | 0 | 0 | NaN |
129 | bulkrna3.Rds | KEGG | 500 | 0 | 0 | NaN |
130 | bulkrna4.Rds | KEGG | 500 | 0 | 0 | NaN |
131 | bulkrna5.Rds | KEGG | 500 | 0 | 0 | NaN |
132 | bulkrna6.Rds | KEGG | 500 | 0 | 0 | NaN |
133 | bulkrna7.Rds | KEGG | 500 | 0 | 0 | NaN |
134 | bulkrna1.Rds | Reactome | 500 | 0 | 0 | NaN |
135 | bulkrna2.Rds | Reactome | 500 | 3 | 2 | 0.6666667 |
136 | bulkrna3.Rds | Reactome | 500 | 0 | 0 | NaN |
137 | bulkrna4.Rds | Reactome | 500 | 0 | 0 | NaN |
138 | bulkrna5.Rds | Reactome | 500 | 0 | 0 | NaN |
139 | bulkrna6.Rds | Reactome | 500 | 0 | 0 | NaN |
140 | bulkrna7.Rds | Reactome | 500 | 0 | 0 | NaN |
141 | bulkrna1.Rds | Wikipathways | 500 | 0 | 0 | NaN |
142 | bulkrna2.Rds | Wikipathways | 500 | 0 | 0 | NaN |
143 | bulkrna3.Rds | Wikipathways | 500 | 0 | 0 | NaN |
144 | bulkrna4.Rds | Wikipathways | 500 | 0 | 0 | NaN |
145 | bulkrna5.Rds | Wikipathways | 500 | 0 | 0 | NaN |
146 | bulkrna6.Rds | Wikipathways | 500 | 0 | 0 | NaN |
147 | bulkrna7.Rds | Wikipathways | 500 | 0 | 0 | NaN |
148 | bulkrna1.Rds | miR targets | 500 | 7 | 7 | 1.0000000 |
149 | bulkrna2.Rds | miR targets | 500 | 0 | 0 | NaN |
150 | bulkrna3.Rds | miR targets | 500 | 0 | 0 | NaN |
151 | bulkrna4.Rds | miR targets | 500 | 0 | 0 | NaN |
152 | bulkrna5.Rds | miR targets | 500 | 1 | 1 | 1.0000000 |
153 | bulkrna6.Rds | miR targets | 500 | 115 | 108 | 0.9391304 |
154 | bulkrna7.Rds | miR targets | 500 | 10 | 10 | 1.0000000 |
155 | bulkrna1.Rds | TFT GTRD | 500 | 11 | 11 | 1.0000000 |
156 | bulkrna2.Rds | TFT GTRD | 500 | 6 | 5 | 0.8333333 |
157 | bulkrna3.Rds | TFT GTRD | 500 | 3 | 3 | 1.0000000 |
158 | bulkrna4.Rds | TFT GTRD | 500 | 4 | 4 | 1.0000000 |
159 | bulkrna5.Rds | TFT GTRD | 500 | 2 | 2 | 1.0000000 |
160 | bulkrna6.Rds | TFT GTRD | 500 | 5 | 5 | 1.0000000 |
161 | bulkrna7.Rds | TFT GTRD | 500 | 2 | 2 | 1.0000000 |
162 | bulkrna1.Rds | GO | 500 | 19 | 9 | 0.4736842 |
163 | bulkrna2.Rds | GO | 500 | 69 | 42 | 0.6086957 |
164 | bulkrna3.Rds | GO | 500 | 6 | 0 | 0.0000000 |
165 | bulkrna4.Rds | GO | 500 | 0 | 0 | NaN |
166 | bulkrna5.Rds | GO | 500 | 39 | 26 | 0.6666667 |
167 | bulkrna6.Rds | GO | 500 | 1 | 1 | 1.0000000 |
168 | bulkrna7.Rds | GO | 500 | 1 | 1 | 1.0000000 |
169 | bulkrna1.Rds | HPO | 500 | 0 | 0 | NaN |
170 | bulkrna2.Rds | HPO | 500 | 0 | 0 | NaN |
171 | bulkrna3.Rds | HPO | 500 | 0 | 0 | NaN |
172 | bulkrna4.Rds | HPO | 500 | 0 | 0 | NaN |
173 | bulkrna5.Rds | HPO | 500 | 0 | 0 | NaN |
174 | bulkrna6.Rds | HPO | 500 | 0 | 0 | NaN |
175 | bulkrna7.Rds | HPO | 500 | 0 | 0 | NaN |
176 | bulkrna1.Rds | Cellmarkers | 500 | 2 | 2 | 1.0000000 |
177 | bulkrna2.Rds | Cellmarkers | 500 | 45 | 45 | 1.0000000 |
178 | bulkrna3.Rds | Cellmarkers | 500 | 6 | 6 | 1.0000000 |
179 | bulkrna4.Rds | Cellmarkers | 500 | 1 | 1 | 1.0000000 |
180 | bulkrna5.Rds | Cellmarkers | 500 | 0 | 0 | NaN |
181 | bulkrna6.Rds | Cellmarkers | 500 | 4 | 3 | 0.7500000 |
182 | bulkrna7.Rds | Cellmarkers | 500 | 32 | 31 | 0.9687500 |
183 | bulkrna1.Rds | Hallmark | 500 | 0 | 0 | NaN |
184 | bulkrna2.Rds | Hallmark | 500 | 0 | 0 | NaN |
185 | bulkrna3.Rds | Hallmark | 500 | 0 | 0 | NaN |
186 | bulkrna4.Rds | Hallmark | 500 | 0 | 0 | NaN |
187 | bulkrna5.Rds | Hallmark | 500 | 0 | 0 | NaN |
188 | bulkrna6.Rds | Hallmark | 500 | 0 | 0 | NaN |
189 | bulkrna7.Rds | Hallmark | 500 | 0 | 0 | NaN |
190 | bulkrna1.Rds | KEGG | 1000 | 0 | 0 | NaN |
191 | bulkrna2.Rds | KEGG | 1000 | 0 | 0 | NaN |
192 | bulkrna3.Rds | KEGG | 1000 | 0 | 0 | NaN |
193 | bulkrna4.Rds | KEGG | 1000 | 0 | 0 | NaN |
194 | bulkrna5.Rds | KEGG | 1000 | 0 | 0 | NaN |
195 | bulkrna6.Rds | KEGG | 1000 | 0 | 0 | NaN |
196 | bulkrna7.Rds | KEGG | 1000 | 0 | 0 | NaN |
197 | bulkrna1.Rds | Reactome | 1000 | 49 | 36 | 0.7346939 |
198 | bulkrna2.Rds | Reactome | 1000 | 65 | 56 | 0.8615385 |
199 | bulkrna3.Rds | Reactome | 1000 | 103 | 91 | 0.8834951 |
200 | bulkrna4.Rds | Reactome | 1000 | 0 | 0 | NaN |
201 | bulkrna5.Rds | Reactome | 1000 | 0 | 0 | NaN |
202 | bulkrna6.Rds | Reactome | 1000 | 81 | 77 | 0.9506173 |
203 | bulkrna7.Rds | Reactome | 1000 | 34 | 26 | 0.7647059 |
204 | bulkrna1.Rds | Wikipathways | 1000 | 1 | 1 | 1.0000000 |
205 | bulkrna2.Rds | Wikipathways | 1000 | 2 | 2 | 1.0000000 |
206 | bulkrna3.Rds | Wikipathways | 1000 | 2 | 2 | 1.0000000 |
207 | bulkrna4.Rds | Wikipathways | 1000 | 0 | 0 | NaN |
208 | bulkrna5.Rds | Wikipathways | 1000 | 1 | 1 | 1.0000000 |
209 | bulkrna6.Rds | Wikipathways | 1000 | 2 | 2 | 1.0000000 |
210 | bulkrna7.Rds | Wikipathways | 1000 | 1 | 1 | 1.0000000 |
211 | bulkrna1.Rds | miR targets | 1000 | 12 | 12 | 1.0000000 |
212 | bulkrna2.Rds | miR targets | 1000 | 3 | 3 | 1.0000000 |
213 | bulkrna3.Rds | miR targets | 1000 | 3 | 3 | 1.0000000 |
214 | bulkrna4.Rds | miR targets | 1000 | 0 | 0 | NaN |
215 | bulkrna5.Rds | miR targets | 1000 | 87 | 86 | 0.9885057 |
216 | bulkrna6.Rds | miR targets | 1000 | 307 | 305 | 0.9934853 |
217 | bulkrna7.Rds | miR targets | 1000 | 2 | 2 | 1.0000000 |
218 | bulkrna1.Rds | TFT GTRD | 1000 | 33 | 33 | 1.0000000 |
219 | bulkrna2.Rds | TFT GTRD | 1000 | 40 | 40 | 1.0000000 |
220 | bulkrna3.Rds | TFT GTRD | 1000 | 36 | 32 | 0.8888889 |
221 | bulkrna4.Rds | TFT GTRD | 1000 | 19 | 19 | 1.0000000 |
222 | bulkrna5.Rds | TFT GTRD | 1000 | 45 | 45 | 1.0000000 |
223 | bulkrna6.Rds | TFT GTRD | 1000 | 28 | 27 | 0.9642857 |
224 | bulkrna7.Rds | TFT GTRD | 1000 | 19 | 17 | 0.8947368 |
225 | bulkrna1.Rds | GO | 1000 | 91 | 73 | 0.8021978 |
226 | bulkrna2.Rds | GO | 1000 | 214 | 189 | 0.8831776 |
227 | bulkrna3.Rds | GO | 1000 | 105 | 92 | 0.8761905 |
228 | bulkrna4.Rds | GO | 1000 | 11 | 8 | 0.7272727 |
229 | bulkrna5.Rds | GO | 1000 | 78 | 57 | 0.7307692 |
230 | bulkrna6.Rds | GO | 1000 | 73 | 60 | 0.8219178 |
231 | bulkrna7.Rds | GO | 1000 | 93 | 80 | 0.8602151 |
232 | bulkrna1.Rds | HPO | 1000 | 3 | 3 | 1.0000000 |
233 | bulkrna2.Rds | HPO | 1000 | 0 | 0 | NaN |
234 | bulkrna3.Rds | HPO | 1000 | 1 | 1 | 1.0000000 |
235 | bulkrna4.Rds | HPO | 1000 | 0 | 0 | NaN |
236 | bulkrna5.Rds | HPO | 1000 | 0 | 0 | NaN |
237 | bulkrna6.Rds | HPO | 1000 | 0 | 0 | NaN |
238 | bulkrna7.Rds | HPO | 1000 | 3 | 3 | 1.0000000 |
239 | bulkrna1.Rds | Cellmarkers | 1000 | 34 | 34 | 1.0000000 |
240 | bulkrna2.Rds | Cellmarkers | 1000 | 117 | 112 | 0.9572650 |
241 | bulkrna3.Rds | Cellmarkers | 1000 | 36 | 36 | 1.0000000 |
242 | bulkrna4.Rds | Cellmarkers | 1000 | 7 | 7 | 1.0000000 |
243 | bulkrna5.Rds | Cellmarkers | 1000 | 12 | 10 | 0.8333333 |
244 | bulkrna6.Rds | Cellmarkers | 1000 | 43 | 42 | 0.9767442 |
245 | bulkrna7.Rds | Cellmarkers | 1000 | 99 | 98 | 0.9898990 |
246 | bulkrna1.Rds | Hallmark | 1000 | 2 | 2 | 1.0000000 |
247 | bulkrna2.Rds | Hallmark | 1000 | 7 | 7 | 1.0000000 |
248 | bulkrna3.Rds | Hallmark | 1000 | 3 | 3 | 1.0000000 |
249 | bulkrna4.Rds | Hallmark | 1000 | 0 | 0 | NaN |
250 | bulkrna5.Rds | Hallmark | 1000 | 0 | 0 | NaN |
251 | bulkrna6.Rds | Hallmark | 1000 | 5 | 5 | 1.0000000 |
252 | bulkrna7.Rds | Hallmark | 1000 | 0 | 0 | NaN |
253 | bulkrna1.Rds | KEGG | 2000 | 3 | 3 | 1.0000000 |
254 | bulkrna2.Rds | KEGG | 2000 | 1 | 0 | 0.0000000 |
255 | bulkrna3.Rds | KEGG | 2000 | 1 | 0 | 0.0000000 |
256 | bulkrna4.Rds | KEGG | 2000 | 8 | 8 | 1.0000000 |
257 | bulkrna5.Rds | KEGG | 2000 | 0 | 0 | NaN |
258 | bulkrna6.Rds | KEGG | 2000 | 1 | 1 | 1.0000000 |
259 | bulkrna7.Rds | KEGG | 2000 | 0 | 0 | NaN |
260 | bulkrna1.Rds | Reactome | 2000 | 122 | 121 | 0.9918033 |
261 | bulkrna2.Rds | Reactome | 2000 | 96 | 96 | 1.0000000 |
262 | bulkrna3.Rds | Reactome | 2000 | 98 | 97 | 0.9897959 |
263 | bulkrna4.Rds | Reactome | 2000 | 108 | 105 | 0.9722222 |
264 | bulkrna5.Rds | Reactome | 2000 | 39 | 36 | 0.9230769 |
265 | bulkrna6.Rds | Reactome | 2000 | 74 | 72 | 0.9729730 |
266 | bulkrna7.Rds | Reactome | 2000 | 24 | 17 | 0.7083333 |
267 | bulkrna1.Rds | Wikipathways | 2000 | 18 | 18 | 1.0000000 |
268 | bulkrna2.Rds | Wikipathways | 2000 | 18 | 18 | 1.0000000 |
269 | bulkrna3.Rds | Wikipathways | 2000 | 3 | 3 | 1.0000000 |
270 | bulkrna4.Rds | Wikipathways | 2000 | 0 | 0 | NaN |
271 | bulkrna5.Rds | Wikipathways | 2000 | 0 | 0 | NaN |
272 | bulkrna6.Rds | Wikipathways | 2000 | 18 | 18 | 1.0000000 |
273 | bulkrna7.Rds | Wikipathways | 2000 | 6 | 6 | 1.0000000 |
274 | bulkrna1.Rds | miR targets | 2000 | 188 | 184 | 0.9787234 |
275 | bulkrna2.Rds | miR targets | 2000 | 37 | 37 | 1.0000000 |
276 | bulkrna3.Rds | miR targets | 2000 | 3 | 3 | 1.0000000 |
277 | bulkrna4.Rds | miR targets | 2000 | 2 | 2 | 1.0000000 |
278 | bulkrna5.Rds | miR targets | 2000 | 270 | 269 | 0.9962963 |
279 | bulkrna6.Rds | miR targets | 2000 | 690 | 688 | 0.9971014 |
280 | bulkrna7.Rds | miR targets | 2000 | 36 | 36 | 1.0000000 |
281 | bulkrna1.Rds | TFT GTRD | 2000 | 88 | 88 | 1.0000000 |
282 | bulkrna2.Rds | TFT GTRD | 2000 | 88 | 87 | 0.9886364 |
283 | bulkrna3.Rds | TFT GTRD | 2000 | 100 | 96 | 0.9600000 |
284 | bulkrna4.Rds | TFT GTRD | 2000 | 66 | 66 | 1.0000000 |
285 | bulkrna5.Rds | TFT GTRD | 2000 | 113 | 111 | 0.9823009 |
286 | bulkrna6.Rds | TFT GTRD | 2000 | 89 | 87 | 0.9775281 |
287 | bulkrna7.Rds | TFT GTRD | 2000 | 64 | 62 | 0.9687500 |
288 | bulkrna1.Rds | GO | 2000 | 289 | 289 | 1.0000000 |
289 | bulkrna2.Rds | GO | 2000 | 621 | 600 | 0.9661836 |
290 | bulkrna3.Rds | GO | 2000 | 269 | 262 | 0.9739777 |
291 | bulkrna4.Rds | GO | 2000 | 88 | 82 | 0.9318182 |
292 | bulkrna5.Rds | GO | 2000 | 214 | 205 | 0.9579439 |
293 | bulkrna6.Rds | GO | 2000 | 188 | 177 | 0.9414894 |
294 | bulkrna7.Rds | GO | 2000 | 323 | 313 | 0.9690402 |
295 | bulkrna1.Rds | HPO | 2000 | 9 | 9 | 1.0000000 |
296 | bulkrna2.Rds | HPO | 2000 | 2 | 2 | 1.0000000 |
297 | bulkrna3.Rds | HPO | 2000 | 1 | 1 | 1.0000000 |
298 | bulkrna4.Rds | HPO | 2000 | 0 | 0 | NaN |
299 | bulkrna5.Rds | HPO | 2000 | 0 | 0 | NaN |
300 | bulkrna6.Rds | HPO | 2000 | 0 | 0 | NaN |
301 | bulkrna7.Rds | HPO | 2000 | 0 | 0 | NaN |
302 | bulkrna1.Rds | Cellmarkers | 2000 | 82 | 81 | 0.9878049 |
303 | bulkrna2.Rds | Cellmarkers | 2000 | 255 | 255 | 1.0000000 |
304 | bulkrna3.Rds | Cellmarkers | 2000 | 91 | 91 | 1.0000000 |
305 | bulkrna4.Rds | Cellmarkers | 2000 | 31 | 31 | 1.0000000 |
306 | bulkrna5.Rds | Cellmarkers | 2000 | 43 | 43 | 1.0000000 |
307 | bulkrna6.Rds | Cellmarkers | 2000 | 104 | 104 | 1.0000000 |
308 | bulkrna7.Rds | Cellmarkers | 2000 | 240 | 242 | 0.9917355 |
309 | bulkrna1.Rds | Hallmark | 2000 | 3 | 3 | 1.0000000 |
310 | bulkrna2.Rds | Hallmark | 2000 | 13 | 13 | 1.0000000 |
311 | bulkrna3.Rds | Hallmark | 2000 | 3 | 3 | 1.0000000 |
312 | bulkrna4.Rds | Hallmark | 2000 | 2 | 2 | 1.0000000 |
313 | bulkrna5.Rds | Hallmark | 2000 | 0 | 0 | NaN |
314 | bulkrna6.Rds | Hallmark | 2000 | 12 | 12 | 1.0000000 |
315 | bulkrna7.Rds | Hallmark | 2000 | 3 | 3 | 1.0000000 |
summary(res1$nsig_bad)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 1.0 25.2 10.5 690.0
summary(res1$nsig_good)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 23.92 8.00 688.00
summary(res1$Jaccard)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.8779 1.0000 0.8465 1.0000 1.0000 157
lapply(ngenes, function(n) {
res2 <- subset(res1,Var3==n) ; summary(res2$nsig_bad)
} )
## [[1]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.5238 0.0000 13.0000
##
## [[2]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 2.222 1.000 32.000
##
## [[3]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 6.254 3.000 115.000
##
## [[4]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 3.00 31.94 41.50 307.00
##
## [[5]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 2.50 31.00 85.05 99.00 690.00
lapply(ngenes, function(n) {
res2 <- subset(res1,Var3==n) ; summary(res2$nsig_good)
} )
## [[1]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1111 0.0000 6.0000
##
## [[2]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 1.603 1.000 30.000
##
## [[3]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 5.19 2.00 108.00
##
## [[4]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 3.00 29.22 38.00 305.00
##
## [[5]]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 2.50 31.00 83.46 96.50 688.00
lapply(ngenes, function(n) {
res2 <- subset(res1,Var3==n) ; summary(res2$Jaccard)
} )
## [[1]]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.0000 0.1667 0.0000 1.0000 53
##
## [[2]]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.5000 1.0000 0.7063 1.0000 1.0000 38
##
## [[3]]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.8125 1.0000 0.8711 1.0000 1.0000 39
##
## [[4]]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.7273 0.8835 1.0000 0.9419 1.0000 1.0000 18
##
## [[5]]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.9749 1.0000 0.9468 1.0000 1.0000 9
# NUMBER OF SIGNIFICANT SETS
ns1 <- lapply(ngenes ,function(n) {
res2 <- subset(res1,Var3==n)
ns <- matrix( res2$nsig_bad , ncol=9 )
rownames(ns) <- names(d)
colnames(ns) <- names(gs)
return(ns)
} )
names(ns1) <- ngenes
ns1
## $`125`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds 0 0 0 0 0 9 2 0
## bulkrna2.Rds 0 0 1 0 0 0 0 2
## bulkrna3.Rds 0 0 0 0 0 0 0 0
## bulkrna4.Rds 0 0 0 0 0 0 1 2
## bulkrna5.Rds 0 0 0 0 0 13 0 0
## bulkrna6.Rds 0 0 0 0 0 0 0 0
## bulkrna7.Rds 0 0 0 0 0 1 1 0
## Hallmark
## bulkrna1.Rds 0
## bulkrna2.Rds 0
## bulkrna3.Rds 0
## bulkrna4.Rds 0
## bulkrna5.Rds 1
## bulkrna6.Rds 0
## bulkrna7.Rds 0
##
## $`250`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds 0 0 0 0 0 1 0 0
## bulkrna2.Rds 2 3 0 0 7 16 0 32
## bulkrna3.Rds 0 0 0 0 1 0 3 4
## bulkrna4.Rds 0 0 0 0 1 0 0 2
## bulkrna5.Rds 0 0 1 1 1 20 0 0
## bulkrna6.Rds 0 0 0 1 2 0 0 4
## bulkrna7.Rds 0 2 1 0 1 0 19 12
## Hallmark
## bulkrna1.Rds 0
## bulkrna2.Rds 1
## bulkrna3.Rds 0
## bulkrna4.Rds 0
## bulkrna5.Rds 0
## bulkrna6.Rds 2
## bulkrna7.Rds 0
##
## $`500`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds 0 0 0 7 11 19 0 2
## bulkrna2.Rds 0 3 0 0 6 69 0 45
## bulkrna3.Rds 0 0 0 0 3 6 0 6
## bulkrna4.Rds 0 0 0 0 4 0 0 1
## bulkrna5.Rds 0 0 0 1 2 39 0 0
## bulkrna6.Rds 0 0 0 115 5 1 0 4
## bulkrna7.Rds 0 0 0 10 2 1 0 32
## Hallmark
## bulkrna1.Rds 0
## bulkrna2.Rds 0
## bulkrna3.Rds 0
## bulkrna4.Rds 0
## bulkrna5.Rds 0
## bulkrna6.Rds 0
## bulkrna7.Rds 0
##
## $`1000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds 0 49 1 12 33 91 3
## bulkrna2.Rds 0 65 2 3 40 214 0
## bulkrna3.Rds 0 103 2 3 36 105 1
## bulkrna4.Rds 0 0 0 0 19 11 0
## bulkrna5.Rds 0 0 1 87 45 78 0
## bulkrna6.Rds 0 81 2 307 28 73 0
## bulkrna7.Rds 0 34 1 2 19 93 3
## Cellmarkers Hallmark
## bulkrna1.Rds 34 2
## bulkrna2.Rds 117 7
## bulkrna3.Rds 36 3
## bulkrna4.Rds 7 0
## bulkrna5.Rds 12 0
## bulkrna6.Rds 43 5
## bulkrna7.Rds 99 0
##
## $`2000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds 3 122 18 188 88 289 9
## bulkrna2.Rds 1 96 18 37 88 621 2
## bulkrna3.Rds 1 98 3 3 100 269 1
## bulkrna4.Rds 8 108 0 2 66 88 0
## bulkrna5.Rds 0 39 0 270 113 214 0
## bulkrna6.Rds 1 74 18 690 89 188 0
## bulkrna7.Rds 0 24 6 36 64 323 0
## Cellmarkers Hallmark
## bulkrna1.Rds 82 3
## bulkrna2.Rds 255 13
## bulkrna3.Rds 91 3
## bulkrna4.Rds 31 2
## bulkrna5.Rds 43 0
## bulkrna6.Rds 104 12
## bulkrna7.Rds 240 3
ns1 <- lapply(ns1,function(y) {
res <- apply(y,2,function(x) {
x <- x[!is.na(x)]
x <- x[is.finite(x)]
mean(x)
} )
return(res)
})
ns1
## $`125`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.0000000 0.0000000 0.1428571 0.0000000 0.0000000 3.2857143
## HPO Cellmarkers Hallmark
## 0.5714286 0.5714286 0.1428571
##
## $`250`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.2857143 0.7142857 0.2857143 0.2857143 1.8571429 5.2857143
## HPO Cellmarkers Hallmark
## 3.1428571 7.7142857 0.4285714
##
## $`500`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.0000000 0.4285714 0.0000000 19.0000000 4.7142857 19.2857143
## HPO Cellmarkers Hallmark
## 0.0000000 12.8571429 0.0000000
##
## $`1000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.000000 47.428571 1.285714 59.142857 31.428571 95.000000
## HPO Cellmarkers Hallmark
## 1.000000 49.714286 2.428571
##
## $`2000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 2.000000 80.142857 9.000000 175.142857 86.857143 284.571429
## HPO Cellmarkers Hallmark
## 1.714286 120.857143 5.142857
ns2 <- lapply(ngenes ,function(n) {
res2 <- subset(res1,Var3==n)
ns <- matrix( res2$nsig_good , ncol=9 )
rownames(ns) <- names(d)
colnames(ns) <- names(gs)
return(ns)
} )
names(ns2) <- ngenes
ns2
## $`125`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds 0 0 0 0 0 6 0 0
## bulkrna2.Rds 0 0 0 0 0 0 0 0
## bulkrna3.Rds 0 0 0 0 0 0 0 0
## bulkrna4.Rds 0 0 0 0 0 0 0 0
## bulkrna5.Rds 0 0 0 0 0 0 0 0
## bulkrna6.Rds 0 0 0 0 0 0 0 0
## bulkrna7.Rds 0 0 0 0 0 0 1 0
## Hallmark
## bulkrna1.Rds 0
## bulkrna2.Rds 0
## bulkrna3.Rds 0
## bulkrna4.Rds 0
## bulkrna5.Rds 0
## bulkrna6.Rds 0
## bulkrna7.Rds 0
##
## $`250`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds 0 0 0 0 0 0 0 0
## bulkrna2.Rds 1 3 0 0 5 12 0 30
## bulkrna3.Rds 0 0 0 0 1 0 1 4
## bulkrna4.Rds 0 0 0 0 1 0 0 2
## bulkrna5.Rds 0 0 0 1 1 10 0 0
## bulkrna6.Rds 0 0 0 1 2 0 0 2
## bulkrna7.Rds 0 0 1 0 0 0 8 12
## Hallmark
## bulkrna1.Rds 0
## bulkrna2.Rds 1
## bulkrna3.Rds 0
## bulkrna4.Rds 0
## bulkrna5.Rds 0
## bulkrna6.Rds 2
## bulkrna7.Rds 0
##
## $`500`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO Cellmarkers
## bulkrna1.Rds 0 0 0 7 11 9 0 2
## bulkrna2.Rds 0 2 0 0 5 42 0 45
## bulkrna3.Rds 0 0 0 0 3 0 0 6
## bulkrna4.Rds 0 0 0 0 4 0 0 1
## bulkrna5.Rds 0 0 0 1 2 26 0 0
## bulkrna6.Rds 0 0 0 108 5 1 0 3
## bulkrna7.Rds 0 0 0 10 2 1 0 31
## Hallmark
## bulkrna1.Rds 0
## bulkrna2.Rds 0
## bulkrna3.Rds 0
## bulkrna4.Rds 0
## bulkrna5.Rds 0
## bulkrna6.Rds 0
## bulkrna7.Rds 0
##
## $`1000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds 0 36 1 12 33 73 3
## bulkrna2.Rds 0 56 2 3 40 189 0
## bulkrna3.Rds 0 91 2 3 32 92 1
## bulkrna4.Rds 0 0 0 0 19 8 0
## bulkrna5.Rds 0 0 1 86 45 57 0
## bulkrna6.Rds 0 77 2 305 27 60 0
## bulkrna7.Rds 0 26 1 2 17 80 3
## Cellmarkers Hallmark
## bulkrna1.Rds 34 2
## bulkrna2.Rds 112 7
## bulkrna3.Rds 36 3
## bulkrna4.Rds 7 0
## bulkrna5.Rds 10 0
## bulkrna6.Rds 42 5
## bulkrna7.Rds 98 0
##
## $`2000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds 3 121 18 184 88 289 9
## bulkrna2.Rds 0 96 18 37 87 600 2
## bulkrna3.Rds 0 97 3 3 96 262 1
## bulkrna4.Rds 8 105 0 2 66 82 0
## bulkrna5.Rds 0 36 0 269 111 205 0
## bulkrna6.Rds 1 72 18 688 87 177 0
## bulkrna7.Rds 0 17 6 36 62 313 0
## Cellmarkers Hallmark
## bulkrna1.Rds 81 3
## bulkrna2.Rds 255 13
## bulkrna3.Rds 91 3
## bulkrna4.Rds 31 2
## bulkrna5.Rds 43 0
## bulkrna6.Rds 104 12
## bulkrna7.Rds 242 3
ns2 <- lapply(ns2,function(y) {
res <- apply(y,2,function(x) {
x <- x[!is.na(x)]
x <- x[is.finite(x)]
mean(x)
} )
return(res)
})
ns2
## $`125`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.8571429
## HPO Cellmarkers Hallmark
## 0.1428571 0.0000000 0.0000000
##
## $`250`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.1428571 0.4285714 0.1428571 0.2857143 1.4285714 3.1428571
## HPO Cellmarkers Hallmark
## 1.2857143 7.1428571 0.4285714
##
## $`500`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.0000000 0.2857143 0.0000000 18.0000000 4.5714286 11.2857143
## HPO Cellmarkers Hallmark
## 0.0000000 12.5714286 0.0000000
##
## $`1000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.000000 40.857143 1.285714 58.714286 30.428571 79.857143
## HPO Cellmarkers Hallmark
## 1.000000 48.428571 2.428571
##
## $`2000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 1.714286 77.714286 9.000000 174.142857 85.285714 275.428571
## HPO Cellmarkers Hallmark
## 1.714286 121.000000 5.142857
# JACCARD
jac <- lapply(ngenes ,function(n) {
res2 <- subset(res1,Var3==n)
ns <- matrix( res2$Jaccard , ncol=9 )
rownames(ns) <- names(d)
colnames(ns) <- names(gs)
return(ns)
} )
names(jac) <- ngenes
jac
## $`125`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds NaN NaN NaN NaN NaN 0.6666667 0
## bulkrna2.Rds NaN NaN 0 NaN NaN NaN NaN
## bulkrna3.Rds NaN NaN NaN NaN NaN NaN NaN
## bulkrna4.Rds NaN NaN NaN NaN NaN NaN 0
## bulkrna5.Rds NaN NaN NaN NaN NaN 0.0000000 NaN
## bulkrna6.Rds NaN NaN NaN NaN NaN NaN NaN
## bulkrna7.Rds NaN NaN NaN NaN NaN 0.0000000 1
## Cellmarkers Hallmark
## bulkrna1.Rds NaN NaN
## bulkrna2.Rds 0 NaN
## bulkrna3.Rds NaN NaN
## bulkrna4.Rds 0 NaN
## bulkrna5.Rds NaN 0
## bulkrna6.Rds NaN NaN
## bulkrna7.Rds NaN NaN
##
## $`250`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds NaN NaN NaN NaN NaN 0.00 NaN
## bulkrna2.Rds 0.5 1 NaN NaN 0.7142857 0.75 NaN
## bulkrna3.Rds NaN NaN NaN NaN 1.0000000 NaN 0.3333333
## bulkrna4.Rds NaN NaN NaN NaN 1.0000000 NaN NaN
## bulkrna5.Rds NaN NaN 0 1 1.0000000 0.50 NaN
## bulkrna6.Rds NaN NaN NaN 1 1.0000000 NaN NaN
## bulkrna7.Rds NaN 0 1 NaN 0.0000000 NaN 0.4210526
## Cellmarkers Hallmark
## bulkrna1.Rds NaN NaN
## bulkrna2.Rds 0.9375 1
## bulkrna3.Rds 1.0000 NaN
## bulkrna4.Rds 1.0000 NaN
## bulkrna5.Rds NaN NaN
## bulkrna6.Rds 0.5000 1
## bulkrna7.Rds 1.0000 NaN
##
## $`500`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds NaN NaN NaN 1.0000000 1.0000000 0.4736842 NaN
## bulkrna2.Rds NaN 0.6666667 NaN NaN 0.8333333 0.6086957 NaN
## bulkrna3.Rds NaN NaN NaN NaN 1.0000000 0.0000000 NaN
## bulkrna4.Rds NaN NaN NaN NaN 1.0000000 NaN NaN
## bulkrna5.Rds NaN NaN NaN 1.0000000 1.0000000 0.6666667 NaN
## bulkrna6.Rds NaN NaN NaN 0.9391304 1.0000000 1.0000000 NaN
## bulkrna7.Rds NaN NaN NaN 1.0000000 1.0000000 1.0000000 NaN
## Cellmarkers Hallmark
## bulkrna1.Rds 1.00000 NaN
## bulkrna2.Rds 1.00000 NaN
## bulkrna3.Rds 1.00000 NaN
## bulkrna4.Rds 1.00000 NaN
## bulkrna5.Rds NaN NaN
## bulkrna6.Rds 0.75000 NaN
## bulkrna7.Rds 0.96875 NaN
##
## $`1000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds NaN 0.7346939 1 1.0000000 1.0000000 0.8021978 1
## bulkrna2.Rds NaN 0.8615385 1 1.0000000 1.0000000 0.8831776 NaN
## bulkrna3.Rds NaN 0.8834951 1 1.0000000 0.8888889 0.8761905 1
## bulkrna4.Rds NaN NaN NaN NaN 1.0000000 0.7272727 NaN
## bulkrna5.Rds NaN NaN 1 0.9885057 1.0000000 0.7307692 NaN
## bulkrna6.Rds NaN 0.9506173 1 0.9934853 0.9642857 0.8219178 NaN
## bulkrna7.Rds NaN 0.7647059 1 1.0000000 0.8947368 0.8602151 1
## Cellmarkers Hallmark
## bulkrna1.Rds 1.0000000 1
## bulkrna2.Rds 0.9572650 1
## bulkrna3.Rds 1.0000000 1
## bulkrna4.Rds 1.0000000 NaN
## bulkrna5.Rds 0.8333333 NaN
## bulkrna6.Rds 0.9767442 1
## bulkrna7.Rds 0.9898990 NaN
##
## $`2000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO HPO
## bulkrna1.Rds 1 0.9918033 1 0.9787234 1.0000000 1.0000000 1
## bulkrna2.Rds 0 1.0000000 1 1.0000000 0.9886364 0.9661836 1
## bulkrna3.Rds 0 0.9897959 1 1.0000000 0.9600000 0.9739777 1
## bulkrna4.Rds 1 0.9722222 NaN 1.0000000 1.0000000 0.9318182 NaN
## bulkrna5.Rds NaN 0.9230769 NaN 0.9962963 0.9823009 0.9579439 NaN
## bulkrna6.Rds 1 0.9729730 1 0.9971014 0.9775281 0.9414894 NaN
## bulkrna7.Rds NaN 0.7083333 1 1.0000000 0.9687500 0.9690402 NaN
## Cellmarkers Hallmark
## bulkrna1.Rds 0.9878049 1
## bulkrna2.Rds 1.0000000 1
## bulkrna3.Rds 1.0000000 1
## bulkrna4.Rds 1.0000000 1
## bulkrna5.Rds 1.0000000 NaN
## bulkrna6.Rds 1.0000000 1
## bulkrna7.Rds 0.9917355 1
jac <- lapply(jac,function(y) {
res <- apply(y,2,function(x) {
x <- x[!is.na(x)]
x <- x[is.finite(x)]
mean(x)
} )
return(res)
})
jac
## $`125`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## NaN NaN 0.0000000 NaN NaN 0.2222222
## HPO Cellmarkers Hallmark
## 0.3333333 0.0000000 0.0000000
##
## $`250`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.5000000 0.5000000 0.5000000 1.0000000 0.7857143 0.4166667
## HPO Cellmarkers Hallmark
## 0.3771930 0.8875000 1.0000000
##
## $`500`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## NaN 0.6666667 NaN 0.9847826 0.9761905 0.6248411
## HPO Cellmarkers Hallmark
## NaN 0.9531250 NaN
##
## $`1000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## NaN 0.8390101 1.0000000 0.9969985 0.9639873 0.8145344
## HPO Cellmarkers Hallmark
## 1.0000000 0.9653202 1.0000000
##
## $`2000`
## KEGG Reactome Wikipathways miR targets TFT GTRD GO
## 0.6000000 0.9368864 1.0000000 0.9960173 0.9824593 0.9629219
## HPO Cellmarkers Hallmark
## 1.0000000 0.9970772 1.0000000
jac <- do.call(rbind,jac)
plot(jac[,"GO"],type="b", pch=19,cex=1.5,lwd=2,
xaxt = "n",ylim=c(0,1),ylab="Jaccard index",xlab="no. genes selected")
axis(1, at = seq(1, 5, by = 1),
labels = ngenes)
points(jac[,"Reactome"],type="b",col="red",pch=19,cex=1.5,lwd=2)
points(jac[,"Cellmarkers"],type="b",col="blue",pch=19,cex=1.5,lwd=2)
points(jac[,"TFT GTRD"],type="b",col="gray50",pch=19,cex=1.5,lwd=2)
mtext("impact of gene list length on FDR problem severity")
legend("bottomright", legend=c("GO","Reactome", "Cellmarkers", "TFT"),
col=c("black", "red", "blue", "gray50"), lty=1, cex=1.2,lwd=3,pch=19)
For bulk RNA-seq data set #1, run enrichment with all the gene set libraries.
names(gs)
## [1] "KEGG" "Reactome" "Wikipathways" "miR targets" "TFT GTRD"
## [6] "GO" "HPO" "Cellmarkers" "Hallmark"
lapply(1:length(gs), function(i) {
bad <- orabad(d[[1]],gs[[i]],ngenes=125)
good <- oragood(d[[1]],gs[[i]],ngenes=125)
compare_ora_plots(bad,good,libname=names(gs)[i],setsize=125)
} )
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
lapply(1:length(gs), function(i) {
bad <- orabad(d[[1]],gs[[i]],ngenes=250)
good <- oragood(d[[1]],gs[[i]],ngenes=250)
compare_ora_plots(bad,good,libname=names(gs)[i],setsize=250)
} )
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
lapply(1:length(gs), function(i) {
bad <- orabad(d[[1]],gs[[i]],ngenes=500)
good <- oragood(d[[1]],gs[[i]],ngenes=500)
compare_ora_plots(bad,good,libname=names(gs)[i],setsize=500)
} )
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
lapply(1:length(gs), function(i) {
bad <- orabad(d[[1]],gs[[i]],ngenes=1000)
good <- oragood(d[[1]],gs[[i]],ngenes=1000)
compare_ora_plots(bad,good,libname=names(gs)[i],setsize=1000)
} )
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
lapply(1:length(gs), function(i) {
bad <- orabad(d[[1]],gs[[i]],ngenes=2000)
good <- oragood(d[[1]],gs[[i]],ngenes=2000)
compare_ora_plots(bad,good,libname=names(gs)[i],setsize=2000)
} )
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
For reproducibility
sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] gplots_3.1.3.1 eulerr_7.0.2
## [3] fgsea_1.30.0 clusterProfiler_4.12.0
## [5] kableExtra_1.4.0 DESeq2_1.44.0
## [7] SummarizedExperiment_1.34.0 Biobase_2.64.0
## [9] MatrixGenerics_1.16.0 matrixStats_1.3.0
## [11] GenomicRanges_1.56.0 GenomeInfoDb_1.40.0
## [13] IRanges_2.38.0 S4Vectors_0.42.0
## [15] BiocGenerics_0.50.0 getDEE2_1.14.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.16.0 jsonlite_1.8.8
## [4] magrittr_2.0.3 farver_2.1.2 rmarkdown_2.27
## [7] fs_1.6.4 zlibbioc_1.50.0 vctrs_0.6.5
## [10] memoise_2.0.1.9000 ggtree_3.12.0 htmltools_0.5.8.1
## [13] S4Arrays_1.4.0 SparseArray_1.4.3 gridGraphics_0.5-1
## [16] sass_0.4.9 KernSmooth_2.23-24 bslib_0.7.0
## [19] plyr_1.8.9 cachem_1.1.0 igraph_2.0.3
## [22] lifecycle_1.0.4 pkgconfig_2.0.3 gson_0.1.0
## [25] Matrix_1.7-0 R6_2.5.1 fastmap_1.2.0
## [28] GenomeInfoDbData_1.2.12 digest_0.6.35 aplot_0.2.2
## [31] enrichplot_1.24.0 colorspace_2.1-0 patchwork_1.2.0
## [34] AnnotationDbi_1.66.0 RSQLite_2.3.7 fansi_1.0.6
## [37] httr_1.4.7 polyclip_1.10-6 abind_1.4-5
## [40] compiler_4.4.0 bit64_4.0.5 withr_3.0.0
## [43] BiocParallel_1.38.0 viridis_0.6.5 DBI_1.2.3
## [46] highr_0.11 ggforce_0.4.2 MASS_7.3-60.2
## [49] DelayedArray_0.30.1 HDO.db_0.99.1 caTools_1.18.2
## [52] gtools_3.9.5 tools_4.4.0 scatterpie_0.2.2
## [55] ape_5.8 glue_1.7.0 nlme_3.1-164
## [58] GOSemSim_2.30.0 shadowtext_0.1.3 grid_4.4.0
## [61] reshape2_1.4.4 generics_0.1.3 gtable_0.3.5
## [64] tidyr_1.3.1 data.table_1.15.4 tidygraph_1.3.1
## [67] xml2_1.3.6 utf8_1.2.4 XVector_0.44.0
## [70] ggrepel_0.9.5 pillar_1.9.0 stringr_1.5.1
## [73] yulab.utils_0.1.4 splines_4.4.0 dplyr_1.1.4
## [76] tweenr_2.0.3 treeio_1.28.0 lattice_0.22-6
## [79] bit_4.0.5 tidyselect_1.2.1 GO.db_3.19.1
## [82] locfit_1.5-9.9 Biostrings_2.72.0 knitr_1.47
## [85] gridExtra_2.3 svglite_2.1.3 xfun_0.44
## [88] graphlayouts_1.1.1 stringi_1.8.4 UCSC.utils_1.0.0
## [91] lazyeval_0.2.2 ggfun_0.1.5 yaml_2.3.8
## [94] evaluate_0.23 codetools_0.2-20 ggraph_2.2.1
## [97] tibble_3.2.1 qvalue_2.36.0 ggplotify_0.1.2
## [100] cli_3.6.2 systemfonts_1.1.0 munsell_0.5.1
## [103] jquerylib_0.1.4 Rcpp_1.0.12 png_0.1-8
## [106] ggplot2_3.5.1 blob_1.2.4 DOSE_3.30.1
## [109] htm2txt_2.2.2 bitops_1.0-7 viridisLite_0.4.2
## [112] tidytree_0.4.6 scales_1.3.0 purrr_1.0.2
## [115] crayon_1.5.2 rlang_1.1.4 cowplot_1.1.3
## [118] fastmatch_1.1-4 KEGGREST_1.44.0