Source: https://github.com/markziemann/SurveyEnrichmentMethods

Intro

Previously we analysed five datasets below and looked at the differences between FCS and ORA methods for gene set enrichment.

Dataset SRA accesion genes in annotation set genes detected genes differentially expressed
1. SRP128998 39297 15635 3472
2. SRP038101 39297 13926 3589
3. SRP096178 39297 15477 9488
4. SRP038101 39297 15607 5150
5. SRP247621 39297 14288 230
6. SRP253951 39297 15182 8588
7. SRP068733 39297 14255 7365
suppressPackageStartupMessages({
library("kableExtra")
library("beeswarm")
})

Summarise

Here I’m summarising the results of the five example data sets.

exdat=NULL
ex1dat <- readRDS("ex1dat.rds")
ex2dat <- readRDS("ex2dat.rds")
ex3dat <- readRDS("ex3dat.rds")
ex4dat <- readRDS("ex4dat.rds")
ex5dat <- readRDS("ex5dat.rds")
ex6dat <- readRDS("ex6dat.rds")
ex7dat <- readRDS("ex7dat.rds")

exdat <- rbind(ex1dat,ex2dat,ex3dat,ex4dat,ex5dat,ex6dat,ex7dat)
rownames(exdat) <- c("SRP128998", "SRP038101", "SRP096178","SRP038101","SRP247621","SRP253951","SRP068733")

exdat %>% kbl(caption="jaccard index values across seven RNA-seq studies") %>% kable_paper("hover", full_width = F)
jaccard index values across seven RNA-seq studies
FCS vs ORA ORA vs ORA* FCS vs ORA* ORA vs ORA comb ORA vs ORA* comb
SRP128998 0.6564706 0.4119360 0.5259467 0.0396040 0.0785498
SRP038101 0.5235294 0.3287858 0.5150376 0.2570423 0.3857494
SRP096178 0.6666667 0.3551724 0.4409836 0.0000000 0.2664908
SRP038101 0.6309859 0.3918440 0.5311973 0.4226804 0.4214876
SRP247621 0.4739130 0.1653905 0.3014925 0.0086207 0.0084746
SRP253951 0.5724466 0.3660031 0.5404624 0.0681004 0.3248120
SRP068733 0.5938697 0.1913580 0.2848665 0.2645503 0.1758475
means <- colMeans(exdat)
medians <- apply(exdat,2,median)
sds <- apply(exdat,2,sd)
summstats <- rbind(means,medians,sds)
summstats %>% kbl(caption="summary statistics of different enrichment analysis comparisons") %>% kable_paper("hover", full_width = F)
summary statistics of different enrichment analysis comparisons
FCS vs ORA ORA vs ORA* FCS vs ORA* ORA vs ORA comb ORA vs ORA* comb
means 0.5882688 0.3157842 0.4485695 0.1515140 0.2373445
medians 0.5938697 0.3551724 0.5150376 0.0681004 0.2664908
sds 0.0709138 0.0977862 0.1111791 0.1634530 0.1558909

Here let’s plot the jaccard scores. First as a boxplot and then overlay the dataset studies over the top.

cols <- c("#CC79A7","#56B4E9","#E69F00","#F0E442","#009E73","#0072B2","#D55E00")
coll <- list(cols, cols, cols, cols, cols)
par(mar=c(5,10,3,1))
boxplot(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),names = rev(colnames(exdat)),horizontal = TRUE, las=1 , col="white", cex=1.5,pch=19, xlab = "Jaccard index")
beeswarm(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),labels = rev(colnames(exdat)),horizontal = TRUE, las=1 , pwcol = coll,cex=1.5,pch=19, xlab = "Jaccard index",add = TRUE)
legend("bottomright", legend=c("SRP128998", "SRP038101", "SRP096178","SRP038101","SRP247621","SRP253951","SRP068733"), pch=19,  col=cols, cex=1,pt.cex = 1.5)

pdf("images/jacbox1.pdf",width=5,height=4)
par(mar=c(5,9,3,2))
boxplot(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),names = rev(colnames(exdat)),horizontal = TRUE, las=1 , col="white", cex=1.5,pch=19, xlab = "Jaccard index")
beeswarm(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),labels = rev(colnames(exdat)),horizontal = TRUE, las=1 , pwcol = coll,cex=1.5,pch=19, xlab = "Jaccard index",add = TRUE)
legend("topleft", legend=c("SRP128998", "SRP038101", "SRP096178","SRP038101","SRP247621","SRP253951","SRP068733"), pch=19,  col=cols, cex=1,pt.cex = 1.5)
dev.off()
## X11cairo 
##        2
png("images/jacbox1.png")
par(mar=c(5,9,3,1))
boxplot(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),names = rev(colnames(exdat)),horizontal = TRUE, las=1 , col="white", cex=1.5,pch=19, xlab = "Jaccard index")
beeswarm(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),labels = rev(colnames(exdat)),horizontal = TRUE, las=1 , pwcol = coll,cex=1.5,pch=19, xlab = "Jaccard index",add = TRUE)
legend("bottomright", legend=c("SRP128998", "SRP038101", "SRP096178","SRP038101","SRP247621","SRP253951","SRP068733"), pch=19,  col=cols, cex=1.2,pt.cex = 1.5)
dev.off()
## X11cairo 
##        2

Now plot as a simple bargraph

mymean <- colMeans(exdat)
mysd <- apply(exdat,2,sd)

par(mar=c(5,10,3,1))
barplot(rev(colMeans(exdat)),xlab="mean Jaccard index",horiz = TRUE, las =1, xlim=c(0,.7) )

pdf("images/jacbar1.pdf",width=4,height=4)
par(mar=c(5,10,3,1))
barplot(rev(colMeans(exdat)),xlab="mean Jaccard index",horiz = TRUE, las =1, xlim=c(0,.7) )
dev.off()
## X11cairo 
##        2
png("images/jacbar1.png")
par(mar=c(5,10,3,1))
barplot(rev(colMeans(exdat)),xlab="mean Jaccard index",horiz = TRUE, las =1, xlim=c(0,.7) )
dev.off()
## X11cairo 
##        2

Session information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] beeswarm_0.4.0   kableExtra_1.3.4
## 
## loaded via a namespace (and not attached):
##  [1] rstudioapi_0.13   knitr_1.36        xml2_1.3.2        magrittr_2.0.1   
##  [5] rvest_1.0.2       munsell_0.5.0     colorspace_2.0-2  viridisLite_0.4.0
##  [9] R6_2.5.1          rlang_0.4.12      fastmap_1.1.0     highr_0.9        
## [13] stringr_1.4.0     httr_1.4.2        tools_4.1.2       webshot_0.5.2    
## [17] xfun_0.28         jquerylib_0.1.4   htmltools_0.5.2   systemfonts_1.0.3
## [21] yaml_2.2.1        digest_0.6.28     lifecycle_1.0.1   sass_0.4.0       
## [25] glue_1.5.0        evaluate_0.14     rmarkdown_2.11    stringi_1.7.5    
## [29] bslib_0.3.1       compiler_4.1.2    scales_1.1.1      jsonlite_1.7.2   
## [33] svglite_2.0.0