Source: https://github.com/markziemann/SurveyEnrichmentMethods
Previously we analysed five datasets below and looked at the differences between FCS and ORA methods for gene set enrichment.
Dataset | SRA accesion | genes in annotation set | genes detected | genes differentially expressed |
---|---|---|---|---|
1. | SRP128998 | 39297 | 15635 | 3472 |
2. | SRP038101 | 39297 | 13926 | 3589 |
3. | SRP096178 | 39297 | 15477 | 9488 |
4. | SRP038101 | 39297 | 15607 | 5150 |
5. | SRP247621 | 39297 | 14288 | 230 |
6. | SRP253951 | 39297 | 15182 | 8588 |
7. | SRP068733 | 39297 | 14255 | 7365 |
suppressPackageStartupMessages({
library("kableExtra")
library("beeswarm")
})
Here I’m summarising the results of the five example data sets.
exdat=NULL
ex1dat <- readRDS("ex1dat.rds")
ex2dat <- readRDS("ex2dat.rds")
ex3dat <- readRDS("ex3dat.rds")
ex4dat <- readRDS("ex4dat.rds")
ex5dat <- readRDS("ex5dat.rds")
ex6dat <- readRDS("ex6dat.rds")
ex7dat <- readRDS("ex7dat.rds")
exdat <- rbind(ex1dat,ex2dat,ex3dat,ex4dat,ex5dat,ex6dat,ex7dat)
rownames(exdat) <- c("SRP128998", "SRP038101", "SRP096178","SRP038101","SRP247621","SRP253951","SRP068733")
exdat %>% kbl(caption="jaccard index values across seven RNA-seq studies") %>% kable_paper("hover", full_width = F)
FCS vs ORA | ORA vs ORA* | FCS vs ORA* | ORA vs ORA comb | ORA vs ORA* comb | |
---|---|---|---|---|---|
SRP128998 | 0.6564706 | 0.4119360 | 0.5259467 | 0.0396040 | 0.0785498 |
SRP038101 | 0.5235294 | 0.3287858 | 0.5150376 | 0.2570423 | 0.3857494 |
SRP096178 | 0.6666667 | 0.3551724 | 0.4409836 | 0.0000000 | 0.2664908 |
SRP038101 | 0.6309859 | 0.3918440 | 0.5311973 | 0.4226804 | 0.4214876 |
SRP247621 | 0.4739130 | 0.1653905 | 0.3014925 | 0.0086207 | 0.0084746 |
SRP253951 | 0.5724466 | 0.3660031 | 0.5404624 | 0.0681004 | 0.3248120 |
SRP068733 | 0.5938697 | 0.1913580 | 0.2848665 | 0.2645503 | 0.1758475 |
means <- colMeans(exdat)
medians <- apply(exdat,2,median)
sds <- apply(exdat,2,sd)
summstats <- rbind(means,medians,sds)
summstats %>% kbl(caption="summary statistics of different enrichment analysis comparisons") %>% kable_paper("hover", full_width = F)
FCS vs ORA | ORA vs ORA* | FCS vs ORA* | ORA vs ORA comb | ORA vs ORA* comb | |
---|---|---|---|---|---|
means | 0.5882688 | 0.3157842 | 0.4485695 | 0.1515140 | 0.2373445 |
medians | 0.5938697 | 0.3551724 | 0.5150376 | 0.0681004 | 0.2664908 |
sds | 0.0709138 | 0.0977862 | 0.1111791 | 0.1634530 | 0.1558909 |
Here let’s plot the jaccard scores. First as a boxplot and then overlay the dataset studies over the top.
cols <- c("#CC79A7","#56B4E9","#E69F00","#F0E442","#009E73","#0072B2","#D55E00")
coll <- list(cols, cols, cols, cols, cols)
par(mar=c(5,10,3,1))
boxplot(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),names = rev(colnames(exdat)),horizontal = TRUE, las=1 , col="white", cex=1.5,pch=19, xlab = "Jaccard index")
beeswarm(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),labels = rev(colnames(exdat)),horizontal = TRUE, las=1 , pwcol = coll,cex=1.5,pch=19, xlab = "Jaccard index",add = TRUE)
legend("bottomright", legend=c("SRP128998", "SRP038101", "SRP096178","SRP038101","SRP247621","SRP253951","SRP068733"), pch=19, col=cols, cex=1,pt.cex = 1.5)
pdf("images/jacbox1.pdf",width=5,height=4)
par(mar=c(5,9,3,2))
boxplot(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),names = rev(colnames(exdat)),horizontal = TRUE, las=1 , col="white", cex=1.5,pch=19, xlab = "Jaccard index")
beeswarm(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),labels = rev(colnames(exdat)),horizontal = TRUE, las=1 , pwcol = coll,cex=1.5,pch=19, xlab = "Jaccard index",add = TRUE)
legend("topleft", legend=c("SRP128998", "SRP038101", "SRP096178","SRP038101","SRP247621","SRP253951","SRP068733"), pch=19, col=cols, cex=1,pt.cex = 1.5)
dev.off()
## X11cairo
## 2
png("images/jacbox1.png")
par(mar=c(5,9,3,1))
boxplot(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),names = rev(colnames(exdat)),horizontal = TRUE, las=1 , col="white", cex=1.5,pch=19, xlab = "Jaccard index")
beeswarm(list(exdat[,5],exdat[,4],exdat[,3],exdat[,2],exdat[,1]),labels = rev(colnames(exdat)),horizontal = TRUE, las=1 , pwcol = coll,cex=1.5,pch=19, xlab = "Jaccard index",add = TRUE)
legend("bottomright", legend=c("SRP128998", "SRP038101", "SRP096178","SRP038101","SRP247621","SRP253951","SRP068733"), pch=19, col=cols, cex=1.2,pt.cex = 1.5)
dev.off()
## X11cairo
## 2
Now plot as a simple bargraph
mymean <- colMeans(exdat)
mysd <- apply(exdat,2,sd)
par(mar=c(5,10,3,1))
barplot(rev(colMeans(exdat)),xlab="mean Jaccard index",horiz = TRUE, las =1, xlim=c(0,.7) )
pdf("images/jacbar1.pdf",width=4,height=4)
par(mar=c(5,10,3,1))
barplot(rev(colMeans(exdat)),xlab="mean Jaccard index",horiz = TRUE, las =1, xlim=c(0,.7) )
dev.off()
## X11cairo
## 2
png("images/jacbar1.png")
par(mar=c(5,10,3,1))
barplot(rev(colMeans(exdat)),xlab="mean Jaccard index",horiz = TRUE, las =1, xlim=c(0,.7) )
dev.off()
## X11cairo
## 2
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] beeswarm_0.4.0 kableExtra_1.3.4
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.13 knitr_1.36 xml2_1.3.2 magrittr_2.0.1
## [5] rvest_1.0.2 munsell_0.5.0 colorspace_2.0-2 viridisLite_0.4.0
## [9] R6_2.5.1 rlang_0.4.12 fastmap_1.1.0 highr_0.9
## [13] stringr_1.4.0 httr_1.4.2 tools_4.1.2 webshot_0.5.2
## [17] xfun_0.28 jquerylib_0.1.4 htmltools_0.5.2 systemfonts_1.0.3
## [21] yaml_2.2.1 digest_0.6.28 lifecycle_1.0.1 sass_0.4.0
## [25] glue_1.5.0 evaluate_0.14 rmarkdown_2.11 stringi_1.7.5
## [29] bslib_0.3.1 compiler_4.1.2 scales_1.1.1 jsonlite_1.7.2
## [33] svglite_2.0.0