Source: TBA
How many parallel threads should be used for pathway enrichment analysis?
AMD Ryzen Threadripper 1900X 8-Core Processor (16 parallel threads).
BiocManager::install(c("mitch","fgsea"))
## Bioconductor version 3.11 (BiocManager 1.30.10), R 4.0.0 (2020-04-24)
## Installing package(s) 'mitch', 'fgsea'
## also installing the dependencies 'colorspace', 'formatR', 'hms', 'farver', 'labeling', 'munsell', 'viridisLite', 'bitops', 'generics', 'tidyselect', 'tidyr', 'httpuv', 'xtable', 'sourcetools', 'fastmap', 'lambda.r', 'futile.options', 'gtable', 'progress', 'RColorBrewer', 'reshape', 'scales', 'isoband', 'gtools', 'gdata', 'caTools', 'dplyr', 'countrycode', 'd3r', 'broom', 'shiny', 'corrplot', 'data.tree', 'futile.logger', 'snow', 'plyr', 'reshape2', 'GGally', 'gridExtra', 'ggplot2', 'gplots', 'beeswarm', 'echarts4r', 'data.table', 'BiocParallel', 'fastmatch'
install.packages(c("tictoc","RhpcBLASctl","peakRAM"))
## Installing packages into '/usr/local/lib/R/site-library'
## (as 'lib' is unspecified)
library("mitch")
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library("fgsea")
library("tictoc")
## Warning: package 'tictoc' was built under R version 4.0.3
library("RhpcBLASctl")
## Warning: package 'RhpcBLASctl' was built under R version 4.0.3
library("peakRAM")
blas_set_num_threads(1)
download.file("https://ziemann-lab.net/public/fgseatest/de.Rds",
"de.Rds")
de <- readRDS("de.Rds")
head(de)
## baseMean log2FoldChange lfcSE stat
## ENSG00000165949 IFI27 1960.1970 -3.384492 0.09388689 -36.04861
## ENSG00000090382 LYZ 7596.0299 -1.650342 0.05611430 -29.41036
## ENSG00000115461 IGFBP5 531.2217 -5.071157 0.17952391 -28.24781
## ENSG00000157601 MX1 827.1511 -2.877795 0.10478234 -27.46450
## ENSG00000111331 OAS3 2127.2010 -2.661214 0.09721242 -27.37525
## ENSG00000070915 SLC12A3 424.5509 -3.374852 0.12986708 -25.98697
## pvalue padj SRR1171523 SRR1171524
## ENSG00000165949 IFI27 1.450013e-284 1.909377e-280 12.05759 12.12946
## ENSG00000090382 LYZ 4.048160e-190 2.665308e-186 13.52939 13.52615
## ENSG00000115461 IGFBP5 1.514307e-175 6.646797e-172 10.60714 10.46316
## ENSG00000157601 MX1 4.663288e-166 1.535154e-162 10.88831 11.08737
## ENSG00000111331 OAS3 5.406541e-165 1.423867e-161 11.92053 12.26289
## ENSG00000070915 SLC12A3 6.951548e-149 1.525633e-145 10.35061 10.33824
## SRR1171525 SRR1171526 SRR1171527 SRR1171528
## ENSG00000165949 IFI27 11.82385 9.646471 9.705799 9.623453
## ENSG00000090382 LYZ 13.62313 12.080100 12.012891 12.031277
## ENSG00000115461 IGFBP5 10.69892 8.568916 8.566744 8.693134
## ENSG00000157601 MX1 10.86873 9.322793 9.356473 9.267699
## ENSG00000111331 OAS3 11.91655 10.108651 10.070989 10.012229
## ENSG00000070915 SLC12A3 10.26395 8.844934 8.904787 8.871748
download.file("https://ziemann-lab.net/public/fgseatest/c5.go.v2023.2.Hs.symbols.gmt",
"c5.go.v2023.2.Hs.symbols.gmt")
pw <- gmt_import("c5.go.v2023.2.Hs.symbols.gmt")
gt <- data.frame(rownames(de))
gt$g <- sapply(strsplit(gt[,1]," "),"[[",2)
m <- mitch_import(x=de,DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 13168
## Note: no. genes in output = 13164
## Note: estimated proportion of input genes in output = 1
corerange <- 1:16
mres <- lapply(corerange, function(cores) {
tic()
mres <- mitch_calc(x=m,genesets=pw,cores=cores)
toc()
} )
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 44.708 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 25.097 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 20.425 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 17.815 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 13.9 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 15.201 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 13.549 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 12.972 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 16.291 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 15.545 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 13.042 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.536 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 13.099 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.716 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 11.544 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.431 sec elapsed
peakRAM(mxres <- mitch_calc(x=m,genesets=pw,cores=1))
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## Function_Call Elapsed_Time_sec
## 1 mxres<-mitch_calc(x=m,genesets=pw,cores=1) 41.16
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 1 108.5
mres <- do.call(rbind,lapply(mres,unlist))
mres <- as.numeric(mres[,2]) - as.numeric(mres[,1])
names(mres) <- corerange
mres
## 1 2 3 4 5 6 7 8 9 10 11
## 44.708 25.097 20.425 17.815 13.900 15.201 13.549 12.972 16.291 15.545 13.042
## 12 13 14 15 16
## 14.536 13.099 14.716 11.544 14.431
barplot(mres,ylab="elapsed time in s",xlab="parallel threads", main="mitch")
f <- as.vector(m[,1])
names(f) <- rownames(m)
corerange <- 1:16
fres <- lapply(corerange, function(cores) {
tic()
fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=cores)
toc()
} )
## Warning in fgseaMultilevel(...): There were 1 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 654.316 sec elapsed
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 347.007 sec elapsed
## Warning in fgseaMultilevel(...): There were 3 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 247.004 sec elapsed
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 215.428 sec elapsed
## Warning in fgseaMultilevel(...): There were 6 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 118.221 sec elapsed
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 147.009 sec elapsed
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 168.877 sec elapsed
## Warning in fgseaMultilevel(...): There were 3 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 112.069 sec elapsed
## Warning in fgseaMultilevel(...): There were 1 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 121.509 sec elapsed
## Warning in fgseaMultilevel(...): There were 1 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 133.855 sec elapsed
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 113.679 sec elapsed
## Warning in fgseaMultilevel(...): There were 2 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 112.87 sec elapsed
## Warning in fgseaMultilevel(...): There were 6 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 81.215 sec elapsed
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 115.173 sec elapsed
## Warning in fgseaMultilevel(...): There were 1 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 94.948 sec elapsed
## Warning in fgseaMultilevel(...): There were 4 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## 64.685 sec elapsed
blas_set_num_threads(1)
peakRAM(fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=1))
## Warning in fgseaMultilevel(...): There were 6 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=1) 507.955
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 24.5 89.1
blas_set_num_threads(8)
peakRAM(fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=1))
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=1) 681.537
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 8.5 104.5
blas_set_num_threads(1)
peakRAM(fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=8))
## Warning in fgseaMultilevel(...): There were 10 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values.
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=8) 76.662
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 8.3 104.5
blas_set_num_threads(4)
peakRAM(fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=4))
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=4) 222.283
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 8.4 104.5
fres <- do.call(rbind,lapply(fres,unlist))
fres <- as.numeric(fres[,2]) - as.numeric(fres[,1])
names(fres) <- corerange
fres
## 1 2 3 4 5 6 7 8 9 10
## 654.316 347.007 247.004 215.428 118.221 147.009 168.877 112.069 121.509 133.855
## 11 12 13 14 15 16
## 113.679 112.870 81.215 115.173 94.948 64.685
barplot(fres,ylab="elapsed time in s",xlab="parallel threads", main="fgsea")
sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] peakRAM_1.0.2 RhpcBLASctl_0.20-137 tictoc_1.0
## [4] fgsea_1.14.0 mitch_1.0.10 BiocManager_1.30.10
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.4.6 lattice_0.20-41 gtools_3.8.2
## [4] digest_0.6.25 mime_0.9 R6_2.4.1
## [7] plyr_1.8.6 evaluate_0.14 ggplot2_3.3.1
## [10] pillar_1.4.4 gplots_3.0.3 rlang_0.4.6
## [13] data.table_1.12.8 gdata_2.18.0 Matrix_1.2-18
## [16] rmarkdown_2.2 BiocParallel_1.22.0 stringr_1.4.0
## [19] htmlwidgets_1.5.1 munsell_0.5.0 shiny_1.4.0.2
## [22] compiler_4.0.0 httpuv_1.5.4 xfun_0.14
## [25] pkgconfig_2.0.3 htmltools_0.4.0 tidyselect_1.1.0
## [28] tibble_3.0.1 gridExtra_2.3 reshape_0.8.8
## [31] crayon_1.3.4 dplyr_1.0.0 later_1.1.0.1
## [34] MASS_7.3-51.6 bitops_1.0-6 grid_4.0.0
## [37] xtable_1.8-4 GGally_2.0.0 gtable_0.3.0
## [40] lifecycle_0.2.0 magrittr_1.5 scales_1.1.1
## [43] KernSmooth_2.23-17 stringi_1.4.6 reshape2_1.4.4
## [46] promises_1.1.0 ellipsis_0.3.1 generics_0.0.2
## [49] vctrs_0.3.1 fastmatch_1.1-0 RColorBrewer_1.1-2
## [52] tools_4.0.0 glue_1.4.1 beeswarm_0.2.3
## [55] purrr_0.3.4 parallel_4.0.0 fastmap_1.0.1
## [58] yaml_2.2.1 colorspace_1.4-1 caTools_1.18.0
## [61] knitr_1.28 echarts4r_0.2.3