Source: TBA
How many parallel threads should be used for pathway enrichment analysis?
AMD Ryzen Threadripper 1900X 8-Core Processor (16 parallel threads).
#BiocManager::install(c("mitch","fgsea"))
#install.packages(c("tictoc","RhpcBLASctl","peakRAM"))
library("mitch")
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library("fgsea")
library("tictoc")
library("RhpcBLASctl")
library("peakRAM")
blas_set_num_threads(1)
download.file("https://ziemann-lab.net/public/fgseatest/de.Rds",
"de.Rds")
de <- readRDS("de.Rds")
head(de)
## baseMean log2FoldChange lfcSE stat
## ENSG00000165949 IFI27 1960.1970 -3.384492 0.09388689 -36.04861
## ENSG00000090382 LYZ 7596.0299 -1.650342 0.05611430 -29.41036
## ENSG00000115461 IGFBP5 531.2217 -5.071157 0.17952391 -28.24781
## ENSG00000157601 MX1 827.1511 -2.877795 0.10478234 -27.46450
## ENSG00000111331 OAS3 2127.2010 -2.661214 0.09721242 -27.37525
## ENSG00000070915 SLC12A3 424.5509 -3.374852 0.12986708 -25.98697
## pvalue padj SRR1171523 SRR1171524
## ENSG00000165949 IFI27 1.450013e-284 1.909377e-280 12.05759 12.12946
## ENSG00000090382 LYZ 4.048160e-190 2.665308e-186 13.52939 13.52615
## ENSG00000115461 IGFBP5 1.514307e-175 6.646797e-172 10.60714 10.46316
## ENSG00000157601 MX1 4.663288e-166 1.535154e-162 10.88831 11.08737
## ENSG00000111331 OAS3 5.406541e-165 1.423867e-161 11.92053 12.26289
## ENSG00000070915 SLC12A3 6.951548e-149 1.525633e-145 10.35061 10.33824
## SRR1171525 SRR1171526 SRR1171527 SRR1171528
## ENSG00000165949 IFI27 11.82385 9.646471 9.705799 9.623453
## ENSG00000090382 LYZ 13.62313 12.080100 12.012891 12.031277
## ENSG00000115461 IGFBP5 10.69892 8.568916 8.566744 8.693134
## ENSG00000157601 MX1 10.86873 9.322793 9.356473 9.267699
## ENSG00000111331 OAS3 11.91655 10.108651 10.070989 10.012229
## ENSG00000070915 SLC12A3 10.26395 8.844934 8.904787 8.871748
download.file("https://ziemann-lab.net/public/fgseatest/c5.go.v2023.2.Hs.symbols.gmt",
"c5.go.v2023.2.Hs.symbols.gmt")
pw <- gmt_import("c5.go.v2023.2.Hs.symbols.gmt")
gt <- data.frame(rownames(de))
gt$g <- sapply(strsplit(gt[,1]," "),"[[",2)
m <- mitch_import(x=de,DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 13168
## Note: no. genes in output = 13164
## Note: estimated proportion of input genes in output = 1
corerange <- 1:16
mres <- lapply(corerange, function(cores) {
tic()
mres <- mitch_calc(x=m,genesets=pw,cores=cores)
toc()
} )
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 45.769 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 25.727 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 18.463 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 18.576 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 16.529 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.73 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 15.47 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 13.163 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.203 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.987 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 13.361 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 12.997 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.734 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.239 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 12.961 sec elapsed
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## 14.601 sec elapsed
peakRAM(mxres <- mitch_calc(x=m,genesets=pw,cores=1))
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
## Function_Call Elapsed_Time_sec
## 1 mxres<-mitch_calc(x=m,genesets=pw,cores=1) 41.119
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 1.1 110
mres <- do.call(rbind,lapply(mres,unlist))
mres <- as.numeric(mres[,2]) - as.numeric(mres[,1])
names(mres) <- corerange
mres
## 1 2 3 4 5 6 7 8 9 10 11
## 45.769 25.727 18.463 18.576 16.529 14.730 15.470 13.163 14.203 14.987 13.361
## 12 13 14 15 16
## 12.997 14.734 14.239 12.961 14.601
barplot(mres,ylab="elapsed time in s",xlab="parallel threads", main="mitch")
f <- as.vector(m[,1])
names(f) <- rownames(m)
corerange <- 1:16
fres <- lapply(corerange, function(cores) {
tic()
fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=cores)
toc()
} )
## Warning in fgseaMultilevel(...): There were 20 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 147.233 sec elapsed
## Warning in fgseaMultilevel(...): There were 20 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 77.812 sec elapsed
## Warning in fgseaMultilevel(...): There were 24 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 54.43 sec elapsed
## Warning in fgseaMultilevel(...): There were 19 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 46.759 sec elapsed
## Warning in fgseaMultilevel(...): There were 20 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 35.989 sec elapsed
## Warning in fgseaMultilevel(...): There were 17 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 36.65 sec elapsed
## Warning in fgseaMultilevel(...): There were 12 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 32.894 sec elapsed
## Warning in fgseaMultilevel(...): There were 12 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 31.994 sec elapsed
## Warning in fgseaMultilevel(...): There were 23 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 25.354 sec elapsed
## Warning in fgseaMultilevel(...): There were 25 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 24.403 sec elapsed
## Warning in fgseaMultilevel(...): There were 25 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 22.329 sec elapsed
## Warning in fgseaMultilevel(...): There were 25 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 23.01 sec elapsed
## Warning in fgseaMultilevel(...): There were 27 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 26.257 sec elapsed
## Warning in fgseaMultilevel(...): There were 21 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 25.891 sec elapsed
## Warning in fgseaMultilevel(...): There were 26 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 23.911 sec elapsed
## Warning in fgseaMultilevel(...): There were 14 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 25.098 sec elapsed
blas_set_num_threads(1)
peakRAM(fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=1))
## Warning in fgseaMultilevel(...): There were 22 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=1) 146.414
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 -7.2 90.5
blas_set_num_threads(8)
peakRAM(fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=1))
## Warning in fgseaMultilevel(...): There were 17 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=1) 152.926
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 8.3 106
blas_set_num_threads(1)
peakRAM(fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=8))
## Warning in fgseaMultilevel(...): There were 15 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=8) 29.22
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 9.3 106
blas_set_num_threads(4)
peakRAM(fgseaRes <- fgsea(pathways = pw,
stats = f,
minSize = 10,
nproc=4))
## Warning in fgseaMultilevel(...): There were 29 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=4) 43.284
## Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1 9.2 106
fres <- do.call(rbind,lapply(fres,unlist))
fres <- as.numeric(fres[,2]) - as.numeric(fres[,1])
names(fres) <- corerange
fres
## 1 2 3 4 5 6 7 8 9 10
## 147.233 77.812 54.430 46.759 35.989 36.650 32.894 31.994 25.354 24.403
## 11 12 13 14 15 16
## 22.329 23.010 26.257 25.891 23.911 25.098
barplot(fres,ylab="elapsed time in s",xlab="parallel threads", main="fgsea")
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] peakRAM_1.0.2 RhpcBLASctl_0.23-42 tictoc_1.2.1
## [4] fgsea_1.22.0 mitch_1.8.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.9 lattice_0.20-45 tidyr_1.3.1
## [4] gtools_3.9.5 digest_0.6.29 utf8_1.2.2
## [7] mime_0.12 R6_2.5.1 plyr_1.8.9
## [10] evaluate_0.15 highr_0.11 ggplot2_3.5.1
## [13] pillar_1.9.0 gplots_3.1.3.1 rlang_1.1.4
## [16] data.table_1.15.4 jquerylib_0.1.4 Matrix_1.4-1
## [19] rmarkdown_2.27 BiocParallel_1.30.4 stringr_1.5.1
## [22] htmlwidgets_1.5.4 munsell_0.5.1 shiny_1.7.2
## [25] compiler_4.2.1 httpuv_1.6.5 xfun_0.46
## [28] pkgconfig_2.0.3 ggstats_0.6.0 htmltools_0.5.3
## [31] tidyselect_1.2.1 tibble_3.2.1 gridExtra_2.3
## [34] codetools_0.2-18 fansi_1.0.3 dplyr_1.1.4
## [37] later_1.3.0 MASS_7.3-58.1 bitops_1.0-7
## [40] grid_4.2.1 jsonlite_1.8.0 xtable_1.8-4
## [43] GGally_2.2.1 gtable_0.3.5 lifecycle_1.0.4
## [46] magrittr_2.0.3 scales_1.3.0 KernSmooth_2.23-20
## [49] cli_3.6.3 stringi_1.7.8 cachem_1.0.6
## [52] reshape2_1.4.4 promises_1.2.0.1 bslib_0.4.0
## [55] ellipsis_0.3.2 generics_0.1.3 vctrs_0.6.5
## [58] fastmatch_1.1-4 RColorBrewer_1.1-3 tools_4.2.1
## [61] glue_1.6.2 beeswarm_0.4.0 purrr_1.0.2
## [64] parallel_4.2.1 fastmap_1.1.0 yaml_2.3.5
## [67] colorspace_2.1-0 caTools_1.18.2 knitr_1.48
## [70] echarts4r_0.4.5 sass_0.4.2