Source: TBA

Intro

How many parallel threads should be used for pathway enrichment analysis?

AMD Ryzen Threadripper 1900X 8-Core Processor (16 parallel threads).

#BiocManager::install(c("mitch","fgsea"))

#install.packages(c("tictoc","RhpcBLASctl","peakRAM"))

library("mitch")
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library("fgsea")
library("tictoc")
library("RhpcBLASctl")
library("peakRAM")
blas_set_num_threads(1)

Get gene expression data

download.file("https://ziemann-lab.net/public/fgseatest/de.Rds",
  "de.Rds")

de <- readRDS("de.Rds")
head(de)
##                          baseMean log2FoldChange      lfcSE      stat
## ENSG00000165949 IFI27   1960.1970      -3.384492 0.09388689 -36.04861
## ENSG00000090382 LYZ     7596.0299      -1.650342 0.05611430 -29.41036
## ENSG00000115461 IGFBP5   531.2217      -5.071157 0.17952391 -28.24781
## ENSG00000157601 MX1      827.1511      -2.877795 0.10478234 -27.46450
## ENSG00000111331 OAS3    2127.2010      -2.661214 0.09721242 -27.37525
## ENSG00000070915 SLC12A3  424.5509      -3.374852 0.12986708 -25.98697
##                                pvalue          padj SRR1171523 SRR1171524
## ENSG00000165949 IFI27   1.450013e-284 1.909377e-280   12.05759   12.12946
## ENSG00000090382 LYZ     4.048160e-190 2.665308e-186   13.52939   13.52615
## ENSG00000115461 IGFBP5  1.514307e-175 6.646797e-172   10.60714   10.46316
## ENSG00000157601 MX1     4.663288e-166 1.535154e-162   10.88831   11.08737
## ENSG00000111331 OAS3    5.406541e-165 1.423867e-161   11.92053   12.26289
## ENSG00000070915 SLC12A3 6.951548e-149 1.525633e-145   10.35061   10.33824
##                         SRR1171525 SRR1171526 SRR1171527 SRR1171528
## ENSG00000165949 IFI27     11.82385   9.646471   9.705799   9.623453
## ENSG00000090382 LYZ       13.62313  12.080100  12.012891  12.031277
## ENSG00000115461 IGFBP5    10.69892   8.568916   8.566744   8.693134
## ENSG00000157601 MX1       10.86873   9.322793   9.356473   9.267699
## ENSG00000111331 OAS3      11.91655  10.108651  10.070989  10.012229
## ENSG00000070915 SLC12A3   10.26395   8.844934   8.904787   8.871748

Get pathways

download.file("https://ziemann-lab.net/public/fgseatest/c5.go.v2023.2.Hs.symbols.gmt",
  "c5.go.v2023.2.Hs.symbols.gmt")

pw <- gmt_import("c5.go.v2023.2.Hs.symbols.gmt")

Mitch

gt <- data.frame(rownames(de))
gt$g <- sapply(strsplit(gt[,1]," "),"[[",2)

m <- mitch_import(x=de,DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 13168
## Note: no. genes in output = 13164
## Note: estimated proportion of input genes in output = 1
corerange <- 1:16

mres <- lapply(corerange, function(cores) {
  tic()
  mres <- mitch_calc(x=m,genesets=pw,cores=cores)
  toc()
} )
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 45.769 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 25.727 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 18.463 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 18.576 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 16.529 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.73 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 15.47 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 13.163 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.203 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.987 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 13.361 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 12.997 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.734 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.239 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 12.961 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.601 sec elapsed
peakRAM(mxres <- mitch_calc(x=m,genesets=pw,cores=1))
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
##                                Function_Call Elapsed_Time_sec
## 1 mxres<-mitch_calc(x=m,genesets=pw,cores=1)           41.119
##   Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1                1.1               110
mres <- do.call(rbind,lapply(mres,unlist))
mres <- as.numeric(mres[,2]) - as.numeric(mres[,1])
names(mres) <- corerange

mres
##      1      2      3      4      5      6      7      8      9     10     11 
## 45.769 25.727 18.463 18.576 16.529 14.730 15.470 13.163 14.203 14.987 13.361 
##     12     13     14     15     16 
## 12.997 14.734 14.239 12.961 14.601
barplot(mres,ylab="elapsed time in s",xlab="parallel threads", main="mitch")

FGSEA

f <- as.vector(m[,1])
names(f) <- rownames(m)

corerange <- 1:16

fres <- lapply(corerange, function(cores) {
  tic()
  fgseaRes <- fgsea(pathways = pw,
                  stats    = f,
                  minSize  = 10,
                  nproc=cores)
  toc()
} )
## Warning in fgseaMultilevel(...): There were 20 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 147.233 sec elapsed
## Warning in fgseaMultilevel(...): There were 20 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 77.812 sec elapsed
## Warning in fgseaMultilevel(...): There were 24 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 54.43 sec elapsed
## Warning in fgseaMultilevel(...): There were 19 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 46.759 sec elapsed
## Warning in fgseaMultilevel(...): There were 20 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 35.989 sec elapsed
## Warning in fgseaMultilevel(...): There were 17 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 36.65 sec elapsed
## Warning in fgseaMultilevel(...): There were 12 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 32.894 sec elapsed
## Warning in fgseaMultilevel(...): There were 12 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 31.994 sec elapsed
## Warning in fgseaMultilevel(...): There were 23 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 25.354 sec elapsed
## Warning in fgseaMultilevel(...): There were 25 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 24.403 sec elapsed
## Warning in fgseaMultilevel(...): There were 25 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 22.329 sec elapsed
## Warning in fgseaMultilevel(...): There were 25 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 23.01 sec elapsed
## Warning in fgseaMultilevel(...): There were 27 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 26.257 sec elapsed
## Warning in fgseaMultilevel(...): There were 21 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 25.891 sec elapsed
## Warning in fgseaMultilevel(...): There were 26 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 23.911 sec elapsed
## Warning in fgseaMultilevel(...): There were 14 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
## 25.098 sec elapsed
blas_set_num_threads(1)
peakRAM(fgseaRes <- fgsea(pathways = pw,
                  stats    = f,
                  minSize  = 10,
                  nproc=1))
## Warning in fgseaMultilevel(...): There were 22 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
##                                             Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=1)          146.414
##   Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1               -7.2              90.5
blas_set_num_threads(8)
peakRAM(fgseaRes <- fgsea(pathways = pw,
                  stats    = f,
                  minSize  = 10,
                  nproc=1))
## Warning in fgseaMultilevel(...): There were 17 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
##                                             Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=1)          152.926
##   Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1                8.3               106
blas_set_num_threads(1)
peakRAM(fgseaRes <- fgsea(pathways = pw,
                  stats    = f,
                  minSize  = 10,
                  nproc=8))
## Warning in fgseaMultilevel(...): There were 15 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
##                                             Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=8)            29.22
##   Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1                9.3               106
blas_set_num_threads(4)
peakRAM(fgseaRes <- fgsea(pathways = pw,
                  stats    = f,
                  minSize  = 10,
                  nproc=4))
## Warning in fgseaMultilevel(...): There were 29 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are
## less than 1e-50. You can set the `eps` argument to zero for better estimation.
##                                             Function_Call Elapsed_Time_sec
## 1 fgseaRes<-fgsea(pathways=pw,stats=f,minSize=10,nproc=4)           43.284
##   Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1                9.2               106
fres <- do.call(rbind,lapply(fres,unlist))
fres <- as.numeric(fres[,2]) - as.numeric(fres[,1])
names(fres) <- corerange

fres
##       1       2       3       4       5       6       7       8       9      10 
## 147.233  77.812  54.430  46.759  35.989  36.650  32.894  31.994  25.354  24.403 
##      11      12      13      14      15      16 
##  22.329  23.010  26.257  25.891  23.911  25.098
barplot(fres,ylab="elapsed time in s",xlab="parallel threads", main="fgsea")

Session information

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] peakRAM_1.0.2       RhpcBLASctl_0.23-42 tictoc_1.2.1       
## [4] fgsea_1.22.0        mitch_1.8.0        
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.9          lattice_0.20-45     tidyr_1.3.1        
##  [4] gtools_3.9.5        digest_0.6.29       utf8_1.2.2         
##  [7] mime_0.12           R6_2.5.1            plyr_1.8.9         
## [10] evaluate_0.15       highr_0.11          ggplot2_3.5.1      
## [13] pillar_1.9.0        gplots_3.1.3.1      rlang_1.1.4        
## [16] data.table_1.15.4   jquerylib_0.1.4     Matrix_1.4-1       
## [19] rmarkdown_2.27      BiocParallel_1.30.4 stringr_1.5.1      
## [22] htmlwidgets_1.5.4   munsell_0.5.1       shiny_1.7.2        
## [25] compiler_4.2.1      httpuv_1.6.5        xfun_0.46          
## [28] pkgconfig_2.0.3     ggstats_0.6.0       htmltools_0.5.3    
## [31] tidyselect_1.2.1    tibble_3.2.1        gridExtra_2.3      
## [34] codetools_0.2-18    fansi_1.0.3         dplyr_1.1.4        
## [37] later_1.3.0         MASS_7.3-58.1       bitops_1.0-7       
## [40] grid_4.2.1          jsonlite_1.8.0      xtable_1.8-4       
## [43] GGally_2.2.1        gtable_0.3.5        lifecycle_1.0.4    
## [46] magrittr_2.0.3      scales_1.3.0        KernSmooth_2.23-20 
## [49] cli_3.6.3           stringi_1.7.8       cachem_1.0.6       
## [52] reshape2_1.4.4      promises_1.2.0.1    bslib_0.4.0        
## [55] ellipsis_0.3.2      generics_0.1.3      vctrs_0.6.5        
## [58] fastmatch_1.1-4     RColorBrewer_1.1-3  tools_4.2.1        
## [61] glue_1.6.2          beeswarm_0.4.0      purrr_1.0.2        
## [64] parallel_4.2.1      fastmap_1.1.0       yaml_2.3.5         
## [67] colorspace_2.1-0    caTools_1.18.2      knitr_1.48         
## [70] echarts4r_0.4.5     sass_0.4.2