Source: TBA


How many parallel threads should be used for pathway enrichment analysis?

AMD Ryzen Threadripper 1900X 8-Core Processor (16 parallel threads).

Get gene expression data


de <- readRDS("de.Rds")
##                          baseMean log2FoldChange      lfcSE      stat
## ENSG00000165949 IFI27   1960.1970      -3.384492 0.09388689 -36.04861
## ENSG00000090382 LYZ     7596.0299      -1.650342 0.05611430 -29.41036
## ENSG00000115461 IGFBP5   531.2217      -5.071157 0.17952391 -28.24781
## ENSG00000157601 MX1      827.1511      -2.877795 0.10478234 -27.46450
## ENSG00000111331 OAS3    2127.2010      -2.661214 0.09721242 -27.37525
## ENSG00000070915 SLC12A3  424.5509      -3.374852 0.12986708 -25.98697
##                                pvalue          padj SRR1171523 SRR1171524
## ENSG00000165949 IFI27   1.450013e-284 1.909377e-280   12.05759   12.12946
## ENSG00000090382 LYZ     4.048160e-190 2.665308e-186   13.52939   13.52615
## ENSG00000115461 IGFBP5  1.514307e-175 6.646797e-172   10.60714   10.46316
## ENSG00000157601 MX1     4.663288e-166 1.535154e-162   10.88831   11.08737
## ENSG00000111331 OAS3    5.406541e-165 1.423867e-161   11.92053   12.26289
## ENSG00000070915 SLC12A3 6.951548e-149 1.525633e-145   10.35061   10.33824
##                         SRR1171525 SRR1171526 SRR1171527 SRR1171528
## ENSG00000165949 IFI27     11.82385   9.646471   9.705799   9.623453
## ENSG00000090382 LYZ       13.62313  12.080100  12.012891  12.031277
## ENSG00000115461 IGFBP5    10.69892   8.568916   8.566744   8.693134
## ENSG00000157601 MX1       10.86873   9.322793   9.356473   9.267699
## ENSG00000111331 OAS3      11.91655  10.108651  10.070989  10.012229
## ENSG00000070915 SLC12A3   10.26395   8.844934   8.904787   8.871748

Get pathways


pw <- gmt_import("c5.go.v2023.2.Hs.symbols.gmt")


gt <- data.frame(rownames(de))
gt$g <- sapply(strsplit(gt[,1]," "),"[[",2)

m <- mitch_import(x=de,DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 13168
## Note: no. genes in output = 13164
## Note: estimated proportion of input genes in output = 1
corerange <- 1:16

mres <- lapply(corerange, function(cores) {
  mres <- mitch_calc(x=m,genesets=pw,cores=cores)
} )
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 46.789 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 27.022 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 22.123 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 19.781 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 18.546 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.706 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 16.227 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 15.224 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 15.191 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.105 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 15.127 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 16.851 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.302 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 14.866 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 15.508 sec elapsed
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
## 16.329 sec elapsed
peakRAM(mxres <- mitch_calc(x=m,genesets=pw,cores=1))
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
##                                Function_Call Elapsed_Time_sec
## 1 mxres<-mitch_calc(x=m,genesets=pw,cores=1)            43.48
##   Total_RAM_Used_MiB Peak_RAM_Used_MiB
## 1                  1             122.7
mres <-,lapply(mres,unlist))
mres <- as.numeric(mres[,2]) - as.numeric(mres[,1])
names(mres) <- corerange

##      1      2      3      4      5      6      7      8      9     10     11 
## 46.789 27.022 22.123 19.781 18.546 14.706 16.227 15.224 15.191 14.105 15.127 
##     12     13     14     15     16 
## 16.851 14.302 14.866 15.508 16.329
barplot(mres,ylab="elapsed time in s",xlab="parallel threads", main="mitch")


f <- as.vector(m[,1])
names(f) <- rownames(m)

corerange <- 1:16

fres <- lapply(corerange, function(cores) {
  fgseaRes <- fgsea(pathways = pw,
                  stats    = f,
                  minSize  = 10,
} )
peakRAM(fgseaRes <- fgsea(pathways = pw,
                  stats    = f,
                  minSize  = 10,
Session information

