Source: https://github.com/markziemann/background
library("parallel")
library("edgeR")
## Loading required package: limma
library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library("limma")
library("stringi")
library("kableExtra")
library("fgsea")
library("clusterProfiler")
##
## clusterProfiler v4.12.0 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
source("simpw_func.R")
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
TODO
xxx object slots
expression counts
ground truth up genes
ground truth down genes
ground truth up gene sets
ground truth down gene sets
DE result (DESeq2)
DE genes up observed
DE genes down observed
clusterprofiler_default up gene sets
clusterprofiler_default down gene sets
clusterprofiler bg fix up gene sets
clusterprofiler bg fix down gene sets
clusterprofiler FDR fix up gene sets
clusterprofiler FDR fix down gene sets
clusterprofiler BG and FDR fix up gene sets
clusterprofiler BG and FDR fix down gene sets
fora up gene sets
fora down gene sets
fgsea up gene sets
fgsea down gene sets
a <- countData()
gsets <- randomGeneSets(a,setsize=30,nsets=200)
SIMS=1000
#run 500 sims
FRAC_DE=0.05
FC=0.5
N_REPS=3
DGE_FUNC="deseq2"
SUM_COUNT=2e7
VARIANCE=c(0,0.1,0.2,0.3,0.4,0.5,0.6)
mygrid <- expand.grid(FRAC_DE,FC,N_REPS,DGE_FUNC,SUM_COUNT,VARIANCE)
colnames(mygrid) <- c("FRAC_DE","FC","N_REPS","DGE_FUNC","SUM_COUNT","VARIANCE")
mygrid
## FRAC_DE FC N_REPS DGE_FUNC SUM_COUNT VARIANCE
## 1 0.05 0.5 3 deseq2 2e+07 0.0
## 2 0.05 0.5 3 deseq2 2e+07 0.1
## 3 0.05 0.5 3 deseq2 2e+07 0.2
## 4 0.05 0.5 3 deseq2 2e+07 0.3
## 5 0.05 0.5 3 deseq2 2e+07 0.4
## 6 0.05 0.5 3 deseq2 2e+07 0.5
## 7 0.05 0.5 3 deseq2 2e+07 0.6
Now run the analysis.
res <- lapply(1:nrow(mygrid), function(i) {
FRAC_DE=mygrid[i,"FRAC_DE"]
FC=mygrid[i,"FC"]
N_REPS=mygrid[i,"N_REPS"]
DGE_FUNC=as.character(mygrid[i,"DGE_FUNC"])
SUM_COUNT=mygrid[i,"SUM_COUNT"]
VARIANCE=mygrid[i,"VARIANCE"]
x <- agg_dge(a,N_REPS,SUM_COUNT,VARIANCE,FRAC_DE,FC,SIMS,DGE_FUNC,gsets,cores=32)
as.data.frame(do.call(rbind, x))
})
res <- do.call(rbind,res)
saveRDS(res,file="res.Rds")
Now show the results.
res %>% kbl(caption="simulation_results") %>% kable_paper("hover", full_width = F)
N_REPS | SUM_COUNT | VARIANCE | FRAC_DE | FC | SIMS | DGE_FUNC | true_pos | false_pos | true_neg | false_neg | p | r | f | PWAY_FUNC |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.922 | 0.280 | 18377.92 | 0.078 | 0.9725544 | 0.9922 | 0.9822790 | clusterProfiler default |
3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.949 | 0.496 | 18377.71 | 0.051 | 0.9525132 | 0.9949 | 0.9732453 | clusterProfiler BG fix |
3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.784 | 0.013 | 18378.19 | 0.216 | 0.9986731 | 0.9784 | 0.9884326 | clusterProfiler FDR fix |
3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.912 | 0.352 | 18377.85 | 0.088 | 0.9657054 | 0.9912 | 0.9782866 | clusterProfiler BG and FDR fix |
3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.912 | 0.352 | 18377.85 | 0.088 | 0.9657054 | 0.9912 | 0.9782866 | fora |
3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.981 | 0.700 | 18377.50 | 0.019 | 0.9344631 | 0.9981 | 0.9652338 | fgsea |
3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.783 | 0.279 | 18371.67 | 0.217 | 0.9722719 | 0.9783 | 0.9752766 | clusterProfiler default |
3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.832 | 0.453 | 18371.50 | 0.168 | 0.9559553 | 0.9832 | 0.9693862 | clusterProfiler BG fix |
3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.442 | 0.024 | 18371.93 | 0.558 | 0.9974646 | 0.9442 | 0.9701017 | clusterProfiler FDR fix |
3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.739 | 0.296 | 18371.66 | 0.261 | 0.9705032 | 0.9739 | 0.9721987 | clusterProfiler BG and FDR fix |
3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.739 | 0.296 | 18371.66 | 0.261 | 0.9705032 | 0.9739 | 0.9721987 | fora |
3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.981 | 0.557 | 18371.40 | 0.019 | 0.9471437 | 0.9981 | 0.9719544 | fgsea |
3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 8.642 | 0.395 | 18349.43 | 1.358 | 0.9562908 | 0.8642 | 0.9079162 | clusterProfiler default |
3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 8.812 | 0.485 | 18349.34 | 1.188 | 0.9478326 | 0.8812 | 0.9133026 | clusterProfiler BG fix |
3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 7.353 | 0.027 | 18349.80 | 2.647 | 0.9963415 | 0.7353 | 0.8461450 | clusterProfiler FDR fix |
3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 8.089 | 0.259 | 18349.56 | 1.911 | 0.9689746 | 0.8089 | 0.8817310 | clusterProfiler BG and FDR fix |
3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 8.089 | 0.259 | 18349.56 | 1.911 | 0.9689746 | 0.8089 | 0.8817310 | fora |
3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 9.858 | 0.531 | 18349.29 | 0.142 | 0.9488882 | 0.9858 | 0.9669920 | fgsea |
3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 5.263 | 0.349 | 18312.24 | 4.737 | 0.9378118 | 0.5263 | 0.6742250 | clusterProfiler default |
3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 5.656 | 0.429 | 18312.16 | 4.344 | 0.9294988 | 0.5656 | 0.7032639 | clusterProfiler BG fix |
3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 3.367 | 0.026 | 18312.57 | 6.633 | 0.9923372 | 0.3367 | 0.5028000 | clusterProfiler FDR fix |
3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 3.685 | 0.087 | 18312.51 | 6.315 | 0.9769353 | 0.3685 | 0.5351438 | clusterProfiler BG and FDR fix |
3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 3.685 | 0.087 | 18312.51 | 6.315 | 0.9769353 | 0.3685 | 0.5351438 | fora |
3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 9.169 | 0.575 | 18312.02 | 0.831 | 0.9409893 | 0.9169 | 0.9287885 | fgsea |
3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 2.146 | 0.276 | 18258.59 | 7.854 | 0.8860446 | 0.2146 | 0.3455160 | clusterProfiler default |
3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 2.560 | 0.387 | 18258.47 | 7.440 | 0.8686800 | 0.2560 | 0.3954584 | clusterProfiler BG fix |
3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 1.032 | 0.017 | 18258.85 | 8.968 | 0.9837941 | 0.1032 | 0.1868042 | clusterProfiler FDR fix |
3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 1.100 | 0.025 | 18258.84 | 8.900 | 0.9777778 | 0.1100 | 0.1977528 | clusterProfiler BG and FDR fix |
3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 1.100 | 0.025 | 18258.84 | 8.900 | 0.9777778 | 0.1100 | 0.1977528 | fora |
3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 7.162 | 0.421 | 18258.44 | 2.838 | 0.9444811 | 0.7162 | 0.8146505 | fgsea |
3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.624 | 0.256 | 18198.76 | 9.376 | 0.7090909 | 0.0624 | 0.1147059 | clusterProfiler default |
3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.758 | 0.347 | 18198.67 | 9.242 | 0.6859729 | 0.0758 | 0.1365151 | clusterProfiler BG fix |
3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.231 | 0.015 | 18199.00 | 9.769 | 0.9390244 | 0.0231 | 0.0450908 | clusterProfiler FDR fix |
3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.242 | 0.018 | 18199.00 | 9.758 | 0.9307692 | 0.0242 | 0.0471735 | clusterProfiler BG and FDR fix |
3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.242 | 0.018 | 18199.00 | 9.758 | 0.9307692 | 0.0242 | 0.0471735 | fora |
3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 4.391 | 0.281 | 18198.73 | 5.609 | 0.9398545 | 0.4391 | 0.5985551 | fgsea |
3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.316 | 0.255 | 18105.75 | 9.684 | 0.5534151 | 0.0316 | 0.0597862 | clusterProfiler default |
3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.369 | 0.323 | 18105.68 | 9.631 | 0.5332370 | 0.0369 | 0.0690236 | clusterProfiler BG fix |
3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.104 | 0.031 | 18105.97 | 9.896 | 0.7703704 | 0.0104 | 0.0205229 | clusterProfiler FDR fix |
3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.105 | 0.032 | 18105.97 | 9.895 | 0.7664234 | 0.0105 | 0.0207162 | clusterProfiler BG and FDR fix |
3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.105 | 0.032 | 18105.97 | 9.895 | 0.7664234 | 0.0105 | 0.0207162 | fora |
3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 2.145 | 0.180 | 18105.82 | 7.855 | 0.9225806 | 0.2145 | 0.3480730 | fgsea |
fig1_ORAjac.png
cp <- subset(res,PWAY_FUNC == "clusterProfiler default")
cpbg <- subset(res,PWAY_FUNC == "clusterProfiler BG fix")
cpfdr <- subset(res,PWAY_FUNC == "clusterProfiler FDR fix")
cpbgfdr <- subset(res,PWAY_FUNC == "clusterProfiler BG and FDR fix")
fo <- subset(res,PWAY_FUNC == "fora")
fg <- subset(res,PWAY_FUNC == "fgsea")
cp %>% kbl(caption="clusterProfiler") %>% kable_paper("hover", full_width = F)
N_REPS | SUM_COUNT | VARIANCE | FRAC_DE | FC | SIMS | DGE_FUNC | true_pos | false_pos | true_neg | false_neg | p | r | f | PWAY_FUNC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.922 | 0.280 | 18377.92 | 0.078 | 0.9725544 | 0.9922 | 0.9822790 | clusterProfiler default |
7 | 3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.783 | 0.279 | 18371.67 | 0.217 | 0.9722719 | 0.9783 | 0.9752766 | clusterProfiler default |
13 | 3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 8.642 | 0.395 | 18349.43 | 1.358 | 0.9562908 | 0.8642 | 0.9079162 | clusterProfiler default |
19 | 3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 5.263 | 0.349 | 18312.24 | 4.737 | 0.9378118 | 0.5263 | 0.6742250 | clusterProfiler default |
25 | 3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 2.146 | 0.276 | 18258.59 | 7.854 | 0.8860446 | 0.2146 | 0.3455160 | clusterProfiler default |
31 | 3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.624 | 0.256 | 18198.76 | 9.376 | 0.7090909 | 0.0624 | 0.1147059 | clusterProfiler default |
37 | 3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.316 | 0.255 | 18105.75 | 9.684 | 0.5534151 | 0.0316 | 0.0597862 | clusterProfiler default |
cpbg %>% kbl(caption="clusterProfiler with background bug fix") %>% kable_paper("hover", full_width = F)
N_REPS | SUM_COUNT | VARIANCE | FRAC_DE | FC | SIMS | DGE_FUNC | true_pos | false_pos | true_neg | false_neg | p | r | f | PWAY_FUNC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.949 | 0.496 | 18377.71 | 0.051 | 0.9525132 | 0.9949 | 0.9732453 | clusterProfiler BG fix |
8 | 3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.832 | 0.453 | 18371.50 | 0.168 | 0.9559553 | 0.9832 | 0.9693862 | clusterProfiler BG fix |
14 | 3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 8.812 | 0.485 | 18349.34 | 1.188 | 0.9478326 | 0.8812 | 0.9133026 | clusterProfiler BG fix |
20 | 3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 5.656 | 0.429 | 18312.16 | 4.344 | 0.9294988 | 0.5656 | 0.7032639 | clusterProfiler BG fix |
26 | 3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 2.560 | 0.387 | 18258.47 | 7.440 | 0.8686800 | 0.2560 | 0.3954584 | clusterProfiler BG fix |
32 | 3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.758 | 0.347 | 18198.67 | 9.242 | 0.6859729 | 0.0758 | 0.1365151 | clusterProfiler BG fix |
38 | 3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.369 | 0.323 | 18105.68 | 9.631 | 0.5332370 | 0.0369 | 0.0690236 | clusterProfiler BG fix |
cpfdr %>% kbl(caption="clusterProfiler with FDR bug fix") %>% kable_paper("hover", full_width = F)
N_REPS | SUM_COUNT | VARIANCE | FRAC_DE | FC | SIMS | DGE_FUNC | true_pos | false_pos | true_neg | false_neg | p | r | f | PWAY_FUNC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | 3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.784 | 0.013 | 18378.19 | 0.216 | 0.9986731 | 0.9784 | 0.9884326 | clusterProfiler FDR fix |
9 | 3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.442 | 0.024 | 18371.93 | 0.558 | 0.9974646 | 0.9442 | 0.9701017 | clusterProfiler FDR fix |
15 | 3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 7.353 | 0.027 | 18349.80 | 2.647 | 0.9963415 | 0.7353 | 0.8461450 | clusterProfiler FDR fix |
21 | 3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 3.367 | 0.026 | 18312.57 | 6.633 | 0.9923372 | 0.3367 | 0.5028000 | clusterProfiler FDR fix |
27 | 3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 1.032 | 0.017 | 18258.85 | 8.968 | 0.9837941 | 0.1032 | 0.1868042 | clusterProfiler FDR fix |
33 | 3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.231 | 0.015 | 18199.00 | 9.769 | 0.9390244 | 0.0231 | 0.0450908 | clusterProfiler FDR fix |
39 | 3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.104 | 0.031 | 18105.97 | 9.896 | 0.7703704 | 0.0104 | 0.0205229 | clusterProfiler FDR fix |
cpbgfdr %>% kbl(caption="clusterProfiler with background and FDR fixes") %>% kable_paper("hover", full_width = F)
N_REPS | SUM_COUNT | VARIANCE | FRAC_DE | FC | SIMS | DGE_FUNC | true_pos | false_pos | true_neg | false_neg | p | r | f | PWAY_FUNC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4 | 3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.912 | 0.352 | 18377.85 | 0.088 | 0.9657054 | 0.9912 | 0.9782866 | clusterProfiler BG and FDR fix |
10 | 3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.739 | 0.296 | 18371.66 | 0.261 | 0.9705032 | 0.9739 | 0.9721987 | clusterProfiler BG and FDR fix |
16 | 3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 8.089 | 0.259 | 18349.56 | 1.911 | 0.9689746 | 0.8089 | 0.8817310 | clusterProfiler BG and FDR fix |
22 | 3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 3.685 | 0.087 | 18312.51 | 6.315 | 0.9769353 | 0.3685 | 0.5351438 | clusterProfiler BG and FDR fix |
28 | 3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 1.100 | 0.025 | 18258.84 | 8.900 | 0.9777778 | 0.1100 | 0.1977528 | clusterProfiler BG and FDR fix |
34 | 3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.242 | 0.018 | 18199.00 | 9.758 | 0.9307692 | 0.0242 | 0.0471735 | clusterProfiler BG and FDR fix |
40 | 3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.105 | 0.032 | 18105.97 | 9.895 | 0.7664234 | 0.0105 | 0.0207162 | clusterProfiler BG and FDR fix |
fo %>% kbl(caption="fora") %>% kable_paper("hover", full_width = F)
N_REPS | SUM_COUNT | VARIANCE | FRAC_DE | FC | SIMS | DGE_FUNC | true_pos | false_pos | true_neg | false_neg | p | r | f | PWAY_FUNC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5 | 3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.912 | 0.352 | 18377.85 | 0.088 | 0.9657054 | 0.9912 | 0.9782866 | fora |
11 | 3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.739 | 0.296 | 18371.66 | 0.261 | 0.9705032 | 0.9739 | 0.9721987 | fora |
17 | 3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 8.089 | 0.259 | 18349.56 | 1.911 | 0.9689746 | 0.8089 | 0.8817310 | fora |
23 | 3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 3.685 | 0.087 | 18312.51 | 6.315 | 0.9769353 | 0.3685 | 0.5351438 | fora |
29 | 3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 1.100 | 0.025 | 18258.84 | 8.900 | 0.9777778 | 0.1100 | 0.1977528 | fora |
35 | 3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 0.242 | 0.018 | 18199.00 | 9.758 | 0.9307692 | 0.0242 | 0.0471735 | fora |
41 | 3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 0.105 | 0.032 | 18105.97 | 9.895 | 0.7664234 | 0.0105 | 0.0207162 | fora |
fg %>% kbl(caption="fg") %>% kable_paper("hover", full_width = F)
N_REPS | SUM_COUNT | VARIANCE | FRAC_DE | FC | SIMS | DGE_FUNC | true_pos | false_pos | true_neg | false_neg | p | r | f | PWAY_FUNC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
6 | 3 | 2e+07 | 0.0 | 0.05 | 0.5 | 1000 | deseq2 | 9.981 | 0.700 | 18377.50 | 0.019 | 0.9344631 | 0.9981 | 0.9652338 | fgsea |
12 | 3 | 2e+07 | 0.1 | 0.05 | 0.5 | 1000 | deseq2 | 9.981 | 0.557 | 18371.40 | 0.019 | 0.9471437 | 0.9981 | 0.9719544 | fgsea |
18 | 3 | 2e+07 | 0.2 | 0.05 | 0.5 | 1000 | deseq2 | 9.858 | 0.531 | 18349.29 | 0.142 | 0.9488882 | 0.9858 | 0.9669920 | fgsea |
24 | 3 | 2e+07 | 0.3 | 0.05 | 0.5 | 1000 | deseq2 | 9.169 | 0.575 | 18312.02 | 0.831 | 0.9409893 | 0.9169 | 0.9287885 | fgsea |
30 | 3 | 2e+07 | 0.4 | 0.05 | 0.5 | 1000 | deseq2 | 7.162 | 0.421 | 18258.44 | 2.838 | 0.9444811 | 0.7162 | 0.8146505 | fgsea |
36 | 3 | 2e+07 | 0.5 | 0.05 | 0.5 | 1000 | deseq2 | 4.391 | 0.281 | 18198.73 | 5.609 | 0.9398545 | 0.4391 | 0.5985551 | fgsea |
42 | 3 | 2e+07 | 0.6 | 0.05 | 0.5 | 1000 | deseq2 | 2.145 | 0.180 | 18105.82 | 7.855 | 0.9225806 | 0.2145 | 0.3480730 | fgsea |
par(mfrow=c(1,3))
par(mar=c(c(5.1, 5.1, 2.1, 2.1) ))
plot(cp$VARIANCE,cp$p,ylim=c(0,1),type="b",pch=19,xlab="variance added",ylab="index",main="precision")
points(cpbg$VARIANCE,cpbg$p,type="b",pch=19,col="orange")
points(cpfdr$VARIANCE,cpfdr$p,type="b",pch=19,col="darkgreen")
points(cpbgfdr$VARIANCE,cpbgfdr$p,type="b",pch=19,col="purple")
points(fo$VARIANCE,fo$p,type="b",pch=19,col="red")
points(fg$VARIANCE,fg$p,type="b",pch=19,col="blue")
legend("bottomleft", inset=.02, title="tool",
c("CP default","CP BG fix","CP FDR fix","CP BG and FDR fix","fora","fgsea"),
col=c("black","orange","darkgreen","purple","red","blue"),horiz=FALSE, cex=1.0, pch=19,lwd=2)
plot(cp$VARIANCE,cp$r,ylim=c(0,1),type="b",pch=19,xlab="variance added",ylab="index",main="recall")
points(cpbg$VARIANCE,cpbg$r,type="b",pch=19,col="orange")
points(cpfdr$VARIANCE,cpfdr$r,type="b",pch=19,col="darkgreen")
points(cpbgfdr$VARIANCE,cpbgfdr$r,type="b",pch=19,col="purple")
points(fo$VARIANCE,fo$r,type="b",pch=19,col="red")
points(fg$VARIANCE,fg$r,type="b",pch=19,col="blue")
plot(cp$VARIANCE,cp$f,ylim=c(0,1),type="b",pch=19,xlab="variance added",ylab="index",main="f1")
points(cpbg$VARIANCE,cpbg$f,type="b",pch=19,col="orange")
points(cpfdr$VARIANCE,cpfdr$f,type="b",pch=19,col="darkgreen")
points(cpbgfdr$VARIANCE,cpbgfdr$f,type="b",pch=19,col="purple")
points(fo$VARIANCE,fo$f,type="b",pch=19,col="red")
points(fg$VARIANCE,fg$f,type="b",pch=19,col="blue")
png("fig3_sim.png", width=7,height=5,units="in",res=150,pointsize=12)
par(mar=c(c(5.1, 5.1, 2.1, 2.1) ))
par(mfrow=c(1,3))
plot(cp$VARIANCE,cp$p,ylim=c(0,1),type="b",pch=19,xlab="variance added",ylab="index",main="precision")
points(cpbg$VARIANCE,cpbg$p,type="b",pch=19,col="orange")
points(cpfdr$VARIANCE,cpfdr$p,type="b",pch=19,col="darkgreen")
points(cpbgfdr$VARIANCE,cpbgfdr$p,type="b",pch=19,col="purple")
points(fo$VARIANCE,fo$p,type="b",pch=19,col="red")
points(fg$VARIANCE,fg$p,type="b",pch=19,col="blue")
grid()
legend("bottomleft", inset=.02, title="tool",
c("CP default","CP BG fix","CP FDR fix","CP BG and FDR fix","fora","fgsea"),
col=c("black","orange","darkgreen","purple","red","blue"),
horiz=FALSE, cex=1.0, pch=19,lwd=2)
plot(cp$VARIANCE,cp$r,ylim=c(0,1),type="b",pch=19,xlab="variance added",ylab="index",main="recall")
points(cpbg$VARIANCE,cpbg$r,type="b",pch=19,col="orange")
points(cpfdr$VARIANCE,cpfdr$r,type="b",pch=19,col="darkgreen")
points(cpbgfdr$VARIANCE,cpbgfdr$r,type="b",pch=19,col="purple")
points(fo$VARIANCE,fo$r,type="b",pch=19,col="red")
points(fg$VARIANCE,fg$r,type="b",pch=19,col="blue")
grid()
plot(cp$VARIANCE,cp$f,ylim=c(0,1),type="b",pch=19,xlab="variance added",ylab="index",main="f1")
points(cpbg$VARIANCE,cpbg$f,type="b",pch=19,col="orange")
points(cpfdr$VARIANCE,cpfdr$f,type="b",pch=19,col="darkgreen")
points(cpbgfdr$VARIANCE,cpbgfdr$f,type="b",pch=19,col="purple")
points(fo$VARIANCE,fo$f,type="b",pch=19,col="red")
points(fg$VARIANCE,fg$f,type="b",pch=19,col="blue")
grid()
dev.off()
## png
## 2
Barplot is simpler.
prec <- c("CP default"=mean(cp$p),"CP BG fix"=mean(cpbg$p),"CP FDR fix"=mean(cpfdr$p),"CP BG and FDR fix"=mean(cpbgfdr$p),"fora"=mean(fo$p),"fgsea"=mean(fg$p))
rec <- c("CP default"=mean(cp$r),"CP BG fix"=mean(cpbg$r),"CP FDR fix"=mean(cpfdr$r),"CP BG and FDR fix"=mean(cpbgfdr$r),"fora"=mean(fo$r),"fgsea"=mean(fg$r))
f1 <- sapply(1:length(prec), function(i) { 2/(1/rec[i]+1/prec[i]) })
par(mfrow=c(1,3))
par(mar=c(c(9.1, 3.5, 2.1, 1.1) ))
barplot(prec,ylim=c(0,1.1),las=2,main="precision",ylab="")
text((1:6*1.18)-0.45,prec+0.02,labels=signif(prec,3))
barplot(rec,ylim=c(0,1.1),las=2,main="recall",ylab="")
text((1:6*1.18)-0.45,rec+0.02,labels=signif(rec,3))
barplot(f1,ylim=c(0,1.1),las=2,main="F1",ylab="")
text((1:6*1.18)-0.45,f1+0.02,labels=signif(f1,3))
png("fig3_bars.png", width=7,height=5,units="in",res=150,pointsize=12)
par(mar=c(c(9.1, 3.5, 2.1, 1.1) ))
par(mfrow=c(1,3))
barplot(prec,ylim=c(0,1.1),las=2,main="precision",ylab="")
text((1:6*1.18)-0.45,prec+0.02,labels=signif(prec,3))
barplot(rec,ylim=c(0,1.1),las=2,main="recall",ylab="")
text((1:6*1.18)-0.45,rec+0.02,labels=signif(rec,3))
barplot(f1,ylim=c(0,1.1),las=2,main="F1",ylab="")
text((1:6*1.18)-0.45,f1+0.02,labels=signif(f1,3))
dev.off()
## png
## 2
sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] mitch_1.16.0 clusterProfiler_4.12.0
## [3] fgsea_1.30.0 kableExtra_1.4.0
## [5] stringi_1.8.4 DESeq2_1.44.0
## [7] SummarizedExperiment_1.34.0 Biobase_2.64.0
## [9] MatrixGenerics_1.16.0 matrixStats_1.3.0
## [11] GenomicRanges_1.56.0 GenomeInfoDb_1.40.1
## [13] IRanges_2.38.0 S4Vectors_0.42.0
## [15] BiocGenerics_0.50.0 edgeR_4.2.0
## [17] limma_3.60.2
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.16.0 jsonlite_1.8.8
## [4] magrittr_2.0.3 farver_2.1.2 rmarkdown_2.26
## [7] fs_1.6.4 zlibbioc_1.50.0 vctrs_0.6.5
## [10] memoise_2.0.1 ggtree_3.12.0 htmltools_0.5.8.1
## [13] S4Arrays_1.4.1 SparseArray_1.4.8 gridGraphics_0.5-1
## [16] sass_0.4.9 KernSmooth_2.23-22 bslib_0.7.0
## [19] htmlwidgets_1.6.4 echarts4r_0.4.5 plyr_1.8.9
## [22] cachem_1.0.8 igraph_2.0.3 mime_0.12
## [25] lifecycle_1.0.4 pkgconfig_2.0.3 gson_0.1.0
## [28] Matrix_1.7-0 R6_2.5.1 fastmap_1.1.1
## [31] shiny_1.8.1.1 GenomeInfoDbData_1.2.12 digest_0.6.35
## [34] aplot_0.2.2 enrichplot_1.24.0 GGally_2.2.1
## [37] colorspace_2.1-0 patchwork_1.2.0 AnnotationDbi_1.66.0
## [40] RSQLite_2.3.7 fansi_1.0.6 httr_1.4.7
## [43] polyclip_1.10-6 abind_1.4-5 compiler_4.4.0
## [46] bit64_4.0.5 withr_3.0.0 BiocParallel_1.38.0
## [49] viridis_0.6.5 DBI_1.2.3 ggstats_0.6.0
## [52] highr_0.10 gplots_3.1.3.1 ggforce_0.4.2
## [55] MASS_7.3-60.2 DelayedArray_0.30.1 HDO.db_0.99.1
## [58] caTools_1.18.2 gtools_3.9.5 tools_4.4.0
## [61] beeswarm_0.4.0 scatterpie_0.2.3 ape_5.8
## [64] httpuv_1.6.15 glue_1.7.0 promises_1.3.0
## [67] nlme_3.1-164 GOSemSim_2.30.0 shadowtext_0.1.3
## [70] grid_4.4.0 reshape2_1.4.4 generics_0.1.3
## [73] gtable_0.3.5 tidyr_1.3.1 data.table_1.15.4
## [76] tidygraph_1.3.1 xml2_1.3.6 utf8_1.2.4
## [79] XVector_0.44.0 ggrepel_0.9.5 pillar_1.9.0
## [82] stringr_1.5.1 yulab.utils_0.1.4 later_1.3.2
## [85] splines_4.4.0 dplyr_1.1.4 tweenr_2.0.3
## [88] treeio_1.28.0 lattice_0.22-6 bit_4.0.5
## [91] tidyselect_1.2.1 GO.db_3.19.1 locfit_1.5-9.9
## [94] Biostrings_2.72.1 knitr_1.46 gridExtra_2.3
## [97] svglite_2.1.3 xfun_0.43 graphlayouts_1.1.1
## [100] statmod_1.5.0 UCSC.utils_1.0.0 lazyeval_0.2.2
## [103] ggfun_0.1.5 yaml_2.3.8 evaluate_0.23
## [106] codetools_0.2-20 ggraph_2.2.1 tibble_3.2.1
## [109] qvalue_2.36.0 ggplotify_0.1.2 cli_3.6.2
## [112] xtable_1.8-4 systemfonts_1.0.6 munsell_0.5.1
## [115] jquerylib_0.1.4 Rcpp_1.0.12 png_0.1-8
## [118] ggplot2_3.5.1 blob_1.2.4 DOSE_3.30.1
## [121] bitops_1.0-7 viridisLite_0.4.2 tidytree_0.4.6
## [124] scales_1.3.0 purrr_1.0.2 crayon_1.5.2
## [127] rlang_1.1.3 cowplot_1.1.3 fastmatch_1.1-4
## [130] KEGGREST_1.44.0