Testing whether ORA biases towards gene sets based on their basemean expression levels.
library("fgsea")
library("kableExtra")
library("beeswarm")
library("eulerr")
library("tictoc")
library("parallel")
RhpcBLASctl::blas_set_num_threads(1)
Read data and make a smear plot.
x <- readRDS("dataprep/bulkrna1.Rds")
x$rank <- rank(x$stat)
head(x)
## baseMean log2FoldChange lfcSE stat
## ENSG00000000003 TSPAN6 3754.3819 -0.10925392 0.1417469 -0.7707674
## ENSG00000000419 DPM1 1718.6738 -0.06109763 0.1477350 -0.4135622
## ENSG00000000457 SCYL3 260.6222 0.28894664 0.2166482 1.3337138
## ENSG00000000460 FIRRM 327.1207 0.57058478 0.2019567 2.8252821
## ENSG00000001036 FUCA2 6164.1606 -0.47524943 0.1136307 -4.1824048
## ENSG00000001084 GCLC 4296.9247 -0.68760144 0.1235878 -5.5636659
## pvalue padj rank
## ENSG00000000003 TSPAN6 4.408448e-01 6.243711e-01 5302
## ENSG00000000419 DPM1 6.791948e-01 8.109114e-01 6296
## ENSG00000000457 SCYL3 1.822977e-01 3.508604e-01 11697
## ENSG00000000460 FIRRM 4.723901e-03 2.518387e-02 14336
## ENSG00000001036 FUCA2 2.884418e-05 4.256904e-04 649
## ENSG00000001084 GCLC 2.641654e-08 1.092526e-06 247
dim(x)
## [1] 15520 7
plot(log10(x$baseMean),x$log2FoldChange,pch=19,cex=0.5,
xlab="log10(baseMean)",ylab="log2(foldchange)",
ylim=c(-8,8))
sig <- subset(x,padj<0.01)
points(log10(sig$baseMean),sig$log2FoldChange,pch=19,cex=0.5,col="red")
plot(x$log2FoldChange,-log10(x$pvalue), pch=19,cex=0.5,
xlab="log2(foldchange)",ylab="-log10(p-value)")
sig <- subset(x,padj<0.01)
points(sig$log2FoldChange,-log10(sig$pvalue),pch=19,cex=0.5,col="red")
Break the data into deciles based on basemean expression.
xs <- x[order(-x$baseMean),]
NCHUNKS = 10
CHUNKSIZE <- round(nrow(xs)/NCHUNKS)
START = 1
xl <- as.list(1:10)
for ( i in 1:NCHUNKS ) {
START <- ( ( i - 1 ) * CHUNKSIZE ) + 1
END <- START + CHUNKSIZE - 1
if ( END > nrow(xs) ) { END <- nrow(xs) }
xl[[i]] <- rownames(xs[START:END,])
}
names(xl) <- paste("level",1:10)
str(xl)
## List of 10
## $ level 1 : chr [1:1552] "ENSG00000156508 EEF1A1" "ENSG00000163631 ALB" "ENSG00000198804 MT-CO1" "ENSG00000115414 FN1" ...
## $ level 2 : chr [1:1552] "ENSG00000182985 CADM1" "ENSG00000104824 HNRNPL" "ENSG00000282947 HNRNPL" "ENSG00000113387 SUB1" ...
## $ level 3 : chr [1:1552] "ENSG00000172845 SP3" "ENSG00000197892 KIF13B" "ENSG00000214706 IFRD2" "ENSG00000243649 CFB" ...
## $ level 4 : chr [1:1552] "ENSG00000112701 SENP6" "ENSG00000119979 DENND10" "ENSG00000100099 HPS4" "ENSG00000011451 WIZ" ...
## $ level 5 : chr [1:1552] "ENSG00000163297 ANTXR2" "ENSG00000132793 LPIN3" "ENSG00000131979 GCH1" "ENSG00000205302 SNX2" ...
## $ level 6 : chr [1:1552] "ENSG00000104518 GSDMD" "ENSG00000278718 GSDMD" "ENSG00000114735 HEMK1" "ENSG00000111653 ING4" ...
## $ level 7 : chr [1:1552] "ENSG00000172728 FUT10" "ENSG00000163607 GTPBP8" "ENSG00000237441 RGL2" "ENSG00000071539 TRIP13" ...
## $ level 8 : chr [1:1552] "ENSG00000122870 BICC1" "ENSG00000180257 ZNF816" "ENSG00000103494 RPGRIP1L" "ENSG00000166086 JAM3" ...
## $ level 9 : chr [1:1552] "ENSG00000180592 SKIDA1" "ENSG00000119917 IFIT3" "ENSG00000182389 CACNB4" "ENSG00000167619 TMEM145" ...
## $ level 10: chr [1:1552] "ENSG00000150627 WDR17" "ENSG00000004809 SLC22A16" "ENSG00000168280 KIF5C" "ENSG00000154133 ROBO4" ...
ORA p-value only
xp <- x[order(x$pvalue),]
up <- rownames(head(subset(xp,log2FoldChange>0),800))
dn <- rownames(head(subset(xp,log2FoldChange<0),800))
bg <- rownames(xp)
ora_up <- fora(pathways=xl, genes=up, universe=bg, minSize = 5, maxSize = Inf)
ora_dn <- fora(pathways=xl, genes=dn, universe=bg, minSize = 5, maxSize = Inf)
ora_up[,1:6] %>%
kbl(caption="ORA of upregulated DEGs based on p-value against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 7 | 0.0000000 | 0.0000000 | 1.7875 | 143 | 1552 |
| level 6 | 0.0000221 | 0.0001107 | 1.4500 | 116 | 1552 |
| level 5 | 0.0002043 | 0.0006811 | 1.3875 | 111 | 1552 |
| level 8 | 0.0029494 | 0.0073735 | 1.3000 | 104 | 1552 |
| level 1 | 0.1519479 | 0.3038959 | 1.1125 | 89 | 1552 |
| level 4 | 0.4701361 | 0.7835602 | 1.0125 | 81 | 1552 |
| level 3 | 0.9631128 | 1.0000000 | 0.8250 | 66 | 1552 |
| level 2 | 0.9999546 | 1.0000000 | 0.6250 | 50 | 1552 |
| level 9 | 1.0000000 | 1.0000000 | 0.3875 | 31 | 1552 |
| level 10 | 1.0000000 | 1.0000000 | 0.1125 | 9 | 1552 |
ora_dn[,1:6] %>%
kbl(caption="ORA of downregulated DEGs based on p-value against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 1 | 0.0000000 | 0.0000000 | 2.6375 | 211 | 1552 |
| level 2 | 0.0000000 | 0.0000000 | 1.8875 | 151 | 1552 |
| level 3 | 0.0000001 | 0.0000004 | 1.5750 | 126 | 1552 |
| level 4 | 0.0420603 | 0.1051508 | 1.1875 | 95 | 1552 |
| level 5 | 0.8173203 | 1.0000000 | 0.9125 | 73 | 1552 |
| level 6 | 0.9851624 | 1.0000000 | 0.7875 | 63 | 1552 |
| level 7 | 1.0000000 | 1.0000000 | 0.4250 | 34 | 1552 |
| level 8 | 1.0000000 | 1.0000000 | 0.4125 | 33 | 1552 |
| level 9 | 1.0000000 | 1.0000000 | 0.1625 | 13 | 1552 |
| level 10 | 1.0000000 | 1.0000000 | 0.0125 | 1 | 1552 |
ORA p-value and foldchange threshold
up <- rownames(head(subset(xp,log2FoldChange>0.5),800))
dn <- rownames(head(subset(xp,log2FoldChange < -0.5),800))
ora2_up <- fora(pathways=xl, genes=up, universe=bg, minSize = 5, maxSize = Inf)
ora2_dn <- fora(pathways=xl, genes=dn, universe=bg, minSize = 5, maxSize = Inf)
ora2_up[,1:6] %>%
kbl(caption="ORA of upregulated DEGs based on p-value and logfoldchange threshold against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 7 | 0.0000000 | 0.0000000 | 1.9375 | 155 | 1552 |
| level 6 | 0.0000000 | 0.0000000 | 1.6500 | 132 | 1552 |
| level 5 | 0.0000221 | 0.0000738 | 1.4500 | 116 | 1552 |
| level 8 | 0.0004620 | 0.0011550 | 1.3625 | 109 | 1552 |
| level 4 | 0.7034202 | 1.0000000 | 0.9500 | 76 | 1552 |
| level 1 | 0.9515757 | 1.0000000 | 0.8375 | 67 | 1552 |
| level 3 | 0.9993841 | 1.0000000 | 0.6875 | 55 | 1552 |
| level 9 | 0.9999999 | 1.0000000 | 0.5125 | 41 | 1552 |
| level 2 | 1.0000000 | 1.0000000 | 0.5000 | 40 | 1552 |
| level 10 | 1.0000000 | 1.0000000 | 0.1125 | 9 | 1552 |
ora2_dn[,1:6] %>%
kbl(caption="ORA of downregulated DEGs based on p-value and logfoldchange threshold against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 1 | 0.0000000 | 0.0000000 | 2.3750 | 190 | 1552 |
| level 2 | 0.0000000 | 0.0000000 | 1.9125 | 153 | 1552 |
| level 3 | 0.0000000 | 0.0000000 | 1.6375 | 131 | 1552 |
| level 4 | 0.0326960 | 0.0817399 | 1.2000 | 96 | 1552 |
| level 5 | 0.6596624 | 1.0000000 | 0.9625 | 77 | 1552 |
| level 6 | 0.9723328 | 1.0000000 | 0.8125 | 65 | 1552 |
| level 7 | 1.0000000 | 1.0000000 | 0.4625 | 37 | 1552 |
| level 8 | 1.0000000 | 1.0000000 | 0.4375 | 35 | 1552 |
| level 9 | 1.0000000 | 1.0000000 | 0.1875 | 15 | 1552 |
| level 10 | 1.0000000 | 1.0000000 | 0.0125 | 1 | 1552 |
FCS now.
stat <- x$stat
names(stat) <- rownames(x)
fcs1 <- fgsea(pathways=xl, stats=stat,minSize=5,
maxSize=100000,nPermSimple = 10000)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (2.85% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-50. You can
## set the `eps` argument to zero for better estimation.
fcs1 <- fcs1[order(fcs1$pval),]
fcs1[,1:7] %>%
kbl(caption="FCS of based on Walt test stat against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | log2err | ES | NES | size |
|---|---|---|---|---|---|---|
| level 1 | 0.0000000 | 0.0000000 | NA | -0.4653125 | -2.355585 | 1552 |
| level 7 | 0.0000000 | 0.0000000 | 1.8202395 | 0.3838806 | 2.215987 | 1552 |
| level 2 | 0.0000000 | 0.0000000 | 1.7506503 | -0.4252321 | -2.152683 | 1552 |
| level 8 | 0.0000000 | 0.0000000 | 1.4885397 | 0.3401864 | 1.963758 | 1552 |
| level 3 | 0.0000000 | 0.0000000 | 1.3110148 | -0.3705234 | -1.875728 | 1552 |
| level 6 | 0.0000000 | 0.0000000 | 0.8986712 | 0.2626457 | 1.516147 | 1552 |
| level 9 | 0.0000000 | 0.0000000 | 0.8012156 | 0.2504243 | 1.445598 | 1552 |
| level 4 | 0.0000026 | 0.0000032 | 0.6272567 | -0.2748962 | -1.391627 | 1552 |
| level 5 | 0.0000227 | 0.0000252 | 0.5756103 | 0.2202370 | 1.271338 | 1552 |
| level 10 | 0.9951338 | 0.9951338 | 0.0699072 | 0.1532952 | 0.884911 | 1552 |
Idea for calculating amount of significant terms:
ora1bias <- sum(-log10(ora_up$pval)) + sum(-log10(ora_dn$pval))
ora2bias <- sum(-log10(ora2_up$pval)) + sum(-log10(ora2_dn$pval))
fcs1bias <- sum(-log10(fcs1$pval))
barplot(c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"FCS"=fcs1bias),ylab="Bias measure")
Visualising deciles in a gene rank
START = 1
xr <- as.list(1:10)
for ( i in 1:NCHUNKS ) {
START <- ( ( i - 1 ) * CHUNKSIZE ) + 1
END <- START + CHUNKSIZE - 1
if ( END > nrow(xs) ) { END <- nrow(xs) }
xr[[i]] <- xs[START:END,"rank"]
}
names(xr) <- paste("level",1:10)
str(xr)
## List of 10
## $ level 1 : num [1:1552] 3170 10966 14586 4257 8923 ...
## $ level 2 : num [1:1552] 9628 3278 3278 844 1058 ...
## $ level 3 : num [1:1552] 2612 1947 7581 15475 1883 ...
## $ level 4 : num [1:1552] 5991 2456 10212 6152 11956 ...
## $ level 5 : num [1:1552] 71 15318 1882 497 15396 ...
## $ level 6 : num [1:1552] 13448 13448 14901 13696 10257 ...
## $ level 7 : num [1:1552] 12287 6210 4363 12937 8286 ...
## $ level 8 : num [1:1552] 9757 12012 14936 11967 15333 ...
## $ level 9 : num [1:1552] 6178 14010 9784 8721 4953 ...
## $ level 10: num [1:1552] 5727 10543 10085 11057 7818 ...
boxplot(xr,cex=0,col="white",ylab="Position in gene rank")
beeswarm(xr,cex=0.25,add=TRUE,pch=19,col="darkgray")
Now quantify the data skew.
Pearson’s Coefficient of Skewness.
lapply(xr,summary)
## $`level 1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1733 4232 5498 8405 15516
##
## $`level 2`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12 2053 4920 5987 9630 15518
##
## $`level 3`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 2313 5653 6472 10390 15513
##
## $`level 4`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22 2918 6602 7089 11058 15520
##
## $`level 5`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 26 3816 8150 8067 12520 15490
##
## $`level 6`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9 4600 9358 8655 12720 15499
##
## $`level 7`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 36 5926 10454 9505 13365 15515
##
## $`level 8`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 54 5892 9922 9227 12892 15506
##
## $`level 9`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 189.5 6238.2 9049.5 8832.3 11597.8 15424.0
##
## $`level 10`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 264 6255 8294 8272 10372 15293
pcs <- unlist(lapply(xr, function(x) { ( mean(x) - median(x) ) / sd(x) } ) )
pcs
## level 1 level 2 level 3 level 4 level 5 level 6
## 0.279484286 0.237876159 0.178036079 0.106847741 -0.017634570 -0.153418323
## level 7 level 8 level 9 level 10
## -0.216134585 -0.165202127 -0.062582695 -0.007822581
barplot(pcs)
sum(abs(pcs))
## [1] 1.425039
Consider running Rank ANOVA test for enrichment. Optionally can correct for basemean bias.
df <- x[,"rank",drop=FALSE]
df$baseMean <- rank(x$baseMean)
plot(df)
i=1
gs <- xl[[i]]
df$inset <- as.numeric(rownames(df) %in% gs)
mdl0 <- aov(rank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(rank ~ baseMean + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
summary(mdl0)
## Df Sum Sq Mean Sq F value Pr(>F)
## inset 1 8.824e+09 8.824e+09 452.4 <2e-16 ***
## Residuals 15518 3.027e+11 1.951e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mdl1)
## Df Sum Sq Mean Sq F value Pr(>F)
## baseMean 1 2.076e+10 2.076e+10 1109.83 < 2e-16 ***
## inset 1 4.980e+08 4.980e+08 26.62 2.5e-07 ***
## Residuals 15517 2.903e+11 1.871e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p0
## [1] 5.679257e-99
p1
## [1] 2.503612e-07
myinset <- subset(df,inset==1)
points(myinset[,1:2],col="red")
df <- x[,"rank",drop=FALSE]
df$baseMean <- scale(log10(x$baseMean))
mclapply(1:length(xl), function(i) {
gs <- xl[[i]]
df$inset <- as.numeric(rownames(df) %in% gs)
mdl0 <- aov(rank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(rank ~ baseMean + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
res <- c("p0"=p0,"p1"=p1)
return(res)
},mc.cores=10)
## [[1]]
## p0 p1
## 5.679257e-99 8.546526e-13
##
## [[2]]
## p0 p1
## 3.131147e-61 2.082626e-14
##
## [[3]]
## p0 p1
## 5.357706e-33 7.918281e-08
##
## [[4]]
## p0 p1
## 4.760753e-10 1.119533e-01
##
## [[5]]
## p0 p1
## 4.462087e-03 2.069197e-08
##
## [[6]]
## p0 p1
## 1.026852e-16 1.866350e-18
##
## [[7]]
## p0 p1
## 2.844267e-59 8.605915e-44
##
## [[8]]
## p0 p1
## 2.697317e-42 3.448919e-12
##
## [[9]]
## p0 p1
## 2.539164e-23 9.822761e-03
##
## [[10]]
## p0 p1
## 2.118204e-06 4.932374e-54
Run enrichment test on Reactome gene sets with and without correction.
Relate gene symbols to gene IDs.
x$derank <- rank(x$stat)
x$bmrank <- rank(x$baseMean)
x$symbol <- lapply(strsplit(rownames(x)," "), function(x) { x[length(x) ]} )
Plot DE rank vs basemean rank.
Testing code.
df <- x
plot(df[,c("derank","bmrank")])
reactome <- gmtPathways("c2.cp.reactome.v2023.2.Hs.symbols.gmt")
i=1
gs <- reactome[[i]]
df$inset <- as.numeric(df$symbol %in% gs)
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
# basic es calc
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
summary(mdl0)
## Df Sum Sq Mean Sq F value Pr(>F)
## inset 1 1.834e+07 18341832 0.914 0.339
## Residuals 15518 3.115e+11 20073938
summary(mdl1)
## Df Sum Sq Mean Sq F value Pr(>F)
## bmrank 1 2.076e+10 2.076e+10 1108.092 <2e-16 ***
## inset 1 4.389e+07 4.389e+07 2.343 0.126
## Residuals 15517 2.907e+11 1.874e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p0
## [1] 0.3391459
p1
## [1] 0.1259062
myinset <- subset(df,inset==1)
points(myinset[,c("derank","bmrank")],col="red",pch=19)
# corrected es calc
bmranks <- mean(subset(df,inset==1)$bmrank)
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mysubsets <- lapply(idx, function(i) {
df3 <- df2[(i-block):(i+block),]
df3 <- subset(df3,inset!=1)
return(df3)
} )
# background based on genes in bmrank vicinity rather than all genes
mrbg1 <- mean(unlist(lapply(idx, function(i) {
df3 <- df2[(i-block):(i+block),]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
Now run for all Reactome gene sets.
RhpcBLASctl::blas_set_num_threads(1)
tic()
myres <- mclapply(1:length(reactome), function(i) {
gs <- reactome[[i]]
gsname <- names(reactome)[i]
df$inset <- as.numeric(df$symbol %in% gs)
NGENES <- length(unique(unlist(subset( df,inset==1 )$symbol)))
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
# corrected es
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mrbg1 <- mean(unlist(lapply(idx, function(j) {
START=j-block
if (START < 1) { START=1 }
END=j+block
if (END > tot) { END=tot }
df3 <- df2[START:END,]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
res <- c("GeneSet"=gsname,"Ngenes"=NGENES,"ES0"=es0,"ES1"=es1,"p0"=p0,"p1"=p1)
return(res)
},mc.cores=8)
toc()
## 10.034 sec elapsed
myres <- do.call(rbind,myres)
myrownames <- myres[,1]
myres <- myres[,-1]
myres <- apply(myres,2,as.numeric)
rownames(myres) <- myrownames
myres <- as.data.frame(myres)
myres <- subset(myres,Ngenes>=5)
# FDR correction is vital
myres$fdr0 <- p.adjust(myres$p0,method="fdr")
myres$fdr1 <- p.adjust(myres$p1,method="fdr")
# show top results
myres <- myres[order(myres[,"p0"]),]
head(myres,10) %>%
kbl(caption="Top results for normal ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| REACTOME_ASPARAGINE_N_LINKED_GLYCOSYLATION | 269 | -0.2977667 | -0.1861863 | 0 | 0.0000000 | 0 | 0.0000006 |
| REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE | 109 | -0.4493381 | -0.2290008 | 0 | 0.0000044 | 0 | 0.0001691 |
| REACTOME_TRANSLATION | 288 | -0.2786627 | -0.1244989 | 0 | 0.0001513 | 0 | 0.0032057 |
| REACTOME_CELLULAR_RESPONSES_TO_STIMULI | 733 | -0.1816134 | -0.0733189 | 0 | 0.0004015 | 0 | 0.0075636 |
| REACTOME_CELLULAR_RESPONSE_TO_STARVATION | 152 | -0.3440493 | -0.1913212 | 0 | 0.0000416 | 0 | 0.0011137 |
| REACTOME_METABOLISM_OF_RNA | 705 | -0.1550091 | -0.0495496 | 0 | 0.0352369 | 0 | 0.1696262 |
| REACTOME_COMPLEMENT_CASCADE | 36 | 0.5760350 | 0.6735232 | 0 | 0.0000000 | 0 | 0.0000000 |
| REACTOME_EUKARYOTIC_TRANSLATION_INITIATION | 116 | -0.3687748 | -0.1475434 | 0 | 0.0022283 | 0 | 0.0257729 |
| REACTOME_INFECTIOUS_DISEASE | 804 | -0.1423396 | -0.0443787 | 0 | 0.0419093 | 0 | 0.1863454 |
| REACTOME_CHOLESTEROL_BIOSYNTHESIS | 26 | 0.7932543 | 0.9182751 | 0 | 0.0000000 | 0 | 0.0000000 |
myres <- myres[order(myres[,"p1"]),]
head(myres,10) %>%
kbl(caption="Top results for adjusted ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| REACTOME_COMPLEMENT_CASCADE | 36 | 0.5760350 | 0.6735232 | 0.0e+00 | 0 | 0.00e+00 | 0e+00 |
| REACTOME_CELL_CYCLE | 633 | 0.1322389 | 0.1714455 | 0.0e+00 | 0 | 2.00e-07 | 0e+00 |
| REACTOME_CHOLESTEROL_BIOSYNTHESIS | 26 | 0.7932543 | 0.9182751 | 0.0e+00 | 0 | 0.00e+00 | 0e+00 |
| REACTOME_ACTIVATION_OF_C3_AND_C5 | 7 | 0.7583427 | 0.9149586 | 0.0e+00 | 0 | 0.00e+00 | 0e+00 |
| REACTOME_CELL_CYCLE_MITOTIC | 514 | 0.1289858 | 0.1813587 | 3.0e-07 | 0 | 8.90e-06 | 0e+00 |
| REACTOME_MITOTIC_PROMETAPHASE | 194 | 0.2395851 | 0.2741629 | 0.0e+00 | 0 | 1.00e-07 | 0e+00 |
| REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT | 13 | 0.6715199 | 0.7511741 | 0.0e+00 | 0 | 0.00e+00 | 0e+00 |
| REACTOME_DNA_STRAND_ELONGATION | 32 | 0.6037779 | 0.5562249 | 0.0e+00 | 0 | 2.00e-07 | 1e-07 |
| REACTOME_BIOLOGICAL_OXIDATIONS | 158 | 0.2515345 | 0.2748675 | 0.0e+00 | 0 | 9.00e-07 | 1e-07 |
| REACTOME_FATTY_ACID_METABOLISM | 134 | 0.2310791 | 0.2648147 | 1.3e-06 | 0 | 3.35e-05 | 2e-07 |
Now make an Euler plot.
fdr0set <- rownames(subset(myres,fdr0<0.01))
fdr1set <- rownames(subset(myres,fdr1<0.01))
FDR0LEN <- length(fdr0set)
FDR1LEN <- length(fdr1set)
JACCARD <- signif(length(intersect(fdr0set,fdr1set)) / length(union(fdr0set,fdr1set)),3)
HEADER <- paste("N sig (uncorr)=",FDR0LEN,", N sig (corr)",FDR1LEN,", Jaccard=",JACCARD)
HEADER
## [1] "N sig (uncorr)= 175 , N sig (corr) 94 , Jaccard= 0.387"
v1 <- list("fdr0set"=fdr0set, "fdr1set"=fdr1set)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
#mtext(HEADER,cex=0.5)
Now separate up and down.
fdr0up <- rownames(subset(myres,fdr0<0.01&ES0>0))
fdr0dn <- rownames(subset(myres,fdr0<0.01&ES0<0))
fdr1up <- rownames(subset(myres,fdr1<0.01&ES1>0))
fdr1dn <- rownames(subset(myres,fdr1<0.01&ES1<0))
v1 <- list("fdr0up"=fdr0up, "fdr0dn"=fdr0dn, "fdr1up"=fdr1up,"fdr1dn"=fdr1dn)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
#mtext(HEADER,cex=0.5)
setdiff(fdr0up,fdr1up)
## [1] "REACTOME_BASE_EXCISION_REPAIR"
## [2] "REACTOME_COLLAGEN_CHAIN_TRIMERIZATION"
## [3] "REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE"
## [4] "REACTOME_MEIOTIC_RECOMBINATION"
setdiff(fdr1up,fdr0up)
## [1] "REACTOME_SEPARATION_OF_SISTER_CHROMATIDS"
## [2] "REACTOME_ORGANELLE_BIOGENESIS_AND_MAINTENANCE"
## [3] "REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION"
## [4] "REACTOME_MITOTIC_METAPHASE_AND_ANAPHASE"
## [5] "REACTOME_RHO_GTPASES_ACTIVATE_FORMINS"
## [6] "REACTOME_MITOTIC_G2_G2_M_PHASES"
## [7] "REACTOME_METABOLISM_OF_VITAMINS_AND_COFACTORS"
## [8] "REACTOME_S_PHASE"
## [9] "REACTOME_SYNTHESIS_OF_DNA"
## [10] "REACTOME_DNA_REPLICATION"
## [11] "REACTOME_REGULATION_OF_CHOLESTEROL_BIOSYNTHESIS_BY_SREBP_SREBF"
## [12] "REACTOME_COMMON_PATHWAY_OF_FIBRIN_CLOT_FORMATION"
## [13] "REACTOME_MITOCHONDRIAL_FATTY_ACID_BETA_OXIDATION_OF_SATURATED_FATTY_ACIDS"
## [14] "REACTOME_INHIBITION_OF_THE_PROTEOLYTIC_ACTIVITY_OF_APC_C_REQUIRED_FOR_THE_ONSET_OF_ANAPHASE_BY_MITOTIC_SPINDLE_CHECKPOINT_COMPONENTS"
## [15] "REACTOME_BETA_OXIDATION_OF_VERY_LONG_CHAIN_FATTY_ACIDS"
## [16] "REACTOME_CHREBP_ACTIVATES_METABOLIC_GENE_EXPRESSION"
## [17] "REACTOME_RHO_GTPASE_EFFECTORS"
## [18] "REACTOME_ASPIRIN_ADME"
## [19] "REACTOME_TERMINAL_PATHWAY_OF_COMPLEMENT"
setdiff(fdr0dn,fdr1dn)
## [1] "REACTOME_TRANSFERRIN_ENDOCYTOSIS_AND_RECYCLING"
## [2] "REACTOME_INTERLEUKIN_17_SIGNALING"
## [3] "REACTOME_RAC2_GTPASE_CYCLE"
## [4] "REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION"
## [5] "REACTOME_MAPK_FAMILY_SIGNALING_CASCADES"
## [6] "REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY"
## [7] "REACTOME_NERVOUS_SYSTEM_DEVELOPMENT"
## [8] "REACTOME_BACTERIAL_INFECTION_PATHWAYS"
## [9] "REACTOME_RUNX3_REGULATES_P14_ARF"
## [10] "REACTOME_INTRACELLULAR_SIGNALING_BY_SECOND_MESSENGERS"
## [11] "REACTOME_NEGATIVE_REGULATION_OF_THE_PI3K_AKT_NETWORK"
## [12] "REACTOME_NEGATIVE_REGULATION_OF_MAPK_PATHWAY"
## [13] "REACTOME_EUKARYOTIC_TRANSLATION_INITIATION"
## [14] "REACTOME_NUCLEAR_RECEPTOR_TRANSCRIPTION_PATHWAY"
## [15] "REACTOME_TOLL_LIKE_RECEPTOR_9_TLR9_CASCADE"
## [16] "REACTOME_INSULIN_RECEPTOR_RECYCLING"
## [17] "REACTOME_MYD88_INDEPENDENT_TLR4_CASCADE"
## [18] "REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR"
## [19] "REACTOME_IRON_UPTAKE_AND_TRANSPORT"
## [20] "REACTOME_INFECTION_WITH_MYCOBACTERIUM_TUBERCULOSIS"
## [21] "REACTOME_RAC1_GTPASE_CYCLE"
## [22] "REACTOME_NONSENSE_MEDIATED_DECAY_NMD"
## [23] "REACTOME_KEAP1_NFE2L2_PATHWAY"
## [24] "REACTOME_LYSOSOME_VESICLE_BIOGENESIS"
## [25] "REACTOME_TRANSPORT_TO_THE_GOLGI_AND_SUBSEQUENT_MODIFICATION"
## [26] "REACTOME_AMINO_ACID_TRANSPORT_ACROSS_THE_PLASMA_MEMBRANE"
## [27] "REACTOME_SPHINGOLIPID_METABOLISM"
## [28] "REACTOME_RESPONSE_OF_MTB_TO_PHAGOCYTOSIS"
## [29] "REACTOME_CELL_EXTRACELLULAR_MATRIX_INTERACTIONS"
## [30] "REACTOME_UPTAKE_AND_ACTIONS_OF_BACTERIAL_TOXINS"
## [31] "REACTOME_TRANS_GOLGI_NETWORK_VESICLE_BUDDING"
## [32] "REACTOME_MAPK_TARGETS_NUCLEAR_EVENTS_MEDIATED_BY_MAP_KINASES"
## [33] "REACTOME_PI3K_AKT_SIGNALING_IN_CANCER"
## [34] "REACTOME_SIGNALLING_TO_ERKS"
## [35] "REACTOME_ABC_FAMILY_PROTEINS_MEDIATED_TRANSPORT"
## [36] "REACTOME_DEFECTIVE_CFTR_CAUSES_CYSTIC_FIBROSIS"
## [37] "REACTOME_NUCLEAR_EVENTS_MEDIATED_BY_NFE2L2"
## [38] "REACTOME_REGULATION_OF_EXPRESSION_OF_SLITS_AND_ROBOS"
## [39] "REACTOME_INFLUENZA_INFECTION"
## [40] "REACTOME_ACTIVATION_OF_THE_MRNA_UPON_BINDING_OF_THE_CAP_BINDING_COMPLEX_AND_EIFS_AND_SUBSEQUENT_BINDING_TO_43S"
## [41] "REACTOME_SIGNALING_BY_ROBO_RECEPTORS"
## [42] "REACTOME_CARGO_CONCENTRATION_IN_THE_ER"
## [43] "REACTOME_RHO_GTPASE_CYCLE"
## [44] "REACTOME_SIGNALING_BY_EGFR"
## [45] "REACTOME_SIGNALING_BY_VEGF"
## [46] "REACTOME_SIGNALING_BY_INTERLEUKINS"
## [47] "REACTOME_SIGNALING_BY_EGFR_IN_CANCER"
## [48] "REACTOME_COPII_MEDIATED_VESICLE_TRANSPORT"
## [49] "REACTOME_PI_METABOLISM"
## [50] "REACTOME_DEVELOPMENTAL_BIOLOGY"
## [51] "REACTOME_RHOG_GTPASE_CYCLE"
## [52] "REACTOME_HEME_SIGNALING"
## [53] "REACTOME_ER_TO_GOLGI_ANTEROGRADE_TRANSPORT"
## [54] "REACTOME_AKT_PHOSPHORYLATES_TARGETS_IN_THE_CYTOSOL"
## [55] "REACTOME_CIRCADIAN_CLOCK"
## [56] "REACTOME_DOWNSTREAM_SIGNALING_EVENTS_OF_B_CELL_RECEPTOR_BCR"
## [57] "REACTOME_SARS_COV_1_INFECTION"
## [58] "REACTOME_TCR_SIGNALING"
## [59] "REACTOME_FC_EPSILON_RECEPTOR_FCERI_SIGNALING"
## [60] "REACTOME_RRNA_PROCESSING"
## [61] "REACTOME_SARS_COV_1_MODULATES_HOST_TRANSLATION_MACHINERY"
## [62] "REACTOME_RHOV_GTPASE_CYCLE"
## [63] "REACTOME_GOLGI_ASSOCIATED_VESICLE_BIOGENESIS"
## [64] "REACTOME_VESICLE_MEDIATED_TRANSPORT"
## [65] "REACTOME_METABOLISM_OF_RNA"
## [66] "REACTOME_DOWNREGULATION_OF_TGF_BETA_RECEPTOR_SIGNALING"
## [67] "REACTOME_C_TYPE_LECTIN_RECEPTORS_CLRS"
## [68] "REACTOME_MEMBRANE_TRAFFICKING"
## [69] "REACTOME_DEATH_RECEPTOR_SIGNALING"
## [70] "REACTOME_INFECTIOUS_DISEASE"
## [71] "REACTOME_AUTOPHAGY"
## [72] "REACTOME_ABC_TRANSPORTER_DISORDERS"
## [73] "REACTOME_SELENOAMINO_ACID_METABOLISM"
## [74] "REACTOME_REGULATION_OF_PTEN_STABILITY_AND_ACTIVITY"
## [75] "REACTOME_RRNA_MODIFICATION_IN_THE_NUCLEUS_AND_CYTOSOL"
## [76] "REACTOME_MRNA_SPLICING"
## [77] "REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS"
## [78] "REACTOME_CHROMATIN_MODIFYING_ENZYMES"
## [79] "REACTOME_HEDGEHOG_LIGAND_BIOGENESIS"
## [80] "REACTOME_PTEN_REGULATION"
## [81] "REACTOME_COPI_MEDIATED_ANTEROGRADE_TRANSPORT"
## [82] "REACTOME_SIGNALING_BY_WNT"
## [83] "REACTOME_SARS_COV_INFECTIONS"
## [84] "REACTOME_VIRAL_INFECTION_PATHWAYS"
## [85] "REACTOME_SARS_COV_1_HOST_INTERACTIONS"
## [86] "REACTOME_POST_TRANSLATIONAL_PROTEIN_MODIFICATION"
## [87] "REACTOME_DEGRADATION_OF_BETA_CATENIN_BY_THE_DESTRUCTION_COMPLEX"
## [88] "REACTOME_SIGNALING_BY_TGF_BETA_RECEPTOR_COMPLEX"
## [89] "REACTOME_CLATHRIN_MEDIATED_ENDOCYTOSIS"
## [90] "REACTOME_DEUBIQUITINATION"
## [91] "REACTOME_REGULATION_OF_RUNX2_EXPRESSION_AND_ACTIVITY"
## [92] "REACTOME_TRANSCRIPTIONAL_REGULATION_BY_RUNX3"
## [93] "REACTOME_SARS_COV_2_MODULATES_HOST_TRANSLATION_MACHINERY"
## [94] "REACTOME_SARS_COV_2_INFECTION"
## [95] "REACTOME_PROCESSING_OF_CAPPED_INTRON_CONTAINING_PRE_MRNA"
## [96] "REACTOME_INNATE_IMMUNE_SYSTEM"
setdiff(fdr1dn,fdr0dn)
## character(0)
Now take a look at the top results.
myres <- myres[order(myres[,"p0"]),]
top0dn <- rownames(head(subset(myres,ES0<0),10))
top0up <- rownames(head(subset(myres,ES0>0),10))
myres <- myres[order(myres[,"p1"]),]
top1dn <- rownames(head(subset(myres,ES1<0),10))
top1up <- rownames(head(subset(myres,ES1>0),10))
v1 <- list("up0"=top0up, "up1"=top1up, "dn0"=top0dn, "dn1"=top1dn)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
Now look at the top pathway results by ES after removing non-significant results (FDR>0.01).
top <- subset(myres,fdr0<0.01)
top <- top[order(top$ES0),]
dn0 <- head(top,15)$ES0
names(dn0) <- rownames(head(top,15))
up0 <- tail(top,15)$ES0
names(up0) <- rownames(tail(top,15))
top0 <- c(dn0,up0)
names(top0) <- gsub("REACTOME_","",names(top0))
names(top0) <- gsub("_"," ",names(top0))
par(mar=c(5.1,20.1,4.1,2.1))
barplot(top0,horiz=TRUE,las=1,cex.names=0.6,main="Uncorr",xlab="ES0")
top <- subset(myres,fdr1<0.01)
top <- top[order(top$ES1),]
dn0 <- head(top,15)$ES1
names(dn0) <- rownames(head(top,15))
up0 <- tail(top,15)$ES1
names(up0) <- rownames(tail(top,15))
top0 <- c(dn0,up0)
names(top0) <- gsub("REACTOME_","",names(top0))
names(top0) <- gsub("_"," ",names(top0))
barplot(top0,horiz=TRUE,las=1,cex.names=0.6,main="Corr",xlab="ES1")
par(mar=c(5.1,4.1,4.1,2.1))
Now plot ES values with both approaches.
plot(myres$ES0,myres$ES1,pch=19,col="gray",cex=0.5)
sig0 <- subset(myres,fdr0<0.01)
sig1 <- subset(myres,fdr1<0.01)
sig2 <- subset(myres,fdr0<0.01 & fdr1<0.01)
points(sig0$ES0,sig0$ES1,pch=19,col="blue",cex=0.5)
points(sig1$ES0,sig1$ES1,pch=19,col="orange",cex=0.5)
points(sig2$ES0,sig2$ES1,pch=19,col="black",cex=0.5)
abline(0,1)
Go back andcheck whether the adjustment reduces bias on basemean gene sets.
RhpcBLASctl::blas_set_num_threads(1)
tic()
myres <- mclapply(1:length(xl), function(i) {
gs <- xl[[i]]
gsname <- names(xl)[i]
df$inset <- as.numeric(rownames(df) %in% gs)
NGENES <- length(unique(unlist(subset( df,inset==1 )$symbol)))
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
# corrected es
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mrbg1 <- mean(unlist(lapply(idx, function(j) {
START=j-block
if (START < 1) { START=1 }
END=j+block
if (END > tot) { END=tot }
df3 <- df2[START:END,]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
res <- c("GeneSet"=gsname,"Ngenes"=NGENES,"ES0"=es0,"ES1"=es1,"p0"=p0,"p1"=p1)
return(res)
},mc.cores=8)
toc()
## 1.734 sec elapsed
myres <- do.call(rbind,myres)
myrownames <- myres[,1]
myres <- myres[,-1]
myres <- apply(myres,2,as.numeric)
rownames(myres) <- myrownames
myres <- as.data.frame(myres)
myres <- subset(myres,Ngenes>=5)
# FDR correction is vital
myres$fdr0 <- p.adjust(myres$p0,method="fdr")
myres$fdr1 <- p.adjust(myres$p1,method="fdr")
# show top results
myres <- myres[order(myres[,"p0"]),]
head(myres,10) %>%
kbl(caption="Top results for normal ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| level 1 | 1525 | -0.3238955 | -0.0566294 | 0.0000000 | 0.0000003 | 0.0000000 | 0.0000006 |
| level 2 | 1518 | -0.2539444 | 0.0203558 | 0.0000000 | 0.0000983 | 0.0000000 | 0.0001405 |
| level 7 | 1455 | 0.2497599 | 0.0043189 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| level 8 | 1450 | 0.2099272 | -0.0070585 | 0.0000000 | 0.0000032 | 0.0000000 | 0.0000054 |
| level 3 | 1513 | -0.1844393 | -0.0052342 | 0.0000000 | 0.0040929 | 0.0000000 | 0.0045477 |
| level 9 | 1448 | 0.1534646 | -0.0109516 | 0.0000000 | 0.0005270 | 0.0000000 | 0.0006588 |
| level 6 | 1472 | 0.1281118 | 0.0087921 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| level 4 | 1519 | -0.0961392 | -0.0611298 | 0.0000000 | 0.4925352 | 0.0000000 | 0.4925352 |
| level 10 | 1477 | 0.0732308 | -0.0713551 | 0.0000021 | 0.0000000 | 0.0000024 | 0.0000000 |
| level 5 | 1496 | 0.0439242 | 0.0103533 | 0.0044621 | 0.0000011 | 0.0044621 | 0.0000022 |
myres <- myres[order(myres[,"p1"]),]
head(myres,10) %>%
kbl(caption="Top results for adjusted ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| level 10 | 1477 | 0.0732308 | -0.0713551 | 0.0000021 | 0.0000000 | 0.0000024 | 0.0000000 |
| level 7 | 1455 | 0.2497599 | 0.0043189 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| level 6 | 1472 | 0.1281118 | 0.0087921 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| level 1 | 1525 | -0.3238955 | -0.0566294 | 0.0000000 | 0.0000003 | 0.0000000 | 0.0000006 |
| level 5 | 1496 | 0.0439242 | 0.0103533 | 0.0044621 | 0.0000011 | 0.0044621 | 0.0000022 |
| level 8 | 1450 | 0.2099272 | -0.0070585 | 0.0000000 | 0.0000032 | 0.0000000 | 0.0000054 |
| level 2 | 1518 | -0.2539444 | 0.0203558 | 0.0000000 | 0.0000983 | 0.0000000 | 0.0001405 |
| level 9 | 1448 | 0.1534646 | -0.0109516 | 0.0000000 | 0.0005270 | 0.0000000 | 0.0006588 |
| level 3 | 1513 | -0.1844393 | -0.0052342 | 0.0000000 | 0.0040929 | 0.0000000 | 0.0045477 |
| level 4 | 1519 | -0.0961392 | -0.0611298 | 0.0000000 | 0.4925352 | 0.0000000 | 0.4925352 |
Plot the result.
ora1bias <- sum(-log10(ora_up$pval)) + sum(-log10(ora_dn$pval))
ora2bias <- sum(-log10(ora2_up$pval)) + sum(-log10(ora2_dn$pval))
fcs1bias <- sum(-log10(fcs1$pval))
anova0bias <- sum(-log10(myres$p0))
anova1bias <- sum(-log10(myres$p1))
res_p <- c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"fgsea"=fcs1bias,"ANOVA"=anova0bias, "ANOVA adj"=anova1bias)
res_p
## ORA p ORA p FC fgsea ANOVA ANOVA adj
## 88.27657 88.23577 230.30257 347.06674 114.06945
barplot(res_p, ylab="Bias measure (p)")
ora1bias <- sum(-log10(ora_up$padj)) + sum(-log10(ora_dn$padj))
ora2bias <- sum(-log10(ora2_up$padj)) + sum(-log10(ora2_dn$padj))
fcs1bias <- sum(-log10(fcs1$padj))
anova0bias <- sum(-log10(myres$fdr0))
anova1bias <- sum(-log10(myres$fdr1))
res_fdr <- c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"fgsea"=fcs1bias,"ANOVA"=anova0bias, "ANOVA adj"=anova1bias)
res_fdr
## ORA p ORA p FC fgsea ANOVA ANOVA adj
## 82.40367 82.62872 226.86233 343.62651 110.62922
barplot(res_fdr, ylab="Bias measure (FDR)")
For reproducibility.
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tictoc_1.2.1 eulerr_7.0.4 beeswarm_0.4.0 kableExtra_1.4.0
## [5] fgsea_1.34.2
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 xml2_1.5.2
## [4] polylabelr_1.0.0 stringi_1.8.7 lattice_0.22-7
## [7] digest_0.6.39 magrittr_2.0.4 evaluate_1.0.5
## [10] grid_4.5.2 RColorBrewer_1.1-3 fastmap_1.2.0
## [13] jsonlite_2.0.0 Matrix_1.7-4 viridisLite_0.4.2
## [16] scales_1.4.0 RhpcBLASctl_0.23-42 codetools_0.2-20
## [19] textshaping_1.0.4 jquerylib_0.1.4 cli_3.6.5
## [22] rlang_1.1.7 polyclip_1.10-7 cowplot_1.2.0
## [25] cachem_1.1.0 yaml_2.3.12 otel_0.2.0
## [28] tools_4.5.2 BiocParallel_1.42.2 dplyr_1.1.4
## [31] ggplot2_4.0.1 fastmatch_1.1-8 vctrs_0.7.0
## [34] R6_2.6.1 lifecycle_1.0.5 stringr_1.6.0
## [37] pkgconfig_2.0.3 pillar_1.11.1 bslib_0.9.0
## [40] gtable_0.3.6 glue_1.8.0 data.table_1.18.0
## [43] Rcpp_1.1.1 systemfonts_1.3.1 xfun_0.56
## [46] tibble_3.3.1 tidyselect_1.2.1 rstudioapi_0.18.0
## [49] knitr_1.51 dichromat_2.0-0.1 farver_2.1.2
## [52] htmltools_0.5.9 rmarkdown_2.30 svglite_2.2.2
## [55] compiler_4.5.2 S7_0.2.1