Testing whether ORA biases towards gene sets based on their basemean expression levels.
library("fgsea")
library("kableExtra")
library("beeswarm")
library("eulerr")
library("tictoc")
library("parallel")
RhpcBLASctl::blas_set_num_threads(1)
Read data and make a smear plot.
x <- readRDS("dataprep/bulkrna1_adj.Rds")
x$rank <- rank(x$stat)
head(x)
## baseMean log2FoldChange lfcSE stat
## ENSG00000184164 CRELD2 5883.34 -2.477030 0.2551468 -9.708252
## ENSG00000090520 DNAJB11 125332.88 -2.537699 0.2760598 -9.192570
## ENSG00000149428 HYOU1 3237795.73 -2.678771 0.2992302 -8.952210
## ENSG00000280682 HYOU1 3235618.62 -2.677649 0.2991539 -8.950742
## ENSG00000179218 CALR 50996694.94 -2.608080 0.3028466 -8.611887
## ENSG00000044574 HSPA5 151811844.68 -2.311628 0.2695218 -8.576776
## pvalue padj rank
## ENSG00000184164 CRELD2 2.780646e-22 2.976125e-18 1
## ENSG00000090520 DNAJB11 3.835664e-20 2.052655e-16 2
## ENSG00000149428 HYOU1 3.484367e-19 9.448053e-16 3
## ENSG00000280682 HYOU1 3.530992e-19 9.448053e-16 4
## ENSG00000179218 CALR 7.186763e-18 1.525214e-14 5
## ENSG00000044574 HSPA5 9.756946e-18 1.525214e-14 6
dim(x)
## [1] 15517 7
plot(log10(x$baseMean),x$log2FoldChange,pch=19,cex=0.5,
xlab="log10(baseMean)",ylab="log2(foldchange)",
ylim=c(-8,8))
sig <- subset(x,padj<0.01)
points(log10(sig$baseMean),sig$log2FoldChange,pch=19,cex=0.5,col="red")
plot(x$log2FoldChange,-log10(x$pvalue), pch=19,cex=0.5,
xlab="log2(foldchange)",ylab="-log10(p-value)")
sig <- subset(x,padj<0.01)
points(sig$log2FoldChange,-log10(sig$pvalue),pch=19,cex=0.5,col="red")
Break the data into deciles based on basemean expression.
xs <- x[order(-x$baseMean),]
NCHUNKS = 10
CHUNKSIZE <- round(nrow(xs)/NCHUNKS)
START = 1
xl <- as.list(1:10)
for ( i in 1:NCHUNKS ) {
START <- ( ( i - 1 ) * CHUNKSIZE ) + 1
END <- START + CHUNKSIZE - 1
if ( END > nrow(xs) ) { END <- nrow(xs) }
xl[[i]] <- rownames(xs[START:END,])
}
names(xl) <- paste("level",1:10)
str(xl)
## List of 10
## $ level 1 : chr [1:1552] "ENSG00000156508 EEF1A1" "ENSG00000163631 ALB" "ENSG00000198804 MT-CO1" "ENSG00000115414 FN1" ...
## $ level 2 : chr [1:1552] "ENSG00000184432 COPB2" "ENSG00000152492 CCDC50" "ENSG00000117519 CNN3" "ENSG00000182985 CADM1" ...
## $ level 3 : chr [1:1552] "ENSG00000143889 HNRNPLL" "ENSG00000129933 MAU2" "ENSG00000139726 DENR" "ENSG00000156711 MAPK13" ...
## $ level 4 : chr [1:1552] "ENSG00000183098 GPC6" "ENSG00000181704 YIPF6" "ENSG00000113580 NR3C1" "ENSG00000112992 NNT" ...
## $ level 5 : chr [1:1552] "ENSG00000122042 UBL3" "ENSG00000102763 VWA8" "ENSG00000178105 DDX10" "ENSG00000101057 MYBL2" ...
## $ level 6 : chr [1:1552] "ENSG00000177485 ZBTB33" "ENSG00000128915 ICE2" "ENSG00000158526 TSR2" "ENSG00000263956 NBPF11" ...
## $ level 7 : chr [1:1552] "ENSG00000166529 ZSCAN21" "ENSG00000180346 TIGD2" "ENSG00000205572 SERF1B" "ENSG00000206149 HERC2P9" ...
## $ level 8 : chr [1:1552] "ENSG00000228425 VPS52" "ENSG00000099994 SUSD2" "ENSG00000132825 PPP1R3D" "ENSG00000260103 " ...
## $ level 9 : chr [1:1552] "ENSG00000173275 ZNF449" "ENSG00000186862 PDZD7" "ENSG00000182070 OR52A1" "ENSG00000234618 RPSAP9" ...
## $ level 10: chr [1:1549] "ENSG00000213305 HNRNPCP6" "ENSG00000253958 CLDN23" "ENSG00000154134 ROBO3" "ENSG00000267656 " ...
ORA p-value only
xp <- x[order(x$pvalue),]
up <- rownames(head(subset(xp,log2FoldChange>0),800))
dn <- rownames(head(subset(xp,log2FoldChange<0),800))
bg <- rownames(xp)
ora_up <- fora(pathways=xl, genes=up, universe=bg, minSize = 5, maxSize = Inf)
ora_dn <- fora(pathways=xl, genes=dn, universe=bg, minSize = 5, maxSize = Inf)
ora_up[,1:6] %>%
kbl(caption="ORA of upregulated DEGs based on p-value against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 7 | 0.0000000 | 0.0000000 | 3.8742510 | 310 | 1552 |
| level 6 | 0.0000000 | 0.0000000 | 2.6744829 | 214 | 1552 |
| level 5 | 0.0002058 | 0.0006862 | 1.3872318 | 111 | 1552 |
| level 8 | 0.9999927 | 1.0000000 | 0.5873864 | 47 | 1552 |
| level 4 | 0.9999999 | 1.0000000 | 0.5124009 | 41 | 1552 |
| level 3 | 1.0000000 | 1.0000000 | 0.3499323 | 28 | 1552 |
| level 1 | 1.0000000 | 1.0000000 | 0.3249372 | 26 | 1552 |
| level 2 | 1.0000000 | 1.0000000 | 0.2499517 | 20 | 1552 |
| level 9 | 1.0000000 | 1.0000000 | 0.0000000 | 0 | 1552 |
| level 10 | 1.0000000 | 1.0000000 | 0.0375654 | 3 | 1549 |
ora_dn[,1:6] %>%
kbl(caption="ORA of downregulated DEGs based on p-value against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 1 | 0.0000000 | 0.0000000 | 3.1868839 | 255 | 1552 |
| level 2 | 0.0000000 | 0.0000000 | 2.5370095 | 203 | 1552 |
| level 3 | 0.0000000 | 0.0000000 | 1.8121496 | 145 | 1552 |
| level 4 | 0.1037115 | 0.2592787 | 1.1372801 | 91 | 1552 |
| level 5 | 0.9997709 | 1.0000000 | 0.6623719 | 53 | 1552 |
| level 6 | 1.0000000 | 1.0000000 | 0.4874058 | 39 | 1552 |
| level 7 | 1.0000000 | 1.0000000 | 0.1499710 | 12 | 1552 |
| level 8 | 1.0000000 | 1.0000000 | 0.0249952 | 2 | 1552 |
| level 9 | 1.0000000 | 1.0000000 | 0.0000000 | 0 | 1552 |
| level 10 | 1.0000000 | 1.0000000 | 0.0000000 | 0 | 1549 |
ORA p-value and foldchange threshold
up <- rownames(head(subset(xp,log2FoldChange>0.5),800))
dn <- rownames(head(subset(xp,log2FoldChange < -0.5),800))
ora2_up <- fora(pathways=xl, genes=up, universe=bg, minSize = 5, maxSize = Inf)
ora2_dn <- fora(pathways=xl, genes=dn, universe=bg, minSize = 5, maxSize = Inf)
ora2_up[,1:6] %>%
kbl(caption="ORA of upregulated DEGs based on p-value and logfoldchange threshold against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 7 | 0.0000000 | 0.0000000 | 3.8742510 | 310 | 1552 |
| level 6 | 0.0000000 | 0.0000000 | 2.6744829 | 214 | 1552 |
| level 5 | 0.0002058 | 0.0006862 | 1.3872318 | 111 | 1552 |
| level 8 | 0.9999927 | 1.0000000 | 0.5873864 | 47 | 1552 |
| level 4 | 0.9999999 | 1.0000000 | 0.5124009 | 41 | 1552 |
| level 3 | 1.0000000 | 1.0000000 | 0.3499323 | 28 | 1552 |
| level 1 | 1.0000000 | 1.0000000 | 0.3249372 | 26 | 1552 |
| level 2 | 1.0000000 | 1.0000000 | 0.2499517 | 20 | 1552 |
| level 9 | 1.0000000 | 1.0000000 | 0.0000000 | 0 | 1552 |
| level 10 | 1.0000000 | 1.0000000 | 0.0375654 | 3 | 1549 |
ora2_dn[,1:6] %>%
kbl(caption="ORA of downregulated DEGs based on p-value and logfoldchange threshold against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 1 | 0.0000000 | 0.0000000 | 3.1868839 | 255 | 1552 |
| level 2 | 0.0000000 | 0.0000000 | 2.5370095 | 203 | 1552 |
| level 3 | 0.0000000 | 0.0000000 | 1.8121496 | 145 | 1552 |
| level 4 | 0.1037115 | 0.2592787 | 1.1372801 | 91 | 1552 |
| level 5 | 0.9997709 | 1.0000000 | 0.6623719 | 53 | 1552 |
| level 6 | 1.0000000 | 1.0000000 | 0.4874058 | 39 | 1552 |
| level 7 | 1.0000000 | 1.0000000 | 0.1499710 | 12 | 1552 |
| level 8 | 1.0000000 | 1.0000000 | 0.0249952 | 2 | 1552 |
| level 9 | 1.0000000 | 1.0000000 | 0.0000000 | 0 | 1552 |
| level 10 | 1.0000000 | 1.0000000 | 0.0000000 | 0 | 1549 |
FCS now.
stat <- x$stat
names(stat) <- rownames(x)
fcs1 <- fgsea(pathways=xl, stats=stat,minSize=5,
maxSize=100000,nPermSimple = 10000)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (2.17% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 6 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 100000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-50. You can
## set the `eps` argument to zero for better estimation.
fcs1 <- fcs1[order(fcs1$pval),]
fcs1[,1:7] %>%
kbl(caption="FCS of based on Walt test stat against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | log2err | ES | NES | size |
|---|---|---|---|---|---|---|
| level 1 | 0 | 0 | NA | -0.6677480 | -2.829889 | 1552 |
| level 2 | 0 | 0 | NA | -0.6216479 | -2.634518 | 1552 |
| level 3 | 0 | 0 | NA | -0.5216395 | -2.210687 | 1552 |
| level 4 | 0 | 0 | 0.9545416 | -0.3617866 | -1.533237 | 1552 |
| level 10 | NA | NA | NA | 0.4200957 | NA | 1549 |
| level 5 | NA | NA | NA | 0.2543887 | NA | 1552 |
| level 6 | NA | NA | NA | 0.4741847 | NA | 1552 |
| level 7 | NA | NA | NA | 0.6149518 | NA | 1552 |
| level 8 | NA | NA | NA | 0.5019174 | NA | 1552 |
| level 9 | NA | NA | NA | 0.3736610 | NA | 1552 |
Idea for calculating amount of significant terms:
ora1bias <- sum(-log10(ora_up$pval)) + sum(-log10(ora_dn$pval))
ora2bias <- sum(-log10(ora2_up$pval)) + sum(-log10(ora2_dn$pval))
fcs1bias <- sum(-log10(fcs1$pval))
barplot(c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"FCS"=fcs1bias),ylab="Bias measure")
Visualising deciles in a gene rank
START = 1
xr <- as.list(1:10)
for ( i in 1:NCHUNKS ) {
START <- ( ( i - 1 ) * CHUNKSIZE ) + 1
END <- START + CHUNKSIZE - 1
if ( END > nrow(xs) ) { END <- nrow(xs) }
xr[[i]] <- xs[START:END,"rank"]
}
names(xr) <- paste("level",1:10)
str(xr)
## List of 10
## $ level 1 : num [1:1552] 5995 13060 14148 4735 10163 ...
## $ level 2 : num [1:1552] 748 2951 3189 5176 2415 ...
## $ level 3 : num [1:1552] 1852 5873 4194 3505 4610 ...
## $ level 4 : num [1:1552] 9680 2674 2902 11715 9727 ...
## $ level 5 : num [1:1552] 2924 13252 6973 14201 11313 ...
## $ level 6 : num [1:1552] 12467 13432 4695 4706 3654 ...
## $ level 7 : num [1:1552] 14246 14962 14264 13511 12122 ...
## $ level 8 : num [1:1552] 14021 11824 11192 11317 10331 ...
## $ level 9 : num [1:1552] 10982 12325 10868 5982 7985 ...
## $ level 10: num [1:1549] 11280 8106 10626 10051 6800 ...
boxplot(xr,cex=0,col="white",ylab="Position in gene rank")
beeswarm(xr,cex=0.25,add=TRUE,pch=19,col="darkgray")
Now quantify the data skew.
Pearson’s Coefficient of Skewness.
lapply(xr,summary)
## $`level 1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 1214 2642 3658 4482 15504
##
## $`level 2`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8 1538 3277 4202 5618 15517
##
## $`level 3`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 2021 4222 5179 7159 15515
##
## $`level 4`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9 3047 5440 6508 10411 15516
##
## $`level 5`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14 4389 8668 8539 13069 15506
##
## $`level 6`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 24 6052 11904 10209 13987 15499
##
## $`level 7`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 32 9732 12994 11495 14464 15514
##
## $`level 8`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 358 7862 11185 10314 12820 15510
##
## $`level 9`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2613 7278 8859 8836 10398 15062
##
## $`level 10`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1285 7597 8594 8653 9725 15267
pcs <- unlist(lapply(xr, function(x) { ( mean(x) - median(x) ) / sd(x) } ) )
pcs
## level 1 level 2 level 3 level 4 level 5 level 6
## 0.28512320 0.25958295 0.23849748 0.24717896 -0.02789239 -0.37997955
## level 7 level 8 level 9 level 10
## -0.38551961 -0.26774139 -0.01132231 0.03868135
barplot(pcs)
sum(abs(pcs))
## [1] 2.141519
Consider running Rank ANOVA test for enrichment. Optionally can correct for basemean bias.
df <- x[,"rank",drop=FALSE]
df$baseMean <- rank(x$baseMean)
plot(df)
i=1
gs <- xl[[i]]
df$inset <- as.numeric(rownames(df) %in% gs)
mdl0 <- aov(rank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(rank ~ baseMean + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
summary(mdl0)
## Df Sum Sq Mean Sq F value Pr(>F)
## inset 1 2.901e+10 2.901e+10 1594 <2e-16 ***
## Residuals 15515 2.823e+11 1.820e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mdl1)
## Df Sum Sq Mean Sq F value Pr(>F)
## baseMean 1 6.730e+10 6.730e+10 4308.4 <2e-16 ***
## inset 1 1.727e+09 1.727e+09 110.6 <2e-16 ***
## Residuals 15514 2.423e+11 1.562e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p0
## [1] 0
p1
## [1] 8.869726e-26
myinset <- subset(df,inset==1)
points(myinset[,1:2],col="red")
df <- x[,"rank",drop=FALSE]
df$baseMean <- scale(log10(x$baseMean))
mclapply(1:length(xl), function(i) {
gs <- xl[[i]]
df$inset <- as.numeric(rownames(df) %in% gs)
mdl0 <- aov(rank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(rank ~ baseMean + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
res <- c("p0"=p0,"p1"=p1)
return(res)
},mc.cores=10)
## [[1]]
## p0 p1
## 0.000000e+00 2.172461e-29
##
## [[2]]
## p0 p1
## 4.367544e-247 5.455178e-69
##
## [[3]]
## p0 p1
## 8.594235e-129 3.856289e-36
##
## [[4]]
## p0 p1
## 3.156710e-31 5.647792e-04
##
## [[5]]
## p0 p1
## 4.756777e-13 3.002158e-37
##
## [[6]]
## p0 p1
## 4.660119e-116 2.317980e-135
##
## [[7]]
## p0 p1
## 2.216403e-273 2.431321e-221
##
## [[8]]
## p0 p1
## 2.813087e-126 1.442341e-22
##
## [[9]]
## p0 p1
## 1.516935e-23 8.993893e-68
##
## [[10]]
## p0 p1
## 1.119790e-16 5.669304e-132
Run enrichment test on Reactome gene sets with and without correction.
Relate gene symbols to gene IDs.
x$derank <- rank(x$stat)
x$bmrank <- rank(x$baseMean)
x$symbol <- lapply(strsplit(rownames(x)," "), function(x) { x[length(x) ]} )
Plot DE rank vs basemean rank.
Testing code.
df <- x
plot(df[,c("derank","bmrank")])
reactome <- gmtPathways("c2.cp.reactome.v2023.2.Hs.symbols.gmt")
i=1
gs <- reactome[[i]]
df$inset <- as.numeric(df$symbol %in% gs)
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
# basic es calc
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
summary(mdl0)
## Df Sum Sq Mean Sq F value Pr(>F)
## inset 1 5.601e+04 56008 0.003 0.958
## Residuals 15515 3.113e+11 20067357
summary(mdl1)
## Df Sum Sq Mean Sq F value Pr(>F)
## bmrank 1 6.730e+10 6.730e+10 4278.299 <2e-16 ***
## inset 1 1.984e+07 1.984e+07 1.261 0.261
## Residuals 15514 2.440e+11 1.573e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p0
## [1] 0.9578683
p1
## [1] 0.2614122
myinset <- subset(df,inset==1)
points(myinset[,c("derank","bmrank")],col="red",pch=19)
# corrected es calc
bmranks <- mean(subset(df,inset==1)$bmrank)
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mysubsets <- lapply(idx, function(i) {
df3 <- df2[(i-block):(i+block),]
df3 <- subset(df3,inset!=1)
return(df3)
} )
# background based on genes in bmrank vicinity rather than all genes
mrbg1 <- mean(unlist(lapply(idx, function(i) {
df3 <- df2[(i-block):(i+block),]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
Now run for all Reactome gene sets.
RhpcBLASctl::blas_set_num_threads(1)
tic()
myres <- mclapply(1:length(reactome), function(i) {
gs <- reactome[[i]]
gsname <- names(reactome)[i]
df$inset <- as.numeric(df$symbol %in% gs)
NGENES <- length(unique(unlist(subset( df,inset==1 )$symbol)))
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
# corrected es
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mrbg1 <- mean(unlist(lapply(idx, function(j) {
START=j-block
if (START < 1) { START=1 }
END=j+block
if (END > tot) { END=tot }
df3 <- df2[START:END,]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
res <- c("GeneSet"=gsname,"Ngenes"=NGENES,"ES0"=es0,"ES1"=es1,"p0"=p0,"p1"=p1)
return(res)
},mc.cores=8)
toc()
## 9.608 sec elapsed
myres <- do.call(rbind,myres)
myrownames <- myres[,1]
myres <- myres[,-1]
myres <- apply(myres,2,as.numeric)
rownames(myres) <- myrownames
myres <- as.data.frame(myres)
myres <- subset(myres,Ngenes>=5)
# FDR correction is vital
myres$fdr0 <- p.adjust(myres$p0,method="fdr")
myres$fdr1 <- p.adjust(myres$p1,method="fdr")
# show top results
myres <- myres[order(myres[,"p0"]),]
head(myres,10) %>%
kbl(caption="Top results for normal ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| REACTOME_CELLULAR_RESPONSES_TO_STIMULI | 732 | -0.2869156 | -0.0809741 | 0 | 0.0000020 | 0 | 0.0000663 |
| REACTOME_INFECTIOUS_DISEASE | 804 | -0.2506137 | -0.0695032 | 0 | 0.0002558 | 0 | 0.0043368 |
| REACTOME_TRANSLATION | 286 | -0.4036663 | -0.1184580 | 0 | 0.0000948 | 0 | 0.0019181 |
| REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE | 109 | -0.6188420 | -0.2406677 | 0 | 0.0000013 | 0 | 0.0000481 |
| REACTOME_METABOLISM_OF_RNA | 704 | -0.2437267 | -0.0573382 | 0 | 0.0263616 | 0 | 0.1261059 |
| REACTOME_EUKARYOTIC_TRANSLATION_INITIATION | 116 | -0.5710405 | -0.1890998 | 0 | 0.0000955 | 0 | 0.0019181 |
| REACTOME_VIRAL_INFECTION_PATHWAYS | 662 | -0.2431956 | -0.0639578 | 0 | 0.0040268 | 0 | 0.0333958 |
| REACTOME_DEVELOPMENTAL_BIOLOGY | 787 | -0.2279278 | -0.0775218 | 0 | 0.0000020 | 0 | 0.0000663 |
| REACTOME_SIGNALING_BY_ROBO_RECEPTORS | 194 | -0.4358706 | -0.1351939 | 0 | 0.0000919 | 0 | 0.0018943 |
| REACTOME_NERVOUS_SYSTEM_DEVELOPMENT | 459 | -0.2909668 | -0.1028581 | 0 | 0.0000030 | 0 | 0.0000914 |
myres <- myres[order(myres[,"p1"]),]
head(myres,10) %>%
kbl(caption="Top results for adjusted ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| REACTOME_CHOLESTEROL_BIOSYNTHESIS | 26 | 0.7144825 | 0.9422283 | 0.0000000 | 0 | 0.0000000 | 0 |
| REACTOME_ACTIVATION_OF_C3_AND_C5 | 7 | 0.6664395 | 0.9296139 | 0.0000000 | 0 | 0.0000002 | 0 |
| REACTOME_COMPLEMENT_CASCADE | 36 | 0.4506368 | 0.6218277 | 0.0000000 | 0 | 0.0000001 | 0 |
| REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT | 13 | 0.5995036 | 0.7285496 | 0.0000000 | 0 | 0.0000001 | 0 |
| REACTOME_CELL_CYCLE | 632 | 0.0619000 | 0.1279676 | 0.0060782 | 0 | 0.0214707 | 0 |
| REACTOME_MITOTIC_PROMETAPHASE | 193 | 0.1623782 | 0.2199831 | 0.0000475 | 0 | 0.0003754 | 0 |
| REACTOME_CELL_CYCLE_MITOTIC | 513 | 0.0441162 | 0.1361668 | 0.0797421 | 0 | 0.1596935 | 0 |
| REACTOME_DNA_STRAND_ELONGATION | 32 | 0.6020625 | 0.4754586 | 0.0000000 | 0 | 0.0000001 | 0 |
| REACTOME_CILIUM_ASSEMBLY | 184 | 0.2053925 | 0.1675311 | 0.0000005 | 0 | 0.0000071 | 0 |
| REACTOME_FATTY_ACID_METABOLISM | 134 | 0.1785201 | 0.2400829 | 0.0001810 | 0 | 0.0011652 | 0 |
Now make an Euler plot.
fdr0set <- rownames(subset(myres,fdr0<0.01))
fdr1set <- rownames(subset(myres,fdr1<0.01))
FDR0LEN <- length(fdr0set)
FDR1LEN <- length(fdr1set)
JACCARD <- signif(length(intersect(fdr0set,fdr1set)) / length(union(fdr0set,fdr1set)),3)
HEADER <- paste("N sig (uncorr)=",FDR0LEN,", N sig (corr)",FDR1LEN,", Jaccard=",JACCARD)
HEADER
## [1] "N sig (uncorr)= 356 , N sig (corr) 126 , Jaccard= 0.217"
v1 <- list("fdr0set"=fdr0set, "fdr1set"=fdr1set)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
#mtext(HEADER,cex=0.5)
Now separate up and down.
fdr0up <- rownames(subset(myres,fdr0<0.01&ES0>0))
fdr0dn <- rownames(subset(myres,fdr0<0.01&ES0<0))
fdr1up <- rownames(subset(myres,fdr1<0.01&ES1>0))
fdr1dn <- rownames(subset(myres,fdr1<0.01&ES1<0))
v1 <- list("fdr0up"=fdr0up, "fdr0dn"=fdr0dn, "fdr1up"=fdr1up,"fdr1dn"=fdr1dn)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
#mtext(HEADER,cex=0.5)
setdiff(fdr0up,fdr1up)
## [1] "REACTOME_MISCELLANEOUS_SUBSTRATES"
## [2] "REACTOME_G2_M_DNA_DAMAGE_CHECKPOINT"
## [3] "REACTOME_PROCESSING_OF_DNA_DOUBLE_STRAND_BREAK_ENDS"
## [4] "REACTOME_POST_TRANSLATIONAL_MODIFICATION_SYNTHESIS_OF_GPI_ANCHORED_PROTEINS"
## [5] "REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE"
## [6] "REACTOME_MEIOTIC_RECOMBINATION"
setdiff(fdr1up,fdr0up)
## [1] "REACTOME_CELL_CYCLE"
## [2] "REACTOME_CELL_CYCLE_MITOTIC"
## [3] "REACTOME_ACTIVATION_OF_GENE_EXPRESSION_BY_SREBF_SREBP"
## [4] "REACTOME_CELL_CYCLE_CHECKPOINTS"
## [5] "REACTOME_PEROXISOMAL_PROTEIN_IMPORT"
## [6] "REACTOME_M_PHASE"
## [7] "REACTOME_ORGANELLE_BIOGENESIS_AND_MAINTENANCE"
## [8] "REACTOME_RESOLUTION_OF_SISTER_CHROMATID_COHESION"
## [9] "REACTOME_MITOTIC_SPINDLE_CHECKPOINT"
## [10] "REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION"
## [11] "REACTOME_FORMATION_OF_FIBRIN_CLOT_CLOTTING_CASCADE"
## [12] "REACTOME_METABOLISM_OF_VITAMINS_AND_COFACTORS"
## [13] "REACTOME_METABOLISM_OF_STEROIDS"
## [14] "REACTOME_PEROXISOMAL_LIPID_METABOLISM"
## [15] "REACTOME_MITOTIC_G2_G2_M_PHASES"
## [16] "REACTOME_REGULATION_OF_CHOLESTEROL_BIOSYNTHESIS_BY_SREBP_SREBF"
## [17] "REACTOME_SYNTHESIS_OF_DNA"
## [18] "REACTOME_SEPARATION_OF_SISTER_CHROMATIDS"
## [19] "REACTOME_GLUTATHIONE_CONJUGATION"
## [20] "REACTOME_RHO_GTPASES_ACTIVATE_FORMINS"
## [21] "REACTOME_PROCESSIVE_SYNTHESIS_ON_THE_C_STRAND_OF_THE_TELOMERE"
## [22] "REACTOME_FORMATION_OF_INCISION_COMPLEX_IN_GG_NER"
## [23] "REACTOME_REGULATION_OF_INSULIN_LIKE_GROWTH_FACTOR_IGF_TRANSPORT_AND_UPTAKE_BY_INSULIN_LIKE_GROWTH_FACTOR_BINDING_PROTEINS_IGFBPS"
## [24] "REACTOME_MISMATCH_REPAIR"
## [25] "REACTOME_GLYOXYLATE_METABOLISM_AND_GLYCINE_DEGRADATION"
## [26] "REACTOME_HISTIDINE_CATABOLISM"
## [27] "REACTOME_BETA_OXIDATION_OF_VERY_LONG_CHAIN_FATTY_ACIDS"
## [28] "REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA"
## [29] "REACTOME_DUAL_INCISION_IN_GG_NER"
## [30] "REACTOME_COMMON_PATHWAY_OF_FIBRIN_CLOT_FORMATION"
## [31] "REACTOME_ETHANOL_OXIDATION"
## [32] "REACTOME_MITOTIC_METAPHASE_AND_ANAPHASE"
## [33] "REACTOME_S_PHASE"
## [34] "REACTOME_TELOMERE_MAINTENANCE"
## [35] "REACTOME_DNA_REPLICATION"
## [36] "REACTOME_HDR_THROUGH_MMEJ_ALT_NHEJ"
## [37] "REACTOME_INTEGRIN_CELL_SURFACE_INTERACTIONS"
## [38] "REACTOME_MITOCHONDRIAL_FATTY_ACID_BETA_OXIDATION"
## [39] "REACTOME_RESOLUTION_OF_AP_SITES_VIA_THE_MULTIPLE_NUCLEOTIDE_PATCH_REPLACEMENT_PATHWAY"
setdiff(fdr0dn,fdr1dn)
## [1] "REACTOME_CHROMATIN_MODIFYING_ENZYMES"
## [2] "REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR"
## [3] "REACTOME_INSULIN_RECEPTOR_RECYCLING"
## [4] "REACTOME_SIGNALLING_TO_ERKS"
## [5] "REACTOME_FC_EPSILON_RECEPTOR_FCERI_SIGNALING"
## [6] "REACTOME_NUCLEAR_RECEPTOR_TRANSCRIPTION_PATHWAY"
## [7] "REACTOME_KEAP1_NFE2L2_PATHWAY"
## [8] "REACTOME_RAC1_GTPASE_CYCLE"
## [9] "REACTOME_ACTIVATION_OF_THE_MRNA_UPON_BINDING_OF_THE_CAP_BINDING_COMPLEX_AND_EIFS_AND_SUBSEQUENT_BINDING_TO_43S"
## [10] "REACTOME_RHO_GTPASE_CYCLE"
## [11] "REACTOME_CELL_EXTRACELLULAR_MATRIX_INTERACTIONS"
## [12] "REACTOME_MYD88_INDEPENDENT_TLR4_CASCADE"
## [13] "REACTOME_SPHINGOLIPID_METABOLISM"
## [14] "REACTOME_SARS_COV_1_INFECTION"
## [15] "REACTOME_TOLL_LIKE_RECEPTOR_9_TLR9_CASCADE"
## [16] "REACTOME_CONSTITUTIVE_SIGNALING_BY_ABERRANT_PI3K_IN_CANCER"
## [17] "REACTOME_RUNX3_REGULATES_P14_ARF"
## [18] "REACTOME_MAPK_TARGETS_NUCLEAR_EVENTS_MEDIATED_BY_MAP_KINASES"
## [19] "REACTOME_TCR_SIGNALING"
## [20] "REACTOME_TRANSFERRIN_ENDOCYTOSIS_AND_RECYCLING"
## [21] "REACTOME_TRANSPORT_TO_THE_GOLGI_AND_SUBSEQUENT_MODIFICATION"
## [22] "REACTOME_DOWNSTREAM_SIGNALING_EVENTS_OF_B_CELL_RECEPTOR_BCR"
## [23] "REACTOME_NUCLEAR_EVENTS_MEDIATED_BY_NFE2L2"
## [24] "REACTOME_BACTERIAL_INFECTION_PATHWAYS"
## [25] "REACTOME_AKT_PHOSPHORYLATES_TARGETS_IN_THE_CYTOSOL"
## [26] "REACTOME_PTEN_REGULATION"
## [27] "REACTOME_VIRAL_INFECTION_PATHWAYS"
## [28] "REACTOME_CELL_CELL_COMMUNICATION"
## [29] "REACTOME_MEMBRANE_TRAFFICKING"
## [30] "REACTOME_TCF_DEPENDENT_SIGNALING_IN_RESPONSE_TO_WNT"
## [31] "REACTOME_RHOG_GTPASE_CYCLE"
## [32] "REACTOME_HEME_SIGNALING"
## [33] "REACTOME_VESICLE_MEDIATED_TRANSPORT"
## [34] "REACTOME_DEGRADATION_OF_BETA_CATENIN_BY_THE_DESTRUCTION_COMPLEX"
## [35] "REACTOME_SARS_COV_INFECTIONS"
## [36] "REACTOME_SIGNALING_BY_EGFR_IN_CANCER"
## [37] "REACTOME_IRON_UPTAKE_AND_TRANSPORT"
## [38] "REACTOME_TRANSCRIPTIONAL_REGULATION_BY_RUNX3"
## [39] "REACTOME_ESR_MEDIATED_SIGNALING"
## [40] "REACTOME_AUTOPHAGY"
## [41] "REACTOME_CELL_JUNCTION_ORGANIZATION"
## [42] "REACTOME_SIGNALING_BY_MET"
## [43] "REACTOME_EGFR_DOWNREGULATION"
## [44] "REACTOME_ABC_FAMILY_PROTEINS_MEDIATED_TRANSPORT"
## [45] "REACTOME_CIRCADIAN_CLOCK"
## [46] "REACTOME_BETA_CATENIN_INDEPENDENT_WNT_SIGNALING"
## [47] "REACTOME_SIGNALLING_TO_RAS"
## [48] "REACTOME_SIGNALING_BY_NUCLEAR_RECEPTORS"
## [49] "REACTOME_SELENOAMINO_ACID_METABOLISM"
## [50] "REACTOME_CARGO_CONCENTRATION_IN_THE_ER"
## [51] "REACTOME_SYNTHESIS_SECRETION_AND_INACTIVATION_OF_GLUCOSE_DEPENDENT_INSULINOTROPIC_POLYPEPTIDE_GIP"
## [52] "REACTOME_CDC42_GTPASE_CYCLE"
## [53] "REACTOME_DAP12_SIGNALING"
## [54] "REACTOME_MAP3K8_TPL2_DEPENDENT_MAPK1_3_ACTIVATION"
## [55] "REACTOME_NFE2L2_REGULATING_TUMORIGENIC_GENES"
## [56] "REACTOME_REGULATION_OF_PTEN_STABILITY_AND_ACTIVITY"
## [57] "REACTOME_DEUBIQUITINATION"
## [58] "REACTOME_GASTRULATION"
## [59] "REACTOME_INFECTION_WITH_MYCOBACTERIUM_TUBERCULOSIS"
## [60] "REACTOME_FCERI_MEDIATED_MAPK_ACTIVATION"
## [61] "REACTOME_SIGNALING_BY_NOTCH"
## [62] "REACTOME_INCRETIN_SYNTHESIS_SECRETION_AND_INACTIVATION"
## [63] "REACTOME_SIGNALING_BY_NOTCH1"
## [64] "REACTOME_PI_METABOLISM"
## [65] "REACTOME_CLATHRIN_MEDIATED_ENDOCYTOSIS"
## [66] "REACTOME_RESPONSE_OF_MTB_TO_PHAGOCYTOSIS"
## [67] "REACTOME_TBC_RABGAPS"
## [68] "REACTOME_RAF_ACTIVATION"
## [69] "REACTOME_SARS_COV_1_MODULATES_HOST_TRANSLATION_MACHINERY"
## [70] "REACTOME_SIGNALING_BY_PDGFR_IN_DISEASE"
## [71] "REACTOME_PCP_CE_PATHWAY"
## [72] "REACTOME_MRNA_SPLICING"
## [73] "REACTOME_RHOV_GTPASE_CYCLE"
## [74] "REACTOME_AMINO_ACID_TRANSPORT_ACROSS_THE_PLASMA_MEMBRANE"
## [75] "REACTOME_ABC_TRANSPORTER_DISORDERS"
## [76] "REACTOME_UCH_PROTEINASES"
## [77] "REACTOME_RHOQ_GTPASE_CYCLE"
## [78] "REACTOME_FLT3_SIGNALING"
## [79] "REACTOME_C_TYPE_LECTIN_RECEPTORS_CLRS"
## [80] "REACTOME_ER_TO_GOLGI_ANTEROGRADE_TRANSPORT"
## [81] "REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM"
## [82] "REACTOME_ERK_MAPK_TARGETS"
## [83] "REACTOME_REGULATION_OF_TP53_ACTIVITY_THROUGH_ACETYLATION"
## [84] "REACTOME_SIGNALING_BY_FGFR3"
## [85] "REACTOME_FCGAMMA_RECEPTOR_FCGR_DEPENDENT_PHAGOCYTOSIS"
## [86] "REACTOME_INTRA_GOLGI_TRAFFIC"
## [87] "REACTOME_TRANS_GOLGI_NETWORK_VESICLE_BUDDING"
## [88] "REACTOME_INSULIN_RECEPTOR_SIGNALLING_CASCADE"
## [89] "REACTOME_ESTROGEN_DEPENDENT_NUCLEAR_EVENTS_DOWNSTREAM_OF_ESR_MEMBRANE_SIGNALING"
## [90] "REACTOME_REGULATION_OF_MRNA_STABILITY_BY_PROTEINS_THAT_BIND_AU_RICH_ELEMENTS"
## [91] "REACTOME_CONSTITUTIVE_SIGNALING_BY_LIGAND_RESPONSIVE_EGFR_CANCER_VARIANTS"
## [92] "REACTOME_HSF1_DEPENDENT_TRANSACTIVATION"
## [93] "REACTOME_METABOLISM_OF_RNA"
## [94] "REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS"
## [95] "REACTOME_COPII_MEDIATED_VESICLE_TRANSPORT"
## [96] "REACTOME_HEDGEHOG_LIGAND_BIOGENESIS"
## [97] "REACTOME_RND2_GTPASE_CYCLE"
## [98] "REACTOME_CONSTITUTIVE_SIGNALING_BY_EGFRVIII"
## [99] "REACTOME_UB_SPECIFIC_PROCESSING_PROTEASES"
## [100] "REACTOME_UPTAKE_AND_ACTIONS_OF_BACTERIAL_TOXINS"
## [101] "REACTOME_SARS_COV_2_INFECTION"
## [102] "REACTOME_P75_NTR_RECEPTOR_MEDIATED_SIGNALLING"
## [103] "REACTOME_REGULATION_OF_SIGNALING_BY_CBL"
## [104] "REACTOME_SIGNALING_BY_FGFR4"
## [105] "REACTOME_NEPHRIN_FAMILY_INTERACTIONS"
## [106] "REACTOME_SARS_COV_1_HOST_INTERACTIONS"
## [107] "REACTOME_RRNA_PROCESSING"
## [108] "REACTOME_RESPONSE_OF_EIF2AK1_HRI_TO_HEME_DEFICIENCY"
## [109] "REACTOME_CONSTITUTIVE_SIGNALING_BY_AKT1_E17K_IN_CANCER"
## [110] "REACTOME_NEGATIVE_REGULATION_OF_MET_ACTIVITY"
## [111] "REACTOME_EPH_EPHRIN_SIGNALING"
## [112] "REACTOME_FOXO_MEDIATED_TRANSCRIPTION_OF_CELL_CYCLE_GENES"
## [113] "REACTOME_AMINO_ACIDS_REGULATE_MTORC1"
## [114] "REACTOME_PARASITE_INFECTION"
## [115] "REACTOME_CYTOSOLIC_TRNA_AMINOACYLATION"
## [116] "REACTOME_ATTENUATION_PHASE"
## [117] "REACTOME_REGULATION_OF_RUNX3_EXPRESSION_AND_ACTIVITY"
## [118] "REACTOME_FCERI_MEDIATED_NF_KB_ACTIVATION"
## [119] "REACTOME_DECTIN_1_MEDIATED_NONCANONICAL_NF_KB_SIGNALING"
## [120] "REACTOME_ACTIVATED_NOTCH1_TRANSMITS_SIGNAL_TO_THE_NUCLEUS"
## [121] "REACTOME_HATS_ACETYLATE_HISTONES"
## [122] "REACTOME_CLEC7A_DECTIN_1_SIGNALING"
## [123] "REACTOME_LYSOSOME_VESICLE_BIOGENESIS"
## [124] "REACTOME_TRANSCRIPTIONAL_REGULATION_BY_RUNX1"
## [125] "REACTOME_PROCESSING_OF_CAPPED_INTRON_CONTAINING_PRE_MRNA"
## [126] "REACTOME_SIGNALING_BY_FGFR1"
## [127] "REACTOME_UPTAKE_AND_FUNCTION_OF_ANTHRAX_TOXINS"
## [128] "REACTOME_NFE2L2_REGULATING_ER_STRESS_ASSOCIATED_GENES"
## [129] "REACTOME_REGULATION_OF_RUNX2_EXPRESSION_AND_ACTIVITY"
## [130] "REACTOME_RHOA_GTPASE_CYCLE"
## [131] "REACTOME_SIGNALING_BY_CSF1_M_CSF_IN_MYELOID_CELLS"
## [132] "REACTOME_NEGATIVE_REGULATION_OF_FGFR1_SIGNALING"
## [133] "REACTOME_SIGNALING_BY_TGF_BETA_RECEPTOR_COMPLEX"
## [134] "REACTOME_POTENTIAL_THERAPEUTICS_FOR_SARS"
## [135] "REACTOME_FOXO_MEDIATED_TRANSCRIPTION"
## [136] "REACTOME_SIGNALING_BY_ERBB2"
## [137] "REACTOME_TGF_BETA_RECEPTOR_SIGNALING_ACTIVATES_SMADS"
## [138] "REACTOME_REGULATION_OF_NFE2L2_GENE_EXPRESSION"
## [139] "REACTOME_TRANSCRIPTIONAL_REGULATION_BY_RUNX2"
## [140] "REACTOME_CONSTITUTIVE_SIGNALING_BY_OVEREXPRESSED_ERBB2"
## [141] "REACTOME_GOLGI_ASSOCIATED_VESICLE_BIOGENESIS"
## [142] "REACTOME_AUF1_HNRNP_D0_BINDS_AND_DESTABILIZES_MRNA"
## [143] "REACTOME_FOLDING_OF_ACTIN_BY_CCT_TRIC"
## [144] "REACTOME_DEGRADATION_OF_AXIN"
## [145] "REACTOME_CD28_CO_STIMULATION"
## [146] "REACTOME_MAPK6_MAPK4_SIGNALING"
## [147] "REACTOME_LOSS_OF_FUNCTION_OF_MECP2_IN_RETT_SYNDROME"
## [148] "REACTOME_DEATH_RECEPTOR_SIGNALING"
## [149] "REACTOME_FORMATION_OF_PARAXIAL_MESODERM"
## [150] "REACTOME_NEGATIVE_REGULATION_OF_FGFR3_SIGNALING"
## [151] "REACTOME_SIGNALING_BY_ERBB4"
## [152] "REACTOME_COPI_MEDIATED_ANTEROGRADE_TRANSPORT"
## [153] "REACTOME_NOTCH1_INTRACELLULAR_DOMAIN_REGULATES_TRANSCRIPTION"
## [154] "REACTOME_NEGATIVE_REGULATION_OF_FGFR4_SIGNALING"
## [155] "REACTOME_TOLL_LIKE_RECEPTOR_TLR1_TLR2_CASCADE"
## [156] "REACTOME_CARGO_RECOGNITION_FOR_CLATHRIN_MEDIATED_ENDOCYTOSIS"
## [157] "REACTOME_MATURATION_OF_SARS_COV_1_SPIKE_PROTEIN"
## [158] "REACTOME_EPHB_MEDIATED_FORWARD_SIGNALING"
## [159] "REACTOME_ACTIVATION_OF_BAD_AND_TRANSLOCATION_TO_MITOCHONDRIA"
## [160] "REACTOME_REGULATION_OF_RAS_BY_GAPS"
## [161] "REACTOME_SIGNALING_BY_NOTCH1_PEST_DOMAIN_MUTANTS_IN_CANCER"
## [162] "REACTOME_DEGRADATION_OF_GLI1_BY_THE_PROTEASOME"
## [163] "REACTOME_NF_KB_IS_ACTIVATED_AND_SIGNALS_SURVIVAL"
## [164] "REACTOME_MITOPHAGY"
## [165] "REACTOME_PROLONGED_ERK_ACTIVATION_EVENTS"
## [166] "REACTOME_DOWNREGULATION_OF_TGF_BETA_RECEPTOR_SIGNALING"
## [167] "REACTOME_POST_TRANSLATIONAL_PROTEIN_MODIFICATION"
## [168] "REACTOME_DEACTIVATION_OF_THE_BETA_CATENIN_TRANSACTIVATING_COMPLEX"
## [169] "REACTOME_INTERLEUKIN_1_SIGNALING"
## [170] "REACTOME_FLT3_SIGNALING_IN_DISEASE"
## [171] "REACTOME_INTERLEUKIN_1_FAMILY_SIGNALING"
## [172] "REACTOME_INTERLEUKIN_12_FAMILY_SIGNALING"
## [173] "REACTOME_CYTOSOLIC_SENSORS_OF_PATHOGEN_ASSOCIATED_DNA"
## [174] "REACTOME_TOLL_LIKE_RECEPTOR_CASCADES"
## [175] "REACTOME_SIGNALING_BY_FGFR"
## [176] "REACTOME_REGULATION_OF_PTEN_GENE_TRANSCRIPTION"
## [177] "REACTOME_TRANSLATION_OF_SARS_COV_1_STRUCTURAL_PROTEINS"
## [178] "REACTOME_NEGATIVE_REGULATION_OF_NOTCH4_SIGNALING"
## [179] "REACTOME_SUPPRESSION_OF_PHAGOSOMAL_MATURATION"
## [180] "REACTOME_RAB_REGULATION_OF_TRAFFICKING"
## [181] "REACTOME_DEGRADATION_OF_DVL"
## [182] "REACTOME_SPRY_REGULATION_OF_FGF_SIGNALING"
## [183] "REACTOME_TGF_BETA_RECEPTOR_SIGNALING_IN_EMT_EPITHELIAL_TO_MESENCHYMAL_TRANSITION"
## [184] "REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA"
## [185] "REACTOME_SIGNALING_BY_NOTCH4"
## [186] "REACTOME_FORMATION_OF_WDR5_CONTAINING_HISTONE_MODIFYING_COMPLEXES"
## [187] "REACTOME_INTERLEUKIN_12_SIGNALING"
## [188] "REACTOME_M_DECAY_DEGRADATION_OF_MATERNAL_MRNAS_BY_MATERNALLY_STORED_FACTORS"
## [189] "REACTOME_ACTIVATION_OF_BH3_ONLY_PROTEINS"
## [190] "REACTOME_SIGNALING_BY_PTK6"
## [191] "REACTOME_DISORDERS_OF_TRANSMEMBRANE_TRANSPORTERS"
## [192] "REACTOME_DISASSEMBLY_OF_THE_DESTRUCTION_COMPLEX_AND_RECRUITMENT_OF_AXIN_TO_THE_MEMBRANE"
## [193] "REACTOME_SCF_BETA_TRCP_MEDIATED_DEGRADATION_OF_EMI1"
## [194] "REACTOME_DEADENYLATION_OF_MRNA"
## [195] "REACTOME_RRNA_MODIFICATION_IN_THE_NUCLEUS_AND_CYTOSOL"
## [196] "REACTOME_PINK1_PRKN_MEDIATED_MITOPHAGY"
## [197] "REACTOME_ATF6_ATF6_ALPHA_ACTIVATES_CHAPERONE_GENES"
## [198] "REACTOME_SARS_COV_2_MODULATES_HOST_TRANSLATION_MACHINERY"
## [199] "REACTOME_RUNX1_REGULATES_TRANSCRIPTION_OF_GENES_INVOLVED_IN_DIFFERENTIATION_OF_HSCS"
## [200] "REACTOME_RHO_GTPASES_ACTIVATE_WASPS_AND_WAVES"
## [201] "REACTOME_SIGNALING_BY_FGFR2"
## [202] "REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS"
## [203] "REACTOME_RHOJ_GTPASE_CYCLE"
## [204] "REACTOME_PHOSPHOLIPID_METABOLISM"
## [205] "REACTOME_HEDGEHOG_ON_STATE"
## [206] "REACTOME_SIGNALING_BY_TGFB_FAMILY_MEMBERS"
## [207] "REACTOME_PERK_REGULATES_GENE_EXPRESSION"
## [208] "REACTOME_CROSS_PRESENTATION_OF_SOLUBLE_EXOGENOUS_ANTIGENS_ENDOSOMES"
## [209] "REACTOME_GENE_AND_PROTEIN_EXPRESSION_BY_JAK_STAT_SIGNALING_AFTER_INTERLEUKIN_12_STIMULATION"
## [210] "REACTOME_METABOLISM_OF_POLYAMINES"
## [211] "REACTOME_RHOBTB1_GTPASE_CYCLE"
## [212] "REACTOME_CELLULAR_RESPONSE_TO_HEAT_STRESS"
## [213] "REACTOME_VEGFR2_MEDIATED_CELL_PROLIFERATION"
## [214] "REACTOME_RHOB_GTPASE_CYCLE"
## [215] "REACTOME_LEISHMANIA_INFECTION"
## [216] "REACTOME_RHOU_GTPASE_CYCLE"
## [217] "REACTOME_INLB_MEDIATED_ENTRY_OF_LISTERIA_MONOCYTOGENES_INTO_HOST_CELL"
## [218] "REACTOME_INNATE_IMMUNE_SYSTEM"
## [219] "REACTOME_METABOLISM_OF_AMINO_ACIDS_AND_DERIVATIVES"
## [220] "REACTOME_SIGNALING_BY_ALK_IN_CANCER"
## [221] "REACTOME_TP53_REGULATES_METABOLIC_GENES"
## [222] "REACTOME_RHOBTB_GTPASE_CYCLE"
## [223] "REACTOME_RHOF_GTPASE_CYCLE"
## [224] "REACTOME_ESTROGEN_DEPENDENT_GENE_EXPRESSION"
## [225] "REACTOME_ONCOGENIC_MAPK_SIGNALING"
## [226] "REACTOME_TNFR2_NON_CANONICAL_NF_KB_PATHWAY"
## [227] "REACTOME_STABILIZATION_OF_P53"
## [228] "REACTOME_HOST_INTERACTIONS_OF_HIV_FACTORS"
## [229] "REACTOME_RHOC_GTPASE_CYCLE"
## [230] "REACTOME_SOMITOGENESIS"
## [231] "REACTOME_REGULATION_OF_HSF1_MEDIATED_HEAT_SHOCK_RESPONSE"
## [232] "REACTOME_SCF_SKP2_MEDIATED_DEGRADATION_OF_P27_P21"
## [233] "REACTOME_RHOBTB2_GTPASE_CYCLE"
## [234] "REACTOME_ADAPTIVE_IMMUNE_SYSTEM"
## [235] "REACTOME_SIGNALING_BY_RHO_GTPASES_MIRO_GTPASES_AND_RHOBTB3"
## [236] "REACTOME_TRANSCRIPTIONAL_ACTIVITY_OF_SMAD2_SMAD3_SMAD4_HETEROTRIMER"
## [237] "REACTOME_NFE2L2_REGULATING_ANTI_OXIDANT_DETOXIFICATION_ENZYMES"
## [238] "REACTOME_SARS_COV_1_TARGETS_HOST_INTRACELLULAR_SIGNALLING_AND_REGULATORY_PATHWAYS"
## [239] "REACTOME_MITOCHONDRIAL_BIOGENESIS"
## [240] "REACTOME_INTRA_GOLGI_AND_RETROGRADE_GOLGI_TO_ER_TRAFFIC"
## [241] "REACTOME_SIGNALING_BY_MODERATE_KINASE_ACTIVITY_BRAF_MUTANTS"
## [242] "REACTOME_CYTOPROTECTION_BY_HMOX1"
## [243] "REACTOME_DOWNREGULATION_OF_SMAD2_3_SMAD4_TRANSCRIPTIONAL_ACTIVITY"
## [244] "REACTOME_TRANSCRIPTIONAL_REGULATION_BY_TP53"
## [245] "REACTOME_SARS_COV_2_HOST_INTERACTIONS"
## [246] "REACTOME_CYCLIN_A_CDK2_ASSOCIATED_EVENTS_AT_S_PHASE_ENTRY"
## [247] "REACTOME_G1_S_DNA_DAMAGE_CHECKPOINTS"
## [248] "REACTOME_PLASMA_LIPOPROTEIN_CLEARANCE"
## [249] "REACTOME_ANTIGEN_PROCESSING_UBIQUITINATION_PROTEASOME_DEGRADATION"
## [250] "REACTOME_NEDDYLATION"
## [251] "REACTOME_NEUTROPHIL_DEGRANULATION"
## [252] "REACTOME_HEMOSTASIS"
## [253] "REACTOME_SIGNALING_BY_BRAF_AND_RAF1_FUSIONS"
## [254] "REACTOME_SELECTIVE_AUTOPHAGY"
## [255] "REACTOME_APOPTOSIS"
## [256] "REACTOME_TRANSPORT_OF_MATURE_TRANSCRIPT_TO_CYTOPLASM"
## [257] "REACTOME_PLATELET_ACTIVATION_SIGNALING_AND_AGGREGATION"
## [258] "REACTOME_HIV_INFECTION"
## [259] "REACTOME_ISG15_ANTIVIRAL_MECHANISM"
## [260] "REACTOME_CLASS_I_MHC_MEDIATED_ANTIGEN_PROCESSING_PRESENTATION"
## [261] "REACTOME_THE_ROLE_OF_GTSE1_IN_G2_M_PROGRESSION_AFTER_G2_CHECKPOINT"
## [262] "REACTOME_PROGRAMMED_CELL_DEATH"
## [263] "REACTOME_RNA_POLYMERASE_II_TRANSCRIPTION_TERMINATION"
## [264] "REACTOME_MITOCHONDRIAL_TRANSLATION"
setdiff(fdr1dn,fdr0dn)
## [1] "REACTOME_SIGNALING_BY_GPCR"
Now take a look at the top results.
myres <- myres[order(myres[,"p0"]),]
top0dn <- rownames(head(subset(myres,ES0<0),10))
top0up <- rownames(head(subset(myres,ES0>0),10))
myres <- myres[order(myres[,"p1"]),]
top1dn <- rownames(head(subset(myres,ES1<0),10))
top1up <- rownames(head(subset(myres,ES1>0),10))
v1 <- list("up0"=top0up, "up1"=top1up, "dn0"=top0dn, "dn1"=top1dn)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
Now look at the top pathway results by ES after removing non-significant results (FDR>0.01).
top <- subset(myres,fdr0<0.01)
top <- top[order(top$ES0),]
dn0 <- head(top,15)$ES0
names(dn0) <- rownames(head(top,15))
up0 <- tail(top,15)$ES0
names(up0) <- rownames(tail(top,15))
top0 <- c(dn0,up0)
names(top0) <- gsub("REACTOME_","",names(top0))
names(top0) <- gsub("_"," ",names(top0))
par(mar=c(5.1,20.1,4.1,2.1))
barplot(top0,horiz=TRUE,las=1,cex.names=0.6,main="Uncorr",xlab="ES0")
top <- subset(myres,fdr1<0.01)
top <- top[order(top$ES1),]
dn0 <- head(top,15)$ES1
names(dn0) <- rownames(head(top,15))
up0 <- tail(top,15)$ES1
names(up0) <- rownames(tail(top,15))
top0 <- c(dn0,up0)
names(top0) <- gsub("REACTOME_","",names(top0))
names(top0) <- gsub("_"," ",names(top0))
barplot(top0,horiz=TRUE,las=1,cex.names=0.6,main="Corr",xlab="ES1")
par(mar=c(5.1,4.1,4.1,2.1))
Now plot ES values with both approaches.
plot(myres$ES0,myres$ES1,pch=19,col="gray",cex=0.5)
sig0 <- subset(myres,fdr0<0.01)
sig1 <- subset(myres,fdr1<0.01)
sig2 <- subset(myres,fdr0<0.01 & fdr1<0.01)
points(sig0$ES0,sig0$ES1,pch=19,col="blue",cex=0.5)
points(sig1$ES0,sig1$ES1,pch=19,col="orange",cex=0.5)
points(sig2$ES0,sig2$ES1,pch=19,col="black",cex=0.5)
abline(0,1)
Go back andcheck whether the adjustment reduces bias on basemean gene sets.
RhpcBLASctl::blas_set_num_threads(1)
tic()
myres <- mclapply(1:length(xl), function(i) {
gs <- xl[[i]]
gsname <- names(xl)[i]
df$inset <- as.numeric(rownames(df) %in% gs)
NGENES <- length(unique(unlist(subset( df,inset==1 )$symbol)))
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
# corrected es
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mrbg1 <- mean(unlist(lapply(idx, function(j) {
START=j-block
if (START < 1) { START=1 }
END=j+block
if (END > tot) { END=tot }
df3 <- df2[START:END,]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
res <- c("GeneSet"=gsname,"Ngenes"=NGENES,"ES0"=es0,"ES1"=es1,"p0"=p0,"p1"=p1)
return(res)
},mc.cores=8)
toc()
## 1.378 sec elapsed
myres <- do.call(rbind,myres)
myrownames <- myres[,1]
myres <- myres[,-1]
myres <- apply(myres,2,as.numeric)
rownames(myres) <- myrownames
myres <- as.data.frame(myres)
myres <- subset(myres,Ngenes>=5)
# FDR correction is vital
myres$fdr0 <- p.adjust(myres$p0,method="fdr")
myres$fdr1 <- p.adjust(myres$p1,method="fdr")
# show top results
myres <- myres[order(myres[,"p0"]),]
head(myres,10) %>%
kbl(caption="Top results for normal ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| level 1 | 1524 | -0.5873784 | -0.0516501 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 7 | 1456 | 0.5349955 | 0.0208394 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 2 | 1519 | -0.5093819 | 0.0004480 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 3 | 1513 | -0.3695368 | -0.0146797 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 8 | 1450 | 0.3659215 | -0.0122493 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 6 | 1472 | 0.3508208 | 0.0033589 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 4 | 1518 | -0.1791750 | -0.0589030 | 0 | 0.0716092 | 0 | 0.0716092 |
| level 9 | 1448 | 0.1542525 | -0.0355512 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 10 | 1476 | 0.1280646 | -0.0439250 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 5 | 1498 | 0.1116372 | 0.0601581 | 0 | 0.0000000 | 0 | 0.0000000 |
myres <- myres[order(myres[,"p1"]),]
head(myres,10) %>%
kbl(caption="Top results for adjusted ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| level 10 | 1476 | 0.1280646 | -0.0439250 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 7 | 1456 | 0.5349955 | 0.0208394 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 6 | 1472 | 0.3508208 | 0.0033589 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 9 | 1448 | 0.1542525 | -0.0355512 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 5 | 1498 | 0.1116372 | 0.0601581 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 2 | 1519 | -0.5093819 | 0.0004480 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 1 | 1524 | -0.5873784 | -0.0516501 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 3 | 1513 | -0.3695368 | -0.0146797 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 8 | 1450 | 0.3659215 | -0.0122493 | 0 | 0.0000000 | 0 | 0.0000000 |
| level 4 | 1518 | -0.1791750 | -0.0589030 | 0 | 0.0716092 | 0 | 0.0716092 |
Plot the result.
ora1bias <- sum(-log10(ora_up$pval)) + sum(-log10(ora_dn$pval))
ora2bias <- sum(-log10(ora2_up$pval)) + sum(-log10(ora2_dn$pval))
fcs1bias <- sum(-log10(fcs1$pval))
anova0bias <- sum(-log10(myres$p0))
anova1bias <- sum(-log10(myres$p1))
res_p <- c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"fgsea"=fcs1bias,"ANOVA"=anova0bias, "ANOVA adj"=anova1bias)
res_p
## ORA p ORA p FC fgsea ANOVA ANOVA adj
## 277.8098 277.8098 NA Inf 656.7638
barplot(res_p, ylab="Bias measure (p)",ylim=c(0,400))
ora1bias <- sum(-log10(ora_up$padj)) + sum(-log10(ora_dn$padj))
ora2bias <- sum(-log10(ora2_up$padj)) + sum(-log10(ora2_dn$padj))
fcs1bias <- sum(-log10(fcs1$padj))
anova0bias <- sum(-log10(myres$fdr0))
anova1bias <- sum(-log10(myres$fdr1))
res_fdr <- c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"fgsea"=fcs1bias,"ANOVA"=anova0bias, "ANOVA adj"=anova1bias)
res_fdr
## ORA p ORA p FC fgsea ANOVA ANOVA adj
## 272.9681 272.9681 NA Inf 653.3236
barplot(res_fdr, ylab="Bias measure (FDR)",ylim=c(0,400))
For reproducibility.
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tictoc_1.2.1 eulerr_7.0.4 beeswarm_0.4.0 kableExtra_1.4.0
## [5] fgsea_1.34.2
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 xml2_1.5.2
## [4] polylabelr_1.0.0 stringi_1.8.7 lattice_0.22-9
## [7] digest_0.6.39 magrittr_2.0.4 evaluate_1.0.5
## [10] grid_4.5.2 RColorBrewer_1.1-3 fastmap_1.2.0
## [13] jsonlite_2.0.0 Matrix_1.7-4 viridisLite_0.4.3
## [16] scales_1.4.0 RhpcBLASctl_0.23-42 codetools_0.2-20
## [19] textshaping_1.0.4 jquerylib_0.1.4 cli_3.6.5
## [22] rlang_1.1.7 polyclip_1.10-7 cowplot_1.2.0
## [25] cachem_1.1.0 yaml_2.3.12 otel_0.2.0
## [28] tools_4.5.2 BiocParallel_1.42.2 dplyr_1.2.0
## [31] ggplot2_4.0.2 fastmatch_1.1-8 vctrs_0.7.1
## [34] R6_2.6.1 lifecycle_1.0.5 stringr_1.6.0
## [37] pkgconfig_2.0.3 pillar_1.11.1 bslib_0.10.0
## [40] gtable_0.3.6 glue_1.8.0 data.table_1.18.2.1
## [43] Rcpp_1.1.1 systemfonts_1.3.1 xfun_0.56
## [46] tibble_3.3.1 tidyselect_1.2.1 rstudioapi_0.18.0
## [49] knitr_1.51 dichromat_2.0-0.1 farver_2.1.2
## [52] htmltools_0.5.9 rmarkdown_2.30 svglite_2.2.2
## [55] compiler_4.5.2 S7_0.2.1