Testing whether ORA biases towards gene sets based on their basemean expression levels.
library("fgsea")
library("kableExtra")
library("beeswarm")
library("eulerr")
library("tictoc")
library("parallel")
RhpcBLASctl::blas_set_num_threads(1)
Read data and make a smear plot.
x <- readRDS("dataprep/bulkrna2.Rds")
x$rank <- rank(x$stat)
head(x)
## baseMean log2FoldChange lfcSE stat
## ENSG00000000419 DPM1 589.41607 0.1843400 0.09395830 1.961934
## ENSG00000000457 SCYL3 519.86126 -0.1170847 0.10545097 -1.110323
## ENSG00000000460 FIRRM 600.40694 0.1209053 0.09547212 1.266394
## ENSG00000000938 FGR 49.67164 -1.8982836 0.34307729 -5.533108
## ENSG00000000971 CFH 60.31278 -2.3243491 0.32538459 -7.143390
## ENSG00000001036 FUCA2 1880.52699 0.1397832 0.06103558 2.290192
## pvalue padj rank
## ENSG00000000419 DPM1 4.977014e-02 1.444029e-01 11451
## ENSG00000000457 SCYL3 2.668597e-01 4.744703e-01 3849
## ENSG00000000460 FIRRM 2.053722e-01 3.986603e-01 10233
## ENSG00000000938 FGR 3.146060e-08 5.822722e-07 536
## ENSG00000000971 CFH 9.105673e-13 3.208424e-11 324
## ENSG00000001036 FUCA2 2.201018e-02 7.702733e-02 11891
dim(x)
## [1] 13816 7
plot(log10(x$baseMean),x$log2FoldChange,pch=19,cex=0.5,
xlab="log10(baseMean)",ylab="log2(foldchange)",
ylim=c(-8,8))
sig <- subset(x,padj<0.01)
points(log10(sig$baseMean),sig$log2FoldChange,pch=19,cex=0.5,col="red")
plot(x$log2FoldChange,-log10(x$pvalue), pch=19,cex=0.5,
xlab="log2(foldchange)",ylab="-log10(p-value)")
sig <- subset(x,padj<0.01)
points(sig$log2FoldChange,-log10(sig$pvalue),pch=19,cex=0.5,col="red")
Break the data into deciles based on basemean expression.
xs <- x[order(-x$baseMean),]
NCHUNKS = 10
CHUNKSIZE <- round(nrow(xs)/NCHUNKS)
START = 1
xl <- as.list(1:10)
for ( i in 1:NCHUNKS ) {
START <- ( ( i - 1 ) * CHUNKSIZE ) + 1
END <- START + CHUNKSIZE - 1
if ( END > nrow(xs) ) { END <- nrow(xs) }
xl[[i]] <- rownames(xs[START:END,])
}
names(xl) <- paste("level",1:10)
str(xl)
## List of 10
## $ level 1 : chr [1:1382] "ENSG00000156508 EEF1A1" "ENSG00000075624 ACTB" "ENSG00000111640 GAPDH" "ENSG00000184009 ACTG1" ...
## $ level 2 : chr [1:1382] "ENSG00000179051 RCC2" "ENSG00000281540 RCC2" "ENSG00000100911 PSME2" "ENSG00000115234 SNX17" ...
## $ level 3 : chr [1:1382] "ENSG00000168297 PXK" "ENSG00000106615 RHEB" "ENSG00000090372 STRN4" "ENSG00000182511 FES" ...
## $ level 4 : chr [1:1382] "ENSG00000087502 ERGIC2" "ENSG00000273772 SMN2" "ENSG00000151366 NDUFC2" "ENSG00000160959 LRRC14" ...
## $ level 5 : chr [1:1382] "ENSG00000276561 IRF7" "ENSG00000180694 TMEM64" "ENSG00000181788 SIAH2" "ENSG00000163378 EOGT" ...
## $ level 6 : chr [1:1382] "ENSG00000204946 ZNF783" "ENSG00000133059 DSTYK" "ENSG00000125482 TTF1" "ENSG00000010318 PHF7" ...
## $ level 7 : chr [1:1382] "ENSG00000196693 ZNF33B" "ENSG00000150433 TMEM218" "ENSG00000180229 HERC2P3" "ENSG00000277072 STAG3L2" ...
## $ level 8 : chr [1:1382] "ENSG00000273136 NBPF26" "ENSG00000164066 INTU" "ENSG00000144426 NBEAL1" "ENSG00000129474 AJUBA" ...
## $ level 9 : chr [1:1382] "ENSG00000117010 ZNF684" "ENSG00000113555 PCDH12" "ENSG00000110171 TRIM3" "ENSG00000182389 CACNB4" ...
## $ level 10: chr [1:1378] "ENSG00000233450 KIFC1" "ENSG00000237649 KIFC1" "ENSG00000107165 TYRP1" "ENSG00000235297 FAUP1" ...
ORA p-value only
xp <- x[order(x$pvalue),]
up <- rownames(head(subset(xp,log2FoldChange>0),800))
dn <- rownames(head(subset(xp,log2FoldChange<0),800))
bg <- rownames(xp)
ora_up <- fora(pathways=xl, genes=up, universe=bg, minSize = 5, maxSize = Inf)
ora_dn <- fora(pathways=xl, genes=dn, universe=bg, minSize = 5, maxSize = Inf)
ora_up[,1:6] %>%
kbl(caption="ORA of upregulated DEGs based on p-value against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 1 | 0.0000000 | 0.00e+00 | 3.6364472 | 291 | 1382 |
| level 2 | 0.0000000 | 0.00e+00 | 2.3618162 | 189 | 1382 |
| level 3 | 0.0000048 | 1.61e-05 | 1.4870695 | 119 | 1382 |
| level 4 | 0.4234360 | 1.00e+00 | 1.0247033 | 82 | 1382 |
| level 5 | 0.9996368 | 1.00e+00 | 0.6748046 | 54 | 1382 |
| level 6 | 1.0000000 | 1.00e+00 | 0.3124096 | 25 | 1382 |
| level 7 | 1.0000000 | 1.00e+00 | 0.2624240 | 21 | 1382 |
| level 8 | 1.0000000 | 1.00e+00 | 0.1249638 | 10 | 1382 |
| level 9 | 1.0000000 | 1.00e+00 | 0.0499855 | 4 | 1382 |
| level 10 | 1.0000000 | 1.00e+00 | 0.0626633 | 5 | 1378 |
ora_dn[,1:6] %>%
kbl(caption="ORA of downregulated DEGs based on p-value against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 1 | 0.0000000 | 0.0000000 | 1.8119754 | 145 | 1382 |
| level 2 | 0.0040592 | 0.0202962 | 1.2871274 | 103 | 1382 |
| level 3 | 0.3322204 | 0.8305509 | 1.0496961 | 84 | 1382 |
| level 4 | 0.3322204 | 0.8305509 | 1.0496961 | 84 | 1382 |
| level 8 | 0.4234360 | 0.8468720 | 1.0247033 | 82 | 1382 |
| level 7 | 0.5678923 | 0.8790435 | 0.9872142 | 79 | 1382 |
| level 9 | 0.6153305 | 0.8790435 | 0.9747178 | 78 | 1382 |
| level 5 | 0.9728987 | 1.0000000 | 0.8122648 | 65 | 1382 |
| level 6 | 0.9985358 | 1.0000000 | 0.7122938 | 57 | 1382 |
| level 10 | 1.0000000 | 1.0000000 | 0.2882511 | 23 | 1378 |
ORA p-value and foldchange threshold
up <- rownames(head(subset(xp,log2FoldChange>0.5),800))
dn <- rownames(head(subset(xp,log2FoldChange < -0.5),800))
ora2_up <- fora(pathways=xl, genes=up, universe=bg, minSize = 5, maxSize = Inf)
ora2_dn <- fora(pathways=xl, genes=dn, universe=bg, minSize = 5, maxSize = Inf)
ora2_up[,1:6] %>%
kbl(caption="ORA of upregulated DEGs based on p-value and logfoldchange threshold against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 10 | 0.0000000 | 0.0000000 | 3.5736683 | 180 | 1378 |
| level 9 | 0.0000000 | 0.0000000 | 2.2369761 | 113 | 1382 |
| level 8 | 0.0019200 | 0.0063999 | 1.4055337 | 71 | 1382 |
| level 7 | 0.8889625 | 1.0000000 | 0.8512387 | 43 | 1382 |
| level 5 | 0.9999998 | 1.0000000 | 0.4157212 | 21 | 1382 |
| level 2 | 1.0000000 | 1.0000000 | 0.3761287 | 19 | 1382 |
| level 4 | 1.0000000 | 1.0000000 | 0.3761287 | 19 | 1382 |
| level 6 | 1.0000000 | 1.0000000 | 0.3365362 | 17 | 1382 |
| level 3 | 1.0000000 | 1.0000000 | 0.2771475 | 14 | 1382 |
| level 1 | 1.0000000 | 1.0000000 | 0.1583700 | 8 | 1382 |
ora2_dn[,1:6] %>%
kbl(caption="ORA of downregulated DEGs based on p-value and logfoldchange threshold against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | foldEnrichment | overlap | size |
|---|---|---|---|---|---|
| level 8 | 0.0000000 | 0.0000000 | 1.6995080 | 136 | 1382 |
| level 7 | 0.0000000 | 0.0000000 | 1.6620188 | 133 | 1382 |
| level 9 | 0.0000003 | 0.0000011 | 1.5495514 | 124 | 1382 |
| level 6 | 0.5196256 | 0.9999996 | 0.9997106 | 80 | 1382 |
| level 4 | 0.9728987 | 0.9999996 | 0.8122648 | 65 | 1382 |
| level 10 | 0.9844982 | 0.9999996 | 0.7895573 | 63 | 1378 |
| level 5 | 0.9966255 | 0.9999996 | 0.7372865 | 59 | 1382 |
| level 1 | 0.9999247 | 0.9999996 | 0.6373155 | 51 | 1382 |
| level 3 | 0.9999964 | 0.9999996 | 0.5748336 | 46 | 1382 |
| level 2 | 0.9999996 | 0.9999996 | 0.5373444 | 43 | 1382 |
FCS now.
stat <- x$stat
names(stat) <- rownames(x)
fcs1 <- fgsea(pathways=xl, stats=stat,minSize=5,
maxSize=100000,nPermSimple = 10000)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (2.82% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fcs1 <- fcs1[order(fcs1$pval),]
fcs1[,1:7] %>%
kbl(caption="FCS of based on Walt test stat against basemean gene categories",row.names=FALSE) %>%
kable_paper("hover", full_width = F)
| pathway | pval | padj | log2err | ES | NES | size |
|---|---|---|---|---|---|---|
| level 1 | 0.0000000 | 0.0000000 | 1.4025887 | 0.3565445 | 1.8795747 | 1382 |
| level 2 | 0.0000000 | 0.0000000 | 1.0476265 | 0.3036576 | 1.6007741 | 1382 |
| level 9 | 0.0000000 | 0.0000000 | 0.9759947 | -0.3936404 | -1.6469853 | 1382 |
| level 8 | 0.0000000 | 0.0000000 | 0.8390889 | -0.3722041 | -1.5572958 | 1382 |
| level 3 | 0.0000142 | 0.0000284 | 0.5933255 | 0.2402212 | 1.2663602 | 1382 |
| level 10 | 0.0004093 | 0.0006822 | 0.4984931 | -0.3077118 | -1.2873429 | 1378 |
| level 7 | 0.0028026 | 0.0040037 | 0.4317077 | -0.2999701 | -1.2550700 | 1382 |
| level 6 | 0.1810795 | 0.2263493 | 0.0307458 | -0.2578135 | -1.0786877 | 1382 |
| level 4 | 0.9042937 | 0.9497392 | 0.0047739 | -0.2119484 | -0.8867887 | 1382 |
| level 5 | 0.9497392 | 0.9497392 | 0.0034264 | -0.2043486 | -0.8549913 | 1382 |
Idea for calculating amount of significant terms:
ora1bias <- sum(-log10(ora_up$pval)) + sum(-log10(ora_dn$pval))
ora2bias <- sum(-log10(ora2_up$pval)) + sum(-log10(ora2_dn$pval))
fcs1bias <- sum(-log10(fcs1$pval))
barplot(c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"FCS"=fcs1bias),ylab="Bias measure")
Visualising deciles in a gene rank
START = 1
xr <- as.list(1:10)
for ( i in 1:NCHUNKS ) {
START <- ( ( i - 1 ) * CHUNKSIZE ) + 1
END <- START + CHUNKSIZE - 1
if ( END > nrow(xs) ) { END <- nrow(xs) }
xr[[i]] <- xs[START:END,"rank"]
}
names(xr) <- paste("level",1:10)
str(xr)
## List of 10
## $ level 1 : num [1:1382] 11592 1309 9756 247 2209 ...
## $ level 2 : num [1:1382] 13178 13178 1044 8120 3997 ...
## $ level 3 : num [1:1382] 993 5781 3652 1018 12179 ...
## $ level 4 : num [1:1382] 12347 5718 3995 9941 7028 ...
## $ level 5 : num [1:1382] 67 9429 9648 928 8852 ...
## $ level 6 : num [1:1382] 1563 9551 11422 9866 5259 ...
## $ level 7 : num [1:1382] 4005 7618 7036 12332 11857 ...
## $ level 8 : num [1:1382] 1418 9420 9510 1612 7322 ...
## $ level 9 : num [1:1382] 7053 8015 2963 12394 4744 ...
## $ level 10: num [1:1378] 12218 12218 4527 13202 9414 ...
boxplot(xr,cex=0,col="white",ylab="Position in gene rank")
beeswarm(xr,cex=0.25,add=TRUE,pch=19,col="darkgray")
Now quantify the data skew.
Pearson’s Coefficient of Skewness.
lapply(xr,summary)
## $`level 1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 3307 9794 8196 12692 13815
##
## $`level 2`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5 3587 8822 7853 12125 13805
##
## $`level 3`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 3476 8656 7664 11684 13816
##
## $`level 4`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4 3712 7881 7473 11325 13812
##
## $`level 5`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8 3991 7550 7253 10536 13808
##
## $`level 6`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20 3597 6848 6775 10064 13728
##
## $`level 7`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19 3374 6474 6414 9481 13744
##
## $`level 8`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 63 3032 5811 5926 8706 13595
##
## $`level 9`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 160 3025 5412 5611 8026 13687
##
## $`level 10`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 488 3779 5977 5916 7996 13676
pcs <- unlist(lapply(xr, function(x) { ( mean(x) - median(x) ) / sd(x) } ) )
pcs
## level 1 level 2 level 3 level 4 level 5 level 6
## -0.33084997 -0.21350532 -0.22823534 -0.09799732 -0.07764381 -0.01947470
## level 7 level 8 level 9 level 10
## -0.01656189 0.03367986 0.06295465 -0.02171975
barplot(pcs)
sum(abs(pcs))
## [1] 1.102623
Consider running Rank ANOVA test for enrichment. Optionally can correct for basemean bias.
df <- x[,"rank",drop=FALSE]
df$baseMean <- rank(x$baseMean)
plot(df)
i=1
gs <- xl[[i]]
df$inset <- as.numeric(rownames(df) %in% gs)
mdl0 <- aov(rank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(rank ~ baseMean + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
summary(mdl0)
## Df Sum Sq Mean Sq F value Pr(>F)
## inset 1 2.547e+09 2.547e+09 162 <2e-16 ***
## Residuals 13814 2.172e+11 1.572e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mdl1)
## Df Sum Sq Mean Sq F value Pr(>F)
## baseMean 1 9.992e+09 9.992e+09 657.961 <2e-16 ***
## inset 1 2.994e+06 2.994e+06 0.197 0.657
## Residuals 13813 2.098e+11 1.519e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p0
## [1] 6.712575e-37
p1
## [1] 0.657034
myinset <- subset(df,inset==1)
points(myinset[,1:2],col="red")
df <- x[,"rank",drop=FALSE]
df$baseMean <- scale(log10(x$baseMean))
mclapply(1:length(xl), function(i) {
gs <- xl[[i]]
df$inset <- as.numeric(rownames(df) %in% gs)
mdl0 <- aov(rank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(rank ~ baseMean + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
res <- c("p0"=p0,"p1"=p1)
return(res)
},mc.cores=10)
## [[1]]
## p0 p1
## 6.712575e-37 8.457594e-01
##
## [[2]]
## p0 p1
## 1.441658e-20 1.662103e-01
##
## [[3]]
## p0 p1
## 1.065758e-13 7.109287e-02
##
## [[4]]
## p0 p1
## 2.895052e-08 6.187235e-02
##
## [[5]]
## p0 p1
## 0.0007155697 0.1420623413
##
## [[6]]
## p0 p1
## 0.1901358 0.1719877
##
## [[7]]
## p0 p1
## 1.174347e-06 9.805682e-03
##
## [[8]]
## p0 p1
## 4.239804e-22 1.529406e-05
##
## [[9]]
## p0 p1
## 1.957065e-37 1.386998e-03
##
## [[10]]
## p0 p1
## 1.802813e-22 1.046265e-09
Run enrichment test on Reactome gene sets with and without correction.
Relate gene symbols to gene IDs.
x$derank <- rank(x$stat)
x$bmrank <- rank(x$baseMean)
x$symbol <- lapply(strsplit(rownames(x)," "), function(x) { x[length(x) ]} )
Plot DE rank vs basemean rank.
Testing code.
df <- x
plot(df[,c("derank","bmrank")])
reactome <- gmtPathways("c2.cp.reactome.v2023.2.Hs.symbols.gmt")
i=1
gs <- reactome[[i]]
df$inset <- as.numeric(df$symbol %in% gs)
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
# basic es calc
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
summary(mdl0)
## Df Sum Sq Mean Sq F value Pr(>F)
## inset 1 2.745e+07 27450768 1.726 0.189
## Residuals 13814 2.197e+11 15907137
summary(mdl1)
## Df Sum Sq Mean Sq F value Pr(>F)
## bmrank 1 9.992e+09 9.992e+09 657.979 <2e-16 ***
## inset 1 8.858e+06 8.858e+06 0.583 0.445
## Residuals 13813 2.098e+11 1.519e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p0
## [1] 0.1889842
p1
## [1] 0.4450452
myinset <- subset(df,inset==1)
points(myinset[,c("derank","bmrank")],col="red",pch=19)
# corrected es calc
bmranks <- mean(subset(df,inset==1)$bmrank)
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mysubsets <- lapply(idx, function(i) {
df3 <- df2[(i-block):(i+block),]
df3 <- subset(df3,inset!=1)
return(df3)
} )
# background based on genes in bmrank vicinity rather than all genes
mrbg1 <- mean(unlist(lapply(idx, function(i) {
df3 <- df2[(i-block):(i+block),]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
Now run for all Reactome gene sets.
RhpcBLASctl::blas_set_num_threads(1)
tic()
myres <- mclapply(1:length(reactome), function(i) {
gs <- reactome[[i]]
gsname <- names(reactome)[i]
df$inset <- as.numeric(df$symbol %in% gs)
NGENES <- length(unique(unlist(subset( df,inset==1 )$symbol)))
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
# corrected es
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mrbg1 <- mean(unlist(lapply(idx, function(j) {
START=j-block
if (START < 1) { START=1 }
END=j+block
if (END > tot) { END=tot }
df3 <- df2[START:END,]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
res <- c("GeneSet"=gsname,"Ngenes"=NGENES,"ES0"=es0,"ES1"=es1,"p0"=p0,"p1"=p1)
return(res)
},mc.cores=8)
toc()
## 8.997 sec elapsed
myres <- do.call(rbind,myres)
myrownames <- myres[,1]
myres <- myres[,-1]
myres <- apply(myres,2,as.numeric)
rownames(myres) <- myrownames
myres <- as.data.frame(myres)
myres <- subset(myres,Ngenes>=5)
# FDR correction is vital
myres$fdr0 <- p.adjust(myres$p0,method="fdr")
myres$fdr1 <- p.adjust(myres$p1,method="fdr")
# show top results
myres <- myres[order(myres[,"p0"]),]
head(myres,10) %>%
kbl(caption="Top results for normal ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| REACTOME_CELL_CYCLE | 620 | 0.3215053 | 0.2539856 | 0 | 0 | 0 | 0 |
| REACTOME_METABOLISM_OF_RNA | 704 | 0.2839469 | 0.2040418 | 0 | 0 | 0 | 0 |
| REACTOME_CELL_CYCLE_MITOTIC | 501 | 0.3193013 | 0.2437830 | 0 | 0 | 0 | 0 |
| REACTOME_INTERFERON_ALPHA_BETA_SIGNALING | 59 | -0.7126585 | -0.7580538 | 0 | 0 | 0 | 0 |
| REACTOME_INNATE_IMMUNE_SYSTEM | 785 | -0.2297866 | -0.2940845 | 0 | 0 | 0 | 0 |
| REACTOME_MITOTIC_PROMETAPHASE | 193 | 0.4380085 | 0.3591849 | 0 | 0 | 0 | 0 |
| REACTOME_PROCESSING_OF_CAPPED_INTRON_CONTAINING_PRE_MRNA | 277 | 0.3502446 | 0.2536392 | 0 | 0 | 0 | 0 |
| REACTOME_INTERFERON_GAMMA_SIGNALING | 70 | -0.5987944 | -0.6131345 | 0 | 0 | 0 | 0 |
| REACTOME_M_PHASE | 361 | 0.3019828 | 0.2242104 | 0 | 0 | 0 | 0 |
| REACTOME_CELL_CYCLE_CHECKPOINTS | 265 | 0.3352443 | 0.2571510 | 0 | 0 | 0 | 0 |
myres <- myres[order(myres[,"p1"]),]
head(myres,10) %>%
kbl(caption="Top results for adjusted ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| REACTOME_INNATE_IMMUNE_SYSTEM | 785 | -0.2297866 | -0.2940845 | 0 | 0 | 0 | 0 |
| REACTOME_INTERFERON_ALPHA_BETA_SIGNALING | 59 | -0.7126585 | -0.7580538 | 0 | 0 | 0 | 0 |
| REACTOME_NEUTROPHIL_DEGRANULATION | 396 | -0.2380202 | -0.3079980 | 0 | 0 | 0 | 0 |
| REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | 595 | -0.2004849 | -0.2446540 | 0 | 0 | 0 | 0 |
| REACTOME_CELL_CYCLE | 620 | 0.3215053 | 0.2539856 | 0 | 0 | 0 | 0 |
| REACTOME_INTERFERON_GAMMA_SIGNALING | 70 | -0.5987944 | -0.6131345 | 0 | 0 | 0 | 0 |
| REACTOME_CELL_CYCLE_MITOTIC | 501 | 0.3193013 | 0.2437830 | 0 | 0 | 0 | 0 |
| REACTOME_MITOTIC_PROMETAPHASE | 193 | 0.4380085 | 0.3591849 | 0 | 0 | 0 | 0 |
| REACTOME_METABOLISM_OF_RNA | 704 | 0.2839469 | 0.2040418 | 0 | 0 | 0 | 0 |
| REACTOME_SIGNALING_BY_INTERLEUKINS | 349 | -0.2101842 | -0.2573284 | 0 | 0 | 0 | 0 |
Now make an Euler plot.
fdr0set <- rownames(subset(myres,fdr0<0.01))
fdr1set <- rownames(subset(myres,fdr1<0.01))
FDR0LEN <- length(fdr0set)
FDR1LEN <- length(fdr1set)
JACCARD <- signif(length(intersect(fdr0set,fdr1set)) / length(union(fdr0set,fdr1set)),3)
HEADER <- paste("N sig (uncorr)=",FDR0LEN,", N sig (corr)",FDR1LEN,", Jaccard=",JACCARD)
HEADER
## [1] "N sig (uncorr)= 244 , N sig (corr) 264 , Jaccard= 0.588"
v1 <- list("fdr0set"=fdr0set, "fdr1set"=fdr1set)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
#mtext(HEADER,cex=0.5)
Now separate up and down.
fdr0up <- rownames(subset(myres,fdr0<0.01&ES0>0))
fdr0dn <- rownames(subset(myres,fdr0<0.01&ES0<0))
fdr1up <- rownames(subset(myres,fdr1<0.01&ES1>0))
fdr1dn <- rownames(subset(myres,fdr1<0.01&ES1<0))
v1 <- list("fdr0up"=fdr0up, "fdr0dn"=fdr0dn, "fdr1up"=fdr1up,"fdr1dn"=fdr1dn)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
#mtext(HEADER,cex=0.5)
setdiff(fdr0up,fdr1up)
## [1] "REACTOME_ESTABLISHMENT_OF_SISTER_CHROMATID_COHESION"
## [2] "REACTOME_GLUTAMATE_AND_GLUTAMINE_METABOLISM"
## [3] "REACTOME_INHIBITION_OF_THE_PROTEOLYTIC_ACTIVITY_OF_APC_C_REQUIRED_FOR_THE_ONSET_OF_ANAPHASE_BY_MITOTIC_SPINDLE_CHECKPOINT_COMPONENTS"
## [4] "REACTOME_REGULATION_OF_TP53_ACTIVITY"
## [5] "REACTOME_RNA_POLYMERASE_III_TRANSCRIPTION_INITIATION_FROM_TYPE_1_PROMOTER"
## [6] "REACTOME_PERK_REGULATES_GENE_EXPRESSION"
## [7] "REACTOME_ACTIVATION_OF_NOXA_AND_TRANSLOCATION_TO_MITOCHONDRIA"
## [8] "REACTOME_DNA_REPLICATION"
## [9] "REACTOME_FORMATION_OF_TC_NER_PRE_INCISION_COMPLEX"
## [10] "REACTOME_RESOLUTION_OF_AP_SITES_VIA_THE_MULTIPLE_NUCLEOTIDE_PATCH_REPLACEMENT_PATHWAY"
## [11] "REACTOME_EPIGENETIC_REGULATION_OF_GENE_EXPRESSION"
## [12] "REACTOME_G2_M_DNA_REPLICATION_CHECKPOINT"
## [13] "REACTOME_PCNA_DEPENDENT_LONG_PATCH_BASE_EXCISION_REPAIR"
## [14] "REACTOME_HDR_THROUGH_MMEJ_ALT_NHEJ"
## [15] "REACTOME_GENE_SILENCING_BY_RNA"
## [16] "REACTOME_TRANSCRIPTIONAL_REGULATION_BY_SMALL_RNAS"
## [17] "REACTOME_FORMATION_OF_RNA_POL_II_ELONGATION_COMPLEX"
## [18] "REACTOME_DUAL_INCISION_IN_TC_NER"
## [19] "REACTOME_REGULATION_OF_PLK1_ACTIVITY_AT_G2_M_TRANSITION"
## [20] "REACTOME_PHOSPHORYLATION_OF_THE_APC_C"
## [21] "REACTOME_UNWINDING_OF_DNA"
## [22] "REACTOME_RRNA_PROCESSING"
## [23] "REACTOME_PHOSPHORYLATION_OF_EMI1"
## [24] "REACTOME_HIV_TRANSCRIPTION_ELONGATION"
## [25] "REACTOME_TRNA_AMINOACYLATION"
## [26] "REACTOME_POLYMERASE_SWITCHING"
## [27] "REACTOME_RNA_POLYMERASE_II_PRE_TRANSCRIPTION_EVENTS"
## [28] "REACTOME_TELOMERE_C_STRAND_LAGGING_STRAND_SYNTHESIS"
## [29] "REACTOME_DISEASES_OF_MITOTIC_CELL_CYCLE"
## [30] "REACTOME_RESOLUTION_OF_ABASIC_SITES_AP_SITES"
## [31] "REACTOME_TRANSCRIPTION_OF_THE_HIV_GENOME"
## [32] "REACTOME_COHESIN_LOADING_ONTO_CHROMATIN"
## [33] "REACTOME_FOLDING_OF_ACTIN_BY_CCT_TRIC"
## [34] "REACTOME_RECOGNITION_OF_DNA_DAMAGE_BY_PCNA_CONTAINING_REPLICATION_COMPLEX"
## [35] "REACTOME_MISMATCH_REPAIR"
## [36] "REACTOME_HCMV_EARLY_EVENTS"
## [37] "REACTOME_CYTOSOLIC_TRNA_AMINOACYLATION"
## [38] "REACTOME_MITOTIC_PROPHASE"
## [39] "REACTOME_APC_CDC20_MEDIATED_DEGRADATION_OF_NEK2A"
## [40] "REACTOME_HIV_INFECTION"
## [41] "REACTOME_GLUCOSE_METABOLISM"
## [42] "REACTOME_RHOBTB_GTPASE_CYCLE"
## [43] "REACTOME_DNA_DAMAGE_BYPASS"
## [44] "REACTOME_GLYCOLYSIS"
## [45] "REACTOME_TRANSLATION"
## [46] "REACTOME_HOST_INTERACTIONS_OF_HIV_FACTORS"
## [47] "REACTOME_TRANSCRIPTIONAL_REGULATION_BY_TP53"
## [48] "REACTOME_SWITCHING_OF_ORIGINS_TO_A_POST_REPLICATIVE_STATE"
## [49] "REACTOME_ISG15_ANTIVIRAL_MECHANISM"
setdiff(fdr1up,fdr0up)
## character(0)
setdiff(fdr0dn,fdr1dn)
## [1] "REACTOME_G_ALPHA_Q_SIGNALLING_EVENTS"
## [2] "REACTOME_VISUAL_PHOTOTRANSDUCTION"
## [3] "REACTOME_GPCR_LIGAND_BINDING"
## [4] "REACTOME_ECM_PROTEOGLYCANS"
## [5] "REACTOME_PROSTANOID_LIGAND_RECEPTORS"
## [6] "REACTOME_CLASS_A_1_RHODOPSIN_LIKE_RECEPTORS"
## [7] "REACTOME_SENSORY_PERCEPTION"
setdiff(fdr1dn,fdr0dn)
## [1] "REACTOME_INFECTIOUS_DISEASE"
## [2] "REACTOME_NERVOUS_SYSTEM_DEVELOPMENT"
## [3] "REACTOME_DISEASES_OF_SIGNAL_TRANSDUCTION_BY_GROWTH_FACTOR_RECEPTORS_AND_SECOND_MESSENGERS"
## [4] "REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION"
## [5] "REACTOME_SIGNALING_BY_NOTCH"
## [6] "REACTOME_SARS_COV_INFECTIONS"
## [7] "REACTOME_SARS_COV_2_INFECTION"
## [8] "REACTOME_CLATHRIN_MEDIATED_ENDOCYTOSIS"
## [9] "REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE"
## [10] "REACTOME_RHO_GTPASE_CYCLE"
## [11] "REACTOME_SIGNALING_BY_NOTCH1"
## [12] "REACTOME_SELENOAMINO_ACID_METABOLISM"
## [13] "REACTOME_INTERLEUKIN_1_FAMILY_SIGNALING"
## [14] "REACTOME_DDX58_IFIH1_MEDIATED_INDUCTION_OF_INTERFERON_ALPHA_BETA"
## [15] "REACTOME_ANTIGEN_ACTIVATES_B_CELL_RECEPTOR_BCR_LEADING_TO_GENERATION_OF_SECOND_MESSENGERS"
## [16] "REACTOME_CARGO_RECOGNITION_FOR_CLATHRIN_MEDIATED_ENDOCYTOSIS"
## [17] "REACTOME_SARS_COV_1_INFECTION"
## [18] "REACTOME_CDC42_GTPASE_CYCLE"
## [19] "REACTOME_CELLULAR_RESPONSE_TO_STARVATION"
## [20] "REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY"
## [21] "REACTOME_DETOXIFICATION_OF_REACTIVE_OXYGEN_SPECIES"
## [22] "REACTOME_SIGNALING_BY_CSF1_M_CSF_IN_MYELOID_CELLS"
## [23] "REACTOME_CELL_EXTRACELLULAR_MATRIX_INTERACTIONS"
## [24] "REACTOME_SARS_COV_1_HOST_INTERACTIONS"
## [25] "REACTOME_VIRAL_INFECTION_PATHWAYS"
## [26] "REACTOME_INTERLEUKIN_17_SIGNALING"
## [27] "REACTOME_SARS_COV_1_MODULATES_HOST_TRANSLATION_MACHINERY"
## [28] "REACTOME_NEGATIVE_REGULATION_OF_THE_PI3K_AKT_NETWORK"
## [29] "REACTOME_TRANSCRIPTIONAL_REGULATION_BY_RUNX3"
## [30] "REACTOME_SARS_COV_2_HOST_INTERACTIONS"
## [31] "REACTOME_P75NTR_SIGNALS_VIA_NF_KB"
## [32] "REACTOME_RUNX3_REGULATES_NOTCH_SIGNALING"
## [33] "REACTOME_SIGNALING_BY_NOTCH1_PEST_DOMAIN_MUTANTS_IN_CANCER"
## [34] "REACTOME_SIGNAL_REGULATORY_PROTEIN_FAMILY_INTERACTIONS"
## [35] "REACTOME_FGFR1_MUTANT_RECEPTOR_ACTIVATION"
## [36] "REACTOME_SYNTHESIS_OF_PIPS_AT_THE_PLASMA_MEMBRANE"
## [37] "REACTOME_EGFR_DOWNREGULATION"
## [38] "REACTOME_MAPK_FAMILY_SIGNALING_CASCADES"
## [39] "REACTOME_GPVI_MEDIATED_ACTIVATION_CASCADE"
## [40] "REACTOME_NEGATIVE_REGULATORS_OF_DDX58_IFIH1_SIGNALING"
## [41] "REACTOME_NOTCH4_INTRACELLULAR_DOMAIN_REGULATES_TRANSCRIPTION"
## [42] "REACTOME_TNF_SIGNALING"
## [43] "REACTOME_RAC2_GTPASE_CYCLE"
## [44] "REACTOME_THE_ROLE_OF_NEF_IN_HIV_1_REPLICATION_AND_DISEASE_PATHOGENESIS"
## [45] "REACTOME_RAC3_GTPASE_CYCLE"
## [46] "REACTOME_NUCLEAR_EVENTS_KINASE_AND_TRANSCRIPTION_FACTOR_ACTIVATION"
## [47] "REACTOME_NEF_MEDIATES_DOWN_MODULATION_OF_CELL_SURFACE_RECEPTORS_BY_RECRUITING_THEM_TO_CLATHRIN_ADAPTERS"
## [48] "REACTOME_FREE_FATTY_ACIDS_REGULATE_INSULIN_SECRETION"
## [49] "REACTOME_MODULATION_BY_MTB_OF_HOST_IMMUNE_SYSTEM"
## [50] "REACTOME_REGULATION_OF_INNATE_IMMUNE_RESPONSES_TO_CYTOSOLIC_DNA"
## [51] "REACTOME_INTERLEUKIN_6_SIGNALING"
## [52] "REACTOME_PEPTIDE_HORMONE_METABOLISM"
## [53] "REACTOME_OAS_ANTIVIRAL_RESPONSE"
## [54] "REACTOME_PI_METABOLISM"
## [55] "REACTOME_INTERLEUKIN_RECEPTOR_SHC_SIGNALING"
## [56] "REACTOME_SIGNALING_BY_ROBO_RECEPTORS"
## [57] "REACTOME_PTK6_REGULATES_RTKS_AND_THEIR_EFFECTORS_AKT1_AND_DOK1"
## [58] "REACTOME_NOTCH_HLH_TRANSCRIPTION_PATHWAY"
## [59] "REACTOME_SCAVENGING_BY_CLASS_A_RECEPTORS"
## [60] "REACTOME_REGULATION_OF_IFNA_IFNB_SIGNALING"
## [61] "REACTOME_STAT5_ACTIVATION_DOWNSTREAM_OF_FLT3_ITD_MUTANTS"
## [62] "REACTOME_SIGNALING_BY_NOTCH1_HD_DOMAIN_MUTANTS_IN_CANCER"
## [63] "REACTOME_TRAIL_SIGNALING"
## [64] "REACTOME_REGULATION_OF_SIGNALING_BY_CBL"
## [65] "REACTOME_BACTERIAL_INFECTION_PATHWAYS"
## [66] "REACTOME_SIGNALING_BY_NOTCH4"
## [67] "REACTOME_CLASS_I_MHC_MEDIATED_ANTIGEN_PROCESSING_PRESENTATION"
## [68] "REACTOME_NF_KB_IS_ACTIVATED_AND_SIGNALS_SURVIVAL"
## [69] "REACTOME_INTERLEUKIN_1_SIGNALING"
## [70] "REACTOME_METABOLISM_OF_STEROID_HORMONES"
## [71] "REACTOME_SARS_COV_2_ACTIVATES_MODULATES_INNATE_AND_ADAPTIVE_IMMUNE_RESPONSES"
## [72] "REACTOME_TRANSCRIPTIONAL_REGULATION_OF_GRANULOPOIESIS"
## [73] "REACTOME_RAF_INDEPENDENT_MAPK1_3_ACTIVATION"
## [74] "REACTOME_SARS_COV_1_ACTIVATES_MODULATES_INNATE_IMMUNE_RESPONSES"
## [75] "REACTOME_SIGNALING_BY_NTRKS"
## [76] "REACTOME_ROLE_OF_LAT2_NTAL_LAB_ON_CALCIUM_MOBILIZATION"
Now take a look at the top results.
myres <- myres[order(myres[,"p0"]),]
top0dn <- rownames(head(subset(myres,ES0<0),10))
top0up <- rownames(head(subset(myres,ES0>0),10))
myres <- myres[order(myres[,"p1"]),]
top1dn <- rownames(head(subset(myres,ES1<0),10))
top1up <- rownames(head(subset(myres,ES1>0),10))
v1 <- list("up0"=top0up, "up1"=top1up, "dn0"=top0dn, "dn1"=top1dn)
plot(euler(v1),quantities = list(cex = 2), labels = list(cex = 2))
Now look at the top pathway results by ES after removing non-significant results (FDR>0.01).
top <- subset(myres,fdr0<0.01)
top <- top[order(top$ES0),]
dn0 <- head(top,15)$ES0
names(dn0) <- rownames(head(top,15))
up0 <- tail(top,15)$ES0
names(up0) <- rownames(tail(top,15))
top0 <- c(dn0,up0)
names(top0) <- gsub("REACTOME_","",names(top0))
names(top0) <- gsub("_"," ",names(top0))
par(mar=c(5.1,20.1,4.1,2.1))
barplot(top0,horiz=TRUE,las=1,cex.names=0.6,main="Uncorr",xlab="ES0")
top <- subset(myres,fdr1<0.01)
top <- top[order(top$ES1),]
dn0 <- head(top,15)$ES1
names(dn0) <- rownames(head(top,15))
up0 <- tail(top,15)$ES1
names(up0) <- rownames(tail(top,15))
top0 <- c(dn0,up0)
names(top0) <- gsub("REACTOME_","",names(top0))
names(top0) <- gsub("_"," ",names(top0))
barplot(top0,horiz=TRUE,las=1,cex.names=0.6,main="Corr",xlab="ES1")
par(mar=c(5.1,4.1,4.1,2.1))
Now plot ES values with both approaches.
plot(myres$ES0,myres$ES1,pch=19,col="gray",cex=0.5)
sig0 <- subset(myres,fdr0<0.01)
sig1 <- subset(myres,fdr1<0.01)
sig2 <- subset(myres,fdr0<0.01 & fdr1<0.01)
points(sig0$ES0,sig0$ES1,pch=19,col="blue",cex=0.5)
points(sig1$ES0,sig1$ES1,pch=19,col="orange",cex=0.5)
points(sig2$ES0,sig2$ES1,pch=19,col="black",cex=0.5)
abline(0,1)
Go back andcheck whether the adjustment reduces bias on basemean gene sets.
RhpcBLASctl::blas_set_num_threads(1)
tic()
myres <- mclapply(1:length(xl), function(i) {
gs <- xl[[i]]
gsname <- names(xl)[i]
df$inset <- as.numeric(rownames(df) %in% gs)
NGENES <- length(unique(unlist(subset( df,inset==1 )$symbol)))
mdl0 <- aov(derank ~ inset , data = df)
p0 <- summary(mdl0)[[1]][1,5]
mdl1 <- aov(derank ~ bmrank + inset, data = df)
p1 <- summary(mdl1)[[1]][2,5]
mrset <- mean(subset(df,inset==1)$derank)
mrbg <- mean(subset(df,inset!=1)$derank)
tot <- nrow(df)
es0 <- (mrset - mrbg) / (tot/2)
# corrected es
df2 <- df[order(df$bmrank),]
idx <- which(df2$inset==1)
block <- round(nrow(df)/100)
mrbg1 <- mean(unlist(lapply(idx, function(j) {
START=j-block
if (START < 1) { START=1 }
END=j+block
if (END > tot) { END=tot }
df3 <- df2[START:END,]
df3 <- subset(df3,inset!=1)
return(df3$derank)
} )))
tot <- nrow(df)
es1 <- (mrset - mrbg1) / (tot/2)
res <- c("GeneSet"=gsname,"Ngenes"=NGENES,"ES0"=es0,"ES1"=es1,"p0"=p0,"p1"=p1)
return(res)
},mc.cores=8)
toc()
## 1.236 sec elapsed
myres <- do.call(rbind,myres)
myrownames <- myres[,1]
myres <- myres[,-1]
myres <- apply(myres,2,as.numeric)
rownames(myres) <- myrownames
myres <- as.data.frame(myres)
myres <- subset(myres,Ngenes>=5)
# FDR correction is vital
myres$fdr0 <- p.adjust(myres$p0,method="fdr")
myres$fdr1 <- p.adjust(myres$p1,method="fdr")
# show top results
myres <- myres[order(myres[,"p0"]),]
head(myres,10) %>%
kbl(caption="Top results for normal ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| level 9 | 1269 | -0.2087227 | -0.1137706 | 0.0000000 | 0.0034666 | 0.0000000 | 0.0173330 |
| level 1 | 1358 | 0.2071619 | -0.0018121 | 0.0000000 | 0.6570340 | 0.0000000 | 0.8212924 |
| level 10 | 1285 | -0.1595994 | 0.0740805 | 0.0000000 | 0.0000892 | 0.0000000 | 0.0008915 |
| level 8 | 1283 | -0.1579739 | -0.0316899 | 0.0000000 | 0.0100668 | 0.0000000 | 0.0335560 |
| level 2 | 1361 | 0.1519793 | -0.0784217 | 0.0000000 | 0.3438113 | 0.0000000 | 0.5730189 |
| level 3 | 1354 | 0.1215655 | -0.0537725 | 0.0000000 | 0.8378175 | 0.0000000 | 0.8845685 |
| level 4 | 1361 | 0.0907777 | -0.0109786 | 0.0000000 | 0.2100569 | 0.0000000 | 0.4201139 |
| level 7 | 1318 | -0.0795342 | 0.0037123 | 0.0000012 | 0.5953866 | 0.0000015 | 0.8212924 |
| level 5 | 1341 | 0.0553850 | 0.0249408 | 0.0007156 | 0.0467562 | 0.0007951 | 0.1168906 |
| level 6 | 1333 | -0.0214500 | -0.0089738 | 0.1901358 | 0.8845685 | 0.1901358 | 0.8845685 |
myres <- myres[order(myres[,"p1"]),]
head(myres,10) %>%
kbl(caption="Top results for adjusted ES calc",row.names=TRUE) %>%
kable_paper("hover", full_width = F)
| Ngenes | ES0 | ES1 | p0 | p1 | fdr0 | fdr1 | |
|---|---|---|---|---|---|---|---|
| level 10 | 1285 | -0.1595994 | 0.0740805 | 0.0000000 | 0.0000892 | 0.0000000 | 0.0008915 |
| level 9 | 1269 | -0.2087227 | -0.1137706 | 0.0000000 | 0.0034666 | 0.0000000 | 0.0173330 |
| level 8 | 1283 | -0.1579739 | -0.0316899 | 0.0000000 | 0.0100668 | 0.0000000 | 0.0335560 |
| level 5 | 1341 | 0.0553850 | 0.0249408 | 0.0007156 | 0.0467562 | 0.0007951 | 0.1168906 |
| level 4 | 1361 | 0.0907777 | -0.0109786 | 0.0000000 | 0.2100569 | 0.0000000 | 0.4201139 |
| level 2 | 1361 | 0.1519793 | -0.0784217 | 0.0000000 | 0.3438113 | 0.0000000 | 0.5730189 |
| level 7 | 1318 | -0.0795342 | 0.0037123 | 0.0000012 | 0.5953866 | 0.0000015 | 0.8212924 |
| level 1 | 1358 | 0.2071619 | -0.0018121 | 0.0000000 | 0.6570340 | 0.0000000 | 0.8212924 |
| level 3 | 1354 | 0.1215655 | -0.0537725 | 0.0000000 | 0.8378175 | 0.0000000 | 0.8845685 |
| level 6 | 1333 | -0.0214500 | -0.0089738 | 0.1901358 | 0.8845685 | 0.1901358 | 0.8845685 |
Plot the result.
ora1bias <- sum(-log10(ora_up$pval)) + sum(-log10(ora_dn$pval))
ora2bias <- sum(-log10(ora2_up$pval)) + sum(-log10(ora2_dn$pval))
fcs1bias <- sum(-log10(fcs1$pval))
anova0bias <- sum(-log10(myres$p0))
anova1bias <- sum(-log10(myres$p1))
res_p <- c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"fgsea"=fcs1bias,"ANOVA"=anova0bias, "ANOVA adj"=anova1bias)
res_p
## ORA p ORA p FC fgsea ANOVA ANOVA adj
## 149.46383 100.87570 79.80545 166.14652 11.51631
barplot(res_p, ylab="Bias measure (p)")
ora1bias <- sum(-log10(ora_up$padj)) + sum(-log10(ora_dn$padj))
ora2bias <- sum(-log10(ora2_up$padj)) + sum(-log10(ora2_dn$padj))
fcs1bias <- sum(-log10(fcs1$padj))
anova0bias <- sum(-log10(myres$fdr0))
anova1bias <- sum(-log10(myres$fdr1))
res_fdr <- c("ORA p"=ora1bias,"ORA p FC"=ora2bias,"fgsea"=fcs1bias,"ANOVA"=anova0bias, "ANOVA adj"=anova1bias)
res_fdr
## ORA p ORA p FC fgsea ANOVA ANOVA adj
## 143.715514 96.076358 76.389672 162.706282 8.113455
barplot(res_fdr, ylab="Bias measure (FDR)")
For reproducibility.
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tictoc_1.2.1 eulerr_7.0.4 beeswarm_0.4.0 kableExtra_1.4.0
## [5] fgsea_1.34.2
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 xml2_1.5.2
## [4] polylabelr_1.0.0 stringi_1.8.7 lattice_0.22-7
## [7] digest_0.6.39 magrittr_2.0.4 evaluate_1.0.5
## [10] grid_4.5.2 RColorBrewer_1.1-3 fastmap_1.2.0
## [13] jsonlite_2.0.0 Matrix_1.7-4 viridisLite_0.4.2
## [16] scales_1.4.0 RhpcBLASctl_0.23-42 codetools_0.2-20
## [19] textshaping_1.0.4 jquerylib_0.1.4 cli_3.6.5
## [22] rlang_1.1.7 polyclip_1.10-7 cowplot_1.2.0
## [25] cachem_1.1.0 yaml_2.3.12 otel_0.2.0
## [28] tools_4.5.2 BiocParallel_1.42.2 dplyr_1.1.4
## [31] ggplot2_4.0.1 fastmatch_1.1-8 vctrs_0.7.0
## [34] R6_2.6.1 lifecycle_1.0.5 stringr_1.6.0
## [37] pkgconfig_2.0.3 pillar_1.11.1 bslib_0.9.0
## [40] gtable_0.3.6 glue_1.8.0 data.table_1.18.0
## [43] Rcpp_1.1.1 systemfonts_1.3.1 xfun_0.56
## [46] tibble_3.3.1 tidyselect_1.2.1 rstudioapi_0.18.0
## [49] knitr_1.51 dichromat_2.0-0.1 farver_2.1.2
## [52] htmltools_0.5.9 rmarkdown_2.30 svglite_2.2.2
## [55] compiler_4.5.2 S7_0.2.1