Source: https://github.com/markziemann/asd_meth
Here I will be running a comparison of differential methylation data from guthrie and fresh blood samples.
Here are the files that I’m using:
limma_blood_motor.csv
limma_guthrie_motor.csv
suppressPackageStartupMessages({
library("parallel")
library("mitch")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
source("https://raw.githubusercontent.com/markziemann/gmea/main/meth_functions.R")
library("data.table")
library("kableExtra")
library("eulerr")
library("RIdeogram")
library("GenomicRanges")
library("tictoc")
library("globaltest")
library("ebGSEA")
data("dualmap850kEID")
})
source("meth_functions.R")
bl_mvals <- readRDS(file="bl_mvals.rds")
gu_mvals <- readRDS(file="gu_mvals.rds")
bl_design <- readRDS(file="bl_design_motor.rds")
gu_design <- readRDS(file="gu_design_motor.rds")
bl_lim <- read.csv("limma_blood_motor.csv")
gu_lim <- read.csv("limma_guthrie_motor.csv")
For each gene, extract out the probes.
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)
promoters <- myann[promoters,]
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
Effect size is estimated with the median t-statistic for each gene. Significance is estimated using the fry test. Results are merged for the user.
PMEA step 196.063 sec elapsed on 8 cores
Fry step 293.569 sec elapsed on 8 cores
Fry single core 1205.519 sec
tic()
res_bl <- main(mval=bl_mvals,design=bl_design,sets,cores=detectCores()/2)
res_bl[[1]] <- subset(res_bl[[1]],nprobes!=0) # remove genes with no probes obvs
res_bl_df <- res_bl[[1]] # enrichment results
res_bl_top <- res_bl[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 495.648 sec elapsed
head(res_bl_df,30) %>%
kbl(caption="Top DM genes in blood identified with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
TPRG1L | 17 | -0.6355437 | 0.0002821 | 0.9186771 |
GINS3 | 13 | -0.6966986 | 0.0003210 | 0.9186771 |
UBA52 | 17 | -0.5000111 | 0.0003226 | 0.9186771 |
C14orf179 | 15 | -0.6636516 | 0.0005103 | 0.9186771 |
ZRANB2-AS2 | 8 | -0.7418282 | 0.0006058 | 0.9186771 |
ZWINT | 17 | -0.7368550 | 0.0006343 | 0.9186771 |
RPUSD4 | 27 | -0.5210532 | 0.0006696 | 0.9186771 |
HIST1H2AI | 11 | -1.3082756 | 0.0006768 | 0.9186771 |
LOC644656 | 6 | -1.4397800 | 0.0008174 | 0.9186771 |
RTP4 | 7 | -0.6703340 | 0.0010852 | 0.9186771 |
LOC100996579 | 3 | -0.8305690 | 0.0011972 | 0.9186771 |
RPUSD2 | 18 | -0.7759266 | 0.0012952 | 0.9186771 |
SOX21-AS1 | 1 | -3.8748299 | 0.0013233 | 0.9186771 |
PLA2G16 | 22 | -0.4459525 | 0.0013957 | 0.9186771 |
DNAJB11 | 22 | -0.4218696 | 0.0014732 | 0.9186771 |
VIPAS39 | 9 | -1.1581975 | 0.0015019 | 0.9186771 |
IER3IP1 | 12 | -0.7492204 | 0.0015446 | 0.9186771 |
FAM66B | 4 | -1.0302264 | 0.0015791 | 0.9186771 |
C11orf58 | 24 | -0.7667586 | 0.0016136 | 0.9186771 |
LOC101927204 | 8 | -1.3230913 | 0.0016454 | 0.9186771 |
MFN1 | 20 | -0.5435028 | 0.0016879 | 0.9186771 |
TUBB8 | 4 | 0.9379241 | 0.0017941 | 0.9186771 |
GOLGA8C | 1 | -3.7755589 | 0.0018270 | 0.9186771 |
C16orf67 | 9 | -0.6150654 | 0.0018550 | 0.9186771 |
RPL36 | 14 | -0.6997137 | 0.0018852 | 0.9186771 |
IFIT5 | 20 | -0.4758531 | 0.0019680 | 0.9186771 |
TMEM70 | 17 | -0.7535128 | 0.0022142 | 0.9186771 |
LOC100287010 | 1 | 4.1070979 | 0.0022815 | 0.9186771 |
ILK | 34 | -0.6143204 | 0.0023583 | 0.9186771 |
MSH5 | 68 | -0.3508799 | 0.0023738 | 0.9186771 |
par(mfrow=c(2,1))
hist(res_bl_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_bl_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
pdf("hist_bl_motor.pdf")
par(mfrow=c(2,1))
hist(res_bl_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_bl_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png
## 2
par(mfrow=c(1,1))
Now some visualisations.
volcano_plot(res_bl_df)
probe_bias(res_bl_df)
gmea_boxplot(res_bl,sets)
tic()
res_gu <- main(mval=gu_mvals,design=gu_design,sets,cores=detectCores()/2)
res_gu[[1]] <- subset(res_gu[[1]],nprobes!=0) # remove genes with no probes obvs
res_gu_df <- res_gu[[1]] # enrichment results
res_gu_top <- res_gu[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 495.937 sec elapsed
head(res_gu_df,30) %>%
kbl(caption="Top DM genes in Guthrie card identified with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
LSM8 | 4 | 1.3231445 | 0.0000008 | 0.0232247 |
POM121L2 | 17 | -0.4393981 | 0.0000320 | 0.4308053 |
C14orf19 | 1 | -3.5147773 | 0.0000494 | 0.4308053 |
APOC4-APOC2 | 7 | -0.6850403 | 0.0000630 | 0.4308053 |
FAAHP1 | 6 | -1.2446245 | 0.0001329 | 0.6551585 |
LOC646498 | 3 | -2.6925552 | 0.0001437 | 0.6551585 |
LOC100507506 | 5 | 1.7340565 | 0.0002048 | 0.6594549 |
DCDC2B | 9 | -0.5088355 | 0.0002094 | 0.6594549 |
AZI2 | 22 | -0.6699400 | 0.0003001 | 0.6594549 |
ZNF333 | 19 | -0.6357455 | 0.0003065 | 0.6594549 |
SSC4D | 6 | -1.0832995 | 0.0003110 | 0.6594549 |
LOC102724009 | 5 | -0.9792010 | 0.0003296 | 0.6594549 |
ATF4 | 15 | -0.6034059 | 0.0003352 | 0.6594549 |
LOC441601 | 11 | -1.0120389 | 0.0003958 | 0.6594549 |
MIR1269B | 1 | -4.6207998 | 0.0004030 | 0.6594549 |
PNRC1 | 14 | 0.7147979 | 0.0004398 | 0.6594549 |
C8orf37 | 17 | -0.5386501 | 0.0005319 | 0.6594549 |
LOC100133669 | 34 | -0.5986539 | 0.0005646 | 0.6594549 |
INHBE | 7 | -1.7598959 | 0.0005855 | 0.6594549 |
LOC101929372 | 6 | -1.5508283 | 0.0006088 | 0.6594549 |
C17orf63 | 21 | 0.5334405 | 0.0006157 | 0.6594549 |
ZNF205-AS1 | 8 | -1.5604329 | 0.0006519 | 0.6594549 |
MIR324 | 2 | -2.4743898 | 0.0007137 | 0.6594549 |
HAS3 | 35 | -0.7158813 | 0.0007231 | 0.6594549 |
POU4F3 | 17 | -0.8323352 | 0.0007393 | 0.6594549 |
APBA3 | 28 | -0.6457784 | 0.0007550 | 0.6594549 |
MT1IP | 5 | 1.2281307 | 0.0007627 | 0.6594549 |
PSMA6 | 25 | -0.7844647 | 0.0008198 | 0.6594549 |
C20orf72 | 25 | 0.4852870 | 0.0008209 | 0.6594549 |
LMF2 | 21 | 0.4426317 | 0.0008501 | 0.6594549 |
par(mfrow=c(2,1))
hist(res_gu_top$t,xlab="probe t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_gu_df$median,xlab="gene median t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
pdf("hist_gu_motor.pdf")
par(mfrow=c(2,1))
hist(res_gu_top$t,xlab="probe t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_gu_df$median,xlab="gene median t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png
## 2
par(mfrow=c(1,1))
Now some visualisations.
volcano_plot(res_gu_df)
probe_bias(res_gu_df)
gmea_boxplot(res_gu,sets)
Now compare blood and guthrie card
res_bl_df$bl <- res_bl_df$median
res_gu_df$gu <- res_gu_df$median
m1 <- merge(res_bl_df,res_gu_df,by=0)
m2 <- m1[,c("Row.names","bl","gu")]
rownames(m2) <- m2$Row.names
m2$Row.names=NULL
plot(m2$bl,m2$gu,xlab="bl",ylab="gu",
col = rgb(red = 0.6, green = 0.6, blue = 0.6, alpha = 0.5) ,
pch=19,cex=0.9)
grid()
abline(v=0,lty=1,lwd=3,col="black") ; abline(h=0,lty=1,lwd=3,col="black")
mtext("aggregated t-statistic for each gene (median)")
#plot(rank(m1$bl),rank(m1$gu),xlab="bl rank",ylab="gu rank",cex=0.6,pch=19) ; grid()
rm2 <- apply(m2,2,rank)
mydiff <- apply(m2,2,function(x) { length(which(x<0)) } )
rm3 <- rm2
rm3[,1] <- rm3[,1] - mydiff[1]
rm3[,2] <- rm3[,2] - mydiff[2]
rnk <- rm3
palette <- colorRampPalette(c("white", "yellow", "orange", "red",
"darkred", "black"))
xmin = min(rnk[, 1])
xmax = max(rnk[, 1])
ymin = min(rnk[, 2])
ymax = max(rnk[, 2])
k <- MASS::kde2d(rnk[, 1], rnk[, 2])
X_AXIS = paste("Rank in contrast", colnames(rnk)[1])
Y_AXIS = paste("Rank in contrast", colnames(rnk)[2])
filled.contour(k, xlim = c(xmin, xmax), ylim = c(ymin, ymax),
color.palette = palette, plot.title = {
abline(v = 0, h = 0, lty = 2, lwd = 2, col = "blue")
title(main = "Rank-rank plot of all genes", xlab = X_AXIS,
ylab = Y_AXIS)
})
pdf("genedens_motor.pdf")
par(mfrow=c(2,2))
plot(1)
plot(1)
plot(m2$bl,m2$gu,xlab="bl",ylab="gu",
col = rgb(red = 0.6, green = 0.6, blue = 0.6, alpha = 0.5) ,
pch=19,cex=0.9)
grid()
abline(v=0, h=0, lty=2, lwd=2, col="blue")
mtext("aggregated t-statistic for each gene (median)")
filled.contour(k, xlim = c(xmin, xmax), ylim = c(ymin, ymax),
color.palette = palette, plot.title = {
abline(v = 0, h = 0, lty = 2, lwd = 2, col = "blue")
title(main = "Rank-rank plot of all genes", xlab = X_AXIS,
ylab = Y_AXIS)
})
dev.off()
## png
## 2
Now we need a table of genes with top scores.
m3 <- m2
m3$med <- apply(rnk,1,median)
mg <- merge(res_gu_df,res_bl_df,by=0)
mg$gu = mg$bl = NULL
mg$med <- m3$med
mg2 <- data.frame(mg,rnk)
mg2$med <- apply(rnk,1,median)
mg2 <- mg2[order(mg2$med),]
mg2 <-subset(mg2,nprobes.x>=5)
head(mg2,20) %>%
kbl(caption="Genes with coordinated hypomethylation in neonatal Guthrie card and blood at assessment") %>%
kable_paper("hover", full_width = F)
Row.names | nprobes.x | median.x | PValue.x | FDR.x | nprobes.y | median.y | PValue.y | FDR.y | med | bl | gu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
LOC101927143 | LOC101927143 | 5 | -2.090386 | 0.0374357 | 0.6594549 | 5 | -1.1943342 | 0.4085102 | 0.9186771 | -21518.0 | -22098.0 | -20938.0 |
OR6A2 | OR6A2 | 5 | -1.515784 | 0.2153602 | 0.6594549 | 5 | -1.3175067 | 0.0848329 | 0.9186771 | -21276.0 | -22151.0 | -20401.0 |
MCART2 | MCART2 | 5 | -1.830232 | 0.1051698 | 0.6594549 | 5 | -0.9443246 | 0.4307318 | 0.9186771 | -21248.5 | -21681.0 | -20816.0 |
GLYATL2 | GLYATL2 | 8 | -1.753183 | 0.1194711 | 0.6594549 | 8 | -0.9554996 | 0.2543987 | 0.9186771 | -21232.0 | -21717.0 | -20747.0 |
RPS15AP10 | RPS15AP10 | 6 | -1.762994 | 0.0790808 | 0.6594549 | 6 | -0.9453806 | 0.3516051 | 0.9186771 | -21222.5 | -21684.0 | -20761.0 |
LOC102723895 | LOC102723895 | 5 | -1.728030 | 0.1498544 | 0.6594549 | 5 | -0.9518350 | 0.4520350 | 0.9186771 | -21214.0 | -21704.0 | -20724.0 |
MIR449B | MIR449B | 5 | -1.734902 | 0.2209143 | 0.6594549 | 5 | -0.9443159 | 0.6186069 | 0.9190031 | -21205.5 | -21680.0 | -20731.0 |
PSMD6-AS2 | PSMD6-AS2 | 7 | -1.560276 | 0.1603024 | 0.6594549 | 7 | -1.0555830 | 0.3613774 | 0.9186771 | -21202.0 | -21920.0 | -20484.0 |
LOC101929372 | LOC101929372 | 6 | -1.550828 | 0.0006088 | 0.6594549 | 6 | -1.0600487 | 0.1465587 | 0.9186771 | -21196.0 | -21926.0 | -20466.0 |
DEFB133 | DEFB133 | 5 | -1.624038 | 0.1668213 | 0.6594549 | 6 | -0.9864976 | 0.3301852 | 0.9186771 | -21192.0 | -21790.0 | -20594.0 |
DRAIC | DRAIC | 7 | -1.568262 | 0.1172452 | 0.6594549 | 8 | -1.0268274 | 0.1901104 | 0.9186771 | -21188.0 | -21870.0 | -20506.0 |
MIR302A | MIR302A | 5 | -1.673894 | 0.2025755 | 0.6594549 | 6 | -0.9358474 | 0.4971149 | 0.9186771 | -21160.5 | -21651.5 | -20669.5 |
MIR302D | MIR302D | 5 | -1.673894 | 0.2025755 | 0.6594549 | 6 | -0.9358474 | 0.4971149 | 0.9186771 | -21160.5 | -21651.5 | -20669.5 |
LOC101927543 | LOC101927543 | 5 | -1.985720 | 0.4713332 | 0.7369614 | 7 | -0.8773635 | 0.3828223 | 0.9186771 | -21150.5 | -21402.0 | -20899.0 |
LINC00501 | LINC00501 | 5 | -1.543238 | 0.0356477 | 0.6594549 | 6 | -0.9855288 | 0.3115906 | 0.9186771 | -21118.0 | -21786.0 | -20450.0 |
OGN | OGN | 8 | -1.551842 | 0.2060268 | 0.6594549 | 10 | -0.9739525 | 0.4376277 | 0.9186771 | -21110.5 | -21754.0 | -20467.0 |
OR51L1 | OR51L1 | 6 | -1.863689 | 0.1055407 | 0.6594549 | 7 | -0.8710924 | 0.3792742 | 0.9186771 | -21103.0 | -21370.0 | -20836.0 |
SPIC | SPIC | 5 | -1.628939 | 0.0855942 | 0.6594549 | 6 | -0.8942722 | 0.4156001 | 0.9186771 | -21044.0 | -21483.0 | -20605.0 |
LOC729799 | LOC729799 | 8 | -1.366704 | 0.1755766 | 0.6594549 | 8 | -1.0918766 | 0.3714490 | 0.9186771 | -21005.5 | -21980.0 | -20031.0 |
LYZL2 | LYZL2 | 5 | -1.429065 | 0.3185155 | 0.6630048 | 5 | -0.9839387 | 0.2598811 | 0.9186771 | -21000.5 | -21783.0 | -20218.0 |
mg2 <- mg2[order(-mg2$med),]
head(mg2,20) %>%
kbl(caption="Genes with coordinated hypermethylation in neonatal Guthrie card and blood at assessment") %>%
kable_paper("hover", full_width = F)
Row.names | nprobes.x | median.x | PValue.x | FDR.x | nprobes.y | median.y | PValue.y | FDR.y | med | bl | gu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
MLLT4-AS1 | MLLT4-AS1 | 5 | 1.0350117 | 0.2945828 | 0.6596709 | 5 | 0.7850532 | 0.6624548 | 0.9246793 | 3880.50 | 3232 | 4529.0 |
KMT2E-AS1 | KMT2E-AS1 | 6 | 0.8663427 | 0.2539192 | 0.6594549 | 6 | 0.8307623 | 0.4339020 | 0.9186771 | 3863.50 | 3263 | 4464.0 |
LOC100129518 | LOC100129518 | 6 | 1.3443317 | 0.0302121 | 0.6594549 | 6 | 0.5620233 | 0.2451148 | 0.9186771 | 3821.00 | 3034 | 4608.0 |
KLF14 | KLF14 | 20 | 0.7392867 | 0.1704994 | 0.6594549 | 22 | 0.6057389 | 0.3255789 | 0.9186771 | 3736.50 | 3086 | 4387.0 |
LOC729338 | LOC729338 | 8 | 0.6941762 | 0.1703178 | 0.6594549 | 9 | 0.5503189 | 0.8216427 | 0.9813971 | 3683.50 | 3018 | 4349.0 |
LOC100506860 | LOC100506860 | 6 | 0.5571802 | 0.7963209 | 0.9418042 | 7 | 0.7806567 | 0.5077716 | 0.9186771 | 3681.00 | 3228 | 4134.0 |
RAB29 | RAB29 | 6 | 0.8797933 | 0.0026008 | 0.6594549 | 6 | 0.4657133 | 0.7420125 | 0.9471233 | 3681.00 | 2893 | 4469.0 |
GRINL1A | GRINL1A | 9 | 1.0127984 | 0.0320878 | 0.6594549 | 9 | 0.4239308 | 0.3881600 | 0.9186771 | 3663.00 | 2806 | 4520.0 |
NR2F1-AS1 | NR2F1-AS1 | 8 | 0.5951104 | 0.8953767 | 1.0000000 | 8 | 0.6170555 | 0.3410755 | 0.9186771 | 3651.50 | 3098 | 4205.0 |
MMP23A | MMP23A | 9 | 0.5878208 | 0.7029395 | 0.8850297 | 9 | 0.6095957 | 0.4379323 | 0.9186771 | 3640.50 | 3089 | 4192.0 |
LOC400752 | LOC400752 | 10 | 0.6656023 | 0.2494047 | 0.6594549 | 10 | 0.5054270 | 0.5043968 | 0.9186771 | 3630.50 | 2946 | 4315.0 |
MMP23B | MMP23B | 10 | 0.5380137 | 0.6978101 | 0.8819966 | 10 | 0.6382949 | 0.4757799 | 0.9186771 | 3607.00 | 3116 | 4098.0 |
PNRC1 | PNRC1 | 14 | 0.7147979 | 0.0004398 | 0.6594549 | 19 | 0.4172245 | 0.9548980 | 1.0000000 | 3577.00 | 2790 | 4364.0 |
TMPO-AS1 | TMPO-AS1 | 5 | 0.6115742 | 0.3338573 | 0.6663287 | 6 | 0.4629967 | 0.6730809 | 0.9265521 | 3563.00 | 2887 | 4239.0 |
LOC100507506 | LOC100507506 | 5 | 1.7340565 | 0.0002048 | 0.6594549 | 6 | 0.2890279 | 0.8837533 | 1.0000000 | 3541.25 | 2436 | 4646.5 |
ID2 | ID2 | 14 | 0.4638333 | 0.3761181 | 0.6854529 | 15 | 0.7071760 | 0.3552842 | 0.9186771 | 3540.50 | 3164 | 3917.0 |
BBS12 | BBS12 | 13 | 0.6052677 | 0.1535549 | 0.6594549 | 14 | 0.4407824 | 0.8521841 | 0.9949820 | 3535.00 | 2847 | 4223.0 |
PHF23 | PHF23 | 18 | 0.7959005 | 0.0199328 | 0.6594549 | 18 | 0.3503241 | 0.6721306 | 0.9263866 | 3529.00 | 2639 | 4419.0 |
LOC100233209 | LOC100233209 | 5 | 0.9030243 | 0.1530730 | 0.6594549 | 5 | 0.3221347 | 0.9831494 | 1.0000000 | 3521.50 | 2566 | 4477.0 |
LOC642366 | LOC642366 | 10 | 1.0650878 | 0.1438339 | 0.6594549 | 11 | 0.3003432 | 0.5525855 | 0.9186771 | 3510.50 | 2478 | 4543.0 |
genesets <- gmt_import("ReactomePathways.gmt")
cores = 8
Here I’m using the median t-statistic for downstream enrichment. Pathways are from REACTOME and analysis using mitch.
res_bl_df$bl <- res_bl_df$median
res_gu_df$gu <- res_gu_df$median
bl <- res_bl_df[,"bl",drop=FALSE]
mitch_bl <- mitch_calc(bl,genesets,priority="effect")
head( mitch_bl$enrichment_result,30) %>%
kbl(caption="BLOOD: Top effect size pathways found with mitch") %>%
kable_paper("hover", full_width = F)
sig <- subset(mitch_bl$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="BLOOD: Top effect size pathways found with mitch after 1% FDR filtering") %>%
kable_paper("hover", full_width = F)
gu <- res_gu_df[,"gu",drop=FALSE]
mitch_gu <- mitch_calc(gu,genesets,priority="effect")
head( mitch_gu$enrichment_result,30) %>%
kbl(caption="GUTHRIE: Top effect size pathways found with mitch") %>%
kable_paper("hover", full_width = F)
sig <- subset(mitch_gu$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="GUTHRIE: Top effect size pathways found with mitch after 1% FDR filtering") %>%
kable_paper("hover", full_width = F)
Mitch joint analysis of blood and guthrie card. As using the median value is apparently the most sensitive, we’ll go with that.
m <- merge(res_bl_df,res_gu_df,by=0)
rownames(m) <- m$Row.names
m2 <- m[,c("bl","gu")]
m2[is.na(m2)] <- 0
res <- mitch_calc(m2, genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mitch_report(res,"blgu_mitch.html",overwrite=TRUE)
## Note: overwriting existing report
## Dataset saved as " /tmp/RtmpJmgNI1/blgu_mitch.rds ".
##
##
## processing file: mitch.Rmd
##
|
| | 0%
|
|. | 3%
|
|.. | 6% (checklibraries)
|
|.... | 9%
|
|..... | 12% (peek)
|
|...... | 15%
|
|....... | 18% (metrics)
|
|........ | 21%
|
|.......... | 24% (scatterplot)
|
|........... | 26%
|
|............ | 29% (contourplot)
|
|............. | 32%
|
|.............. | 35% (input_geneset_metrics1)
|
|................ | 38%
|
|................. | 41% (input_geneset_metrics2)
|
|.................. | 44%
|
|................... | 47% (input_geneset_metrics3)
|
|.................... | 50%
|
|...................... | 53% (echart1d)
|
|....................... | 56% (echart2d)
|
|........................ | 59%
|
|......................... | 62% (heatmap)
|
|........................... | 65%
|
|............................ | 68% (effectsize)
|
|............................. | 71%
|
|.............................. | 74% (results_table)
|
|............................... | 76%
|
|................................. | 79% (results_table_complete)
|
|.................................. | 82%
|
|................................... | 85% (detailed_geneset_reports1d)
|
|.................................... | 88%
|
|..................................... | 91% (detailed_geneset_reports2d)
|
|....................................... | 94%
|
|........................................ | 97% (session_info)
|
|.........................................| 100%
## output file: /home/mdz/projects/asd_meth/mitch.knit.md
## /usr/bin/pandoc +RTS -K512m -RTS /home/mdz/projects/asd_meth/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpJmgNI1/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpJmgNI1/rmarkdown-str3a6ca04b85adaf.html
##
## Output created: /tmp/RtmpJmgNI1/mitch_report.html
## [1] TRUE
mitch_plots(res=res,outfile="blgu_mitch_motor.pdf")
## png
## 2
sig <- subset(res$enrichment_result,p.adjustMANOVA<0.01)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="Top pathways found with mitch (effect size after 1% FDR filtering)") %>%
kable_paper("hover", full_width = F)
set | setSize | pMANOVA | s.bl | s.gu | p.bl | p.gu | s.dist | SD | p.adjustMANOVA | |
---|---|---|---|---|---|---|---|---|---|---|
212 | Class C/3 (Metabotropic glutamate/pheromone receptors) | 39 | 0.0000000 | -0.3702575 | -0.5241916 | 0.0000627 | 0.0000000 | 0.6417690 | 0.1088478 | 0.0000001 |
90 | Apoptotic factor-mediated response | 17 | 0.0009289 | 0.4643898 | 0.3551958 | 0.0009148 | 0.0112190 | 0.5846554 | 0.0772118 | 0.0083490 |
1163 | SARS-CoV-1 modulates host translation machinery | 34 | 0.0000008 | 0.2892332 | 0.4998328 | 0.0035084 | 0.0000005 | 0.5774848 | 0.1489164 | 0.0000235 |
397 | Eukaryotic Translation Termination | 87 | 0.0000000 | 0.2242965 | 0.4888818 | 0.0002972 | 0.0000000 | 0.5378795 | 0.1870901 | 0.0000000 |
903 | Peptide chain elongation | 84 | 0.0000000 | 0.2197476 | 0.4882671 | 0.0004960 | 0.0000000 | 0.5354379 | 0.1898719 | 0.0000000 |
395 | Eukaryotic Translation Elongation | 88 | 0.0000000 | 0.2257164 | 0.4850794 | 0.0002508 | 0.0000000 | 0.5350232 | 0.1833973 | 0.0000000 |
1169 | SARS-CoV-2 modulates host translation machinery | 46 | 0.0000002 | 0.2787944 | 0.4416871 | 0.0010666 | 0.0000002 | 0.5223158 | 0.1151826 | 0.0000066 |
399 | Expression and translocation of olfactory receptors | 356 | 0.0000000 | -0.0979980 | -0.5116389 | 0.0014701 | 0.0000000 | 0.5209395 | 0.2924883 | 0.0000000 |
1496 | Viral mRNA Translation | 84 | 0.0000000 | 0.2035625 | 0.4689274 | 0.0012538 | 0.0000000 | 0.5112051 | 0.1876413 | 0.0000000 |
853 | Olfactory Signaling Pathway | 363 | 0.0000000 | -0.0910373 | -0.5009511 | 0.0028538 | 0.0000000 | 0.5091560 | 0.2898529 | 0.0000000 |
1145 | Response of EIF2AK4 (GCN2) to amino acid deficiency | 95 | 0.0000000 | 0.2122975 | 0.4496069 | 0.0003467 | 0.0000000 | 0.4972088 | 0.1678031 | 0.0000000 |
1208 | Selenocysteine synthesis | 87 | 0.0000000 | 0.1970608 | 0.4558203 | 0.0014810 | 0.0000000 | 0.4965935 | 0.1829706 | 0.0000000 |
829 | Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) | 89 | 0.0000000 | 0.1926072 | 0.4518203 | 0.0016784 | 0.0000000 | 0.4911609 | 0.1832913 | 0.0000000 |
13 | APC/C:Cdc20 mediated degradation of Cyclin B | 24 | 0.0010592 | 0.3646563 | 0.3273019 | 0.0019821 | 0.0055028 | 0.4900008 | 0.0264135 | 0.0094657 |
779 | NGF-stimulated transcription | 39 | 0.0000230 | 0.2931807 | 0.3774015 | 0.0015297 | 0.0000451 | 0.4778983 | 0.0595531 | 0.0004871 |
1215 | Sensory perception of sweet, bitter, and umami (glutamate) taste | 41 | 0.0000149 | -0.2816548 | -0.3817693 | 0.0018002 | 0.0000232 | 0.4744231 | 0.0707916 | 0.0003284 |
449 | Formation of a pool of free 40S subunits | 94 | 0.0000000 | 0.1741655 | 0.4379431 | 0.0035061 | 0.0000000 | 0.4713044 | 0.1865190 | 0.0000000 |
1187 | SRP-dependent cotranslational protein targeting to membrane | 105 | 0.0000000 | 0.1716610 | 0.4288923 | 0.0023612 | 0.0000000 | 0.4619698 | 0.1818900 | 0.0000000 |
1213 | Senescence-Associated Secretory Phenotype (SASP) | 53 | 0.0000022 | 0.3510009 | 0.2877199 | 0.0000098 | 0.0002895 | 0.4538550 | 0.0447464 | 0.0000631 |
828 | Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC) | 108 | 0.0000000 | 0.1796697 | 0.4065902 | 0.0012491 | 0.0000000 | 0.4445186 | 0.1604571 | 0.0000000 |
830 | Nonsense-Mediated Decay (NMD) | 108 | 0.0000000 | 0.1796697 | 0.4065902 | 0.0012491 | 0.0000000 | 0.4445186 | 0.1604571 | 0.0000000 |
493 | GTP hydrolysis and joining of the 60S ribosomal subunit | 104 | 0.0000000 | 0.1756741 | 0.4058892 | 0.0019557 | 0.0000000 | 0.4422753 | 0.1627867 | 0.0000000 |
663 | L13a-mediated translational silencing of Ceruloplasmin expression | 103 | 0.0000000 | 0.1633686 | 0.4064907 | 0.0041558 | 0.0000000 | 0.4380913 | 0.1719133 | 0.0000000 |
553 | HSF1 activation | 27 | 0.0005242 | 0.0293900 | 0.4235694 | 0.7915037 | 0.0001387 | 0.4245878 | 0.2787269 | 0.0052289 |
455 | Formation of the ternary complex, and subsequently, the 43S complex | 46 | 0.0000204 | 0.1670712 | 0.3903017 | 0.0498949 | 0.0000046 | 0.4245564 | 0.1578478 | 0.0004375 |
1207 | Selenoamino acid metabolism | 103 | 0.0000000 | 0.1693842 | 0.3887426 | 0.0029621 | 0.0000000 | 0.4240422 | 0.1551098 | 0.0000000 |
165 | Cap-dependent Translation Initiation | 111 | 0.0000000 | 0.1564598 | 0.3936647 | 0.0043851 | 0.0000000 | 0.4236172 | 0.1677292 | 0.0000000 |
396 | Eukaryotic Translation Initiation | 111 | 0.0000000 | 0.1564598 | 0.3936647 | 0.0043851 | 0.0000000 | 0.4236172 | 0.1677292 | 0.0000000 |
835 | Nuclear Events (kinase and transcription factor activation) | 60 | 0.0000059 | 0.2499539 | 0.3240762 | 0.0008088 | 0.0000140 | 0.4092705 | 0.0524124 | 0.0001535 |
703 | Major pathway of rRNA processing in the nucleolus and cytosol | 171 | 0.0000000 | 0.1644688 | 0.3705178 | 0.0002048 | 0.0000000 | 0.4053806 | 0.1456986 | 0.0000000 |
Camera is designed to control for correlation bias. Using the median value gives better results than the directional significance or the combined metric. Blood dataset. Need to filter by FDR then sort by effect size.
stat <- res_bl_df$median
names(stat) <- rownames(res_bl_df)
stat[is.na(stat)] <- 0
tic()
cres <- cameraPR(statistic=stat, index=genesets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
cres <- subset(cres,NGenes>4)
cres$FDR <- p.adjust(cres$PValue,method="fdr")
mymedians <- sapply(1:nrow(cres),function(i) {
myset <- genesets[[which(names(genesets) == rownames(cres)[i])]]
mystats <- stat[names(stat) %in% myset]
median(mystats)
})
cres$median <- mymedians
toc() # 1.0 sec
## 3.145 sec elapsed
sig <- subset(cres,FDR<0.05)
head(sig[order(-abs(sig$median)),],20) %>%
kbl(caption = "BLOOD: Top effect size pathways found with CAMERA after 5% FDR filtering") %>%
kable_paper("hover", full_width = F)
NGenes | Direction | PValue | FDR | median |
---|---|---|---|---|
nrow(subset(cres,FDR<0.05 & Direction=="Up"))
## [1] 0
nrow(subset(cres,FDR<0.05 & Direction=="Down"))
## [1] 0
cres_bl <- cres
Guthrie dataset.
stat <- res_gu_df$median
names(stat) <- rownames(res_gu_df)
stat[is.na(stat)] <- 0
tic()
cres <- cameraPR(statistic=stat, index=genesets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
cres <- subset(cres,NGenes>4)
cres$FDR <- p.adjust(cres$PValue,method="fdr")
mymedians <- sapply(1:nrow(cres),function(i) {
myset <- genesets[[which(names(genesets) == rownames(cres)[i])]]
mystats <- stat[names(stat) %in% myset]
median(mystats)
})
cres$median <- mymedians
toc() # 1.0 sec
## 3.108 sec elapsed
sig <- subset(cres,FDR<0.05)
head(sig[order(-abs(sig$median)),],20) %>%
kbl(caption = "GUTHRIE: Top effect size pathways found with CAMERA after 5% FDR filtering") %>%
kable_paper("hover", full_width = F)
NGenes | Direction | PValue | FDR | median | |
---|---|---|---|---|---|
Class C/3 (Metabotropic glutamate/pheromone receptors) | 39 | Down | 0.0000006 | 0.0002757 | -0.9326218 |
Expression and translocation of olfactory receptors | 356 | Down | 0.0000000 | 0.0000000 | -0.9135639 |
Olfactory Signaling Pathway | 363 | Down | 0.0000000 | 0.0000000 | -0.8990948 |
Sensory perception of sweet, bitter, and umami (glutamate) taste | 41 | Down | 0.0001442 | 0.0089876 | -0.8132978 |
Sensory Perception | 567 | Down | 0.0000000 | 0.0000004 | -0.7422755 |
Processing of Capped Intron-Containing Pre-mRNA | 232 | Up | 0.0009409 | 0.0432823 | -0.2188573 |
mRNA Splicing - Major Pathway | 174 | Up | 0.0005819 | 0.0294950 | -0.2067553 |
mRNA Splicing | 184 | Up | 0.0003303 | 0.0187685 | -0.2004759 |
Metabolism of RNA | 639 | Up | 0.0001194 | 0.0077354 | -0.1995168 |
Mitochondrial translation termination | 87 | Up | 0.0010338 | 0.0464478 | -0.1880202 |
Signaling by ROBO receptors | 203 | Up | 0.0005118 | 0.0267264 | -0.1869627 |
Mitochondrial translation initiation | 87 | Up | 0.0005954 | 0.0294950 | -0.1807847 |
Mitochondrial translation elongation | 87 | Up | 0.0007729 | 0.0373330 | -0.1807847 |
mRNA Splicing - Minor Pathway | 51 | Up | 0.0002918 | 0.0170846 | -0.1802930 |
Translation | 262 | Up | 0.0000206 | 0.0018296 | -0.1745762 |
rRNA processing | 186 | Up | 0.0000947 | 0.0065357 | -0.1622758 |
rRNA processing in the nucleus and cytosol | 180 | Up | 0.0000313 | 0.0025235 | -0.1583304 |
Major pathway of rRNA processing in the nucleolus and cytosol | 171 | Up | 0.0000194 | 0.0018296 | -0.1555345 |
Cellular response to starvation | 147 | Up | 0.0000845 | 0.0060479 | -0.1555345 |
Regulation of mRNA stability by proteins that bind AU-rich elements | 87 | Up | 0.0003883 | 0.0211553 | -0.1451538 |
cres_gu <- cres
Compare median set score for both contrasts. In the graph below, the larger points represent larger gene sets. Red fill means significant in blood contrast. Blue ring means significant in guthrie contrast.
m4 <- merge(cres_bl,cres_gu,by=0)
sigx <- subset(m4,FDR.x<0.05)
sigy <- subset(m4,FDR.y<0.05)
plot(m4$median.x,m4$median.y,col="gray",pch=19,cex=log10(m4$NGenes.x),
xlab="blood",ylab="Guthrie",main="median t-stat")
grid()
points(sigx$median.x,sigx$median.y,col="red",pch=19,cex=log10(sigx$NGenes.x))
points(sigy$median.x,sigy$median.y,col="blue",pch=1,cex=log10(sigy$NGenes.x))
abline(v=0,h=0,lty=2,lwd=3)
par(mfrow=c(1,1))
pdf("camera_pw_motor.pdf")
plot(m4$median.x,m4$median.y,col="gray",pch=19,cex=log10(m4$NGenes.x),
xlab="blood",ylab="Guthrie",main="median t-stat")
grid()
points(sigx$median.x,sigx$median.y,col="red",pch=19,cex=log10(sigx$NGenes.x))
points(sigy$median.x,sigy$median.y,col="blue",pch=1,cex=log10(sigy$NGenes.x))
abline(v=0,h=0,lty=2,lwd=3)
dev.off()
## png
## 2
subset(m4,FDR.x<0.05 & FDR.y<0.05 ) %>%
kbl(caption = "Pathways significant in blood (x) and guthrie cards (y) at 5%FDR") %>%
kable_paper("hover", full_width = F)
Row.names | NGenes.x | Direction.x | PValue.x | FDR.x | median.x | NGenes.y | Direction.y | PValue.y | FDR.y | median.y |
---|---|---|---|---|---|---|---|---|---|---|
make scatterplots of the top sets
XMAX=max(res$ranked_profile[,1])
XMIN=min(res$ranked_profile[,1])
YMAX=max(res$ranked_profile[,2])
YMIN=min(res$ranked_profile[,2])
plot(res$detailed_sets[[1]], pch=19,cex=2,col="lightgray",
xlim=c(XMIN,XMAX),ylim=c(YMIN,YMAX),
xlab="blood",ylab="Guthrie")
text(res$detailed_sets[[1]],labels=rownames(res$detailed_sets[[1]]))
mtext(names(res$detailed_sets)[[1]])
grid() ; abline(v=0,h=0,lty=2,lwd=3)
pdf("c3_metabotropic_motor.pdf")
plot(res$detailed_sets[[1]], pch=19,cex=2,col="lightgray",
xlim=c(XMIN,XMAX),ylim=c(YMIN,YMAX),
xlab="blood",ylab="Guthrie")
text(res$detailed_sets[[1]],labels=rownames(res$detailed_sets[[1]]))
mtext(names(res$detailed_sets)[[1]])
grid() ; abline(v=0,h=0,lty=2,lwd=3)
dev.off()
## png
## 2
Here I will use MDS plot to cluster samples.
First I will use all probes, then I will focus on a subset of candidate probes.
plot(cmdscale(dist(t(bl_mvals))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
mtext("Numbers indicate motor impairment score")
text(cmdscale(dist(t(bl_mvals))), labels=bl_design[,ncol(bl_design)], )
plot(cmdscale(dist(t(gu_mvals))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Guthrie card")
mtext("Numbers indicate motor impairment score")
text(cmdscale(dist(t(gu_mvals))), labels=gu_design[,ncol(gu_design)], )
There is no apparent clustering by motor impairment score, indicating a subtle effect.
myprobes2 <- head(bl_lim$Name,50)
myprobedat2 <- bl_mvals[which(rownames(bl_mvals) %in% myprobes2),]
plot(cmdscale(dist(t(myprobedat2))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat2))), labels=bl_design[,ncol(bl_design)], )
myprobes2 <- head(gu_lim$Name,50)
myprobedat2 <- gu_mvals[which(rownames(gu_mvals) %in% myprobes2),]
plot(cmdscale(dist(t(myprobedat2))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n", main="Guthrie")
text(cmdscale(dist(t(myprobedat2))), labels=gu_design[,ncol(gu_design)], )
MDS analysis of top probes identified without correction for covariates.
bl_design2 <- bl_design[,c(1,10)]
fit <- lmFit(bl_mvals, bl_design2)
fit <- eBayes(fit)
summary(decideTests(fit))
## (Intercept) motor
## Down 254485 0
## NotSig 23396 802647
## Up 524766 0
top <- topTable(fit,coef="motor",num=Inf, sort.by = "P")
head(top)
## logFC AveExpr t P.Value adj.P.Val B
## cg25801113 -0.4924234 2.3647080 -7.749753 2.135837e-07 0.1043483 1.2721636
## cg08944170 -1.6603593 -2.7264259 -7.535346 3.246676e-07 0.1043483 1.1501603
## cg21945639 -1.5798387 -2.7769770 -7.442438 3.900157e-07 0.1043483 1.0956014
## cg20507276 -1.5677433 -2.6588302 -7.217419 6.110390e-07 0.1071385 0.9590817
## cg13004509 -1.9133511 -3.2148072 -7.055087 8.483567e-07 0.1071385 0.8566325
## cg24284539 -1.4953010 -0.7225866 -7.053989 8.502532e-07 0.1071385 0.8559276
myprobes3 <- rownames(top)[1:20]
myprobedat3 <- bl_mvals[which(rownames(bl_mvals) %in% myprobes3),]
plot(cmdscale(dist(t(myprobedat3))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat3))), labels=bl_design[,ncol(bl_design)], )
gu_design2 <- gu_design[,c(1,12)]
fit <- lmFit(gu_mvals, gu_design2)
fit <- eBayes(fit)
summary(decideTests(fit))
## (Intercept) motor
## Down 259787 6
## NotSig 31841 790649
## Up 499030 3
top <- topTable(fit,coef="motor",num=Inf, sort.by = "P")
head(top)
## logFC AveExpr t P.Value adj.P.Val B
## cg09301498 0.5997376 -0.08065059 8.809223 6.337772e-09 0.002242356 2.345151
## cg00616572 -1.5197315 1.57436451 -8.780731 6.724791e-09 0.002242356 2.329842
## cg13004509 -1.8984782 -3.29215572 -8.668162 8.508189e-09 0.002242356 2.268537
## cg08944170 -1.4144672 -3.40324443 -8.188820 2.360476e-08 0.004665822 1.992227
## cg06825310 -1.9496169 1.66197257 -7.622831 8.191739e-08 0.012953727 1.631875
## cg05724451 1.3640864 -0.31197776 7.503716 1.070179e-07 0.013798347 1.551022
myprobes3 <- rownames(top)[1:20]
myprobedat3 <- gu_mvals[which(rownames(gu_mvals) %in% myprobes3),]
plot(cmdscale(dist(t(myprobedat3))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Guthrie")
text(cmdscale(dist(t(myprobedat3))), labels=gu_design[,ncol(gu_design)], )
Again it was unsuccessful.
ebGSEA uses globaltest to identify DMGs from infinium data. Some points to note:
only 1 sided test - does not distinguish between up and down methylated genes.
cannot use complex experiment designs.
Therefore I modify how the ebGSEA code uses globaltest.
item mapEIDto850k.lv : A list mapping Entrez Gene ID to 850k probes
#bl_gt <- doGT(bl_design,bl_mvals,array="850k",ncores=16)
data.m <- bl_mvals
subsets <- mclapply(mapEIDto850k.lv,intersect,rownames(data.m),mc.cores = 16)
nrep.v <- unlist(lapply(subsets,length))
selG.idx <- which(nrep.v>0)
gt.o <- gt(response=bl_design[,ncol(bl_design)], alternative=t(data.m), model="linear",
directional = TRUE, standardize = FALSE, permutations = 0,
subsets=subsets[selG.idx],trace=F)
gt_res <- result(gt.o)
gt_res <- gt_res[order(gt_res$`p-value`),]
genesets <- gmt_import("ReactomePathways.gmt")
cores = 8
Aggregate mval to genes and examine pathway enrichment in blood and guthries.
tic()
bl_gsmea <- gsmea(mval=bl_mvals,design=bl_design,probesets=sets,genesets=genesets,cores=8)
toc()
## 210.349 sec elapsed
bl_gsmea$res <- subset(bl_gsmea$res,nprobes>=5)
bl_gsmea$res$FDR <- p.adjust(bl_gsmea$res$PValue,method="fdr")
head(bl_gsmea$res,20) %>% kbl(caption="Top DM pathways in blood identified with GSMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
Degradation of GLI2 by the proteasome | 58 | -0.4110899 | 0.0012575 | 0.4711747 |
Activation of NF-kappaB in B cells | 64 | -0.4207849 | 0.0013288 | 0.4711747 |
Folding of actin by CCT/TriC | 10 | -0.7679933 | 0.0015657 | 0.4711747 |
HSP90 chaperone cycle for steroid hormone receptors (SHR) in the presence of ligand | 37 | -0.7389222 | 0.0018685 | 0.4711747 |
GLI3 is processed to GLI3R by the proteasome | 58 | -0.3916729 | 0.0020684 | 0.4711747 |
Attenuation phase | 25 | -0.8717547 | 0.0024193 | 0.4711747 |
NF-kB is activated and signals survival | 12 | -0.3558659 | 0.0024675 | 0.4711747 |
Asymmetric localization of PCP proteins | 62 | -0.3692910 | 0.0027270 | 0.4711747 |
FCERI mediated NF-kB activation | 74 | -0.3967175 | 0.0028493 | 0.4711747 |
Prefoldin mediated transfer of substrate to CCT/TriC | 27 | -0.5818032 | 0.0029000 | 0.4711747 |
Dectin-1 mediated noncanonical NF-kB signaling | 60 | -0.4110899 | 0.0034556 | 0.4711747 |
Gene and protein expression by JAK-STAT signaling after Interleukin-12 stimulation | 35 | -0.6021849 | 0.0040709 | 0.4711747 |
Uptake and function of diphtheria toxin | 6 | -0.8337081 | 0.0044657 | 0.4711747 |
SARS-CoV-1-host interactions | 90 | -0.4160035 | 0.0045561 | 0.4711747 |
Regulation of TP53 Activity through Methylation | 18 | -0.7376858 | 0.0049371 | 0.4711747 |
Degradation of GLI1 by the proteasome | 58 | -0.4110899 | 0.0054780 | 0.4711747 |
Metabolism of polyamines | 55 | -0.3691072 | 0.0057719 | 0.4711747 |
InlA-mediated entry of Listeria monocytogenes into host cells | 9 | -0.9311193 | 0.0059441 | 0.4711747 |
HSF1 activation | 27 | -0.6086286 | 0.0059862 | 0.4711747 |
Oxygen-dependent proline hydroxylation of Hypoxia-inducible Factor Alpha | 62 | -0.3916729 | 0.0068445 | 0.4711747 |
tic()
gu_gsmea <- gsmea(mval=gu_mvals,design=gu_design,probesets=sets,genesets=genesets,cores=8)
toc()
## 208.163 sec elapsed
gu_gsmea$res <- subset(gu_gsmea$res,nprobes>=5)
gu_gsmea$res$FDR <- p.adjust(gu_gsmea$res$PValue,method="fdr")
head(gu_gsmea$res,20) %>% kbl(caption="Top DM pathways in Guthrie card identified with GSMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
Regulation of PTEN localization | 8 | -1.0481811 | 0.0007245 | 0.3714598 |
Synthesis of PI | 5 | -0.8951218 | 0.0013194 | 0.3714598 |
Highly calcium permeable postsynaptic nicotinic acetylcholine receptors | 11 | -1.1574584 | 0.0032461 | 0.3714598 |
Activation of IRF3/IRF7 mediated by TBK1/IKK epsilon | 17 | -0.7968845 | 0.0034191 | 0.3714598 |
Myoclonic epilepsy of Lafora | 9 | -1.3384903 | 0.0043225 | 0.3714598 |
Downregulation of ERBB2:ERBB3 signaling | 13 | -1.0646736 | 0.0047794 | 0.3714598 |
LGI-ADAM interactions | 14 | -1.4657720 | 0.0059084 | 0.3714598 |
Metal sequestration by antimicrobial proteins | 6 | -1.2099954 | 0.0064916 | 0.3714598 |
SUMO is transferred from E1 to E2 (UBE2I, UBC9) | 7 | -1.5000829 | 0.0071677 | 0.3714598 |
Negative regulation of FLT3 | 15 | -0.9452527 | 0.0078427 | 0.3714598 |
Signaling by MAPK mutants | 6 | -1.3864107 | 0.0080855 | 0.3714598 |
Fanconi Anemia Pathway | 36 | -0.8802185 | 0.0100903 | 0.3714598 |
Prevention of phagosomal-lysosomal fusion | 9 | -1.0353350 | 0.0104192 | 0.3714598 |
Highly calcium permeable nicotinic acetylcholine receptors | 9 | -0.2403340 | 0.0109494 | 0.3714598 |
TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition) | 16 | -1.0434237 | 0.0140169 | 0.3714598 |
Transcription of E2F targets under negative control by DREAM complex | 19 | -0.1265416 | 0.0141615 | 0.3714598 |
Pexophagy | 11 | -0.3898584 | 0.0144110 | 0.3714598 |
GLI proteins bind promoters of Hh responsive genes to promote transcription | 7 | -1.0466395 | 0.0148046 | 0.3714598 |
Ligand-receptor interactions | 8 | -0.9912733 | 0.0152208 | 0.3714598 |
Downregulation of ERBB4 signaling | 9 | -1.2246832 | 0.0157631 | 0.3714598 |
For reproducibility
sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] pkgload_1.3.2
## [2] GGally_2.1.2
## [3] ggplot2_3.4.1
## [4] gtools_3.9.4
## [5] tibble_3.2.0
## [6] dplyr_1.1.0
## [7] echarts4r_0.4.4
## [8] ebGSEA_0.1.0
## [9] globaltest_5.52.0
## [10] survival_3.5-5
## [11] tictoc_1.1
## [12] RIdeogram_0.2.2
## [13] kableExtra_1.3.4
## [14] data.table_1.14.8
## [15] ENmix_1.34.0
## [16] doParallel_1.0.17
## [17] qqman_0.1.8
## [18] RCircos_1.2.2
## [19] beeswarm_0.4.0
## [20] forestplot_3.1.1
## [21] abind_1.4-5
## [22] checkmate_2.1.0
## [23] reshape2_1.4.4
## [24] gplots_3.1.3
## [25] eulerr_7.0.0
## [26] GEOquery_2.66.0
## [27] RColorBrewer_1.1-3
## [28] IlluminaHumanMethylation450kmanifest_0.4.0
## [29] topconfects_1.14.0
## [30] DMRcatedata_2.16.0
## [31] ExperimentHub_2.6.0
## [32] AnnotationHub_3.6.0
## [33] BiocFileCache_2.6.0
## [34] dbplyr_2.3.1
## [35] DMRcate_2.12.0
## [36] limma_3.54.0
## [37] missMethyl_1.32.0
## [38] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [39] R.utils_2.12.2
## [40] R.oo_1.25.0
## [41] R.methodsS3_1.8.2
## [42] plyr_1.8.8
## [43] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [44] minfi_1.44.0
## [45] bumphunter_1.40.0
## [46] locfit_1.5-9.7
## [47] iterators_1.0.14
## [48] foreach_1.5.2
## [49] Biostrings_2.66.0
## [50] XVector_0.38.0
## [51] SummarizedExperiment_1.28.0
## [52] Biobase_2.58.0
## [53] MatrixGenerics_1.10.0
## [54] matrixStats_0.63.0
## [55] GenomicRanges_1.50.2
## [56] GenomeInfoDb_1.34.6
## [57] IRanges_2.32.0
## [58] S4Vectors_0.36.1
## [59] BiocGenerics_0.44.0
## [60] mitch_1.10.0
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.58.0
## [3] tidyr_1.3.0 bit64_4.0.5
## [5] knitr_1.42 DelayedArray_0.24.0
## [7] rpart_4.1.19 KEGGREST_1.38.0
## [9] RCurl_1.98-1.10 AnnotationFilter_1.22.0
## [11] generics_0.1.3 GenomicFeatures_1.50.3
## [13] preprocessCore_1.60.2 RSQLite_2.3.0
## [15] bit_4.0.5 tzdb_0.3.0
## [17] webshot_0.5.4 xml2_1.3.3
## [19] httpuv_1.6.9 assertthat_0.2.1
## [21] xfun_0.37 hms_1.1.2
## [23] jquerylib_0.1.4 evaluate_0.20
## [25] promises_1.2.0.1 fansi_1.0.4
## [27] restfulr_0.0.15 scrime_1.3.5
## [29] progress_1.2.2 caTools_1.18.2
## [31] readxl_1.4.2 DBI_1.1.3
## [33] geneplotter_1.76.0 htmlwidgets_1.6.2
## [35] reshape_0.8.9 purrr_1.0.1
## [37] ellipsis_0.3.2 backports_1.4.1
## [39] permute_0.9-7 calibrate_1.7.7
## [41] grImport2_0.2-0 annotate_1.76.0
## [43] biomaRt_2.54.0 deldir_1.0-6
## [45] sparseMatrixStats_1.10.0 vctrs_0.6.0
## [47] ensembldb_2.22.0 withr_2.5.0
## [49] cachem_1.0.7 Gviz_1.42.0
## [51] BSgenome_1.66.2 GenomicAlignments_1.34.0
## [53] prettyunits_1.1.1 mclust_6.0.0
## [55] svglite_2.1.1 cluster_2.1.4
## [57] RPMM_1.25 lazyeval_0.2.2
## [59] crayon_1.5.2 genefilter_1.80.3
## [61] labeling_0.4.2 edgeR_3.40.2
## [63] pkgconfig_2.0.3 nlme_3.1-162
## [65] ProtGenerics_1.30.0 nnet_7.3-18
## [67] rlang_1.1.0 lifecycle_1.0.3
## [69] filelock_1.0.2 dichromat_2.0-0.1
## [71] rsvg_2.4.0 cellranger_1.1.0
## [73] rngtools_1.5.2 base64_2.0.1
## [75] Matrix_1.5-3 Rhdf5lib_1.20.0
## [77] base64enc_0.1-3 viridisLite_0.4.1
## [79] png_0.1-8 rjson_0.2.21
## [81] bitops_1.0-7 KernSmooth_2.23-20
## [83] rhdf5filters_1.10.0 blob_1.2.4
## [85] DelayedMatrixStats_1.20.0 doRNG_1.8.6
## [87] stringr_1.5.0 nor1mix_1.3-0
## [89] readr_2.1.4 jpeg_0.1-10
## [91] scales_1.2.1 memoise_2.0.1
## [93] magrittr_2.0.3 zlibbioc_1.44.0
## [95] compiler_4.2.2 BiocIO_1.8.0
## [97] illuminaio_0.40.0 Rsamtools_2.14.0
## [99] cli_3.6.0 DSS_2.46.0
## [101] htmlTable_2.4.1 Formula_1.2-5
## [103] MASS_7.3-58.3 tidyselect_1.2.0
## [105] stringi_1.7.12 highr_0.10
## [107] yaml_2.3.7 askpass_1.1
## [109] latticeExtra_0.6-30 sass_0.4.5
## [111] VariantAnnotation_1.44.0 tools_4.2.2
## [113] rstudioapi_0.14 foreign_0.8-84
## [115] bsseq_1.34.0 gridExtra_2.3
## [117] farver_2.1.1 digest_0.6.31
## [119] BiocManager_1.30.20 shiny_1.7.4
## [121] quadprog_1.5-8 Rcpp_1.0.10
## [123] siggenes_1.72.0 BiocVersion_3.16.0
## [125] later_1.3.0 org.Hs.eg.db_3.16.0
## [127] httr_1.4.5 AnnotationDbi_1.60.0
## [129] biovizBase_1.46.0 colorspace_2.1-0
## [131] rvest_1.0.3 XML_3.99-0.13
## [133] splines_4.2.2 statmod_1.5.0
## [135] kpmt_0.1.0 multtest_2.54.0
## [137] systemfonts_1.0.4 xtable_1.8-4
## [139] jsonlite_1.8.4 dynamicTreeCut_1.63-1
## [141] R6_2.5.1 Hmisc_5.0-1
## [143] pillar_1.8.1 htmltools_0.5.4
## [145] mime_0.12 glue_1.6.2
## [147] fastmap_1.1.1 BiocParallel_1.32.5
## [149] interactiveDisplayBase_1.36.0 beanplot_1.3.1
## [151] codetools_0.2-19 utf8_1.2.3
## [153] lattice_0.20-45 bslib_0.4.2
## [155] curl_5.0.0 openssl_2.0.6
## [157] interp_1.1-3 rmarkdown_2.20
## [159] munsell_0.5.0 rhdf5_2.42.0
## [161] GenomeInfoDbData_1.2.9 HDF5Array_1.26.0
## [163] impute_1.72.3 gtable_0.3.2