Source: https://github.com/markziemann/asd_meth

Introduction

Here I will be running a comparison of differential methylation data from guthrie and fresh blood samples.

Here are the files that I’m using:

  • limma_blood_ADOS.csv

  • limma_guthrie_ADOS.csv

  • limma_buccal_ADOS.csv

suppressPackageStartupMessages({
  library("limma")
  library("parallel")
  library("mitch")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  source("meth_functions.R")
  library("data.table")
  library("kableExtra")
  library("eulerr")
  library("RIdeogram")
  library("GenomicRanges")
  library("tictoc")
  library("globaltest")
  library("ebGSEA")
  data("dualmap850kEID")
})

source("meth_functions.R")

Load data

bl_mvals <- readRDS(file="bl_mvals.rds")
gu_mvals <- readRDS(file="gu_mvals.rds")
bu_mvals <- readRDS(file="buccal_mvals.rds")

bl_design <- readRDS(file="bl_design_ados.rds")
gu_design <- readRDS(file="gu_design_ados.rds")
bu_design <- readRDS(file="buccal_design_ados.rds")

bl_lim <- read.csv("limma_blood_ADOS.csv")
gu_lim <- read.csv("limma_guthrie_ADOS.csv")
bu_lim <- read.csv("limma_buccal_ADOS.csv")

Probe sets

For each gene, extract out the probes.

anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)
promoters <- myann[promoters,]
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    9.00   24.00   49.68   55.00 6778.00

Gene level enrichment for blood

Effect size is estimated with the median t-statistic for each gene. Significance is estimated using the fry test. Results are merged for the user.

  • PMEA step 196.063 sec elapsed on 8 cores

  • Fry step 293.569 sec elapsed on 8 cores

  • Fry single core 1205.519 sec

tic()
res_bl <- main(mval=bl_mvals,design=bl_design,sets,cores=detectCores()/2)
res_bl[[1]] <- subset(res_bl[[1]],nprobes!=0) # remove genes with no probes obvs
res_bl_df <- res_bl[[1]] # enrichment results
res_bl_top <- res_bl[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 416.685 sec elapsed
head(res_bl_df,30) %>%
  kbl(caption="Top DM genes in blood identified with GMEA") %>%
  kable_paper("hover", full_width = F)
Top DM genes in blood identified with GMEA
nprobes median PValue FDR
ANKRD29 31 -0.5119264 0.0000190 0.5185575
NUP43 20 0.5781906 0.0001144 0.7930777
CXADRP3 2 -2.2005853 0.0004065 0.7930777
LOC101926941 18 -0.5469809 0.0004103 0.7930777
OR10W1 6 -1.7758388 0.0007027 0.7930777
ERO1LB 13 -0.7489410 0.0007159 0.7930777
ATP5I 24 -0.5934763 0.0008169 0.7930777
C12orf10 29 -0.8328921 0.0008862 0.7930777
LOC101929284 6 -2.0125155 0.0009618 0.7930777
GNL2 20 -0.8487822 0.0009900 0.7930777
C9orf89 24 -0.7213163 0.0012176 0.7930777
SLC52A2 6 -0.8921214 0.0013174 0.7930777
TRPC1 25 -0.2836119 0.0014168 0.7930777
POLR3G 22 -0.6011733 0.0014716 0.7930777
TTC29 29 -1.0539440 0.0015060 0.7930777
LOC101928663 2 -1.9067665 0.0015507 0.7930777
CEP135 25 -0.5072602 0.0016338 0.7930777
LMO4 29 -0.7910709 0.0016870 0.7930777
LINC01220 2 -1.4641912 0.0016962 0.7930777
SURF1 17 -0.3737872 0.0017982 0.7930777
C5orf24 22 -0.5231647 0.0018726 0.7930777
RELN 63 -0.6171020 0.0018891 0.7930777
ZCCHC2 30 -0.5628906 0.0018896 0.7930777
C3orf54 8 -0.9636677 0.0018978 0.7930777
CPLX4 13 -1.4227283 0.0019430 0.7930777
SNORA28 5 -0.9776395 0.0019468 0.7930777
NOL3 20 -0.7747351 0.0022516 0.7930777
C1orf114 12 -0.7054945 0.0022650 0.7930777
SNF8 27 -0.5223357 0.0023644 0.7930777
LOC101928823 11 0.5870690 0.0024001 0.7930777
par(mfrow=c(2,1))
hist(res_bl_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_bl_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)

pdf("hist_bl_ados.pdf")
par(mfrow=c(2,1))
hist(res_bl_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_bl_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png 
##   2
par(mfrow=c(1,1))

Now some visualisations.

volcano_plot(res_bl_df)

probe_bias(res_bl_df)

gmea_boxplot(res_bl,sets)

Gene level enrichment for guthrie cards

tic()
res_gu <- main(mval=gu_mvals,design=gu_design,sets,cores=detectCores()/2)
res_gu[[1]] <- subset(res_gu[[1]],nprobes!=0) # remove genes with no probes obvs
res_gu_df <- res_gu[[1]] # enrichment results
res_gu_top <- res_gu[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 425.216 sec elapsed
head(res_gu_df,30) %>%
  kbl(caption="Top DM genes in Guthrie card identified with GMEA") %>%
  kable_paper("hover", full_width = F)
Top DM genes in Guthrie card identified with GMEA
nprobes median PValue FDR
MIR3917 5 -0.8808877 0.0000710 1
ARL5A 15 1.0478366 0.0001918 1
ADAMTS1 19 0.6102581 0.0002838 1
CHMP1B 19 0.4557641 0.0004875 1
FOXD1 11 0.7472067 0.0005569 1
ZNF322B 1 -4.0851785 0.0006000 1
NAA30 21 -0.3732929 0.0006911 1
ALPP 14 -0.6916978 0.0009992 1
CAAP1 7 0.7857603 0.0010091 1
RPS14 21 0.4654341 0.0010451 1
LEPROTL1 19 -0.4020884 0.0010934 1
ZNF226 17 0.7738140 0.0016195 1
MEOX1 28 -0.8202385 0.0017083 1
SFRS12IP1 16 0.8951516 0.0018488 1
MIR196A2 9 0.6807623 0.0021186 1
PCP4 17 -0.7132310 0.0022340 1
SEC14L4 21 -0.8930770 0.0025559 1
GZMA 7 0.4791645 0.0026386 1
ARL16 20 0.4894063 0.0027294 1
LINC00866 8 0.9125732 0.0029358 1
RECQL 20 0.4446618 0.0029434 1
GNG10 10 -0.8812882 0.0036663 1
LSM1 26 0.6178880 0.0036880 1
CSN1S2B 4 -1.8839162 0.0037939 1
LOC441046 11 0.5893277 0.0038516 1
ZNF600 5 0.5349287 0.0038972 1
LOC286135 4 -1.5617343 0.0039589 1
C1QTNF9B-AS1 3 1.7431989 0.0040010 1
TMEM223 14 1.0364105 0.0040902 1
LOC101928766 8 -1.5651393 0.0043081 1
par(mfrow=c(2,1))
hist(res_gu_top$t,xlab="probe t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_gu_df$median,xlab="gene median t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)

pdf("hist_gu_ados.pdf")
par(mfrow=c(2,1))
hist(res_gu_top$t,xlab="probe t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_gu_df$median,xlab="gene median t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png 
##   2
par(mfrow=c(1,1))

Now some visualisations.

volcano_plot(res_gu_df)

probe_bias(res_gu_df)

gmea_boxplot(res_gu,sets)

Gene level enrichment for buccal samples

tic()
res_bu <- main(mval=bu_mvals,design=bu_design,sets,cores=detectCores()/2)
res_bu[[1]] <- subset(res_bu[[1]],nprobes!=0) # remove genes with no probes obvs
res_bu_df <- res_bu[[1]] # enrichment results
res_bu_top <- res_bu[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 371.233 sec elapsed
head(res_bu_df,30) %>%
  kbl(caption="Top DM genes in buccal swabs identified with GMEA") %>%
  kable_paper("hover", full_width = F)
Top DM genes in buccal swabs identified with GMEA
nprobes median PValue FDR
FLNC 48 0.4697347 0.0000205 0.4499072
TMEM91 49 0.4610404 0.0000329 0.4499072
CSNK1A1 42 0.5673118 0.0000964 0.7125237
LINC01273 6 -0.1819099 0.0001122 0.7125237
MIR124-2 11 -0.8328384 0.0002250 0.7125237
CPN2 20 0.7720343 0.0002613 0.7125237
ZNF154 17 0.6673729 0.0002750 0.7125237
C9orf106 7 1.2531123 0.0002877 0.7125237
KIAA0494 11 1.1242410 0.0003401 0.7125237
PMEPA1 89 0.4176987 0.0004135 0.7125237
WFDC9 13 -0.4421977 0.0004853 0.7125237
C1orf213 18 0.9337948 0.0004854 0.7125237
LINC01005 2 1.8301673 0.0005469 0.7125237
NECAP1 18 0.7010852 0.0005647 0.7125237
MIR921 8 0.7528839 0.0005965 0.7125237
POLI 19 0.8062953 0.0006081 0.7125237
TRIM8 23 -0.3634760 0.0006409 0.7125237
RD3 31 0.4803811 0.0006475 0.7125237
FCGBP 39 0.2747381 0.0006479 0.7125237
GSC2 12 0.6249996 0.0006662 0.7125237
MRPL9 22 1.0153298 0.0006944 0.7125237
C3orf1 13 0.4552082 0.0007571 0.7125237
ZNF655 29 0.4184367 0.0007743 0.7125237
RPS11 16 0.7328382 0.0008096 0.7125237
C5orf55 13 0.7806143 0.0008323 0.7125237
ZFP82 13 -1.2048232 0.0008722 0.7125237
EIF2B5 30 0.9196388 0.0008773 0.7125237
LOC642852 30 0.6675120 0.0008923 0.7125237
THSD1 16 1.3856603 0.0008998 0.7125237
IGFBP3 47 0.4958188 0.0009103 0.7125237
par(mfrow=c(2,1))
hist(res_bu_top$t,xlab="probe t-stat",main="buccal swab") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_bu_df$median,xlab="gene median t-stat",main="buccal swab") ; grid() ; abline(v=0,lty=1,lwd=3)

pdf("hist_bu_ados.pdf")
par(mfrow=c(2,1))
hist(res_bu_top$t,xlab="probe t-stat",main="buccal swab") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_bu_df$median,xlab="gene median t-stat",main="buccal swab") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png 
##   2
par(mfrow=c(1,1))

Now some visualisations.

volcano_plot(res_bu_df)

probe_bias(res_bu_df)

gmea_boxplot(res_bu,sets)

Comparison of blood and guthrie card

Now compare blood and guthrie card

res_bl_df$bl <- res_bl_df$median
res_gu_df$gu <- res_gu_df$median
m1 <- merge(res_bl_df,res_gu_df,by=0)
m2 <- m1[,c("Row.names","bl","gu")]
rownames(m2) <- m2$Row.names
m2$Row.names=NULL

plot(m2$bl,m2$gu,xlab="bl",ylab="gu",
  col = rgb(red = 0.6, green = 0.6, blue = 0.6, alpha = 0.5) ,
  pch=19,cex=0.9)
grid()
abline(v=0,lty=1,lwd=3,col="black") ; abline(h=0,lty=1,lwd=3,col="black")
mtext("aggregated t-statistic for each gene (median)")

#plot(rank(m1$bl),rank(m1$gu),xlab="bl rank",ylab="gu rank",cex=0.6,pch=19) ; grid()
rm2 <- apply(m2,2,rank)
mydiff <- apply(m2,2,function(x) { length(which(x<0)) } )
rm3 <- rm2
rm3[,1] <- rm3[,1] - mydiff[1]
rm3[,2] <- rm3[,2] - mydiff[2]
rnk <- rm3
    palette <- colorRampPalette(c("white", "yellow", "orange", "red",
        "darkred", "black"))
    xmin = min(rnk[, 1])
    xmax = max(rnk[, 1])
    ymin = min(rnk[, 2])
    ymax = max(rnk[, 2])
    k <- MASS::kde2d(rnk[, 1], rnk[, 2])
    X_AXIS = paste("Rank in contrast", colnames(rnk)[1])
    Y_AXIS = paste("Rank in contrast", colnames(rnk)[2])
    filled.contour(k, xlim = c(xmin, xmax), ylim = c(ymin, ymax), 
        color.palette = palette, plot.title = {
            abline(v = 0, h = 0, lty = 2, lwd = 2, col = "blue")
            title(main = "Rank-rank plot of all genes", xlab = X_AXIS, 
            ylab = Y_AXIS)
        })

pdf("genedens_ados.pdf")
par(mfrow=c(2,2))
plot(1)
plot(1)

plot(m2$bl,m2$gu,xlab="bl",ylab="gu",
  col = rgb(red = 0.6, green = 0.6, blue = 0.6, alpha = 0.5) ,
  pch=19,cex=0.9)
grid()
abline(v=0, h=0, lty=2, lwd=2, col="blue")
mtext("aggregated t-statistic for each gene (median)")

    filled.contour(k, xlim = c(xmin, xmax), ylim = c(ymin, ymax), 
        color.palette = palette, plot.title = {
            abline(v = 0, h = 0, lty = 2, lwd = 2, col = "blue")
            title(main = "Rank-rank plot of all genes", xlab = X_AXIS, 
            ylab = Y_AXIS)
        })

dev.off()
## png 
##   2

Now we need a table of genes with top scores.

m3 <- m2
m3$med <- apply(rnk,1,median)
mg <- merge(res_gu_df,res_bl_df,by=0)
mg$gu = mg$bl = NULL
mg$med <- m3$med
mg2 <- data.frame(mg,rnk)
mg2$med <- apply(rnk,1,median)
mg2 <- mg2[order(mg2$med),]
mg2 <-subset(mg2,nprobes.x>=5)

head(mg2,20) %>%
  kbl(caption="Genes with coordinated hypomethylation in neonatal Guthrie card and blood at assessment") %>%
  kable_paper("hover", full_width = F)
Genes with coordinated hypomethylation in neonatal Guthrie card and blood at assessment
Row.names nprobes.x median.x PValue.x FDR.x nprobes.y median.y PValue.y FDR.y med bl gu
MIR1251 MIR1251 6 -1.4274015 0.1774551 1 6 -1.792361 0.1506930 0.7930777 -19529.5 -21220.0 -17839.0
SNORD114-16 SNORD114-16 5 -1.1449573 0.5203532 1 5 -1.957956 0.0827893 0.7930777 -19468.5 -21307.0 -17630.0
LOC101928273 LOC101928273 5 -1.2127118 0.3877187 1 5 -1.669061 0.0145385 0.7930777 -19415.5 -21132.0 -17699.0
LINC00375 LINC00375 5 -1.3832814 0.2943887 1 6 -1.512424 0.2761556 0.7930777 -19355.0 -20893.0 -17817.0
MIR568 MIR568 6 -0.9928982 0.4417367 1 6 -1.914089 0.0818591 0.7930777 -19332.5 -21290.0 -17375.0
BZW1L1 BZW1L1 5 -1.1110601 0.2114301 1 5 -1.596645 0.1350618 0.7930777 -19312.0 -21044.0 -17580.0
XIRP2-AS1 XIRP2-AS1 8 -1.2066650 0.3419524 1 8 -1.502519 0.0140950 0.7930777 -19283.5 -20874.0 -17693.0
LOC400655 LOC400655 9 -0.9293551 0.3457388 1 9 -2.085337 0.0094342 0.7930777 -19278.0 -21341.0 -17215.0
HISPPD1 HISPPD1 5 -1.1073882 0.4421820 1 5 -1.551529 0.1827707 0.7930777 -19268.0 -20964.0 -17572.0
PTH PTH 5 -0.9789510 0.4148407 1 6 -1.698846 0.0563605 0.7930777 -19255.5 -21159.0 -17352.0
TAAR9 TAAR9 5 -1.1557224 0.2896427 1 6 -1.454520 0.1644642 0.7930777 -19211.0 -20778.0 -17644.0
OR6T1 OR6T1 7 -1.0586676 0.3326189 1 8 -1.510381 0.1210877 0.7930777 -19199.0 -20889.0 -17509.0
LOC101927356 LOC101927356 5 -0.9179596 0.5002259 1 6 -1.785762 0.0042609 0.7930777 -19193.5 -21215.0 -17172.0
SLC13A1 SLC13A1 8 -1.0568968 0.4250764 1 9 -1.477957 0.1210331 0.7930777 -19168.0 -20834.0 -17502.0
TMPRSS11F TMPRSS11F 11 -0.8784111 0.3799398 1 12 -1.767057 0.0298130 0.7930777 -19133.5 -21205.0 -17062.0
DDTL DDTL 5 -1.2451143 0.2211730 1 5 -1.337351 0.1534973 0.7930777 -19107.5 -20488.0 -17727.0
CRYGC CRYGC 9 -1.0422063 0.1633172 1 9 -1.435018 0.0982049 0.7930777 -19106.0 -20736.0 -17476.0
SNORD114-26 SNORD114-26 5 -0.8547406 0.4835089 1 5 -1.817857 0.0957504 0.7930777 -19104.5 -21237.5 -16971.5
ADGRL4 ADGRL4 5 -1.2355909 0.1427681 1 5 -1.336956 0.3894606 0.8010867 -19103.0 -20484.0 -17722.0
RMST RMST 18 -0.9256270 0.2833991 1 18 -1.569118 0.0336580 0.7930777 -19101.5 -21000.0 -17203.0
mg2 <- mg2[order(-mg2$med),]

head(mg2,20) %>%
  kbl(caption="Genes with coordinated hypermethylation in neonatal Guthrie card and blood at assessment") %>%
  kable_paper("hover", full_width = F)
Genes with coordinated hypermethylation in neonatal Guthrie card and blood at assessment
Row.names nprobes.x median.x PValue.x FDR.x nprobes.y median.y PValue.y FDR.y med bl gu
GPX8 GPX8 9 0.7312060 0.2336383 1 9 1.4334798 0.0458087 0.7930777 5749.0 4227.0 7271.0
LOC400027 LOC400027 10 0.8814881 0.1105989 1 10 0.9710774 0.1825455 0.7930777 5741.5 4053.0 7430.0
CARD6 CARD6 5 1.0981604 0.0363444 1 7 0.7900422 0.4244594 0.8070399 5714.5 3887.0 7542.0
LOC728819 LOC728819 9 1.3835518 0.0238339 1 9 0.6906842 0.2130265 0.7930777 5712.5 3786.0 7639.0
HIST1H3E HIST1H3E 15 0.6501439 0.4228645 1 15 1.1093279 0.0751159 0.7930777 5625.0 4115.0 7135.0
FLJ35776 FLJ35776 9 0.7476523 0.3584935 1 9 0.8601087 0.1781379 0.7930777 5621.0 3953.0 7289.0
DNAJB4 DNAJB4 8 0.6526773 0.0212003 1 9 1.0688812 0.2063266 0.7930777 5618.5 4096.0 7141.0
ZSCAN16 ZSCAN16 8 0.7482512 0.1566899 1 8 0.8110517 0.1540819 0.7930777 5601.5 3912.0 7291.0
CAAP1 CAAP1 7 0.7857603 0.0010091 1 8 0.7258485 0.2840578 0.7930777 5584.0 3825.0 7343.0
SMEK2 SMEK2 14 0.6589292 0.4376319 1 14 0.9050071 0.1995825 0.7930777 5573.5 3993.0 7154.0
HIST1H2BD HIST1H2BD 19 0.7566297 0.1791553 1 20 0.7336926 0.2696345 0.7930777 5572.0 3841.0 7303.0
HIST1H2AD HIST1H2AD 8 0.8016445 0.1408507 1 8 0.6835344 0.1461026 0.7930777 5569.5 3777.5 7361.5
HIST1H2BF HIST1H2BF 8 0.8016445 0.1408507 1 8 0.6835344 0.1461026 0.7930777 5569.5 3777.5 7361.5
HIST1H2AC HIST1H2AC 16 0.7649495 0.0425652 1 16 0.6878030 0.4154244 0.8047133 5549.0 3782.0 7316.0
LOC100128822 LOC100128822 9 0.6226731 0.2257377 1 9 0.9326637 0.2320806 0.7930777 5541.5 4016.0 7067.0
RBBP5 RBBP5 11 0.7479680 0.1374463 1 11 0.6800551 0.1405112 0.7930777 5531.5 3773.0 7290.0
SIRT4 SIRT4 10 0.7740743 0.1589140 1 13 0.6493541 0.3554988 0.7937780 5528.5 3730.0 7327.0
ARMC1 ARMC1 15 0.6852645 0.0916985 1 16 0.6967156 0.4803700 0.8219941 5496.0 3792.0 7200.0
ZEB2-AS1 ZEB2-AS1 11 0.5886547 0.2622867 1 11 0.8717299 0.2232118 0.7930777 5472.5 3966.0 6979.0
PRR15L PRR15L 14 0.6222995 0.1417170 1 14 0.7687025 0.4987191 0.8275530 5469.5 3874.0 7065.0

Gene set level enrichment

genesets <- gmt_import("ReactomePathways.gmt")

cores = 8

Mitch enrichment analysis

Here I’m using the median t-statistic for downstream enrichment. Pathways are from REACTOME and analysis using mitch.

res_bl_df$bl <- res_bl_df$median
res_gu_df$gu <- res_gu_df$median
res_bu_df$bu <- res_bu_df$median

bl <-  res_bl_df[,"bl",drop=FALSE]
mitch_bl <- mitch_calc(bl,genesets,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head( mitch_bl$enrichment_result,30) %>%
  kbl(caption="BLOOD: Top effect size pathways found with mitch") %>%
  kable_paper("hover", full_width = F)
BLOOD: Top effect size pathways found with mitch
set setSize pANOVA s.dist p.adjustANOVA
380 Eicosanoids 12 0.0000956 0.6503155 0.0013443
1204 Scavenging of heme from plasma 12 0.0009330 -0.5517614 0.0096161
1163 SARS-CoV-1 modulates host translation machinery 34 0.0000001 0.5293145 0.0000039
212 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 0.0000000 -0.5253078 0.0000007
516 Glucuronidation 25 0.0000280 -0.4839107 0.0004994
903 Peptide chain elongation 84 0.0000000 0.4682615 0.0000000
87 Apoptotic cleavage of cell adhesion proteins 11 0.0074916 -0.4656017 0.0554165
395 Eukaryotic Translation Elongation 88 0.0000000 0.4641716 0.0000000
397 Eukaryotic Translation Termination 87 0.0000000 0.4564440 0.0000000
1208 Selenocysteine synthesis 87 0.0000000 0.4556688 0.0000000
224 Competing endogenous RNAs (ceRNAs) regulate PTEN translation 16 0.0016185 -0.4551520 0.0155414
1215 Sensory perception of sweet, bitter, and umami (glutamate) taste 41 0.0000007 -0.4467520 0.0000299
1496 Viral mRNA Translation 84 0.0000000 0.4451676 0.0000000
1145 Response of EIF2AK4 (GCN2) to amino acid deficiency 95 0.0000000 0.4352367 0.0000000
449 Formation of a pool of free 40S subunits 94 0.0000000 0.4286585 0.0000000
829 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) 89 0.0000000 0.4263920 0.0000000
47 Activation of RAC1 11 0.0147850 -0.4244191 0.1006942
910 Pexophagy 11 0.0157812 0.4202991 0.1038199
436 Fatty acids 15 0.0049146 0.4193853 0.0404144
455 Formation of the ternary complex, and subsequently, the 43S complex 46 0.0000011 0.4147611 0.0000445
493 GTP hydrolysis and joining of the 60S ribosomal subunit 104 0.0000000 0.4100392 0.0000000
663 L13a-mediated translational silencing of Ceruloplasmin expression 103 0.0000000 0.4021987 0.0000000
1169 SARS-CoV-2 modulates host translation machinery 46 0.0000026 0.4005633 0.0000916
165 Cap-dependent Translation Initiation 111 0.0000000 0.3994923 0.0000000
396 Eukaryotic Translation Initiation 111 0.0000000 0.3994923 0.0000000
1207 Selenoamino acid metabolism 103 0.0000000 0.3981056 0.0000000
1187 SRP-dependent cotranslational protein targeting to membrane 105 0.0000000 0.3898505 0.0000000
697 MET activates RAS signaling 11 0.0262410 -0.3869926 0.1554350
828 Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC) 108 0.0000000 0.3744789 0.0000000
830 Nonsense-Mediated Decay (NMD) 108 0.0000000 0.3744789 0.0000000
sig <- subset(mitch_bl$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
  kbl(caption="BLOOD: Top effect size pathways found with mitch after 1% FDR filtering") %>%
  kable_paper("hover", full_width = F)
BLOOD: Top effect size pathways found with mitch after 1% FDR filtering
set setSize pANOVA s.dist p.adjustANOVA
380 Eicosanoids 12 0.0000956 0.6503155 0.0013443
1204 Scavenging of heme from plasma 12 0.0009330 -0.5517614 0.0096161
1163 SARS-CoV-1 modulates host translation machinery 34 0.0000001 0.5293145 0.0000039
212 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 0.0000000 -0.5253078 0.0000007
516 Glucuronidation 25 0.0000280 -0.4839107 0.0004994
903 Peptide chain elongation 84 0.0000000 0.4682615 0.0000000
395 Eukaryotic Translation Elongation 88 0.0000000 0.4641716 0.0000000
397 Eukaryotic Translation Termination 87 0.0000000 0.4564440 0.0000000
1208 Selenocysteine synthesis 87 0.0000000 0.4556688 0.0000000
224 Competing endogenous RNAs (ceRNAs) regulate PTEN translation 16 0.0016185 -0.4551520 0.0155414
1215 Sensory perception of sweet, bitter, and umami (glutamate) taste 41 0.0000007 -0.4467520 0.0000299
1496 Viral mRNA Translation 84 0.0000000 0.4451676 0.0000000
1145 Response of EIF2AK4 (GCN2) to amino acid deficiency 95 0.0000000 0.4352367 0.0000000
449 Formation of a pool of free 40S subunits 94 0.0000000 0.4286585 0.0000000
829 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) 89 0.0000000 0.4263920 0.0000000
436 Fatty acids 15 0.0049146 0.4193853 0.0404144
455 Formation of the ternary complex, and subsequently, the 43S complex 46 0.0000011 0.4147611 0.0000445
493 GTP hydrolysis and joining of the 60S ribosomal subunit 104 0.0000000 0.4100392 0.0000000
663 L13a-mediated translational silencing of Ceruloplasmin expression 103 0.0000000 0.4021987 0.0000000
1169 SARS-CoV-2 modulates host translation machinery 46 0.0000026 0.4005633 0.0000916
165 Cap-dependent Translation Initiation 111 0.0000000 0.3994923 0.0000000
396 Eukaryotic Translation Initiation 111 0.0000000 0.3994923 0.0000000
1207 Selenoamino acid metabolism 103 0.0000000 0.3981056 0.0000000
1187 SRP-dependent cotranslational protein targeting to membrane 105 0.0000000 0.3898505 0.0000000
828 Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC) 108 0.0000000 0.3744789 0.0000000
830 Nonsense-Mediated Decay (NMD) 108 0.0000000 0.3744789 0.0000000
1156 Ribosomal scanning and start codon recognition 53 0.0000025 0.3733970 0.0000916
95 Aspirin ADME 44 0.0000199 -0.3715916 0.0003942
703 Major pathway of rRNA processing in the nucleolus and cytosol 171 0.0000000 0.3705310 0.0000000
107 Autodegradation of the E3 ubiquitin ligase COP1 50 0.0000068 0.3676975 0.0001949
gu <-  res_gu_df[,"gu",drop=FALSE]
mitch_gu <- mitch_calc(gu,genesets,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head( mitch_gu$enrichment_result,30) %>%
  kbl(caption="GUTHRIE: Top effect size pathways found with mitch") %>%
  kable_paper("hover", full_width = F)
GUTHRIE: Top effect size pathways found with mitch
set setSize pANOVA s.dist p.adjustANOVA
952 Prednisone ADME 10 0.0034275 -0.5343966 0.0197722
1517 cGMP effects 15 0.0018264 -0.4647835 0.0119141
212 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 0.0000015 -0.4451724 0.0000378
24 Aberrant regulation of mitotic G1/S transition in cancer due to RB1 defects 17 0.0015979 0.4420932 0.0109309
293 Defective binding of RB1 mutants to E2F1,(E2F2, E2F3) 17 0.0015979 0.4420932 0.0109309
316 Digestion 18 0.0013865 -0.4352479 0.0098326
25 Aberrant regulation of mitotic cell cycle due to RB1 defects 36 0.0000064 0.4345373 0.0001409
614 Interaction With Cumulus Cells And The Zona Pellucida 11 0.0167020 -0.4166894 0.0692260
334 Diseases of mitotic cell cycle 37 0.0000116 0.4165313 0.0002171
26 Aberrant regulation of mitotic exit in cancer due to RB1 defects 20 0.0015741 0.4081783 0.0108643
977 Purine ribonucleoside monophosphate biosynthesis 11 0.0205079 0.4034093 0.0802665
1213 Senescence-Associated Secretory Phenotype (SASP) 53 0.0000005 0.3980270 0.0000161
923 Phosphorylation of the APC/C 20 0.0022972 0.3937422 0.0144366
959 Processing and activation of SUMO 10 0.0312647 0.3932856 0.1078911
1365 TICAM1-dependent activation of IRF3/IRF7 12 0.0183542 0.3931554 0.0742815
683 Lysosome Vesicle Biogenesis 33 0.0001060 0.3898157 0.0013319
372 ERCC6 (CSB) and EHMT2 (G9a) positively regulate rRNA expression 16 0.0070605 0.3889543 0.0350984
95 Aspirin ADME 44 0.0000084 -0.3879403 0.0001715
532 Golgi Cisternae Pericentriolar Stack Reorganization 14 0.0125524 0.3852757 0.0556046
13 APC/C:Cdc20 mediated degradation of Cyclin B 24 0.0011729 0.3826321 0.0085129
882 PINK1-PRKN Mediated Mitophagy 21 0.0024618 0.3816325 0.0150431
516 Glucuronidation 25 0.0009624 -0.3813647 0.0073293
1440 Transcriptional regulation of granulopoiesis 31 0.0002402 0.3810169 0.0025231
1364 TICAM1,TRAF6-dependent induction of TAK1 complex 10 0.0372397 0.3804015 0.1202029
376 Early Phase of HIV Life Cycle 14 0.0137477 0.3802706 0.0600530
824 Nitric oxide stimulates guanylate cyclase 22 0.0020371 -0.3798552 0.0130137
399 Expression and translocation of olfactory receptors 356 0.0000000 -0.3772402 0.0000000
317 Digestion and absorption 23 0.0017570 -0.3767783 0.0116082
513 Glucocorticoid biosynthesis 10 0.0394607 -0.3760601 0.1260270
853 Olfactory Signaling Pathway 363 0.0000000 -0.3740650 0.0000000
sig <- subset(mitch_gu$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
  kbl(caption="GUTHRIE: Top effect size pathways found with mitch after 1% FDR filtering") %>%
  kable_paper("hover", full_width = F)
GUTHRIE: Top effect size pathways found with mitch after 1% FDR filtering
set setSize pANOVA s.dist p.adjustANOVA
952 Prednisone ADME 10 0.0034275 -0.5343966 0.0197722
1517 cGMP effects 15 0.0018264 -0.4647835 0.0119141
212 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 0.0000015 -0.4451724 0.0000378
24 Aberrant regulation of mitotic G1/S transition in cancer due to RB1 defects 17 0.0015979 0.4420932 0.0109309
293 Defective binding of RB1 mutants to E2F1,(E2F2, E2F3) 17 0.0015979 0.4420932 0.0109309
316 Digestion 18 0.0013865 -0.4352479 0.0098326
25 Aberrant regulation of mitotic cell cycle due to RB1 defects 36 0.0000064 0.4345373 0.0001409
334 Diseases of mitotic cell cycle 37 0.0000116 0.4165313 0.0002171
26 Aberrant regulation of mitotic exit in cancer due to RB1 defects 20 0.0015741 0.4081783 0.0108643
1213 Senescence-Associated Secretory Phenotype (SASP) 53 0.0000005 0.3980270 0.0000161
923 Phosphorylation of the APC/C 20 0.0022972 0.3937422 0.0144366
683 Lysosome Vesicle Biogenesis 33 0.0001060 0.3898157 0.0013319
372 ERCC6 (CSB) and EHMT2 (G9a) positively regulate rRNA expression 16 0.0070605 0.3889543 0.0350984
95 Aspirin ADME 44 0.0000084 -0.3879403 0.0001715
13 APC/C:Cdc20 mediated degradation of Cyclin B 24 0.0011729 0.3826321 0.0085129
882 PINK1-PRKN Mediated Mitophagy 21 0.0024618 0.3816325 0.0150431
516 Glucuronidation 25 0.0009624 -0.3813647 0.0073293
1440 Transcriptional regulation of granulopoiesis 31 0.0002402 0.3810169 0.0025231
824 Nitric oxide stimulates guanylate cyclase 22 0.0020371 -0.3798552 0.0130137
399 Expression and translocation of olfactory receptors 356 0.0000000 -0.3772402 0.0000000
317 Digestion and absorption 23 0.0017570 -0.3767783 0.0116082
853 Olfactory Signaling Pathway 363 0.0000000 -0.3740650 0.0000000
560 Heme degradation 16 0.0099994 -0.3719018 0.0461465
106 Autodegradation of Cdh1 by Cdh1:APC/C 62 0.0000005 0.3683680 0.0000161
1172 SCF(Skp2)-mediated degradation of p27/p21 58 0.0000012 0.3678922 0.0000326
237 Conversion from APC/C:Cdc20 to APC/C:Cdh1 in late anaphase 20 0.0047245 0.3648681 0.0257275
753 Mitophagy 27 0.0010537 0.3641463 0.0077573
1201 SUMOylation of transcription factors 20 0.0052162 0.3607526 0.0274296
42 Activation of IRF3/IRF7 mediated by TBK1/IKK epsilon 17 0.0105932 0.3580020 0.0481680
1142 Respiratory electron transport 90 0.0000000 0.3539521 0.0000004
bu <-  res_bu_df[,"bu",drop=FALSE]
mitch_bu <- mitch_calc(bu,genesets,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head( mitch_bu$enrichment_result,30) %>%
  kbl(caption="BUCCAL: Top effect size pathways found with mitch") %>%
  kable_paper("hover", full_width = F)
BUCCAL: Top effect size pathways found with mitch
set setSize pANOVA s.dist p.adjustANOVA
206 Chylomicron remodeling 10 0.0030515 0.5409616 0.0813383
1384 TRAF6 mediated IRF7 activation 26 0.0000237 -0.4787252 0.0012660
1163 SARS-CoV-1 modulates host translation machinery 34 0.0000086 0.4408939 0.0005094
777 NF-kB activation through FADD/RIP-1 pathway mediated by caspase-8 and -10 12 0.0098912 -0.4300281 0.2066466
614 Interaction With Cumulus Cells And The Zona Pellucida 11 0.0170097 -0.4155218 0.2914470
399 Expression and translocation of olfactory receptors 358 0.0000000 -0.3913890 0.0000000
212 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 0.0000237 -0.3910386 0.0012660
172 Caspase activation via Death Receptors in the presence of ligand 16 0.0081250 -0.3821483 0.1752464
853 Olfactory Signaling Pathway 365 0.0000000 -0.3813864 0.0000000
516 Glucuronidation 25 0.0011585 -0.3753131 0.0416525
324 Diseases associated with glycosylation precursor biosynthesis 15 0.0119136 0.3749728 0.2302303
205 Chylomicron assembly 10 0.0422714 0.3708539 0.4913650
913 Phase 4 - resting membrane potential 19 0.0063288 0.3617541 0.1460350
1521 mRNA Editing 10 0.0526736 -0.3538264 0.5694643
778 NF-kB is activated and signals survival 12 0.0344219 0.3525997 0.4376782
1310 Signaling by cytosolic FGFR1 fusion mutants 17 0.0123680 -0.3503867 0.2360616
1342 Synthesis of PIPs at the late endosome membrane 10 0.0656223 -0.3361887 0.6111570
1496 Viral mRNA Translation 84 0.0000001 0.3321835 0.0000270
252 Cytosolic iron-sulfur cluster assembly 10 0.0701270 0.3307280 0.6230824
1091 Regulation of NF-kappa B signaling 17 0.0183290 0.3304198 0.3046957
397 Eukaryotic Translation Termination 87 0.0000001 0.3294139 0.0000270
903 Peptide chain elongation 84 0.0000002 0.3268420 0.0000345
1086 Regulation of IFNA/IFNB signaling 22 0.0081401 -0.3258580 0.1752464
910 Pexophagy 11 0.0644946 0.3218990 0.6079792
1394 Telomere C-strand synthesis initiation 11 0.0668574 -0.3190935 0.6170875
1536 p75NTR recruits signalling complexes 12 0.0556852 0.3189937 0.5825462
1537 p75NTR signals via NF-kB 15 0.0327239 0.3184290 0.4287383
646 Intrinsic Pathway of Fibrin Clot Formation 21 0.0115290 -0.3184022 0.2285111
829 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) 89 0.0000003 0.3153523 0.0000345
1208 Selenocysteine synthesis 87 0.0000004 0.3141580 0.0000461
sig <- subset(mitch_bu$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
  kbl(caption="BUCCAL: Top effect size pathways found with mitch after 1% FDR filtering") %>%
  kable_paper("hover", full_width = F)
BUCCAL: Top effect size pathways found with mitch after 1% FDR filtering
set setSize pANOVA s.dist p.adjustANOVA
1384 TRAF6 mediated IRF7 activation 26 0.0000237 -0.4787252 0.0012660
1163 SARS-CoV-1 modulates host translation machinery 34 0.0000086 0.4408939 0.0005094
399 Expression and translocation of olfactory receptors 358 0.0000000 -0.3913890 0.0000000
212 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 0.0000237 -0.3910386 0.0012660
853 Olfactory Signaling Pathway 365 0.0000000 -0.3813864 0.0000000
516 Glucuronidation 25 0.0011585 -0.3753131 0.0416525
1496 Viral mRNA Translation 84 0.0000001 0.3321835 0.0000270
397 Eukaryotic Translation Termination 87 0.0000001 0.3294139 0.0000270
903 Peptide chain elongation 84 0.0000002 0.3268420 0.0000345
829 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) 89 0.0000003 0.3153523 0.0000345
1208 Selenocysteine synthesis 87 0.0000004 0.3141580 0.0000461
395 Eukaryotic Translation Elongation 88 0.0000004 0.3119438 0.0000461
54 Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S 54 0.0001484 0.2983511 0.0065546
1446 Translation initiation complex formation 53 0.0001779 0.2975558 0.0076396
1207 Selenoamino acid metabolism 103 0.0000003 0.2934261 0.0000345
455 Formation of the ternary complex, and subsequently, the 43S complex 46 0.0005761 0.2932993 0.0232558
1169 SARS-CoV-2 modulates host translation machinery 46 0.0005867 0.2928796 0.0232558
1156 Ribosomal scanning and start codon recognition 53 0.0003121 0.2861701 0.0130414
449 Formation of a pool of free 40S subunits 94 0.0000022 0.2822424 0.0001496
1145 Response of EIF2AK4 (GCN2) to amino acid deficiency 95 0.0000020 0.2821614 0.0001391
663 L13a-mediated translational silencing of Ceruloplasmin expression 103 0.0000017 0.2730596 0.0001221
828 Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC) 108 0.0000013 0.2692334 0.0001133
830 Nonsense-Mediated Decay (NMD) 108 0.0000013 0.2692334 0.0001133
493 GTP hydrolysis and joining of the 60S ribosomal subunit 104 0.0000024 0.2674934 0.0001549
165 Cap-dependent Translation Initiation 111 0.0000016 0.2637072 0.0001210
396 Eukaryotic Translation Initiation 111 0.0000016 0.2637072 0.0001210
703 Major pathway of rRNA processing in the nucleolus and cytosol 171 0.0000000 0.2506125 0.0000059
1187 SRP-dependent cotranslational protein targeting to membrane 105 0.0000106 0.2485995 0.0006090
1214 Sensory Perception 569 0.0000000 -0.2409909 0.0000000
191 Cellular response to starvation 147 0.0000008 0.2356707 0.0000822
mitch_plots(res=mitch_bu,outfile="buccal_mitch_ados.pdf")
## png 
##   2
#mitch_report(res=mitch_bu,outfile="buccal_mitch_ados.html")

subset(mitch_bu$enrichment_result,p.adjustANOVA<0.05 & s.dist<0) %>%
  kbl(caption="Reactome pathways with an inverse methylation association with ADOS in buccal swab samples.") %>%
  kable_paper("hover", full_width = F)
Reactome pathways with an inverse methylation association with ADOS in buccal swab samples.
set setSize pANOVA s.dist p.adjustANOVA
1384 TRAF6 mediated IRF7 activation 26 0.0000237 -0.4787252 0.0012660
399 Expression and translocation of olfactory receptors 358 0.0000000 -0.3913890 0.0000000
212 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 0.0000237 -0.3910386 0.0012660
853 Olfactory Signaling Pathway 365 0.0000000 -0.3813864 0.0000000
516 Glucuronidation 25 0.0011585 -0.3753131 0.0416525
1214 Sensory Perception 569 0.0000000 -0.2409909 0.0000000
460 G alpha (i) signalling events 305 0.0010391 -0.1090662 0.0391819
584 Immune System 1870 0.0000426 -0.0567350 0.0020605

Mitch joint analysis of blood and guthrie card. As using the median value is apparently the most sensitive, we’ll go with that.

m <- merge(res_bl_df,res_gu_df,by=0)
rownames(m) <- m$Row.names
m2 <- m[,c("bl","gu")]
m2[is.na(m2)] <- 0
res <- mitch_calc(m2, genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
mitch_report(res,"blgu_mitch.html",overwrite=TRUE)
## Note: overwriting existing report
## Dataset saved as " /tmp/RtmpP5I8W6/blgu_mitch.rds ".
## 
## 
## processing file: mitch.Rmd
## 1/34                             
## 2/34 [checklibraries]            
## 3/34                             
## 4/34 [peek]                      
## 5/34                             
## 6/34 [metrics]                   
## 7/34                             
## 8/34 [scatterplot]
## 9/34                             
## 10/34 [contourplot]
## 11/34                             
## 12/34 [input_geneset_metrics1]    
## 13/34                             
## 14/34 [input_geneset_metrics2]
## 15/34                             
## 16/34 [input_geneset_metrics3]
## 17/34                             
## 18/34 [echart1d]                  
## 19/34 [echart2d]                  
## 20/34                             
## 21/34 [heatmap]
## 22/34                             
## 23/34 [effectsize]                
## 24/34                             
## 25/34 [results_table]             
## 26/34                             
## 27/34 [results_table_complete]    
## 28/34                             
## 29/34 [detailed_geneset_reports1d]
## 30/34                             
## 31/34 [detailed_geneset_reports2d]
## 32/34                             
## 33/34 [session_info]              
## 34/34
## output file: /home/mdz/projects/asd_meth/mitch.knit.md
## /usr/bin/pandoc +RTS -K512m -RTS /home/mdz/projects/asd_meth/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpP5I8W6/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpP5I8W6/rmarkdown-str9cc35a10ebae.html
## 
## Output created: /tmp/RtmpP5I8W6/mitch_report.html
## [1] TRUE
mitch_plots(res=res,outfile="blgu_mitch_ados.pdf")
## png 
##   2
sig <- subset(res$enrichment_result,p.adjustMANOVA<0.01)

head(sig[order(-abs(sig$s.dist)),],30) %>%
  kbl(caption="Top pathways found with mitch (effect size after 1% FDR filtering)") %>%
  kable_paper("hover", full_width = F)
Top pathways found with mitch (effect size after 1% FDR filtering)
set setSize pMANOVA s.bl s.gu p.bl p.gu s.dist SD p.adjustMANOVA
212 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 0.0000000 -0.5260969 -0.4451724 0.0000000 0.0000015 0.6891708 0.0572222 0.0000000
380 Eicosanoids 12 0.0000841 0.6502295 -0.1591905 0.0000959 0.3396372 0.6694327 0.5723464 0.0007698
516 Glucuronidation 25 0.0000081 -0.4846569 -0.3813647 0.0000272 0.0009624 0.6167101 0.0730386 0.0000914
1163 SARS-CoV-1 modulates host translation machinery 34 0.0000001 0.5291174 0.3014517 0.0000001 0.0023452 0.6089650 0.1609840 0.0000022
1215 Sensory perception of sweet, bitter, and umami (glutamate) taste 41 0.0000002 -0.4475161 -0.3285582 0.0000007 0.0002713 0.5551767 0.0841159 0.0000030
903 Peptide chain elongation 84 0.0000000 0.4680166 0.2845095 0.0000000 0.0000065 0.5477091 0.1297591 0.0000000
395 Eukaryotic Translation Elongation 88 0.0000000 0.4639213 0.2776535 0.0000000 0.0000067 0.5406611 0.1317112 0.0000000
95 Aspirin ADME 44 0.0000002 -0.3723122 -0.3879403 0.0000192 0.0000084 0.5376933 0.0110507 0.0000033
397 Eukaryotic Translation Termination 87 0.0000000 0.4561885 0.2622115 0.0000000 0.0000234 0.5261776 0.1371625 0.0000000
1208 Selenocysteine synthesis 87 0.0000000 0.4554126 0.2617756 0.0000000 0.0000242 0.5252876 0.1369221 0.0000000
1496 Viral mRNA Translation 84 0.0000000 0.4449038 0.2721301 0.0000000 0.0000161 0.5215306 0.1221695 0.0000000
882 PINK1-PRKN Mediated Mitophagy 21 0.0010561 0.3476404 0.3816325 0.0058111 0.0024618 0.5162337 0.0240361 0.0077998
1172 SCF(Skp2)-mediated degradation of p27/p21 58 0.0000000 0.3548917 0.3678922 0.0000029 0.0000012 0.5111681 0.0091928 0.0000003
1145 Response of EIF2AK4 (GCN2) to amino acid deficiency 95 0.0000000 0.4349631 0.2537865 0.0000000 0.0000189 0.5035876 0.1281112 0.0000000
829 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) 89 0.0000000 0.4261122 0.2632858 0.0000000 0.0000175 0.5008902 0.1151357 0.0000000
106 Autodegradation of Cdh1 by Cdh1:APC/C 62 0.0000000 0.3209240 0.3683680 0.0000123 0.0000005 0.4885562 0.0335480 0.0000003
449 Formation of a pool of free 40S subunits 94 0.0000000 0.4283806 0.2339271 0.0000000 0.0000880 0.4880900 0.1374994 0.0000000
399 Expression and translocation of olfactory receptors 356 0.0000000 -0.3013619 -0.3772402 0.0000000 0.0000000 0.4828345 0.0536541 0.0000000
1216 Sensory perception of taste 47 0.0000015 -0.3564503 -0.3256706 0.0000234 0.0001116 0.4828231 0.0217646 0.0000220
224 Competing endogenous RNAs (ceRNAs) regulate PTEN translation 16 0.0012294 -0.4560554 0.1573322 0.0015841 0.2758674 0.4824313 0.4337306 0.0086791
753 Mitophagy 27 0.0004874 0.3124424 0.3641463 0.0049452 0.0010537 0.4798154 0.0365602 0.0038445
1213 Senescence-Associated Secretory Phenotype (SASP) 53 0.0000003 0.2653677 0.3980270 0.0008290 0.0000005 0.4783780 0.0938042 0.0000047
13 APC/C:Cdc20 mediated degradation of Cyclin B 24 0.0011445 0.2857018 0.3826321 0.0153866 0.0011729 0.4775278 0.0685401 0.0082466
853 Olfactory Signaling Pathway 363 0.0000000 -0.2929109 -0.3740650 0.0000000 0.0000000 0.4751015 0.0573846 0.0000000
1169 SARS-CoV-2 modulates host translation machinery 46 0.0000023 0.4002583 0.2532630 0.0000026 0.0029527 0.4736548 0.1039414 0.0000312
492 GSK3B and BTRC:CUL1-mediated-degradation of NFE2L2 50 0.0000013 0.3210559 0.3452131 0.0000854 0.0000240 0.4714329 0.0170818 0.0000186
1404 The role of GTSE1 in G2/M progression after G2 checkpoint 58 0.0000001 0.3562880 0.3075098 0.0000027 0.0000507 0.4706415 0.0344914 0.0000028
493 GTP hydrolysis and joining of the 60S ribosomal subunit 104 0.0000000 0.4097452 0.2233373 0.0000000 0.0000824 0.4666591 0.1318103 0.0000000
565 Hh mutants are degraded by ERAD 54 0.0000006 0.3489444 0.3087141 0.0000091 0.0000865 0.4659041 0.0284471 0.0000093
21 AUF1 (hnRNP D0) binds and destabilizes mRNA 53 0.0000008 0.3526228 0.3039961 0.0000089 0.0001283 0.4655711 0.0343842 0.0000117

Camera enrichment analysis

Camera is designed to control for correlation bias. Using the median value gives better results than the directional significance or the combined metric. Blood dataset. Need to filter by FDR then sort by effect size.

stat <- res_bl_df$median
names(stat) <- rownames(res_bl_df)
stat[is.na(stat)] <- 0

tic()
cres <- cameraPR(statistic=stat, index=genesets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
cres <- subset(cres,NGenes>4)
cres$FDR <- p.adjust(cres$PValue,method="fdr")
mymedians <- sapply(1:nrow(cres),function(i) {
  myset <- genesets[[which(names(genesets) == rownames(cres)[i])]]
  mystats <- stat[names(stat) %in% myset]
  median(mystats)
})
cres$median <- mymedians
toc() # 1.0 sec
## 3.505 sec elapsed
sig <- subset(cres,FDR<0.05)

head(sig[order(-abs(sig$median)),],20) %>%
  kbl(caption = "BLOOD: Top effect size pathways found with CAMERA after 5% FDR filtering") %>%
  kable_paper("hover", full_width = F)
BLOOD: Top effect size pathways found with CAMERA after 5% FDR filtering
NGenes Direction PValue FDR median
Expression and translocation of olfactory receptors 359 Down 0.0000000 0.0000557 -0.8031927
Class C/3 (Metabotropic glutamate/pheromone receptors) 39 Down 0.0000005 0.0002979 -0.8029345
Olfactory Signaling Pathway 366 Down 0.0000001 0.0000559 -0.7970944
Sensory perception of sweet, bitter, and umami (glutamate) taste 41 Down 0.0000123 0.0053857 -0.7224652
Aspirin ADME 44 Down 0.0000705 0.0085143 -0.6442641
Glucuronidation 25 Down 0.0000223 0.0053857 -0.6094985
Sensory perception of taste 47 Down 0.0002054 0.0150070 -0.5983857
Sensory Perception 570 Down 0.0000196 0.0053857 -0.5921152
Metabolism of RNA 639 Up 0.0007046 0.0367915 -0.2416861
Processing of Capped Intron-Containing Pre-mRNA 232 Up 0.0004330 0.0246030 -0.2258106
mRNA Splicing - Major Pathway 174 Up 0.0003687 0.0215874 -0.2159036
mRNA Splicing 184 Up 0.0001359 0.0115705 -0.2019545
Translation 262 Up 0.0003148 0.0196209 -0.1866647
Influenza Infection 148 Up 0.0005360 0.0295889 -0.1749798
mRNA Splicing - Minor Pathway 51 Up 0.0006806 0.0365282 -0.1682665
Regulation of expression of SLITs and ROBOs 159 Up 0.0003659 0.0215874 -0.1597353
rRNA processing 186 Up 0.0002097 0.0150070 -0.1578202
Influenza Viral RNA Transcription and Replication 129 Up 0.0002691 0.0173319 -0.1559051
rRNA processing in the nucleus and cytosol 180 Up 0.0001005 0.0092445 -0.1512278
Selenoamino acid metabolism 103 Up 0.0001377 0.0115705 -0.1483371
nrow(subset(cres,FDR<0.05 & Direction=="Up"))
## [1] 31
nrow(subset(cres,FDR<0.05 & Direction=="Down"))
## [1] 8
cres_bl <- cres

Now drilling down to the top set.

myset <- rownames(head(sig[order(-abs(sig$median)),],1))
myset_members <- genesets[[which(names(genesets) == myset)]]
mystats <- stat[which(names(stat) %in% myset_members)]
myl <- list("allgenes"=stat,myset=mystats)
boxplot(myl,cex=0)

library(vioplot)
## Loading required package: sm
## Package 'sm', version 2.2-5.7: type help(sm) for summary information
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
vioplot(myl,main=myset)

mygenes <- head(mystats[order(mystats)],5)
mygenes
##     OR2L2    OR5AK2    OR4C11     OR5T3     OR5R1 
## -2.861307 -2.405921 -2.359792 -2.336945 -2.259373
myprobes <- unique(unlist(sets[which(names(sets) %in% names(mygenes))]))
myprobedat <- res_bl_top[which(rownames(res_bl_top) %in% myprobes),]

myprobedat %>%
  kbl(caption = "Probe data for top 5 genes") %>%
  kable_paper("hover", full_width = F)
Probe data for top 5 genes
logFC AveExpr t P.Value adj.P.Val B
cg00938877 -0.0813302 2.8547659 -3.111247 0.0090933 0.8764716 -2.674653
cg08959365 -0.0535718 2.6306172 -2.861307 0.0144397 0.8764716 -3.113687
cg04277698 -0.0944025 2.2013011 -2.657212 0.0210447 0.8764716 -3.468232
cg23984446 -0.0365309 3.5304323 -2.430747 0.0318648 0.8764716 -3.854519
cg14696931 -0.0743728 2.3521299 -2.381095 0.0348756 0.8764716 -3.937880
cg15141401 -0.0855296 1.5597086 -2.359792 0.0362497 0.8764716 -3.973475
cg14284630 -0.0444939 2.0070866 -2.336945 0.0377813 0.8764716 -4.011531
cg17381444 -0.0404639 2.5490545 -2.316046 0.0392364 0.8764716 -4.046229
cg15434232 -0.0611575 0.7671502 -2.202700 0.0481086 0.8764716 -4.232419
cg07804888 0.0168582 2.7121774 1.406162 0.1852768 0.8764716 -5.401324
cg20184856 -0.0877620 2.8980060 -1.227698 0.2433189 0.8764716 -5.616157

Guthrie dataset.

stat <- res_gu_df$median
names(stat) <- rownames(res_gu_df)
stat[is.na(stat)] <- 0

tic()
cres <- cameraPR(statistic=stat, index=genesets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
cres <- subset(cres,NGenes>4)
cres$FDR <- p.adjust(cres$PValue,method="fdr")
mymedians <- sapply(1:nrow(cres),function(i) {
  myset <- genesets[[which(names(genesets) == rownames(cres)[i])]]
  mystats <- stat[names(stat) %in% myset]
  median(mystats)
})
cres$median <- mymedians
toc() # 1.0 sec
## 2.92 sec elapsed
sig <- subset(cres,FDR<0.05)

head(sig[order(-abs(sig$median)),],20) %>%
  kbl(caption = "GUTHRIE: Top effect size pathways found with CAMERA after 5% FDR filtering") %>%
  kable_paper("hover", full_width = F)
GUTHRIE: Top effect size pathways found with CAMERA after 5% FDR filtering
NGenes Direction PValue FDR median
Class C/3 (Metabotropic glutamate/pheromone receptors) 39 Down 7.67e-05 0.0370595 -0.4694532
Expression and translocation of olfactory receptors 356 Down 0.00e+00 0.0000504 -0.4371060
Olfactory Signaling Pathway 363 Down 1.00e-07 0.0000504 -0.4354951
Sensory Perception 567 Down 5.50e-06 0.0035182 -0.3662568
cres_gu <- cres

Compare CAMERA data

Compare median set score for both contrasts. In the graph below, the larger points represent larger gene sets. Red fill means significant in blood contrast. Blue ring means significant in guthrie contrast.

m4 <- merge(cres_bl,cres_gu,by=0)
sigx <- subset(m4,FDR.x<0.05)
sigy <- subset(m4,FDR.y<0.05)

plot(m4$median.x,m4$median.y,col="gray",pch=19,cex=log10(m4$NGenes.x),
  xlab="blood",ylab="Guthrie",main="median t-stat")
grid()
points(sigx$median.x,sigx$median.y,col="red",pch=19,cex=log10(sigx$NGenes.x))
points(sigy$median.x,sigy$median.y,col="blue",pch=1,cex=log10(sigy$NGenes.x))
abline(v=0,h=0,lty=2,lwd=3)

par(mfrow=c(1,1))
pdf("camera_pw_ados.pdf")

plot(m4$median.x,m4$median.y,col="gray",pch=19,cex=log10(m4$NGenes.x),
  xlab="blood",ylab="Guthrie",main="median t-stat")
grid()
points(sigx$median.x,sigx$median.y,col="red",pch=19,cex=log10(sigx$NGenes.x))
points(sigy$median.x,sigy$median.y,col="blue",pch=1,cex=log10(sigy$NGenes.x))
abline(v=0,h=0,lty=2,lwd=3)

dev.off()
## png 
##   2
subset(m4,FDR.x<0.05 & FDR.y<0.05 ) %>%
  kbl(caption = "Pathways significant in blood (x) and guthrie cards (y) at 5%FDR") %>%
  kable_paper("hover", full_width = F)
Pathways significant in blood (x) and guthrie cards (y) at 5%FDR
Row.names NGenes.x Direction.x PValue.x FDR.x median.x NGenes.y Direction.y PValue.y FDR.y median.y
262 Class C/3 (Metabotropic glutamate/pheromone receptors) 39 Down 5.00e-07 0.0002979 -0.8029345 39 Down 7.67e-05 0.0370595 -0.4694532
522 Expression and translocation of olfactory receptors 359 Down 0.00e+00 0.0000557 -0.8031927 356 Down 0.00e+00 0.0000504 -0.4371060
1096 Olfactory Signaling Pathway 366 Down 1.00e-07 0.0000559 -0.7970944 363 Down 1.00e-07 0.0000504 -0.4354951
1512 Sensory Perception 570 Down 1.96e-05 0.0053857 -0.5921152 567 Down 5.50e-06 0.0035182 -0.3662568

make scatterplots of the top sets

XMAX=max(res$ranked_profile[,1])
XMIN=min(res$ranked_profile[,1])
YMAX=max(res$ranked_profile[,2])
YMIN=min(res$ranked_profile[,2])
plot(res$detailed_sets[[1]], pch=19,cex=2,col="lightgray",
  xlim=c(XMIN,XMAX),ylim=c(YMIN,YMAX),
  xlab="blood",ylab="Guthrie")
text(res$detailed_sets[[1]],labels=rownames(res$detailed_sets[[1]]))
mtext(names(res$detailed_sets)[[1]])
grid() ; abline(v=0,h=0,lty=2,lwd=3)

pdf("c3_metabotropic_ados.pdf")

plot(res$detailed_sets[[1]], pch=19,cex=2,col="lightgray",
  xlim=c(XMIN,XMAX),ylim=c(YMIN,YMAX), 
  xlab="blood",ylab="Guthrie")
text(res$detailed_sets[[1]],labels=rownames(res$detailed_sets[[1]]))
mtext(names(res$detailed_sets)[[1]])
grid() ; abline(v=0,h=0,lty=2,lwd=3)

dev.off()
## png 
##   2

Make heatmap of top set.

topset<- rownames(res$detailed_sets[[1]])
probes <- unlist(sets[which(names(sets) %in% topset)])
topm <- bl_mvals[which(rownames(bl_mvals) %in% probes),]

colfunc <- colorRampPalette(c("yellow", "red"))
col_pal <- colfunc(length(min(bl_design[,ncol(bl_design)]):max(bl_design[,ncol(bl_design)])))
mycols <- col_pal[bl_design[,ncol(bl_design)]]

colfunc2 <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(topm,scale="row",trace="none",ColSideColors=mycols,col=colfunc2(25),
  main="C/3 receptor methylation in blood")

topset<- rownames(res$detailed_sets[[1]])
probes <- unlist(sets[which(names(sets) %in% topset)])
topm <- gu_mvals[which(rownames(gu_mvals) %in% probes),]

colfunc <- colorRampPalette(c("yellow", "red"))
col_pal <- colfunc(length(min(gu_design[,ncol(gu_design)]):max(gu_design[,ncol(gu_design)])))
mycols <- col_pal[gu_design[,ncol(gu_design)]]

colfunc2 <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(topm,scale="row",trace="none",ColSideColors=mycols,col=colfunc2(25),
  main="C/3 receptor methylation in Guthrie cards")

Distinguishing ASD from birth

Here I will use MDS plot to cluster samples.

First I will use all probes, then I will focus on a subset of candidate probes.

plot(cmdscale(dist(t(bl_mvals))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "n",main="Blood")
mtext("Numbers indicate ADOS score")
text(cmdscale(dist(t(bl_mvals))), labels=bl_design[,ncol(bl_design)], )

plot(cmdscale(dist(t(gu_mvals))), xlab="Coordinate 1", ylab="Coordinate 2", 
  type = "n",main="Guthrie card")
mtext("Numbers indicate ADOS score")
text(cmdscale(dist(t(gu_mvals))), labels=gu_design[,ncol(gu_design)], )

There is no apparent clustering by ADOS score, indicating a subtle effect.

Now I will examine the probes associated with the pathway of interest.

myprobes
##  [1] "cg20184856" "cg00938877" "cg08959365" "cg10435092" "cg15141401"
##  [6] "cg23984446" "cg22123267" "cg14696931" "cg15434232" "cg04277698"
## [11] "cg07804888" "cg17381444" "cg14284630" "cg17626801"
myprobedat <- bl_mvals[which(rownames(bl_mvals) %in% myprobes),]
plot(cmdscale(dist(t(myprobedat))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat))), labels=bl_design[,ncol(bl_design)], )

myprobedat <- gu_mvals[which(rownames(gu_mvals) %in% myprobes),]
plot(cmdscale(dist(t(myprobedat))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "n",main="Guthrie card")
text(cmdscale(dist(t(myprobedat))), labels=gu_design[,ncol(gu_design)], )

Again, there is no apparent clustering by ADOS score. This indicates that the top probes cannot be used diagnostically.

myprobes2 <- head(bl_lim$Name,50)
myprobedat2 <- bl_mvals[which(rownames(bl_mvals) %in% myprobes2),]
plot(cmdscale(dist(t(myprobedat2))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat2))), labels=bl_design[,ncol(bl_design)], )

myprobes2 <- head(gu_lim$Name,50)
myprobedat2 <- gu_mvals[which(rownames(gu_mvals) %in% myprobes2),]
plot(cmdscale(dist(t(myprobedat2))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "n", main="Guthrie")
text(cmdscale(dist(t(myprobedat2))), labels=gu_design[,ncol(gu_design)], )

MDS analysis of top probes identified without correction for covariates.

bl_design2 <- bl_design[,c(1,10)]
fit <- lmFit(bl_mvals, bl_design2)
fit <- eBayes(fit)
summary(decideTests(fit))
##        (Intercept)   ADOS
## Down        248755      0
## NotSig       39844 802647
## Up          514048      0
top <- topTable(fit,coef="ADOS",num=Inf, sort.by = "P")
head(top)
##                  logFC   AveExpr         t      P.Value adj.P.Val        B
## cg13784557  0.04779472 -4.739442  5.456349 2.595342e-05 0.9999998 2.647442
## cg03716591  0.08873078 -3.321034  5.282177 3.833417e-05 0.9999998 2.267504
## cg11078128 -0.05723514  2.941155 -5.154247 5.114886e-05 0.9999998 1.986608
## cg06091288  0.44586721 -1.482468  5.126156 5.450383e-05 0.9999998 1.924737
## cg00320749  0.07116909 -3.962859  5.116515 5.570628e-05 0.9999998 1.903487
## cg00572586 -0.07365197 -3.390343 -5.061720 6.307166e-05 0.9999998 1.782568
myprobes3 <- rownames(top)[1:20]

myprobedat3 <- bl_mvals[which(rownames(bl_mvals) %in% myprobes3),]
plot(cmdscale(dist(t(myprobedat3))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat3))), labels=bl_design[,ncol(bl_design)], )

gu_design2 <- gu_design[,c(1,12)]
fit <- lmFit(gu_mvals, gu_design2)
fit <- eBayes(fit)
summary(decideTests(fit))
##        (Intercept)   ADOS
## Down        251749      0
## NotSig       57584 790658
## Up          481325      0
top <- topTable(fit,coef="ADOS",num=Inf, sort.by = "P")
head(top)
##                  logFC    AveExpr         t      P.Value adj.P.Val        B
## cg24046505  0.05799757 -1.9843389  5.232968 2.418307e-05 0.9999981 2.662615
## cg04098297  0.06458003  2.5127605  5.024096 4.082524e-05 0.9999981 2.158205
## cg00779672  0.06285769  2.1754652  4.996222 4.378776e-05 0.9999981 2.090748
## cg09303399 -0.08027977 -0.3905299 -4.884562 5.799307e-05 0.9999981 1.820269
## cg15310518 -0.04903224 -0.3222181 -4.883002 5.822148e-05 0.9999981 1.816486
## cg01692639 -0.05839269 -3.8720322 -4.783392 7.483579e-05 0.9999981 1.574938
myprobes3 <- rownames(top)[1:20]

myprobedat3 <- gu_mvals[which(rownames(gu_mvals) %in% myprobes3),]
plot(cmdscale(dist(t(myprobedat3))), xlab="Coordinate 1", ylab="Coordinate 2", 
  type = "n",main="Guthrie")
text(cmdscale(dist(t(myprobedat3))), labels=gu_design[,ncol(gu_design)], )

Again it was unsuccessful.

Try ebGSEA

ebGSEA uses globaltest to identify DMGs from infinium data. Some points to note:

  • only 1 sided test - does not distinguish between up and down methylated genes.

  • cannot use complex experiment designs.

Therefore I modify how the ebGSEA code uses globaltest.

item mapEIDto850k.lv : A list mapping Entrez Gene ID to 850k probes

#bl_gt <- doGT(bl_design,bl_mvals,array="850k",ncores=16)
data.m <- bl_mvals

subsets <- mclapply(mapEIDto850k.lv,intersect,rownames(data.m),mc.cores = 16)
nrep.v <- unlist(lapply(subsets,length))
selG.idx <- which(nrep.v>0)

gt.o <- gt(response=bl_design[,"ADOS"], alternative=t(data.m), model="linear",
  directional = TRUE, standardize = FALSE, permutations = 0,
  subsets=subsets[selG.idx],trace=F)

gt_res <- result(gt.o)
gt_res <- gt_res[order(gt_res$`p-value`),]

Gene set level enrichment - ADOS

genesets <- gmt_import("ReactomePathways.gmt")
cores = 8

Aggregate mval to genes and examine pathway enrichment in blood and guthries.

tic()
bl_gsmea <- gsmea(mval=bl_mvals,design=bl_design,probesets=sets,genesets=genesets,cores=8)
toc()
## 142.764 sec elapsed
bl_gsmea$res <- subset(bl_gsmea$res,nprobes>=5)
bl_gsmea$res$FDR <- p.adjust(bl_gsmea$res$PValue,method="fdr")
head(bl_gsmea$res,20)  %>%  kbl(caption="Top DM pathways in blood identified with GSMEA") %>%
  kable_paper("hover", full_width = F)
Top DM pathways in blood identified with GSMEA
nprobes median PValue FDR
RAS processing 19 -0.6367022 0.0030380 0.5514271
Signaling by MAPK mutants 6 -0.8910984 0.0058475 0.5514271
LDL remodeling 6 -1.4384307 0.0065210 0.5514271
Packaging Of Telomere Ends 7 -0.9230999 0.0067477 0.5514271
Removal of the Flap Intermediate from the C-strand 17 -0.9620088 0.0107276 0.5514271
POLB-Dependent Long Patch Base Excision Repair 7 -0.9952393 0.0116273 0.5514271
Integration of provirus 9 -0.7583022 0.0120982 0.5514271
Early Phase of HIV Life Cycle 14 -0.6760405 0.0123859 0.5514271
Protein repair 6 -0.9225705 0.0154445 0.5514271
2-LTR circle formation 7 -0.7583022 0.0155229 0.5514271
Phase 1 - inactivation of fast Na+ channels 6 -0.5750352 0.0155815 0.5514271
Pyroptosis 26 -0.7617448 0.0167240 0.5514271
Processive synthesis on the C-strand of the telomere 19 -0.9952393 0.0171426 0.5514271
Synthesis of Ketone Bodies 8 -1.2918676 0.0179524 0.5514271
tRNA modification in the mitochondrion 7 -0.2242023 0.0181240 0.5514271
APOBEC3G mediated resistance to HIV-1 infection 5 -1.1021444 0.0193878 0.5514271
Synthesis of diphthamide-EEF2 8 -0.8318361 0.0194151 0.5514271
PCNA-Dependent Long Patch Base Excision Repair 21 -0.9538704 0.0194669 0.5514271
Resolution of AP sites via the multiple-nucleotide patch replacement pathway 24 -0.9745548 0.0221451 0.5514271
Activation of RAS in B cells 5 -0.6367022 0.0234223 0.5514271
tic()
gu_gsmea <- gsmea(mval=gu_mvals,design=gu_design,probesets=sets,genesets=genesets,cores=8)
toc()
## 128.032 sec elapsed
gu_gsmea$res <- subset(gu_gsmea$res,nprobes>=5)
gu_gsmea$res$FDR <- p.adjust(gu_gsmea$res$PValue,method="fdr")
head(gu_gsmea$res,20)  %>%  kbl(caption="Top DM pathways in Guthrie card identified with GSMEA") %>%
  kable_paper("hover", full_width = F)
Top DM pathways in Guthrie card identified with GSMEA
nprobes median PValue FDR
APC-Cdc20 mediated degradation of Nek2A 26 0.3593637 0.0121005 0.9999532
Inactivation of APC/C via direct inhibition of the APC/C complex 21 0.3512109 0.0180940 0.9999532
Inhibition of the proteolytic activity of APC/C required for the onset of anaphase by mitotic spindle checkpoint components 21 0.3512109 0.0180940 0.9999532
KSRP (KHSRP) binds and destabilizes mRNA 17 -0.7596917 0.0286669 0.9999532
Signaling by NTRK3 (TRKC) 17 -0.6409496 0.0293964 0.9999532
Phosphorylation of Emi1 6 0.5763514 0.0329307 0.9999532
APC/C:Cdc20 mediated degradation of Cyclin B 24 0.3355649 0.0334188 0.9999532
POLB-Dependent Long Patch Base Excision Repair 7 0.3560613 0.0382982 0.9999532
Phosphorylation of the APC/C 20 0.3355649 0.0406966 0.9999532
RUNX3 regulates WNT signaling 8 -1.0689451 0.0421895 0.9999532
Transport and synthesis of PAPS 6 -0.9923302 0.0484766 0.9999532
Activated NTRK3 signals through PI3K 6 -1.1584291 0.0509890 0.9999532
Binding of TCF/LEF:CTNNB1 to target gene promoters 8 -1.2414620 0.0524543 0.9999532
Diseases associated with surfactant metabolism 9 -0.8414110 0.0544975 0.9999532
Negative regulation of TCF-dependent signaling by DVL-interacting proteins 5 -0.7514719 0.0670212 0.9999532
Formation of apoptosome 10 -0.6161805 0.0693845 0.9999532
Regulation of the apoptosome activity 10 -0.6161805 0.0693845 0.9999532
Serine biosynthesis 9 0.7882701 0.0773532 0.9999532
Sodium-coupled phosphate cotransporters 5 -1.0204257 0.0784883 0.9999532
CLEC7A/inflammasome pathway 6 0.7691143 0.0889579 0.9999532

Gene level enrichment for blood

tic()
res_bu <- main(mval=bu_mvals,design=bu_design,sets,cores=detectCores()/2)
res_bu[[1]] <- subset(res_bu[[1]],nprobes!=0) # remove genes with no probes obvs
res_bu_df <- res_bu[[1]] # enrichment results
res_bu_top <- res_bu[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 369.062 sec elapsed
head(res_bu_df,30) %>%
  kbl(caption="Top DM genes in blood identified with GMEA") %>%
  kable_paper("hover", full_width = F)
Top DM genes in blood identified with GMEA
nprobes median PValue FDR
FLNC 48 0.4697347 0.0000205 0.4499072
TMEM91 49 0.4610404 0.0000329 0.4499072
CSNK1A1 42 0.5673118 0.0000964 0.7125237
LINC01273 6 -0.1819099 0.0001122 0.7125237
MIR124-2 11 -0.8328384 0.0002250 0.7125237
CPN2 20 0.7720343 0.0002613 0.7125237
ZNF154 17 0.6673729 0.0002750 0.7125237
C9orf106 7 1.2531123 0.0002877 0.7125237
KIAA0494 11 1.1242410 0.0003401 0.7125237
PMEPA1 89 0.4176987 0.0004135 0.7125237
WFDC9 13 -0.4421977 0.0004853 0.7125237
C1orf213 18 0.9337948 0.0004854 0.7125237
LINC01005 2 1.8301673 0.0005469 0.7125237
NECAP1 18 0.7010852 0.0005647 0.7125237
MIR921 8 0.7528839 0.0005965 0.7125237
POLI 19 0.8062953 0.0006081 0.7125237
TRIM8 23 -0.3634760 0.0006409 0.7125237
RD3 31 0.4803811 0.0006475 0.7125237
FCGBP 39 0.2747381 0.0006479 0.7125237
GSC2 12 0.6249996 0.0006662 0.7125237
MRPL9 22 1.0153298 0.0006944 0.7125237
C3orf1 13 0.4552082 0.0007571 0.7125237
ZNF655 29 0.4184367 0.0007743 0.7125237
RPS11 16 0.7328382 0.0008096 0.7125237
C5orf55 13 0.7806143 0.0008323 0.7125237
ZFP82 13 -1.2048232 0.0008722 0.7125237
EIF2B5 30 0.9196388 0.0008773 0.7125237
LOC642852 30 0.6675120 0.0008923 0.7125237
THSD1 16 1.3856603 0.0008998 0.7125237
IGFBP3 47 0.4958188 0.0009103 0.7125237
par(mfrow=c(2,1))
hist(res_bu_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_bu_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)

pdf("hist_buccal_ados.pdf")
par(mfrow=c(2,1))
hist(res_bu_top$t,xlab="probe t-stat",main="buccal samples") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_bu_df$median,xlab="gene median t-stat",main="buccal samples") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png 
##   2
par(mfrow=c(1,1))

Now some visualisations.

volcano_plot(res_bu_df)

probe_bias(res_bu_df)

gmea_boxplot(res_bu,sets)

Session information

For reproducibility

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] vioplot_0.4.0                                      
##  [2] zoo_1.8-12                                         
##  [3] sm_2.2-5.7.1                                       
##  [4] pkgload_1.3.2.1                                    
##  [5] GGally_2.1.2                                       
##  [6] ggplot2_3.4.3                                      
##  [7] reshape2_1.4.4                                     
##  [8] beeswarm_0.4.0                                     
##  [9] gplots_3.1.3                                       
## [10] gtools_3.9.4                                       
## [11] tibble_3.2.1                                       
## [12] dplyr_1.1.3                                        
## [13] echarts4r_0.4.5                                    
## [14] ebGSEA_0.1.0                                       
## [15] globaltest_5.54.0                                  
## [16] survival_3.5-7                                     
## [17] tictoc_1.2                                         
## [18] RIdeogram_0.2.2                                    
## [19] eulerr_7.0.0                                       
## [20] kableExtra_1.3.4                                   
## [21] data.table_1.14.8                                  
## [22] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [23] minfi_1.46.0                                       
## [24] bumphunter_1.42.0                                  
## [25] locfit_1.5-9.8                                     
## [26] iterators_1.0.14                                   
## [27] foreach_1.5.2                                      
## [28] Biostrings_2.68.1                                  
## [29] XVector_0.40.0                                     
## [30] SummarizedExperiment_1.30.2                        
## [31] Biobase_2.60.0                                     
## [32] MatrixGenerics_1.12.3                              
## [33] matrixStats_1.0.0                                  
## [34] GenomicRanges_1.52.0                               
## [35] GenomeInfoDb_1.36.3                                
## [36] IRanges_2.34.1                                     
## [37] S4Vectors_0.38.1                                   
## [38] BiocGenerics_0.46.0                                
## [39] mitch_1.12.0                                       
## [40] limma_3.56.2                                       
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.3.1             later_1.3.1              
##   [3] BiocIO_1.10.0             bitops_1.0-7             
##   [5] filelock_1.0.2            preprocessCore_1.62.1    
##   [7] XML_3.99-0.14             lifecycle_1.0.3          
##   [9] lattice_0.21-8            MASS_7.3-60              
##  [11] base64_2.0.1              scrime_1.3.5             
##  [13] magrittr_2.0.3            sass_0.4.7               
##  [15] rmarkdown_2.24            jquerylib_0.1.4          
##  [17] yaml_2.3.7                httpuv_1.6.11            
##  [19] grImport2_0.2-0           doRNG_1.8.6              
##  [21] askpass_1.2.0             DBI_1.1.3                
##  [23] RColorBrewer_1.1-3        abind_1.4-5              
##  [25] zlibbioc_1.46.0           rvest_1.0.3              
##  [27] quadprog_1.5-8            purrr_1.0.2              
##  [29] RCurl_1.98-1.12           rappdirs_0.3.3           
##  [31] GenomeInfoDbData_1.2.10   genefilter_1.82.1        
##  [33] annotate_1.78.0           svglite_2.1.1            
##  [35] DelayedMatrixStats_1.22.6 codetools_0.2-19         
##  [37] DelayedArray_0.26.7       xml2_1.3.5               
##  [39] tidyselect_1.2.0          farver_2.1.1             
##  [41] beanplot_1.3.1            base64enc_0.1-3          
##  [43] BiocFileCache_2.8.0       illuminaio_0.42.0        
##  [45] webshot_0.5.5             GenomicAlignments_1.36.0 
##  [47] jsonlite_1.8.7            multtest_2.56.0          
##  [49] ellipsis_0.3.2            systemfonts_1.0.4        
##  [51] tools_4.3.1               progress_1.2.2           
##  [53] Rcpp_1.0.11               glue_1.6.2               
##  [55] gridExtra_2.3             xfun_0.40                
##  [57] HDF5Array_1.28.1          withr_2.5.0              
##  [59] fastmap_1.1.1             rhdf5filters_1.12.1      
##  [61] fansi_1.0.4               openssl_2.1.0            
##  [63] caTools_1.18.2            digest_0.6.33            
##  [65] R6_2.5.1                  mime_0.12                
##  [67] colorspace_2.1-0          rsvg_2.4.0               
##  [69] jpeg_0.1-10               biomaRt_2.56.1           
##  [71] RSQLite_2.3.1             tidyr_1.3.0              
##  [73] utf8_1.2.3                generics_0.1.3           
##  [75] rtracklayer_1.60.1        prettyunits_1.1.1        
##  [77] httr_1.4.7                htmlwidgets_1.6.2        
##  [79] S4Arrays_1.0.6            pkgconfig_2.0.3          
##  [81] gtable_0.3.4              blob_1.2.4               
##  [83] siggenes_1.74.0           htmltools_0.5.6          
##  [85] scales_1.2.1              png_0.1-8                
##  [87] knitr_1.44                rstudioapi_0.15.0        
##  [89] tzdb_0.4.0                rjson_0.2.21             
##  [91] nlme_3.1-163              curl_5.0.2               
##  [93] org.Hs.eg.db_3.17.0       cachem_1.0.8             
##  [95] rhdf5_2.44.0              stringr_1.5.0            
##  [97] KernSmooth_2.23-22        AnnotationDbi_1.62.2     
##  [99] restfulr_0.0.15           GEOquery_2.68.0          
## [101] pillar_1.9.0              grid_4.3.1               
## [103] reshape_0.8.9             vctrs_0.6.3              
## [105] promises_1.2.1            dbplyr_2.3.3             
## [107] xtable_1.8-4              evaluate_0.21            
## [109] readr_2.1.4               GenomicFeatures_1.52.2   
## [111] cli_3.6.1                 compiler_4.3.1           
## [113] Rsamtools_2.16.0          rlang_1.1.1              
## [115] crayon_1.5.2              rngtools_1.5.2           
## [117] kpmt_0.1.0                labeling_0.4.3           
## [119] nor1mix_1.3-0             mclust_6.0.0             
## [121] plyr_1.8.8                stringi_1.7.12           
## [123] viridisLite_0.4.2         BiocParallel_1.34.2      
## [125] munsell_0.5.0             Matrix_1.6-1             
## [127] hms_1.1.3                 sparseMatrixStats_1.12.2 
## [129] bit64_4.0.5               Rhdf5lib_1.22.1          
## [131] KEGGREST_1.40.0           shiny_1.7.5              
## [133] highr_0.10                memoise_2.0.1            
## [135] bslib_0.5.1               bit_4.0.5