Source: https://github.com/markziemann/asd_meth
Here I will be running a comparison of differential methylation data from guthrie and fresh blood samples.
Here are the files that I’m using:
limma_blood_diagnosis.csv
limma_guthrie_diagnosis.csv
suppressPackageStartupMessages({
library("parallel")
library("mitch")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
source("https://raw.githubusercontent.com/markziemann/gmea/main/meth_functions.R")
library("data.table")
library("kableExtra")
library("eulerr")
library("RIdeogram")
library("GenomicRanges")
library("tictoc")
library("globaltest")
library("ebGSEA")
data("dualmap850kEID")
})
source("meth_functions.R")
bl_mvals <- readRDS(file="bl_mvals.rds")
gu_mvals <- readRDS(file="gu_mvals.rds")
bl_design <- readRDS(file="bl_design_diagnosis.rds")
gu_design <- readRDS(file="gu_design_diagnosis.rds")
bl_lim <- read.csv("limma_blood_diagnosis.csv")
gu_lim <- read.csv("limma_guthrie_diagnosis.csv")
For each gene, extract out the probes.
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)
promoters <- myann[promoters,]
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
Effect size is estimated with the median t-statistic for each gene. Significance is estimated using the fry test. Results are merged for the user.
tic()
res_bl <- main(mval=bl_mvals,design=bl_design,sets,cores=detectCores()/2)
res_bl[[1]] <- subset(res_bl[[1]],nprobes!=0) # remove genes with no probes obvs
res_bl_df <- res_bl[[1]] # enrichment results
res_bl_top <- res_bl[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 507.496 sec elapsed
head(res_bl_df,30) %>%
kbl(caption="Top DM genes in blood identified with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
C3orf54 | 8 | -0.9044234 | 0.0001561 | 1 |
LOC101928847 | 4 | -1.4793045 | 0.0001931 | 1 |
LINC00673 | 38 | 0.3194164 | 0.0002642 | 1 |
PPTC7 | 26 | -0.4309561 | 0.0003817 | 1 |
PCDHB18 | 14 | 0.5299674 | 0.0008029 | 1 |
NUDT22 | 20 | -0.3263249 | 0.0008510 | 1 |
NIPAL1 | 12 | -1.0274454 | 0.0014222 | 1 |
ADIPOR1 | 27 | -0.5216030 | 0.0015982 | 1 |
ZNF706 | 22 | -0.5697666 | 0.0016864 | 1 |
MIR4531 | 1 | -3.4647240 | 0.0018542 | 1 |
KRTAP11-1 | 8 | -1.1430822 | 0.0018866 | 1 |
ACSM5 | 11 | 0.2839467 | 0.0020262 | 1 |
SAP30L | 19 | -0.3014361 | 0.0021212 | 1 |
ANKRD29 | 31 | -0.4156703 | 0.0022162 | 1 |
NCDN | 28 | -0.5819567 | 0.0023078 | 1 |
OR5D14 | 3 | 1.4739149 | 0.0023104 | 1 |
POLR3G | 22 | -0.5451601 | 0.0024278 | 1 |
FBXL20 | 20 | -0.7433867 | 0.0030754 | 1 |
RNF185 | 26 | -0.3027213 | 0.0031567 | 1 |
OMA1 | 20 | -0.6416995 | 0.0032053 | 1 |
DPY19L3 | 22 | -0.6194942 | 0.0032297 | 1 |
ANKRD13A | 36 | -0.3940235 | 0.0032316 | 1 |
NUP43 | 20 | 0.3600712 | 0.0033938 | 1 |
CFHR5 | 7 | -0.9567053 | 0.0034446 | 1 |
E2F2 | 22 | -0.4367487 | 0.0034847 | 1 |
ATP5I | 24 | -0.2404663 | 0.0039088 | 1 |
CCAT2 | 2 | -2.2335283 | 0.0040488 | 1 |
CAPZA2 | 22 | -0.5462040 | 0.0045328 | 1 |
ATP12A | 21 | -0.3769933 | 0.0049507 | 1 |
TMEM49 | 26 | 0.7849556 | 0.0051787 | 1 |
par(mfrow=c(2,1))
hist(res_bl_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_bl_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
pdf("hist_bl_diagnosis.pdf")
par(mfrow=c(2,1))
hist(res_bl_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_bl_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png
## 2
par(mfrow=c(1,1))
Now some visualisations.
volcano_plot(res_bl_df)
probe_bias(res_bl_df)
gmea_boxplot(res_bl,sets)
tic()
res_gu <- main(mval=gu_mvals,design=gu_design,sets,cores=detectCores()/2)
res_gu[[1]] <- subset(res_gu[[1]],nprobes!=0) # remove genes with no probes obvs
res_gu_df <- res_gu[[1]] # enrichment results
res_gu_top <- res_gu[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 452.848 sec elapsed
head(res_gu_df,30) %>%
kbl(caption="Top DM genes in Guthrie card identified with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
ADAMTS1 | 19 | 0.8669193 | 0.0001470 | 1 |
GJD2 | 15 | -0.4571973 | 0.0001763 | 1 |
ALPP | 14 | -1.0726971 | 0.0007022 | 1 |
MIR3917 | 5 | -1.0870554 | 0.0009539 | 1 |
MANEA-AS1 | 4 | 1.2165059 | 0.0010707 | 1 |
C9orf78 | 14 | 0.4162091 | 0.0012923 | 1 |
RECQL | 20 | 0.5239215 | 0.0013199 | 1 |
ABCE1 | 30 | 0.7000192 | 0.0014764 | 1 |
LBXCOR1 | 30 | -0.3231650 | 0.0016304 | 1 |
C1QTNF9B-AS1 | 3 | 1.3276091 | 0.0019835 | 1 |
SLC16A1-AS1 | 4 | 0.8014960 | 0.0019982 | 1 |
CKAP2 | 18 | 0.4642038 | 0.0020919 | 1 |
CSN1S2B | 4 | -1.7450171 | 0.0026604 | 1 |
C2orf49 | 14 | 0.8043325 | 0.0032103 | 1 |
ZNF226 | 17 | 1.2747112 | 0.0033589 | 1 |
DEFB126 | 3 | -1.3481040 | 0.0033914 | 1 |
CTXN1 | 12 | -0.5202831 | 0.0037788 | 1 |
KIAA0319 | 36 | -0.4014418 | 0.0040176 | 1 |
PDAP1 | 29 | -0.4107303 | 0.0040198 | 1 |
TREML2 | 11 | 1.0063143 | 0.0041780 | 1 |
SNORD43 | 17 | 0.2903430 | 0.0043494 | 1 |
PANO1 | 7 | -1.2116568 | 0.0044939 | 1 |
LOC100131347 | 10 | -0.9358699 | 0.0046679 | 1 |
LOC101927081 | 4 | 0.7901791 | 0.0046888 | 1 |
CREB3L3 | 17 | -0.4427179 | 0.0048418 | 1 |
C17orf89 | 17 | -0.2162679 | 0.0050077 | 1 |
KRTAP10-10 | 3 | 2.2820638 | 0.0053858 | 1 |
NAA30 | 21 | -0.5090546 | 0.0062021 | 1 |
MIR196A2 | 9 | 0.5188170 | 0.0062503 | 1 |
IZUMO1 | 16 | 0.5952444 | 0.0064170 | 1 |
par(mfrow=c(2,1))
hist(res_gu_top$t,xlab="probe t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_gu_df$median,xlab="gene median t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
pdf("hist_gu_diagnosis.pdf")
par(mfrow=c(2,1))
hist(res_gu_top$t,xlab="probe t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_gu_df$median,xlab="gene median t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png
## 2
par(mfrow=c(1,1))
Now some visualisations.
volcano_plot(res_gu_df)
probe_bias(res_gu_df)
gmea_boxplot(res_gu,sets)
Now compare blood and guthrie card
res_bl_df$bl <- res_bl_df$median
res_gu_df$gu <- res_gu_df$median
m1 <- merge(res_bl_df,res_gu_df,by=0)
m2 <- m1[,c("Row.names","bl","gu")]
rownames(m2) <- m2$Row.names
m2$Row.names=NULL
plot(m2$bl,m2$gu,xlab="bl",ylab="gu",
col = rgb(red = 0.6, green = 0.6, blue = 0.6, alpha = 0.5) ,
pch=19,cex=0.9)
grid()
abline(v=0,lty=1,lwd=3,col="black") ; abline(h=0,lty=1,lwd=3,col="black")
mtext("aggregated t-statistic for each gene (median)")
#plot(rank(m1$bl),rank(m1$gu),xlab="bl rank",ylab="gu rank",cex=0.6,pch=19) ; grid()
rm2 <- apply(m2,2,rank)
mydiff <- apply(m2,2,function(x) { length(which(x<0)) } )
rm3 <- rm2
rm3[,1] <- rm3[,1] - mydiff[1]
rm3[,2] <- rm3[,2] - mydiff[2]
rnk <- rm3
palette <- colorRampPalette(c("white", "yellow", "orange", "red",
"darkred", "black"))
xmin = min(rnk[, 1])
xmax = max(rnk[, 1])
ymin = min(rnk[, 2])
ymax = max(rnk[, 2])
k <- MASS::kde2d(rnk[, 1], rnk[, 2])
X_AXIS = paste("Rank in contrast", colnames(rnk)[1])
Y_AXIS = paste("Rank in contrast", colnames(rnk)[2])
filled.contour(k, xlim = c(xmin, xmax), ylim = c(ymin, ymax),
color.palette = palette, plot.title = {
abline(v = 0, h = 0, lty = 2, lwd = 2, col = "blue")
title(main = "Rank-rank plot of all genes", xlab = X_AXIS,
ylab = Y_AXIS)
})
pdf("genedens_diagnosis.pdf")
par(mfrow=c(2,2))
plot(1)
plot(1)
plot(m2$bl,m2$gu,xlab="bl",ylab="gu",
col = rgb(red = 0.6, green = 0.6, blue = 0.6, alpha = 0.5) ,
pch=19,cex=0.9)
grid()
abline(v=0, h=0, lty=2, lwd=2, col="blue")
mtext("aggregated t-statistic for each gene (median)")
filled.contour(k, xlim = c(xmin, xmax), ylim = c(ymin, ymax),
color.palette = palette, plot.title = {
abline(v = 0, h = 0, lty = 2, lwd = 2, col = "blue")
title(main = "Rank-rank plot of all genes", xlab = X_AXIS,
ylab = Y_AXIS)
})
dev.off()
## png
## 2
Now we need a table of genes with top scores.
m3 <- m2
m3$med <- apply(rnk,1,median)
mg <- merge(res_gu_df,res_bl_df,by=0)
mg$gu = mg$bl = NULL
mg$med <- m3$med
mg2 <- data.frame(mg,rnk)
mg2$med <- apply(rnk,1,median)
mg2 <- mg2[order(mg2$med),]
mg2 <-subset(mg2,nprobes.x>=5)
head(mg2,20) %>%
kbl(caption="Genes with coordinated hypomethylation in neonatal Guthrie card and blood at assessment") %>%
kable_paper("hover", full_width = F)
Row.names | nprobes.x | median.x | PValue.x | FDR.x | nprobes.y | median.y | PValue.y | FDR.y | med | bl | gu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
LOC400655 | LOC400655 | 9 | -1.127263 | 0.3332732 | 1 | 9 | -1.4148972 | 0.0298523 | 1 | -18820.0 | -18843 | -18797 |
XIRP2-AS1 | XIRP2-AS1 | 8 | -1.291468 | 0.2252772 | 1 | 8 | -0.9847965 | 0.0213955 | 1 | -18811.5 | -18499 | -19124 |
TMPRSS11F | TMPRSS11F | 11 | -1.158840 | 0.2229900 | 1 | 12 | -1.1521011 | 0.2480429 | 1 | -18792.5 | -18708 | -18877 |
NAALADL2-AS2 | NAALADL2-AS2 | 7 | -1.154844 | 0.2080389 | 1 | 7 | -1.0414086 | 0.1515643 | 1 | -18726.0 | -18587 | -18865 |
SNORD114-16 | SNORD114-16 | 5 | -1.353351 | 0.3358379 | 1 | 5 | -0.8319295 | 0.3237850 | 1 | -18679.5 | -18152 | -19207 |
C4orf11 | C4orf11 | 5 | -1.245429 | 0.2235954 | 1 | 5 | -0.8842424 | 0.4174746 | 1 | -18667.5 | -18280 | -19055 |
GPR110 | GPR110 | 5 | -1.272922 | 0.2103689 | 1 | 7 | -0.8596281 | 0.2910221 | 1 | -18666.0 | -18228 | -19104 |
LINC01069 | LINC01069 | 5 | -1.152820 | 0.2951570 | 1 | 5 | -0.9000697 | 0.2196716 | 1 | -18586.5 | -18314 | -18859 |
SNORD20 | SNORD20 | 5 | -1.089067 | 0.2554593 | 1 | 5 | -0.9659791 | 0.1472310 | 1 | -18572.0 | -18469 | -18675 |
CFI | CFI | 7 | -1.438957 | 0.1965863 | 1 | 7 | -0.7402263 | 0.3249857 | 1 | -18559.5 | -17828 | -19291 |
LOC101927356 | LOC101927356 | 5 | -1.195284 | 0.4292489 | 1 | 6 | -0.8339990 | 0.1147637 | 1 | -18559.0 | -18159 | -18959 |
AGR2 | AGR2 | 10 | -1.102617 | 0.3126984 | 1 | 10 | -0.9285832 | 0.0930195 | 1 | -18555.0 | -18390 | -18720 |
SNORD114-5 | SNORD114-5 | 5 | -1.058992 | 0.3592665 | 1 | 5 | -0.9999453 | 0.4012486 | 1 | -18540.0 | -18526 | -18554 |
LINC01312 | LINC01312 | 5 | -1.033366 | 0.3893068 | 1 | 6 | -1.0758788 | 0.2008889 | 1 | -18532.0 | -18633 | -18431 |
LINC01515 | LINC01515 | 7 | -1.395551 | 0.1503052 | 1 | 7 | -0.7260144 | 0.2176163 | 1 | -18512.5 | -17771 | -19254 |
LOC101927973 | LOC101927973 | 11 | -1.112481 | 0.2593231 | 1 | 12 | -0.8811257 | 0.2564275 | 1 | -18509.0 | -18270 | -18748 |
LOC101927378 | LOC101927378 | 6 | -1.078993 | 0.3036075 | 1 | 6 | -0.9226369 | 0.3741215 | 1 | -18507.0 | -18380 | -18634 |
TAAR6 | TAAR6 | 8 | -1.035386 | 0.3027282 | 1 | 8 | -1.0017654 | 0.3595211 | 1 | -18486.0 | -18530 | -18442 |
CMA1 | CMA1 | 5 | -1.336377 | 0.3245436 | 1 | 5 | -0.7255235 | 0.1232123 | 1 | -18476.5 | -17768 | -19185 |
CTD-2350J17.1 | CTD-2350J17.1 | 10 | -1.269042 | 0.1537812 | 1 | 11 | -0.7494309 | 0.2364757 | 1 | -18474.5 | -17852 | -19097 |
mg2 <- mg2[order(-mg2$med),]
head(mg2,20) %>%
kbl(caption="Genes with coordinated hypermethylation in neonatal Guthrie card and blood at assessment") %>%
kable_paper("hover", full_width = F)
Row.names | nprobes.x | median.x | PValue.x | FDR.x | nprobes.y | median.y | PValue.y | FDR.y | med | bl | gu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
OR4X1 | OR4X1 | 5 | 0.9034306 | 0.1495478 | 1 | 5 | 1.2468121 | 0.0883466 | 1 | 6315.0 | 6640 | 5990 |
HIST1H3E | HIST1H3E | 15 | 0.8690408 | 0.1473113 | 1 | 15 | 1.0493102 | 0.0552320 | 1 | 6263.5 | 6560 | 5967 |
C16orf78 | C16orf78 | 8 | 0.8999253 | 0.2988178 | 1 | 8 | 0.8881172 | 0.1580520 | 1 | 6218.5 | 6449 | 5988 |
C2orf27A | C2orf27A | 5 | 0.8292660 | 0.2422787 | 1 | 5 | 0.9273510 | 0.2297358 | 1 | 6212.5 | 6488 | 5937 |
PCDHB10 | PCDHB10 | 7 | 1.0158782 | 0.0350900 | 1 | 8 | 0.7909626 | 0.0329233 | 1 | 6204.0 | 6345 | 6063 |
CD69 | CD69 | 7 | 0.6436311 | 0.4013197 | 1 | 7 | 1.2511092 | 0.0399936 | 1 | 6168.5 | 6642 | 5695 |
LOC388789 | LOC388789 | 11 | 0.6825516 | 0.2469335 | 1 | 11 | 1.1011536 | 0.0152685 | 1 | 6168.5 | 6592 | 5745 |
GAS8-AS1 | GAS8-AS1 | 5 | 0.7780589 | 0.0743387 | 1 | 5 | 0.8449410 | 0.4061573 | 1 | 6138.5 | 6403 | 5874 |
EBLN3 | EBLN3 | 6 | 0.7345526 | 0.6318585 | 1 | 6 | 0.7828374 | 0.2539262 | 1 | 6073.0 | 6328 | 5818 |
GPX8 | GPX8 | 9 | 0.7171386 | 0.1373756 | 1 | 9 | 0.7766114 | 0.3279487 | 1 | 6059.0 | 6322 | 5796 |
LINC00854 | LINC00854 | 6 | 0.7746833 | 0.2327944 | 1 | 6 | 0.7104553 | 0.1498368 | 1 | 6050.0 | 6231 | 5869 |
LOC100287792 | LOC100287792 | 5 | 0.5551406 | 0.1215728 | 1 | 5 | 0.8566904 | 0.1048241 | 1 | 5956.0 | 6410 | 5502 |
TMEM211 | TMEM211 | 9 | 0.6655984 | 0.2939329 | 1 | 9 | 0.6647075 | 0.3595003 | 1 | 5942.5 | 6165 | 5720 |
FAM83H-AS1 | FAM83H-AS1 | 7 | 0.6013584 | 0.3686851 | 1 | 7 | 0.6716334 | 0.0966210 | 1 | 5894.0 | 6172 | 5616 |
MIR6733 | MIR6733 | 6 | 1.0236287 | 0.0802506 | 1 | 6 | 0.4956075 | 0.6370533 | 1 | 5891.0 | 5716 | 6066 |
ARMT1 | ARMT1 | 5 | 0.6118869 | 0.1349583 | 1 | 5 | 0.6503945 | 0.3790928 | 1 | 5888.0 | 6140 | 5636 |
LOC400027 | LOC400027 | 10 | 1.0585911 | 0.1647647 | 1 | 10 | 0.4855248 | 0.6541409 | 1 | 5882.5 | 5676 | 6089 |
LOC101927142 | LOC101927142 | 12 | 0.6734128 | 0.3953720 | 1 | 12 | 0.5902037 | 0.3949349 | 1 | 5863.0 | 5996 | 5730 |
PCDHB18 | PCDHB18 | 13 | 0.6815695 | 0.6029691 | 1 | 14 | 0.5299674 | 0.0008029 | 1 | 5788.5 | 5834 | 5743 |
LOC728819 | LOC728819 | 9 | 1.1823831 | 0.0203475 | 1 | 9 | 0.4198157 | 0.0805504 | 1 | 5778.0 | 5417 | 6139 |
genesets <- gmt_import("ReactomePathways.gmt")
cores = 8
Here I’m using the median t-statistic for downstream enrichment. Pathways are from REACTOME and analysis using mitch.
res_bl_df$bl <- res_bl_df$median
res_gu_df$gu <- res_gu_df$median
bl <- res_bl_df[,"bl",drop=FALSE]
mitch_bl <- mitch_calc(bl,genesets,priority="effect")
head( mitch_bl$enrichment_result,30) %>%
kbl(caption="BLOOD: Top effect size pathways found with mitch") %>%
kable_paper("hover", full_width = F)
sig <- subset(mitch_bl$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="BLOOD: Top effect size pathways found with mitch after 1% FDR filtering") %>%
kable_paper("hover", full_width = F)
gu <- res_gu_df[,"gu",drop=FALSE]
mitch_gu <- mitch_calc(gu,genesets,priority="effect")
head( mitch_gu$enrichment_result,30) %>%
kbl(caption="GUTHRIE: Top effect size pathways found with mitch") %>%
kable_paper("hover", full_width = F)
sig <- subset(mitch_gu$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="GUTHRIE: Top effect size pathways found with mitch after 1% FDR filtering") %>%
kable_paper("hover", full_width = F)
Mitch joint analysis of blood and guthrie card. As using the median value is apparently the most sensitive, we’ll go with that.
m <- merge(res_bl_df,res_gu_df,by=0)
rownames(m) <- m$Row.names
m2 <- m[,c("bl","gu")]
m2[is.na(m2)] <- 0
res <- mitch_calc(m2, genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mitch_report(res,"blgu_mitch.html",overwrite=TRUE)
## Note: overwriting existing report
## Dataset saved as " /tmp/RtmpQdgvaM/blgu_mitch.rds ".
##
##
## processing file: mitch.Rmd
##
|
| | 0%
|
|. | 3%
|
|.. | 6% (checklibraries)
|
|.... | 9%
|
|..... | 12% (peek)
|
|...... | 15%
|
|....... | 18% (metrics)
|
|........ | 21%
|
|.......... | 24% (scatterplot)
|
|........... | 26%
|
|............ | 29% (contourplot)
|
|............. | 32%
|
|.............. | 35% (input_geneset_metrics1)
|
|................ | 38%
|
|................. | 41% (input_geneset_metrics2)
|
|.................. | 44%
|
|................... | 47% (input_geneset_metrics3)
|
|.................... | 50%
|
|...................... | 53% (echart1d)
|
|....................... | 56% (echart2d)
|
|........................ | 59%
|
|......................... | 62% (heatmap)
|
|........................... | 65%
|
|............................ | 68% (effectsize)
|
|............................. | 71%
|
|.............................. | 74% (results_table)
|
|............................... | 76%
|
|................................. | 79% (results_table_complete)
|
|.................................. | 82%
|
|................................... | 85% (detailed_geneset_reports1d)
|
|.................................... | 88%
|
|..................................... | 91% (detailed_geneset_reports2d)
|
|....................................... | 94%
|
|........................................ | 97% (session_info)
|
|.........................................| 100%
## output file: /home/mdz/projects/asd_meth/mitch.knit.md
## /usr/bin/pandoc +RTS -K512m -RTS /home/mdz/projects/asd_meth/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpQdgvaM/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpQdgvaM/rmarkdown-str3a5ba44cc9575d.html
##
## Output created: /tmp/RtmpQdgvaM/mitch_report.html
## [1] TRUE
mitch_plots(res=res,outfile="blgu_mitch_diagnosis.pdf")
## png
## 2
sig <- subset(res$enrichment_result,p.adjustMANOVA<0.01)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="Top pathways found with mitch (effect size after 1% FDR filtering)") %>%
kable_paper("hover", full_width = F)
set | setSize | pMANOVA | s.bl | s.gu | p.bl | p.gu | s.dist | SD | p.adjustMANOVA | |
---|---|---|---|---|---|---|---|---|---|---|
380 | Eicosanoids | 12 | 0.0000523 | 0.7316955 | -0.0553286 | 0.0000113 | 0.7399811 | 0.7337844 | 0.5565101 | 0.0008425 |
399 | Expression and translocation of olfactory receptors | 356 | 0.0000000 | -0.0615262 | -0.4968133 | 0.0458572 | 0.0000000 | 0.5006086 | 0.3077945 | 0.0000000 |
1163 | SARS-CoV-1 modulates host translation machinery | 34 | 0.0000060 | 0.1694744 | 0.4670495 | 0.0871843 | 0.0000024 | 0.4968468 | 0.2104174 | 0.0001353 |
212 | Class C/3 (Metabotropic glutamate/pheromone receptors) | 39 | 0.0000013 | -0.1703740 | -0.4623943 | 0.0655557 | 0.0000006 | 0.4927837 | 0.2064895 | 0.0000348 |
853 | Olfactory Signaling Pathway | 363 | 0.0000000 | -0.0533421 | -0.4868438 | 0.0805006 | 0.0000000 | 0.4897573 | 0.3065320 | 0.0000000 |
602 | Initial triggering of complement | 21 | 0.0008658 | 0.4723487 | 0.0060805 | 0.0001783 | 0.9615247 | 0.4723878 | 0.3297014 | 0.0079201 |
516 | Glucuronidation | 25 | 0.0003667 | -0.1703429 | -0.4384960 | 0.1403591 | 0.0001471 | 0.4704205 | 0.1896128 | 0.0037545 |
683 | Lysosome Vesicle Biogenesis | 33 | 0.0001064 | -0.2296744 | 0.3450779 | 0.0223851 | 0.0006002 | 0.4145227 | 0.4064113 | 0.0014559 |
903 | Peptide chain elongation | 84 | 0.0000000 | 0.1475393 | 0.3824995 | 0.0193719 | 0.0000000 | 0.4099679 | 0.1661420 | 0.0000001 |
1172 | SCF(Skp2)-mediated degradation of p27/p21 | 58 | 0.0000012 | 0.1580968 | 0.3738311 | 0.0372445 | 0.0000008 | 0.4058871 | 0.1525472 | 0.0000342 |
395 | Eukaryotic Translation Elongation | 88 | 0.0000000 | 0.1564619 | 0.3725759 | 0.0111524 | 0.0000000 | 0.4040955 | 0.1528156 | 0.0000001 |
1496 | Viral mRNA Translation | 84 | 0.0000000 | 0.1323388 | 0.3731157 | 0.0359611 | 0.0000000 | 0.3958900 | 0.1702550 | 0.0000004 |
397 | Eukaryotic Translation Termination | 87 | 0.0000000 | 0.1304646 | 0.3695737 | 0.0353662 | 0.0000000 | 0.3919257 | 0.1690756 | 0.0000003 |
1208 | Selenocysteine synthesis | 87 | 0.0000000 | 0.1480650 | 0.3625311 | 0.0169389 | 0.0000000 | 0.3916019 | 0.1516504 | 0.0000003 |
1404 | The role of GTSE1 in G2/M progression after G2 checkpoint | 58 | 0.0000055 | 0.2073454 | 0.3256731 | 0.0062941 | 0.0000177 | 0.3860765 | 0.0836703 | 0.0001259 |
1502 | Vpu mediated degradation of CD4 | 50 | 0.0000331 | 0.2220697 | 0.3134887 | 0.0065836 | 0.0001250 | 0.3841745 | 0.0646430 | 0.0006245 |
1145 | Response of EIF2AK4 (GCN2) to amino acid deficiency | 95 | 0.0000000 | 0.1488754 | 0.3521182 | 0.0121179 | 0.0000000 | 0.3822972 | 0.1437144 | 0.0000002 |
106 | Autodegradation of Cdh1 by Cdh1:APC/C | 62 | 0.0000035 | 0.1641233 | 0.3406518 | 0.0253751 | 0.0000035 | 0.3781271 | 0.1248245 | 0.0000897 |
107 | Autodegradation of the E3 ubiquitin ligase COP1 | 50 | 0.0000533 | 0.2383885 | 0.2905213 | 0.0035352 | 0.0003781 | 0.3758081 | 0.0368634 | 0.0008425 |
492 | GSK3B and BTRC:CUL1-mediated-degradation of NFE2L2 | 50 | 0.0000473 | 0.1758887 | 0.3319988 | 0.0313906 | 0.0000485 | 0.3757127 | 0.1103865 | 0.0007941 |
1482 | Ubiquitin-dependent degradation of Cyclin D | 50 | 0.0000535 | 0.2294771 | 0.2972415 | 0.0049873 | 0.0002756 | 0.3755160 | 0.0479166 | 0.0008425 |
807 | Negative regulation of NOTCH4 signaling | 52 | 0.0000395 | 0.1876349 | 0.3216853 | 0.0192219 | 0.0000596 | 0.3724087 | 0.0947879 | 0.0006853 |
1111 | Regulation of activated PAK-2p34 by proteasome mediated degradation | 48 | 0.0000982 | 0.2238812 | 0.2960106 | 0.0072720 | 0.0003866 | 0.3711402 | 0.0510032 | 0.0013682 |
829 | Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) | 89 | 0.0000000 | 0.1218834 | 0.3489385 | 0.0467983 | 0.0000000 | 0.3696128 | 0.1605522 | 0.0000012 |
1100 | Regulation of RUNX3 expression and activity | 53 | 0.0000417 | 0.1822718 | 0.3193960 | 0.0216761 | 0.0000573 | 0.3677456 | 0.0969615 | 0.0007158 |
565 | Hh mutants are degraded by ERAD | 54 | 0.0000393 | 0.1809947 | 0.3174026 | 0.0213762 | 0.0000544 | 0.3653813 | 0.0964550 | 0.0006853 |
1215 | Sensory perception of sweet, bitter, and umami (glutamate) taste | 41 | 0.0004275 | -0.1566609 | -0.3300458 | 0.0825578 | 0.0002544 | 0.3653394 | 0.1226017 | 0.0042369 |
21 | AUF1 (hnRNP D0) binds and destabilizes mRNA | 53 | 0.0000493 | 0.1878036 | 0.3129629 | 0.0179967 | 0.0000806 | 0.3649876 | 0.0885010 | 0.0008109 |
455 | Formation of the ternary complex, and subsequently, the 43S complex | 46 | 0.0002327 | 0.1586415 | 0.3214715 | 0.0626144 | 0.0001610 | 0.3584844 | 0.1151382 | 0.0026261 |
1214 | Sensory Perception | 567 | 0.0000000 | -0.0117263 | -0.3570162 | 0.6324748 | 0.0000000 | 0.3572087 | 0.2441568 | 0.0000000 |
Camera is designed to control for correlation bias. Using the median value gives better results than the directional significance or the combined metric. Blood dataset. Need to filter by FDR then sort by effect size.
stat <- res_bl_df$median
names(stat) <- rownames(res_bl_df)
stat[is.na(stat)] <- 0
tic()
cres <- cameraPR(statistic=stat, index=genesets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
cres <- subset(cres,NGenes>4)
cres$FDR <- p.adjust(cres$PValue,method="fdr")
mymedians <- sapply(1:nrow(cres),function(i) {
myset <- genesets[[which(names(genesets) == rownames(cres)[i])]]
mystats <- stat[names(stat) %in% myset]
median(mystats)
})
cres$median <- mymedians
toc() # 1.0 sec
## 3.068 sec elapsed
sig <- subset(cres,FDR<0.05)
head(sig[order(-abs(sig$median)),],20) %>%
kbl(caption = "BLOOD: Top effect size pathways found with CAMERA after 5% FDR filtering") %>%
kable_paper("hover", full_width = F)
NGenes | Direction | PValue | FDR | median | |
---|---|---|---|---|---|
Eicosanoids | 12 | Up | 5.1e-06 | 0.0097707 | 0.3805494 |
nrow(subset(cres,FDR<0.05 & Direction=="Up"))
## [1] 1
nrow(subset(cres,FDR<0.05 & Direction=="Down"))
## [1] 0
cres_bl <- cres
Now drilling down to the top set.
myset <- rownames(head(sig[order(-abs(sig$median)),],1))
myset_members <- genesets[[which(names(genesets) == myset)]]
mystats <- stat[which(names(stat) %in% myset_members)]
myl <- list("allgenes"=stat,myset=mystats)
boxplot(myl,cex=0)
library(vioplot)
vioplot(myl,main=myset)
mygenes <- head(mystats[order(mystats)],5)
mygenes
## CYP4F3 TBXAS1 CYP4F11 CYP4F22 PTGIS
## -0.17552925 -0.10223924 0.07868081 0.09246797 0.21430455
myprobes <- unique(unlist(sets[which(names(sets) %in% names(mygenes))]))
myprobedat <- res_bl_top[which(rownames(res_bl_top) %in% myprobes),]
myprobedat %>%
kbl(caption = "Probe data for top 5 genes") %>%
kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
cg11308603 | 0.2797110 | 3.1414633 | 3.3865577 | 0.0055931 | 0.9999989 | -3.125503 |
cg01308787 | 0.2526655 | 2.0520599 | 2.4019598 | 0.0338951 | 0.9999989 | -3.790609 |
cg15950251 | 0.2152586 | 2.6525472 | 2.1974632 | 0.0489233 | 0.9999989 | -3.935044 |
cg24005500 | 0.1620676 | 3.6887463 | 2.0631852 | 0.0620321 | 0.9999989 | -4.029439 |
cg01773376 | -0.2438173 | -3.7337450 | -1.9933416 | 0.0700881 | 0.9999989 | -4.078187 |
cg12313593 | -0.2015108 | -4.4355438 | -1.9658922 | 0.0735123 | 0.9999989 | -4.097254 |
cg17253358 | -0.1484943 | 2.1848574 | -1.8605096 | 0.0881412 | 0.9999989 | -4.169866 |
cg00996623 | 0.1415276 | 1.5600174 | 1.7757879 | 0.1017771 | 0.9999989 | -4.227407 |
cg19565163 | 0.2222606 | 3.1926352 | 1.6488573 | 0.1257670 | 0.9999989 | -4.311832 |
cg17548913 | 0.0975868 | 1.4775621 | 1.6050255 | 0.1351441 | 0.9999989 | -4.340397 |
cg04329112 | -0.2211894 | 1.1669053 | -1.5880843 | 0.1389292 | 0.9999989 | -4.351348 |
cg12484231 | 0.1598165 | 3.0321165 | 1.5729243 | 0.1423943 | 0.9999989 | -4.361102 |
cg20272421 | -0.1348258 | -1.2030556 | -1.4631453 | 0.1697886 | 0.9999989 | -4.430383 |
cg14851685 | -0.1168127 | 2.1828624 | -1.4542525 | 0.1721925 | 0.9999989 | -4.435884 |
cg24431161 | 0.1649660 | 3.3373646 | 1.4366710 | 0.1770297 | 0.9999989 | -4.446706 |
cg05782669 | -0.1212879 | -5.5298051 | -1.4158122 | 0.1829168 | 0.9999989 | -4.459454 |
cg19267340 | -0.1691334 | -0.9218152 | -1.3857156 | 0.1916996 | 0.9999989 | -4.477665 |
cg23653454 | -0.1912711 | -0.1839711 | -1.3790549 | 0.1936901 | 0.9999989 | -4.481666 |
cg16608318 | 0.0904688 | 1.1501185 | 1.3457346 | 0.2039068 | 0.9999989 | -4.501511 |
cg03163448 | 0.0950792 | 3.2884062 | 1.3290445 | 0.2091889 | 0.9999989 | -4.511344 |
cg14352601 | -0.2119045 | 0.1186366 | -1.3202128 | 0.2120291 | 0.9999989 | -4.516518 |
cg25689525 | -0.1285879 | 4.2567971 | -1.3004536 | 0.2184977 | 0.9999989 | -4.528017 |
cg27138194 | -0.1112997 | -3.2549980 | -1.2740045 | 0.2274065 | 0.9999989 | -4.543242 |
cg01584377 | -0.1285927 | 1.2720004 | -1.2287636 | 0.2433214 | 0.9999989 | -4.568825 |
cg03047724 | -0.1036862 | -3.4703416 | -1.2283355 | 0.2434762 | 0.9999989 | -4.569064 |
cg26851661 | 0.1111919 | 0.3325197 | 1.2265594 | 0.2441190 | 0.9999989 | -4.570056 |
cg15543919 | -0.0919929 | -5.0230975 | -1.2210742 | 0.2461127 | 0.9999989 | -4.573114 |
cg00445997 | 0.1193956 | -4.8480204 | 1.1630702 | 0.2679920 | 0.9999989 | -4.604887 |
cg08535317 | -0.1304933 | 3.7105394 | -1.1625735 | 0.2681857 | 0.9999989 | -4.605155 |
cg14116584 | 0.0964946 | 2.6573768 | 1.1583354 | 0.2698429 | 0.9999989 | -4.607435 |
cg08670281 | -0.1778836 | 1.1309615 | -1.1464236 | 0.2745435 | 0.9999989 | -4.613812 |
cg08840230 | 0.1074632 | 4.0100983 | 1.1454136 | 0.2749449 | 0.9999989 | -4.614350 |
cg25287523 | -0.1018451 | 1.2504657 | -1.1172561 | 0.2863202 | 0.9999989 | -4.629233 |
cg20303886 | 0.0858866 | 2.4347199 | 1.1047881 | 0.2914707 | 0.9999989 | -4.635739 |
cg04271991 | 0.0894817 | 2.9040673 | 1.0962048 | 0.2950573 | 0.9999989 | -4.640188 |
cg16576150 | 0.1459993 | 3.8204738 | 1.0820327 | 0.3010522 | 0.9999989 | -4.647479 |
cg13557545 | 0.1007997 | 2.4570712 | 1.0378433 | 0.3203333 | 0.9999989 | -4.669768 |
cg17455968 | 0.1152060 | 2.8164870 | 1.0260609 | 0.3256257 | 0.9999989 | -4.675595 |
cg24456663 | -0.0806875 | -5.1634616 | -0.9954972 | 0.3396535 | 0.9999989 | -4.690476 |
cg09658576 | -0.1014826 | -2.6722711 | -0.9674011 | 0.3529313 | 0.9999989 | -4.703851 |
cg06365890 | -0.1213374 | -1.2564164 | -0.9630320 | 0.3550291 | 0.9999989 | -4.705905 |
cg06462921 | -0.0726347 | 4.2412657 | -0.9629060 | 0.3550897 | 0.9999989 | -4.705964 |
cg25855693 | -0.0697886 | 2.5387076 | -0.9573395 | 0.3577756 | 0.9999989 | -4.708569 |
cg04743066 | 0.0968089 | 1.5783491 | 0.9445588 | 0.3639971 | 0.9999989 | -4.714506 |
cg16175013 | 0.0723662 | 1.2204433 | 0.9424239 | 0.3650438 | 0.9999989 | -4.715492 |
cg18317554 | 0.0564827 | 0.9624209 | 0.9325107 | 0.3699318 | 0.9999989 | -4.720046 |
cg21646520 | -0.0659059 | 3.9900967 | -0.9187448 | 0.3767956 | 0.9999989 | -4.726306 |
cg23460202 | -0.1235528 | -4.7804428 | -0.9095658 | 0.3814215 | 0.9999989 | -4.730440 |
cg01014432 | 0.0848832 | -5.2934659 | 0.8974326 | 0.3875965 | 0.9999989 | -4.735852 |
cg02520746 | -0.0842014 | 3.8915911 | -0.8966767 | 0.3879835 | 0.9999989 | -4.736187 |
cg15750300 | -0.0689150 | 4.2038320 | -0.8911233 | 0.3908347 | 0.9999989 | -4.738644 |
cg04765497 | 0.0703687 | 3.4419048 | 0.8683549 | 0.4026745 | 0.9999989 | -4.748584 |
cg24710631 | -0.1285103 | 3.7240433 | -0.8624678 | 0.4057751 | 0.9999989 | -4.751120 |
cg21758587 | -0.0973856 | 3.1907369 | -0.8457846 | 0.4146493 | 0.9999989 | -4.758230 |
cg24445796 | 0.1035472 | 3.6203791 | 0.8423556 | 0.4164893 | 0.9999989 | -4.759677 |
cg06750564 | -0.0567768 | -0.6599998 | -0.8045893 | 0.4371138 | 0.9999989 | -4.775291 |
cg22233141 | 0.0853595 | 2.9712218 | 0.8001428 | 0.4395852 | 0.9999989 | -4.777090 |
cg19577548 | 0.0718136 | 2.0845651 | 0.7674962 | 0.4580079 | 0.9999989 | -4.790039 |
cg07590004 | -0.0785028 | 2.4966816 | -0.7591692 | 0.4627845 | 0.9999989 | -4.793268 |
cg07612655 | -0.0527658 | -1.2574644 | -0.7513483 | 0.4672992 | 0.9999989 | -4.796273 |
cg06904057 | 0.0467301 | -0.0147848 | 0.7277357 | 0.4810970 | 0.9999989 | -4.805183 |
cg00943809 | 0.0884551 | 2.2408377 | 0.7268889 | 0.4815964 | 0.9999989 | -4.805498 |
cg10592734 | 0.0895544 | 3.7563991 | 0.7095854 | 0.4918719 | 0.9999989 | -4.811864 |
cg26686366 | 0.0833297 | 4.0211642 | 0.6958155 | 0.5001435 | 0.9999989 | -4.816833 |
cg08202392 | 0.0882225 | 1.9446568 | 0.6924209 | 0.5021954 | 0.9999989 | -4.818045 |
cg07916812 | 0.0639713 | 4.0517783 | 0.6666855 | 0.5179146 | 0.9999989 | -4.827063 |
cg12046362 | 0.0798280 | 3.8847931 | 0.6578100 | 0.5234020 | 0.9999989 | -4.830102 |
cg01466035 | 0.0755545 | -4.9703944 | 0.6499689 | 0.5282779 | 0.9999989 | -4.832757 |
cg04202783 | 0.0926454 | 2.9261070 | 0.6439405 | 0.5320443 | 0.9999989 | -4.834779 |
cg23759823 | -0.0722186 | 4.2713637 | -0.6433996 | 0.5323830 | 0.9999989 | -4.834960 |
cg03102482 | 0.0597676 | 2.7734792 | 0.6198722 | 0.5472340 | 0.9999989 | -4.842683 |
cg27638815 | 0.0384727 | -1.1861265 | 0.6156521 | 0.5499223 | 0.9999989 | -4.844040 |
cg14867138 | -0.0632525 | -1.4260500 | -0.6118629 | 0.5523422 | 0.9999989 | -4.845252 |
cg23022819 | 0.0562655 | 1.5865924 | 0.6108907 | 0.5529641 | 0.9999989 | -4.845562 |
cg02967505 | -0.1039275 | 3.9572877 | -0.6049557 | 0.5567688 | 0.9999989 | -4.847444 |
cg01466163 | 0.0563533 | 3.4615419 | 0.6037943 | 0.5575150 | 0.9999989 | -4.847810 |
cg14468688 | -0.0631649 | 3.3490357 | -0.5999753 | 0.5599726 | 0.9999989 | -4.849010 |
cg11307643 | -0.0927374 | -3.0868132 | -0.5820212 | 0.5716058 | 0.9999989 | -4.854559 |
cg14630601 | 0.0547169 | 1.4485897 | 0.5747566 | 0.5763496 | 0.9999989 | -4.856760 |
cg24286200 | 0.0536935 | 4.3240001 | 0.5730511 | 0.5774663 | 0.9999989 | -4.857273 |
cg00571805 | 0.0589336 | 2.5883459 | 0.5710280 | 0.5787925 | 0.9999989 | -4.857880 |
cg20324600 | 0.0451694 | 2.4752749 | 0.5595436 | 0.5863514 | 0.9999989 | -4.861287 |
cg15630995 | 0.0546373 | 3.3467708 | 0.5559469 | 0.5887293 | 0.9999989 | -4.862341 |
cg19001086 | -0.0809608 | 2.5136725 | -0.5558609 | 0.5887862 | 0.9999989 | -4.862367 |
cg09372051 | 0.0686813 | 1.9392556 | 0.5533866 | 0.5904251 | 0.9999989 | -4.863088 |
cg16762802 | 0.1120525 | -2.2107049 | 0.5402326 | 0.5991775 | 0.9999989 | -4.866873 |
cg19062489 | -0.0522313 | -3.9402333 | -0.5312968 | 0.6051609 | 0.9999989 | -4.869396 |
cg10048169 | -0.0582506 | 3.8337663 | -0.5174839 | 0.6144694 | 0.9999989 | -4.873219 |
cg12521813 | -0.0371200 | -4.3106146 | -0.5154543 | 0.6158431 | 0.9999989 | -4.873773 |
cg04396421 | -0.0459126 | 3.8398007 | -0.5143105 | 0.6166179 | 0.9999989 | -4.874084 |
cg20746880 | -0.0529490 | 3.3082864 | -0.5058742 | 0.6223479 | 0.9999989 | -4.876359 |
cg24453014 | 0.0629568 | 3.1882913 | 0.5036020 | 0.6238957 | 0.9999989 | -4.876966 |
cg18391099 | 0.0606248 | 3.6360575 | 0.5027876 | 0.6244509 | 0.9999989 | -4.877183 |
cg04351959 | 0.0579958 | 3.9113835 | 0.5001873 | 0.6262252 | 0.9999989 | -4.877873 |
cg12104698 | -0.0596927 | -0.7850800 | -0.4818795 | 0.6387866 | 0.9999989 | -4.882638 |
cg08437662 | -0.0513031 | 4.4684866 | -0.4623960 | 0.6522848 | 0.9999989 | -4.887525 |
cg15224326 | -0.0688026 | 4.4361290 | -0.4514867 | 0.6598999 | 0.9999989 | -4.890178 |
cg09972052 | 0.0481224 | 2.4083853 | 0.4450352 | 0.6644222 | 0.9999989 | -4.891719 |
cg18812944 | -0.0389394 | 0.1178321 | -0.4364089 | 0.6704907 | 0.9999989 | -4.893746 |
cg27393487 | -0.0416902 | 3.1419400 | -0.4323662 | 0.6733431 | 0.9999989 | -4.894683 |
cg09768458 | 0.0333557 | 0.5566298 | 0.4260797 | 0.6777892 | 0.9999989 | -4.896124 |
cg21433668 | -0.0287627 | -5.2193463 | -0.4248625 | 0.6786516 | 0.9999989 | -4.896400 |
cg21271071 | 0.0464873 | 1.4027134 | 0.4206965 | 0.6816067 | 0.9999989 | -4.897341 |
cg13823415 | -0.1172670 | 3.1457146 | -0.4103897 | 0.6889413 | 0.9999989 | -4.899631 |
cg22068747 | -0.0392912 | 2.0505435 | -0.3966625 | 0.6987616 | 0.9999989 | -4.902596 |
cg24602869 | -0.0537143 | 3.6753227 | -0.3955719 | 0.6995443 | 0.9999989 | -4.902828 |
cg10003048 | 0.0334648 | -0.2773045 | 0.3931402 | 0.7012907 | 0.9999989 | -4.903341 |
cg18568570 | 0.0355641 | 1.0193651 | 0.3928112 | 0.7015272 | 0.9999989 | -4.903411 |
cg20667964 | -0.0520996 | 3.3431256 | -0.3824262 | 0.7090068 | 0.9999989 | -4.905569 |
cg27059136 | 0.0314459 | 1.1152397 | 0.3807576 | 0.7102115 | 0.9999989 | -4.905911 |
cg20230545 | 0.0274588 | 3.2905872 | 0.3571922 | 0.7273126 | 0.9999989 | -4.910581 |
cg06550951 | 0.0255613 | -4.8775763 | 0.3499421 | 0.7326056 | 0.9999989 | -4.911960 |
cg03604364 | -0.0321763 | 3.0983191 | -0.3480784 | 0.7339685 | 0.9999989 | -4.912310 |
cg23625084 | 0.0520031 | 4.8936580 | 0.3464442 | 0.7351644 | 0.9999989 | -4.912615 |
cg16377880 | 0.0285551 | 1.4845868 | 0.3445440 | 0.7365558 | 0.9999989 | -4.912968 |
cg18204208 | 0.0207800 | 1.2759386 | 0.3388178 | 0.7407549 | 0.9999989 | -4.914022 |
cg23755480 | 0.0227622 | -2.5628896 | 0.3362889 | 0.7426120 | 0.9999989 | -4.914482 |
cg23121975 | 0.0303183 | 4.1137863 | 0.3356939 | 0.7430493 | 0.9999989 | -4.914589 |
cg20358286 | 0.0309147 | 4.0904245 | 0.3269682 | 0.7494718 | 0.9999989 | -4.916148 |
cg27067618 | -0.0356103 | -0.7466721 | -0.3233188 | 0.7521639 | 0.9999989 | -4.916787 |
cg09899516 | 0.0448033 | 3.5074480 | 0.3230319 | 0.7523756 | 0.9999989 | -4.916837 |
cg03375880 | -0.0270599 | 4.0508214 | -0.3222142 | 0.7529793 | 0.9999989 | -4.916980 |
cg14116596 | -0.0365376 | 2.4941708 | -0.3172274 | 0.7566648 | 0.9999989 | -4.917840 |
cg05711445 | -0.0375254 | -1.5127244 | -0.3087096 | 0.7629744 | 0.9999989 | -4.919278 |
cg09062977 | 0.0280764 | -4.5074513 | 0.3055221 | 0.7653401 | 0.9999989 | -4.919807 |
cg08164447 | -0.0263223 | 2.4148352 | -0.2991896 | 0.7700474 | 0.9999989 | -4.920841 |
cg13806637 | -0.0288397 | 3.2459460 | -0.2816107 | 0.7831646 | 0.9999989 | -4.923601 |
cg07247925 | 0.0270325 | 4.2826917 | 0.2779431 | 0.7859104 | 0.9999989 | -4.924157 |
cg26021011 | 0.0311813 | 4.4568166 | 0.2766385 | 0.7868878 | 0.9999989 | -4.924352 |
cg09535567 | -0.0270849 | 4.5052524 | -0.2731331 | 0.7895159 | 0.9999989 | -4.924874 |
cg21865946 | -0.0188027 | -3.9062763 | -0.2566188 | 0.8019333 | 0.9999989 | -4.927243 |
cg20798469 | 0.0264172 | 3.9735276 | 0.2543486 | 0.8036449 | 0.9999989 | -4.927558 |
cg19677733 | -0.0299395 | 3.3552964 | -0.2435099 | 0.8118309 | 0.9999989 | -4.929021 |
cg11746287 | 0.0146272 | -3.5091577 | 0.2403826 | 0.8141972 | 0.9999989 | -4.929431 |
cg16537756 | 0.0148709 | -4.6105849 | 0.2396434 | 0.8147567 | 0.9999989 | -4.929527 |
cg04593076 | 0.0308443 | 4.4367907 | 0.2325742 | 0.8201138 | 0.9999989 | -4.930433 |
cg01346423 | 0.0152802 | 1.7054370 | 0.2160418 | 0.8326787 | 0.9999989 | -4.932447 |
cg08440266 | 0.0173508 | 0.3445338 | 0.2143046 | 0.8340019 | 0.9999989 | -4.932650 |
cg25530124 | -0.0255673 | 3.6907498 | -0.2108123 | 0.8366635 | 0.9999989 | -4.933053 |
cg08788055 | -0.0176089 | 2.8535246 | -0.2062944 | 0.8401099 | 0.9999989 | -4.933565 |
cg09397295 | -0.0266744 | 3.8565568 | -0.2037560 | 0.8420479 | 0.9999989 | -4.933848 |
cg03346908 | 0.0163949 | 3.8382524 | 0.2018766 | 0.8434833 | 0.9999989 | -4.934055 |
cg26407309 | -0.0212205 | 2.9033345 | -0.2001663 | 0.8447902 | 0.9999989 | -4.934242 |
cg13487431 | -0.0287729 | 2.6008357 | -0.1964499 | 0.8476316 | 0.9999989 | -4.934643 |
cg19175143 | -0.0221132 | 2.4988616 | -0.1928643 | 0.8503751 | 0.9999989 | -4.935023 |
cg02718199 | -0.0298752 | 4.4786572 | -0.1917335 | 0.8512408 | 0.9999989 | -4.935141 |
cg24831798 | 0.0223390 | 3.9421902 | 0.1884347 | 0.8537673 | 0.9999989 | -4.935482 |
cg01236728 | -0.0356798 | 4.1640955 | -0.1858447 | 0.8557521 | 0.9999989 | -4.935746 |
cg09412503 | -0.0177832 | 3.4543147 | -0.1807525 | 0.8596574 | 0.9999989 | -4.936254 |
cg09697084 | 0.0309537 | 3.7116395 | 0.1798198 | 0.8603731 | 0.9999989 | -4.936345 |
cg17709148 | -0.0131230 | 2.6219043 | -0.1755293 | 0.8636673 | 0.9999989 | -4.936760 |
cg05792851 | -0.0288875 | 2.3934996 | -0.1754173 | 0.8637533 | 0.9999989 | -4.936771 |
cg10835179 | -0.0200170 | 4.1232584 | -0.1605874 | 0.8751598 | 0.9999989 | -4.938128 |
cg10772290 | 0.0161444 | -3.6623149 | 0.1509129 | 0.8826171 | 0.9999989 | -4.938949 |
cg18992113 | -0.0134208 | 2.7168583 | -0.1448875 | 0.8872675 | 0.9999989 | -4.939434 |
cg10181968 | 0.0123162 | -4.7532045 | 0.1216340 | 0.9052546 | 0.9999989 | -4.941124 |
cg02212921 | -0.0403946 | 2.6800084 | -0.1192943 | 0.9070676 | 0.9999989 | -4.941278 |
cg04738224 | 0.0106918 | -1.4245282 | 0.1190889 | 0.9072268 | 0.9999989 | -4.941291 |
cg03764092 | 0.0192605 | 4.5154394 | 0.1177149 | 0.9082918 | 0.9999989 | -4.941380 |
cg02329994 | -0.0097769 | 4.0614295 | -0.1163685 | 0.9093356 | 0.9999989 | -4.941466 |
cg05272727 | -0.0200150 | 4.0175307 | -0.1146183 | 0.9106927 | 0.9999989 | -4.941576 |
cg03506657 | -0.0079962 | 3.8211287 | -0.1139009 | 0.9112490 | 0.9999989 | -4.941621 |
cg26617938 | 0.0109522 | 5.4318371 | 0.1053110 | 0.9179144 | 0.9999989 | -4.942134 |
cg13525067 | 0.0084873 | -2.7842177 | 0.0924680 | 0.9278919 | 0.9999989 | -4.942828 |
cg08802144 | -0.0076370 | 0.8248056 | -0.0923900 | 0.9279525 | 0.9999989 | -4.942832 |
cg06239618 | -0.0093594 | -2.2670613 | -0.0905776 | 0.9293617 | 0.9999989 | -4.942922 |
cg18449879 | 0.0067680 | 1.9105105 | 0.0786808 | 0.9386172 | 0.9999989 | -4.943472 |
cg01198539 | -0.0058647 | 0.1507738 | -0.0764107 | 0.9403844 | 0.9999989 | -4.943568 |
cg24803063 | -0.0056745 | 3.5621243 | -0.0660311 | 0.9484688 | 0.9999989 | -4.943972 |
cg06357305 | -0.0141681 | 3.3601422 | -0.0626766 | 0.9510828 | 0.9999989 | -4.944090 |
cg24656834 | -0.0054539 | -0.4798541 | -0.0619084 | 0.9516816 | 0.9999989 | -4.944116 |
cg05932408 | 0.0038682 | -3.1829356 | 0.0593377 | 0.9536853 | 0.9999989 | -4.944201 |
cg16201795 | -0.0068913 | 4.0101479 | -0.0559230 | 0.9563475 | 0.9999989 | -4.944308 |
cg13404849 | -0.0054485 | 1.0859551 | -0.0552621 | 0.9568628 | 0.9999989 | -4.944329 |
cg08270832 | 0.0070092 | 3.7598937 | 0.0549924 | 0.9570731 | 0.9999989 | -4.944337 |
cg17716453 | 0.0036673 | -5.7036088 | 0.0444635 | 0.9652854 | 0.9999989 | -4.944623 |
cg13729231 | 0.0035916 | 4.1138786 | 0.0404236 | 0.9684376 | 0.9999989 | -4.944716 |
cg00846869 | -0.0057046 | 4.3883958 | -0.0396403 | 0.9690488 | 0.9999989 | -4.944733 |
cg25962137 | 0.0015826 | 2.7743313 | 0.0171353 | 0.9866176 | 0.9999989 | -4.945083 |
cg19543844 | -0.0004770 | 3.8973384 | -0.0038868 | 0.9969643 | 0.9999989 | -4.945159 |
Guthrie dataset.
stat <- res_gu_df$median
names(stat) <- rownames(res_gu_df)
stat[is.na(stat)] <- 0
tic()
cres <- cameraPR(statistic=stat, index=genesets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
cres <- subset(cres,NGenes>4)
cres$FDR <- p.adjust(cres$PValue,method="fdr")
mymedians <- sapply(1:nrow(cres),function(i) {
myset <- genesets[[which(names(genesets) == rownames(cres)[i])]]
mystats <- stat[names(stat) %in% myset]
median(mystats)
})
cres$median <- mymedians
toc() # 1.0 sec
## 3.032 sec elapsed
sig <- subset(cres,FDR<0.05)
head(sig[order(-abs(sig$median)),],20) %>%
kbl(caption = "GUTHRIE: Top effect size pathways found with CAMERA after 5% FDR filtering") %>%
kable_paper("hover", full_width = F)
NGenes | Direction | PValue | FDR | median | |
---|---|---|---|---|---|
Expression and translocation of olfactory receptors | 356 | Down | 0.0000000 | 0.0000000 | -0.6962418 |
Olfactory Signaling Pathway | 363 | Down | 0.0000000 | 0.0000000 | -0.6884692 |
Class C/3 (Metabotropic glutamate/pheromone receptors) | 39 | Down | 0.0000141 | 0.0068276 | -0.5819672 |
Sensory Perception | 567 | Down | 0.0000000 | 0.0000070 | -0.5740685 |
Keratinization | 213 | Down | 0.0004266 | 0.0481472 | -0.4957728 |
Sensory perception of sweet, bitter, and umami (glutamate) taste | 41 | Down | 0.0007424 | 0.0481472 | -0.4895506 |
rRNA processing | 186 | Up | 0.0007276 | 0.0481472 | -0.1085795 |
SARS-CoV-1 modulates host translation machinery | 34 | Up | 0.0001203 | 0.0283602 | 0.1008188 |
Major pathway of rRNA processing in the nucleolus and cytosol | 171 | Up | 0.0003668 | 0.0442930 | -0.0987677 |
Translation | 262 | Up | 0.0006199 | 0.0481472 | -0.0958243 |
rRNA processing in the nucleus and cytosol | 180 | Up | 0.0003384 | 0.0442930 | -0.0919213 |
DNA Replication Pre-Initiation | 97 | Up | 0.0007875 | 0.0490771 | -0.0835862 |
Regulation of expression of SLITs and ROBOs | 159 | Up | 0.0006058 | 0.0481472 | -0.0775098 |
SARS-CoV-1-host interactions | 90 | Up | 0.0001475 | 0.0284884 | -0.0746554 |
Autodegradation of Cdh1 by Cdh1:APC/C | 62 | Up | 0.0005459 | 0.0481472 | -0.0670968 |
Influenza Infection | 148 | Up | 0.0005051 | 0.0481472 | -0.0636216 |
Influenza Viral RNA Transcription and Replication | 129 | Up | 0.0005878 | 0.0481472 | -0.0607625 |
Cap-dependent Translation Initiation | 111 | Up | 0.0007453 | 0.0481472 | -0.0598293 |
Eukaryotic Translation Initiation | 111 | Up | 0.0007453 | 0.0481472 | -0.0598293 |
L13a-mediated translational silencing of Ceruloplasmin expression | 103 | Up | 0.0007476 | 0.0481472 | -0.0520576 |
cres_gu <- cres
Compare median set score for both contrasts. In the graph below, the larger points represent larger gene sets. Red fill means significant in blood contrast. Blue ring means significant in guthrie contrast.
m4 <- merge(cres_bl,cres_gu,by=0)
sigx <- subset(m4,FDR.x<0.05)
sigy <- subset(m4,FDR.y<0.05)
plot(m4$median.x,m4$median.y,col="gray",pch=19,cex=log10(m4$NGenes.x),
xlab="blood",ylab="Guthrie",main="median t-stat")
grid()
points(sigx$median.x,sigx$median.y,col="red",pch=19,cex=log10(sigx$NGenes.x))
points(sigy$median.x,sigy$median.y,col="blue",pch=1,cex=log10(sigy$NGenes.x))
abline(v=0,h=0,lty=2,lwd=3)
par(mfrow=c(1,1))
pdf("camera_pw_diagnosis.pdf")
plot(m4$median.x,m4$median.y,col="gray",pch=19,cex=log10(m4$NGenes.x),
xlab="blood",ylab="Guthrie",main="median t-stat")
grid()
points(sigx$median.x,sigx$median.y,col="red",pch=19,cex=log10(sigx$NGenes.x))
points(sigy$median.x,sigy$median.y,col="blue",pch=1,cex=log10(sigy$NGenes.x))
abline(v=0,h=0,lty=2,lwd=3)
dev.off()
## png
## 2
subset(m4,FDR.x<0.05 & FDR.y<0.05 ) %>%
kbl(caption = "Pathways significant in blood (x) and guthrie cards (y) at 5%FDR") %>%
kable_paper("hover", full_width = F)
Row.names | NGenes.x | Direction.x | PValue.x | FDR.x | median.x | NGenes.y | Direction.y | PValue.y | FDR.y | median.y |
---|---|---|---|---|---|---|---|---|---|---|
make scatterplots of the top sets
XMAX=max(res$ranked_profile[,1])
XMIN=min(res$ranked_profile[,1])
YMAX=max(res$ranked_profile[,2])
YMIN=min(res$ranked_profile[,2])
plot(res$detailed_sets[[1]], pch=19,cex=2,col="lightgray",
xlim=c(XMIN,XMAX),ylim=c(YMIN,YMAX),
xlab="blood",ylab="Guthrie")
text(res$detailed_sets[[1]],labels=rownames(res$detailed_sets[[1]]))
mtext(names(res$detailed_sets)[[1]])
grid() ; abline(v=0,h=0,lty=2,lwd=3)
pdf("c3_metabotropic_diagnosis.pdf")
plot(res$detailed_sets[[1]], pch=19,cex=2,col="lightgray",
xlim=c(XMIN,XMAX),ylim=c(YMIN,YMAX),
xlab="blood",ylab="Guthrie")
text(res$detailed_sets[[1]],labels=rownames(res$detailed_sets[[1]]))
mtext(names(res$detailed_sets)[[1]])
grid() ; abline(v=0,h=0,lty=2,lwd=3)
dev.off()
## png
## 2
Here I will use MDS plot to cluster samples.
First I will use all probes, then I will focus on a subset of candidate probes.
plot(cmdscale(dist(t(bl_mvals))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
mtext("Numbers indicate diagnosis (severity)")
text(cmdscale(dist(t(bl_mvals))), labels=bl_design[,ncol(bl_design)], )
plot(cmdscale(dist(t(gu_mvals))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Guthrie card")
mtext("Numbers indicate diagnosis (severity)")
text(cmdscale(dist(t(gu_mvals))), labels=gu_design[,ncol(gu_design)], )
There is no apparent clustering by diagnosis, indicating a subtle effect.
Now I will examine the probes associated with the pathway of interest.
myprobes
## [1] "cg26617938" "cg01198539" "cg26851661" "cg08670281" "cg17548913"
## [6] "cg24656834" "cg12624051" "cg24803063" "cg18812944" "cg18449879"
## [11] "cg24655310" "cg09768458" "cg10003048" "cg04738224" "cg24456663"
## [16] "cg13525067" "cg17716453" "cg07247925" "cg16608318" "cg08840230"
## [21] "cg03346908" "cg19001086" "cg09372051" "cg03375880" "cg27393487"
## [26] "cg16175013" "cg27638815" "cg16762802" "cg01773376" "cg18992113"
## [31] "cg00571805" "cg19062489" "cg22068747" "cg12313593" "cg14116584"
## [36] "cg13557545" "cg08766627" "cg14851685" "cg21758587" "cg04743066"
## [41] "cg05696779" "cg08520349" "cg08535317" "cg09535567" "cg00996623"
## [46] "cg09412503" "cg24445796" "cg04396421" "cg08802144" "cg27067618"
## [51] "cg08164447" "cg16377880" "cg20303886" "cg20272421" "cg03163448"
## [56] "cg21745599" "cg17709148" "cg05315112" "cg06357305" "cg12484231"
## [61] "cg16537756" "cg15543919" "cg10772290" "cg07612655" "cg19577548"
## [66] "cg24453014" "cg25962137" "cg25530124" "cg27059136" "cg23755480"
## [71] "cg13806637" "cg01346423" "cg08788055" "cg04202783" "cg18204208"
## [76] "cg04271991" "cg08440266" "cg17253358" "cg03102482" "cg21865946"
## [81] "cg11746287" "cg19175143" "cg05792851" "cg11308603" "cg05932408"
## [86] "cg14630601" "cg09062977" "cg13729231" "cg15750300" "cg03764092"
## [91] "cg09658576" "cg03047724" "cg06550951" "cg10592734" "cg24710631"
## [96] "cg23625084" "cg18317554" "cg12521813" "cg27138194" "cg21433668"
## [101] "cg23460202" "cg08270832" "cg04329112" "cg15950251" "cg03506657"
## [106] "cg23759823" "cg00300322" "cg13404849" "cg20667964" "cg06904057"
## [111] "cg19565163" "cg06750564" "cg04765497" "cg01466035" "cg02329994"
## [116] "cg16201795" "cg12104698" "cg04351959" "cg02212921" "cg14867138"
## [121] "cg20746880" "cg26407309" "cg19267340" "cg25689525" "cg20324600"
## [126] "cg00846869" "cg25287523" "cg11307643" "cg09899516" "cg00445997"
## [131] "cg09697084" "cg13823415" "cg01014432" "cg06365890" "cg24831798"
## [136] "cg06462921" "cg23121975" "cg09972052" "cg10181968" "cg08437662"
## [141] "cg01466163" "cg09397295" "cg14468688" "cg21646520" "cg26021011"
## [146] "cg20230545" "cg21271071" "cg14116596" "cg15224326" "cg17455968"
## [151] "cg02967505" "cg10835179" "cg20798469" "cg02718199" "cg14352601"
## [156] "cg24431161" "cg13487431" "cg20358286" "cg05711445" "cg07590004"
## [161] "cg19677733" "cg24286200" "cg05782669" "cg01236728" "cg06239618"
## [166] "cg22233141" "cg08202392" "cg24602869" "cg04593076" "cg02520746"
## [171] "cg01308787" "cg00943809" "cg26686366" "cg15630995" "cg24005500"
## [176] "cg25855693" "cg07916812" "cg12046362" "cg23022819" "cg16576150"
## [181] "cg03604364" "cg01584377" "cg23653454" "cg18568570" "cg10048169"
## [186] "cg19543844" "cg18391099" "cg05272727"
myprobedat <- bl_mvals[which(rownames(bl_mvals) %in% myprobes),]
plot(cmdscale(dist(t(myprobedat))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat))), labels=bl_design[,ncol(bl_design)], )
myprobedat <- gu_mvals[which(rownames(gu_mvals) %in% myprobes),]
plot(cmdscale(dist(t(myprobedat))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Guthrie card")
text(cmdscale(dist(t(myprobedat))), labels=gu_design[,ncol(gu_design)], )
Again, there is no apparent clustering by diagnosis. This indicates that the top probes cannot be used diagnostically.
myprobes2 <- head(bl_lim$Name,50)
myprobedat2 <- bl_mvals[which(rownames(bl_mvals) %in% myprobes2),]
plot(cmdscale(dist(t(myprobedat2))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat2))), labels=bl_design[,ncol(bl_design)], )
myprobes2 <- head(gu_lim$Name,50)
myprobedat2 <- gu_mvals[which(rownames(gu_mvals) %in% myprobes2),]
plot(cmdscale(dist(t(myprobedat2))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n", main="Guthrie")
text(cmdscale(dist(t(myprobedat2))), labels=gu_design[,ncol(gu_design)], )
MDS analysis of top probes identified without correction for covariates.
bl_design2 <- bl_design[,c(1,10)]
fit <- lmFit(bl_mvals, bl_design2)
fit <- eBayes(fit)
summary(decideTests(fit))
## (Intercept) diagnosis
## Down 249498 0
## NotSig 35960 802647
## Up 517189 0
top <- topTable(fit,coef="diagnosis",num=Inf, sort.by = "P")
head(top)
## logFC AveExpr t P.Value adj.P.Val B
## cg10764907 0.4211502 2.8988197 6.263755 4.446016e-06 0.999998 -0.7994062
## cg11078128 -0.3965064 2.9411553 -4.671087 1.538531e-04 0.999998 -1.8176384
## cg18599069 0.2363330 -3.7342528 4.566909 1.955321e-04 0.999998 -1.8952502
## cg23146534 0.3023905 -3.7440973 4.437609 2.634952e-04 0.999998 -1.9933808
## cg02733698 0.2542140 -3.7135497 4.417057 2.763092e-04 0.999998 -2.0091596
## cg05670004 -0.3239825 0.2098527 -4.415222 2.774836e-04 0.999998 -2.0105711
myprobes3 <- rownames(top)[1:20]
myprobedat3 <- bl_mvals[which(rownames(bl_mvals) %in% myprobes3),]
plot(cmdscale(dist(t(myprobedat3))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat3))), labels=bl_design[,ncol(bl_design)], )
gu_design2 <- gu_design[,c(1,12)]
fit <- lmFit(gu_mvals, gu_design2)
fit <- eBayes(fit)
summary(decideTests(fit))
## (Intercept) diagnosis
## Down 254813 0
## NotSig 39023 790658
## Up 496822 0
top <- topTable(fit,coef="diagnosis",num=Inf, sort.by = "P")
head(top)
## logFC AveExpr t P.Value adj.P.Val B
## cg01692639 -0.3836653 -3.872032 -4.741398 0.0000825367 0.9364721 -1.451374
## cg12157673 -0.6579572 -3.458262 -4.662800 0.0001006777 0.9364721 -1.520499
## cg21808045 0.4409276 2.970191 4.660429 0.0001012831 0.9364721 -1.522595
## cg14215256 0.3581916 3.194629 4.652836 0.0001032467 0.9364721 -1.529312
## cg21484573 0.3122056 -2.788167 4.495290 0.0001537948 0.9364721 -1.670063
## cg03272508 -0.4719007 -4.976444 -4.486493 0.0001572556 0.9364721 -1.677999
myprobes3 <- rownames(top)[1:20]
myprobedat3 <- gu_mvals[which(rownames(gu_mvals) %in% myprobes3),]
plot(cmdscale(dist(t(myprobedat3))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Guthrie")
text(cmdscale(dist(t(myprobedat3))), labels=gu_design[,ncol(gu_design)], )
Again it was unsuccessful.
genesets <- gmt_import("ReactomePathways.gmt")
cores = 8
Aggregate mval to genes and examine pathway enrichment in blood and guthries.
tic()
bl_gsmea <- gsmea(mval=bl_mvals,design=bl_design,probesets=sets,genesets=genesets,cores=8)
toc()
## 203.086 sec elapsed
bl_gsmea$res <- subset(bl_gsmea$res,nprobes>=5)
bl_gsmea$res$FDR <- p.adjust(bl_gsmea$res$PValue,method="fdr")
head(bl_gsmea$res,20) %>% kbl(caption="Top DM pathways in blood identified with GSMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
LDL remodeling | 6 | -1.4349303 | 0.0005749 | 0.9672367 |
POLB-Dependent Long Patch Base Excision Repair | 7 | -0.9498138 | 0.0010533 | 0.9672367 |
tRNA modification in the mitochondrion | 7 | -0.6799636 | 0.0027601 | 0.9672367 |
2-LTR circle formation | 7 | -1.0699798 | 0.0044181 | 0.9672367 |
Integration of provirus | 9 | -1.0699798 | 0.0067956 | 0.9672367 |
Early Phase of HIV Life Cycle | 14 | -0.9980008 | 0.0068881 | 0.9672367 |
Packaging Of Telomere Ends | 7 | -1.2169905 | 0.0154906 | 0.9672367 |
APOBEC3G mediated resistance to HIV-1 infection | 5 | -1.0699798 | 0.0226318 | 0.9672367 |
FLT3 signaling through SRC family kinases | 6 | -0.7434924 | 0.0280008 | 0.9672367 |
Interleukin-9 signaling | 7 | 0.7896122 | 0.0392477 | 0.9672367 |
Signaling by MAPK mutants | 6 | -0.7027397 | 0.0401679 | 0.9672367 |
RAS processing | 19 | -0.2759125 | 0.0560138 | 0.9672367 |
APEX1-Independent Resolution of AP Sites via the Single Nucleotide Replacement Pathway | 7 | -0.7845207 | 0.0613673 | 0.9672367 |
Telomere C-strand synthesis initiation | 11 | -0.6821620 | 0.0624257 | 0.9672367 |
Release of apoptotic factors from the mitochondria | 5 | -1.1518408 | 0.0685467 | 0.9672367 |
Initiation of Nuclear Envelope (NE) Reformation | 18 | -0.3133773 | 0.0705022 | 0.9672367 |
Inhibition of DNA recombination at telomere | 20 | -0.5183856 | 0.0721034 | 0.9672367 |
Resolution of AP sites via the multiple-nucleotide patch replacement pathway | 24 | -0.8162263 | 0.0733667 | 0.9672367 |
PCNA-Dependent Long Patch Base Excision Repair | 21 | -0.9019585 | 0.0749295 | 0.9672367 |
Synthesis of diphthamide-EEF2 | 8 | -0.4729171 | 0.0759643 | 0.9672367 |
tic()
gu_gsmea <- gsmea(mval=gu_mvals,design=gu_design,probesets=sets,genesets=genesets,cores=8)
toc()
## 200.647 sec elapsed
gu_gsmea$res <- subset(gu_gsmea$res,nprobes>=5)
gu_gsmea$res$FDR <- p.adjust(gu_gsmea$res$PValue,method="fdr")
head(gu_gsmea$res,20) %>% kbl(caption="Top DM pathways in Guthrie card identified with GSMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
APC-Cdc20 mediated degradation of Nek2A | 26 | 0.5949456 | 0.0068203 | 0.9910316 |
RUNX3 regulates WNT signaling | 8 | -1.1881280 | 0.0112575 | 0.9910316 |
POLB-Dependent Long Patch Base Excision Repair | 7 | 0.5375720 | 0.0144110 | 0.9910316 |
Binding of TCF/LEF:CTNNB1 to target gene promoters | 8 | -1.3511196 | 0.0204142 | 0.9910316 |
TFAP2 (AP-2) family regulates transcription of cell cycle factors | 5 | -1.0275520 | 0.0244011 | 0.9910316 |
Lipid particle organization | 6 | -1.3378866 | 0.0339756 | 0.9910316 |
Transport and synthesis of PAPS | 6 | -1.0192347 | 0.0344609 | 0.9910316 |
HDR through MMEJ (alt-NHEJ) | 11 | 0.8184801 | 0.0364169 | 0.9910316 |
Formation of apoptosome | 10 | -0.6110426 | 0.0451739 | 0.9910316 |
Regulation of the apoptosome activity | 10 | -0.6110426 | 0.0451739 | 0.9910316 |
Inactivation of APC/C via direct inhibition of the APC/C complex | 21 | 0.5255530 | 0.0452061 | 0.9910316 |
Inhibition of the proteolytic activity of APC/C required for the onset of anaphase by mitotic spindle checkpoint components | 21 | 0.5255530 | 0.0452061 | 0.9910316 |
p75NTR recruits signalling complexes | 12 | 0.4562329 | 0.0501792 | 0.9910316 |
APC/C:Cdc20 mediated degradation of Cyclin B | 24 | 0.5278404 | 0.0569799 | 0.9910316 |
Signaling by NTRK3 (TRKC) | 17 | -0.6398299 | 0.0591936 | 0.9910316 |
Activated NTRK3 signals through PI3K | 6 | -1.0074299 | 0.0600548 | 0.9910316 |
Transcription of E2F targets under negative control by DREAM complex | 19 | 0.2507162 | 0.0606410 | 0.9910316 |
Cytochrome c-mediated apoptotic response | 12 | -0.4692336 | 0.0643461 | 0.9910316 |
Modulation by Mtb of host immune system | 6 | 1.0489500 | 0.0832534 | 0.9910316 |
VLDL assembly | 5 | 0.9010527 | 0.0857704 | 0.9910316 |
For reproducibility
sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] vioplot_0.4.0
## [2] zoo_1.8-11
## [3] sm_2.2-5.7.1
## [4] pkgload_1.3.2
## [5] GGally_2.1.2
## [6] ggplot2_3.4.1
## [7] gtools_3.9.4
## [8] tibble_3.2.0
## [9] dplyr_1.1.0
## [10] echarts4r_0.4.4
## [11] ebGSEA_0.1.0
## [12] globaltest_5.52.0
## [13] survival_3.5-5
## [14] tictoc_1.1
## [15] RIdeogram_0.2.2
## [16] kableExtra_1.3.4
## [17] data.table_1.14.8
## [18] ENmix_1.34.0
## [19] doParallel_1.0.17
## [20] qqman_0.1.8
## [21] RCircos_1.2.2
## [22] beeswarm_0.4.0
## [23] forestplot_3.1.1
## [24] abind_1.4-5
## [25] checkmate_2.1.0
## [26] reshape2_1.4.4
## [27] gplots_3.1.3
## [28] eulerr_7.0.0
## [29] GEOquery_2.66.0
## [30] RColorBrewer_1.1-3
## [31] IlluminaHumanMethylation450kmanifest_0.4.0
## [32] topconfects_1.14.0
## [33] DMRcatedata_2.16.0
## [34] ExperimentHub_2.6.0
## [35] AnnotationHub_3.6.0
## [36] BiocFileCache_2.6.0
## [37] dbplyr_2.3.1
## [38] DMRcate_2.12.0
## [39] limma_3.54.0
## [40] missMethyl_1.32.0
## [41] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [42] R.utils_2.12.2
## [43] R.oo_1.25.0
## [44] R.methodsS3_1.8.2
## [45] plyr_1.8.8
## [46] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [47] minfi_1.44.0
## [48] bumphunter_1.40.0
## [49] locfit_1.5-9.7
## [50] iterators_1.0.14
## [51] foreach_1.5.2
## [52] Biostrings_2.66.0
## [53] XVector_0.38.0
## [54] SummarizedExperiment_1.28.0
## [55] Biobase_2.58.0
## [56] MatrixGenerics_1.10.0
## [57] matrixStats_0.63.0
## [58] GenomicRanges_1.50.2
## [59] GenomeInfoDb_1.34.6
## [60] IRanges_2.32.0
## [61] S4Vectors_0.36.1
## [62] BiocGenerics_0.44.0
## [63] mitch_1.10.0
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.58.0
## [3] tidyr_1.3.0 bit64_4.0.5
## [5] knitr_1.42 DelayedArray_0.24.0
## [7] rpart_4.1.19 KEGGREST_1.38.0
## [9] RCurl_1.98-1.10 AnnotationFilter_1.22.0
## [11] generics_0.1.3 GenomicFeatures_1.50.3
## [13] preprocessCore_1.60.2 RSQLite_2.3.0
## [15] bit_4.0.5 tzdb_0.3.0
## [17] webshot_0.5.4 xml2_1.3.3
## [19] httpuv_1.6.9 assertthat_0.2.1
## [21] xfun_0.37 hms_1.1.2
## [23] jquerylib_0.1.4 evaluate_0.20
## [25] promises_1.2.0.1 fansi_1.0.4
## [27] restfulr_0.0.15 scrime_1.3.5
## [29] progress_1.2.2 caTools_1.18.2
## [31] readxl_1.4.2 DBI_1.1.3
## [33] geneplotter_1.76.0 htmlwidgets_1.6.2
## [35] reshape_0.8.9 purrr_1.0.1
## [37] ellipsis_0.3.2 backports_1.4.1
## [39] permute_0.9-7 calibrate_1.7.7
## [41] grImport2_0.2-0 annotate_1.76.0
## [43] biomaRt_2.54.0 deldir_1.0-6
## [45] sparseMatrixStats_1.10.0 vctrs_0.6.0
## [47] ensembldb_2.22.0 withr_2.5.0
## [49] cachem_1.0.7 Gviz_1.42.0
## [51] BSgenome_1.66.2 GenomicAlignments_1.34.0
## [53] prettyunits_1.1.1 mclust_6.0.0
## [55] svglite_2.1.1 cluster_2.1.4
## [57] RPMM_1.25 lazyeval_0.2.2
## [59] crayon_1.5.2 genefilter_1.80.3
## [61] labeling_0.4.2 edgeR_3.40.2
## [63] pkgconfig_2.0.3 nlme_3.1-162
## [65] ProtGenerics_1.30.0 nnet_7.3-18
## [67] rlang_1.1.0 lifecycle_1.0.3
## [69] filelock_1.0.2 dichromat_2.0-0.1
## [71] rsvg_2.4.0 cellranger_1.1.0
## [73] rngtools_1.5.2 base64_2.0.1
## [75] Matrix_1.5-3 Rhdf5lib_1.20.0
## [77] base64enc_0.1-3 viridisLite_0.4.1
## [79] png_0.1-8 rjson_0.2.21
## [81] bitops_1.0-7 KernSmooth_2.23-20
## [83] rhdf5filters_1.10.0 blob_1.2.4
## [85] DelayedMatrixStats_1.20.0 doRNG_1.8.6
## [87] stringr_1.5.0 nor1mix_1.3-0
## [89] readr_2.1.4 jpeg_0.1-10
## [91] scales_1.2.1 memoise_2.0.1
## [93] magrittr_2.0.3 zlibbioc_1.44.0
## [95] compiler_4.2.2 BiocIO_1.8.0
## [97] illuminaio_0.40.0 Rsamtools_2.14.0
## [99] cli_3.6.0 DSS_2.46.0
## [101] htmlTable_2.4.1 Formula_1.2-5
## [103] MASS_7.3-58.3 tidyselect_1.2.0
## [105] stringi_1.7.12 highr_0.10
## [107] yaml_2.3.7 askpass_1.1
## [109] latticeExtra_0.6-30 sass_0.4.5
## [111] VariantAnnotation_1.44.0 tools_4.2.2
## [113] rstudioapi_0.14 foreign_0.8-84
## [115] bsseq_1.34.0 gridExtra_2.3
## [117] farver_2.1.1 digest_0.6.31
## [119] BiocManager_1.30.20 shiny_1.7.4
## [121] quadprog_1.5-8 Rcpp_1.0.10
## [123] siggenes_1.72.0 BiocVersion_3.16.0
## [125] later_1.3.0 org.Hs.eg.db_3.16.0
## [127] httr_1.4.5 AnnotationDbi_1.60.0
## [129] biovizBase_1.46.0 colorspace_2.1-0
## [131] rvest_1.0.3 XML_3.99-0.13
## [133] splines_4.2.2 statmod_1.5.0
## [135] kpmt_0.1.0 multtest_2.54.0
## [137] systemfonts_1.0.4 xtable_1.8-4
## [139] jsonlite_1.8.4 dynamicTreeCut_1.63-1
## [141] R6_2.5.1 Hmisc_5.0-1
## [143] pillar_1.8.1 htmltools_0.5.4
## [145] mime_0.12 glue_1.6.2
## [147] fastmap_1.1.1 BiocParallel_1.32.5
## [149] interactiveDisplayBase_1.36.0 beanplot_1.3.1
## [151] codetools_0.2-19 utf8_1.2.3
## [153] lattice_0.20-45 bslib_0.4.2
## [155] curl_5.0.0 openssl_2.0.6
## [157] interp_1.1-3 rmarkdown_2.20
## [159] munsell_0.5.0 rhdf5_2.42.0
## [161] GenomeInfoDbData_1.2.9 HDF5Array_1.26.0
## [163] impute_1.72.3 gtable_0.3.2