Source: https://github.com/markziemann/asd_meth
Here I will be running a comparison of differential methylation data from guthrie and fresh blood samples.
Here are the files that I’m using:
limma_blood_iIQ.csv
limma_guthrie_iIQ.csv
suppressPackageStartupMessages({
library("parallel")
library("mitch")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
source("https://raw.githubusercontent.com/markziemann/gmea/main/meth_functions.R")
library("data.table")
library("kableExtra")
library("eulerr")
library("RIdeogram")
library("GenomicRanges")
library("tictoc")
library("globaltest")
library("ebGSEA")
data("dualmap850kEID")
})
source("meth_functions.R")
bl_mvals <- readRDS(file="bl_mvals.rds")
gu_mvals <- readRDS(file="gu_mvals.rds")
bl_design <- readRDS(file="bl_design_iiq.rds")
gu_design <- readRDS(file="gu_design_iiq.rds")
bl_lim <- read.csv("limma_blood_iIQ.csv")
gu_lim <- read.csv("limma_guthrie_iIQ.csv")
For each gene, extract out the probes.
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)
promoters <- myann[promoters,]
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
Effect size is estimated with the median t-statistic for each gene. Significance is estimated using the fry test. Results are merged for the user.
PMEA step 196.063 sec elapsed on 8 cores
Fry step 293.569 sec elapsed on 8 cores
Fry single core 1205.519 sec
tic()
res_bl <- main(mval=bl_mvals,design=bl_design,sets,cores=detectCores()/2)
res_bl[[1]] <- subset(res_bl[[1]],nprobes!=0) # remove genes with no probes obvs
res_bl_df <- res_bl[[1]] # enrichment results
res_bl_top <- res_bl[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 510.605 sec elapsed
head(res_bl_df,30) %>%
kbl(caption="Top DM genes in blood identified with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
CDHR1 | 12 | -1.2893037 | 4.70e-06 | 0.0386147 |
ORC5L | 10 | -1.8432257 | 9.70e-06 | 0.0386147 |
TDP1 | 33 | -0.8714396 | 1.07e-05 | 0.0386147 |
BCKDK | 18 | -1.1777631 | 1.25e-05 | 0.0386147 |
C17orf67 | 19 | -0.9682225 | 1.44e-05 | 0.0386147 |
LRRC50 | 18 | -0.6963501 | 1.82e-05 | 0.0386147 |
LOC101929709 | 5 | -2.5747640 | 1.93e-05 | 0.0386147 |
APOA2 | 6 | -1.9544414 | 2.03e-05 | 0.0386147 |
DCLK2 | 62 | -1.3522095 | 2.35e-05 | 0.0386147 |
GOLPH3 | 23 | -0.9145051 | 2.37e-05 | 0.0386147 |
MFN2 | 30 | -0.4312529 | 2.67e-05 | 0.0386147 |
ADAM11 | 30 | -0.4602215 | 2.75e-05 | 0.0386147 |
LOC100507506 | 6 | 1.3828742 | 3.08e-05 | 0.0386147 |
MIR371B | 1 | -5.4124537 | 3.36e-05 | 0.0386147 |
KCNV1 | 24 | -1.4369207 | 3.61e-05 | 0.0386147 |
TMEM215 | 10 | -1.3935383 | 3.94e-05 | 0.0386147 |
KPNA1 | 28 | -1.0290261 | 3.98e-05 | 0.0386147 |
MIR5583-1 | 1 | -6.0976340 | 4.16e-05 | 0.0386147 |
LINC00701 | 1 | 5.1947674 | 4.17e-05 | 0.0386147 |
UCK1 | 13 | -1.6930381 | 4.61e-05 | 0.0386147 |
LOC101929084 | 3 | -1.5563252 | 4.72e-05 | 0.0386147 |
MRPL55 | 26 | -0.5952886 | 5.21e-05 | 0.0386147 |
RSBN1L | 27 | -0.6826001 | 5.27e-05 | 0.0386147 |
STX11 | 27 | -0.7391797 | 6.18e-05 | 0.0386147 |
PDE3B | 29 | -0.4068087 | 6.51e-05 | 0.0386147 |
COX19 | 36 | -1.1633004 | 7.01e-05 | 0.0386147 |
TRANK1 | 58 | -0.7371511 | 7.94e-05 | 0.0386147 |
NRIR | 3 | -3.0097321 | 7.94e-05 | 0.0386147 |
BSN-AS2 | 3 | -1.6909403 | 7.97e-05 | 0.0386147 |
RAB1A | 29 | 0.2763217 | 8.01e-05 | 0.0386147 |
par(mfrow=c(2,1))
hist(res_bl_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_bl_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
pdf("hist_bl_iiq.pdf")
par(mfrow=c(2,1))
hist(res_bl_top$t,xlab="probe t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_bl_df$median,xlab="gene median t-stat",main="blood at assessment") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png
## 2
par(mfrow=c(1,1))
Now some visualisations.
volcano_plot(res_bl_df)
probe_bias(res_bl_df)
gmea_boxplot(res_bl,sets)
tic()
res_gu <- main(mval=gu_mvals,design=gu_design,sets,cores=detectCores()/2)
res_gu[[1]] <- subset(res_gu[[1]],nprobes!=0) # remove genes with no probes obvs
res_gu_df <- res_gu[[1]] # enrichment results
res_gu_top <- res_gu[[2]] # limma results
toc() ## 502-522 sec elapsed on 8 cores (8.3-8.7 mins)
## 453.144 sec elapsed
head(res_gu_df,30) %>%
kbl(caption="Top DM genes in Guthrie card identified with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
C14orf132 | 50 | -0.7849783 | 0.0000068 | 0.1853123 |
TRPV3 | 42 | -0.8788321 | 0.0000280 | 0.2251003 |
ASPG | 41 | -0.5785985 | 0.0000762 | 0.2251003 |
KCNK1 | 31 | -1.0688965 | 0.0000884 | 0.2251003 |
EMID2 | 29 | -0.4193157 | 0.0001003 | 0.2251003 |
TINAGL1 | 34 | -0.8482414 | 0.0001188 | 0.2251003 |
GZMA | 7 | 1.5564016 | 0.0001221 | 0.2251003 |
MIR654 | 14 | -1.4192157 | 0.0001367 | 0.2251003 |
LOC100506384 | 11 | -0.6778815 | 0.0001369 | 0.2251003 |
SLC5A5 | 25 | -0.4726111 | 0.0001393 | 0.2251003 |
LOC283177 | 27 | -0.6547406 | 0.0001396 | 0.2251003 |
KY | 37 | -0.3401560 | 0.0001441 | 0.2251003 |
S1PR2 | 19 | -0.5772395 | 0.0001472 | 0.2251003 |
DRGX | 26 | -0.8981276 | 0.0002181 | 0.2251003 |
KRTAP13-3 | 7 | -1.6766201 | 0.0002421 | 0.2251003 |
MIR376A2 | 12 | -1.4192157 | 0.0002526 | 0.2251003 |
NR4A1 | 40 | -0.4634990 | 0.0002879 | 0.2251003 |
PAX7 | 103 | -0.2642930 | 0.0002995 | 0.2251003 |
AXDND1 | 7 | -0.5484729 | 0.0003349 | 0.2251003 |
KCNK3 | 36 | -0.5928314 | 0.0003566 | 0.2251003 |
LOC285370 | 11 | -1.2920307 | 0.0004091 | 0.2251003 |
ZKSCAN8 | 7 | -1.4399276 | 0.0004417 | 0.2251003 |
HLA-C | 19 | -1.1478727 | 0.0004461 | 0.2251003 |
KCNIP3 | 60 | -0.7177175 | 0.0004596 | 0.2251003 |
GPT | 27 | -1.0148933 | 0.0005312 | 0.2251003 |
DVL1 | 28 | -0.6839862 | 0.0005510 | 0.2251003 |
SNORD64 | 5 | -1.9849933 | 0.0005707 | 0.2251003 |
SYT7 | 64 | -0.5042050 | 0.0005900 | 0.2251003 |
WFDC13 | 12 | -1.4284514 | 0.0006022 | 0.2251003 |
B9D1 | 43 | -0.2502560 | 0.0006437 | 0.2251003 |
par(mfrow=c(2,1))
hist(res_gu_top$t,xlab="probe t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
hist(res_gu_df$median,xlab="gene median t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
pdf("hist_gu_iiq.pdf")
par(mfrow=c(2,1))
hist(res_gu_top$t,xlab="probe t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
hist(res_gu_df$median,xlab="gene median t-stat",main="neonatal Guthrie card") ; grid() ; abline(v=0,lty=1,lwd=3)
plot(1)
dev.off()
## png
## 2
par(mfrow=c(1,1))
Now some visualisations.
volcano_plot(res_gu_df)
probe_bias(res_gu_df)
gmea_boxplot(res_gu,sets)
Now compare blood and guthrie card
res_bl_df$bl <- res_bl_df$median
res_gu_df$gu <- res_gu_df$median
m1 <- merge(res_bl_df,res_gu_df,by=0)
m2 <- m1[,c("Row.names","bl","gu")]
rownames(m2) <- m2$Row.names
m2$Row.names=NULL
plot(m2$bl,m2$gu,xlab="bl",ylab="gu",
col = rgb(red = 0.6, green = 0.6, blue = 0.6, alpha = 0.5) ,
pch=19,cex=0.9)
grid()
abline(v=0,lty=1,lwd=3,col="black") ; abline(h=0,lty=1,lwd=3,col="black")
mtext("aggregated t-statistic for each gene (median)")
#plot(rank(m1$bl),rank(m1$gu),xlab="bl rank",ylab="gu rank",cex=0.6,pch=19) ; grid()
rm2 <- apply(m2,2,rank)
mydiff <- apply(m2,2,function(x) { length(which(x<0)) } )
rm3 <- rm2
rm3[,1] <- rm3[,1] - mydiff[1]
rm3[,2] <- rm3[,2] - mydiff[2]
rnk <- rm3
palette <- colorRampPalette(c("white", "yellow", "orange", "red",
"darkred", "black"))
xmin = min(rnk[, 1])
xmax = max(rnk[, 1])
ymin = min(rnk[, 2])
ymax = max(rnk[, 2])
k <- MASS::kde2d(rnk[, 1], rnk[, 2])
X_AXIS = paste("Rank in contrast", colnames(rnk)[1])
Y_AXIS = paste("Rank in contrast", colnames(rnk)[2])
filled.contour(k, xlim = c(xmin, xmax), ylim = c(ymin, ymax),
color.palette = palette, plot.title = {
abline(v = 0, h = 0, lty = 2, lwd = 2, col = "blue")
title(main = "Rank-rank plot of all genes", xlab = X_AXIS,
ylab = Y_AXIS)
})
pdf("genedens_iiq.pdf")
par(mfrow=c(2,2))
plot(1)
plot(1)
plot(m2$bl,m2$gu,xlab="bl",ylab="gu",
col = rgb(red = 0.6, green = 0.6, blue = 0.6, alpha = 0.5) ,
pch=19,cex=0.9)
grid()
abline(v=0, h=0, lty=2, lwd=2, col="blue")
mtext("aggregated t-statistic for each gene (median)")
filled.contour(k, xlim = c(xmin, xmax), ylim = c(ymin, ymax),
color.palette = palette, plot.title = {
abline(v = 0, h = 0, lty = 2, lwd = 2, col = "blue")
title(main = "Rank-rank plot of all genes", xlab = X_AXIS,
ylab = Y_AXIS)
})
dev.off()
## png
## 2
Now we need a table of genes with top scores.
m3 <- m2
m3$med <- apply(rnk,1,median)
mg <- merge(res_gu_df,res_bl_df,by=0)
mg$gu = mg$bl = NULL
mg$med <- m3$med
mg2 <- data.frame(mg,rnk)
mg2$med <- apply(rnk,1,median)
mg2 <- mg2[order(mg2$med),]
mg2 <-subset(mg2,nprobes.x>=5)
head(mg2,20) %>%
kbl(caption="Genes with coordinated hypomethylation in neonatal Guthrie card and blood at assessment") %>%
kable_paper("hover", full_width = F)
Row.names | nprobes.x | median.x | PValue.x | FDR.x | nprobes.y | median.y | PValue.y | FDR.y | med | bl | gu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
MIR1251 | MIR1251 | 6 | -2.708799 | 0.0093764 | 0.2251003 | 6 | -3.811523 | 0.0041277 | 0.0387949 | -21374.5 | -20679 | -22070 |
PTH | PTH | 5 | -2.653128 | 0.0273584 | 0.2251003 | 6 | -3.819182 | 0.0017876 | 0.0386147 | -21373.0 | -20686 | -22060 |
SNORD114-10 | SNORD114-10 | 6 | -2.372010 | 0.0249565 | 0.2251003 | 6 | -4.206907 | 0.0036569 | 0.0386147 | -21358.0 | -20820 | -21896 |
HISPPD1 | HISPPD1 | 5 | -2.543844 | 0.0270099 | 0.2251003 | 5 | -3.791495 | 0.0082544 | 0.0424019 | -21345.0 | -20669 | -22021 |
AGR3 | AGR3 | 5 | -2.353020 | 0.0316379 | 0.2251003 | 5 | -3.829801 | 0.0093282 | 0.0437684 | -21285.5 | -20690 | -21881 |
KRTAP22-2 | KRTAP22-2 | 5 | -2.244567 | 0.0786717 | 0.2332784 | 5 | -4.257516 | 0.0043495 | 0.0389538 | -21285.5 | -20828 | -21743 |
LRRC39 | LRRC39 | 9 | -2.485974 | 0.0410888 | 0.2251003 | 9 | -3.625742 | 0.0147400 | 0.0511017 | -21277.5 | -20572 | -21983 |
LOC102467212 | LOC102467212 | 5 | -2.223251 | 0.0417104 | 0.2251003 | 5 | -4.044245 | 0.0230449 | 0.0635751 | -21242.5 | -20780 | -21705 |
STX19 | STX19 | 9 | -2.198645 | 0.0623026 | 0.2265522 | 10 | -4.071991 | 0.0056619 | 0.0396875 | -21227.0 | -20788 | -21666 |
TMPRSS15 | TMPRSS15 | 10 | -2.272424 | 0.0241010 | 0.2251003 | 11 | -3.704654 | 0.0157415 | 0.0527364 | -21198.5 | -20616 | -21781 |
MLIP-IT1 | MLIP-IT1 | 5 | -2.639254 | 0.0159890 | 0.2251003 | 5 | -3.367004 | 0.0176765 | 0.0557014 | -21197.5 | -20340 | -22055 |
NAALADL2-AS2 | NAALADL2-AS2 | 7 | -2.205300 | 0.0369862 | 0.2251003 | 7 | -3.857002 | 0.0077735 | 0.0418303 | -21189.0 | -20704 | -21674 |
GNAT3 | GNAT3 | 12 | -2.312735 | 0.0357453 | 0.2251003 | 13 | -3.576574 | 0.0137554 | 0.0495461 | -21183.0 | -20535 | -21831 |
LINC00383 | LINC00383 | 6 | -2.164785 | 0.0990034 | 0.2482090 | 6 | -3.951895 | 0.0071911 | 0.0410138 | -21176.5 | -20744 | -21609 |
LIPF | LIPF | 9 | -2.268095 | 0.0369876 | 0.2251003 | 10 | -3.624858 | 0.0134972 | 0.0491466 | -21173.5 | -20569 | -21778 |
LOC105370306 | LOC105370306 | 6 | -2.390973 | 0.0137543 | 0.2251003 | 8 | -3.454370 | 0.0174572 | 0.0553341 | -21171.0 | -20436 | -21906 |
GBP7 | GBP7 | 11 | -2.283573 | 0.0584804 | 0.2264090 | 11 | -3.564638 | 0.0116450 | 0.0464260 | -21162.0 | -20526 | -21798 |
BZW1L1 | BZW1L1 | 5 | -2.423755 | 0.0211247 | 0.2251003 | 5 | -3.411473 | 0.0061911 | 0.0399287 | -21158.5 | -20387 | -21930 |
TCHHL1 | TCHHL1 | 5 | -2.182425 | 0.0318038 | 0.2251003 | 6 | -3.800116 | 0.0057303 | 0.0396875 | -21156.0 | -20674 | -21638 |
KLHL41 | KLHL41 | 6 | -2.308245 | 0.0396231 | 0.2251003 | 6 | -3.521570 | 0.0008613 | 0.0386147 | -21153.5 | -20481 | -21826 |
mg2 <- mg2[order(-mg2$med),]
head(mg2,20) %>%
kbl(caption="Genes with coordinated hypermethylation in neonatal Guthrie card and blood at assessment") %>%
kable_paper("hover", full_width = F)
Row.names | nprobes.x | median.x | PValue.x | FDR.x | nprobes.y | median.y | PValue.y | FDR.y | med | bl | gu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
POLR2K | POLR2K | 12 | 1.2896792 | 0.0772185 | 0.2322521 | 12 | 3.382169 | 0.0071846 | 0.0410138 | 4155.5 | 4829 | 3482 |
LOC401397 | LOC401397 | 5 | 1.4157235 | 0.0188351 | 0.2251003 | 5 | 2.713042 | 0.0151343 | 0.0517240 | 4153.5 | 4798 | 3509 |
BPNT1 | BPNT1 | 15 | 1.2750570 | 0.0665828 | 0.2278142 | 15 | 2.587692 | 0.0102501 | 0.0446408 | 4130.0 | 4787 | 3473 |
CLEC2B | CLEC2B | 15 | 1.7635390 | 0.0196090 | 0.2251003 | 20 | 2.088876 | 0.0507351 | 0.1028822 | 4128.5 | 4713 | 3544 |
RGS1 | RGS1 | 5 | 1.7159301 | 0.1888758 | 0.3361097 | 6 | 1.922179 | 0.1992182 | 0.2863887 | 4110.0 | 4679 | 3541 |
CARD6 | CARD6 | 5 | 1.6908134 | 0.4411074 | 0.5894969 | 7 | 1.919592 | 0.7726969 | 0.8672359 | 4108.0 | 4677 | 3539 |
FLJ35776 | FLJ35776 | 9 | 1.5257584 | 0.0213668 | 0.2251003 | 9 | 1.881870 | 0.0032388 | 0.0386147 | 4092.5 | 4663 | 3522 |
SNORD50A | SNORD50A | 13 | 1.2658077 | 0.0213440 | 0.2251003 | 13 | 1.841989 | 0.0019256 | 0.0386147 | 4061.0 | 4653 | 3469 |
ILF2 | ILF2 | 18 | 1.0924610 | 0.3115016 | 0.4617763 | 18 | 1.912322 | 0.0302241 | 0.0744355 | 4042.0 | 4674 | 3410 |
OR4X1 | OR4X1 | 5 | 0.8219411 | 0.1548504 | 0.3013102 | 5 | 2.865521 | 0.0036867 | 0.0386323 | 4036.5 | 4807 | 3266 |
CD69 | CD69 | 7 | 0.8294168 | 0.1103197 | 0.2582147 | 7 | 2.525912 | 0.0041559 | 0.0388093 | 4027.5 | 4779 | 3276 |
LOC101928489 | LOC101928489 | 7 | 0.9051322 | 0.0490803 | 0.2251003 | 9 | 2.112376 | 0.0206702 | 0.0600899 | 4016.5 | 4717 | 3316 |
LOC100506860 | LOC100506860 | 6 | 1.0043909 | 0.5632179 | 0.7049304 | 7 | 1.845092 | 0.0579947 | 0.1126646 | 4013.5 | 4656 | 3371 |
LRRIQ1 | LRRIQ1 | 18 | 0.9115820 | 0.0634345 | 0.2267949 | 19 | 1.998052 | 0.0224248 | 0.0626284 | 4010.0 | 4699 | 3321 |
MMRN1 | MMRN1 | 9 | 0.8521665 | 0.1907457 | 0.3380766 | 11 | 1.783157 | 0.0610473 | 0.1165329 | 3963.0 | 4636 | 3290 |
KIAA0776 | KIAA0776 | 10 | 0.7945295 | 0.1336794 | 0.2801779 | 10 | 1.961527 | 0.0182595 | 0.0564772 | 3959.5 | 4690 | 3229 |
LOC101928068 | LOC101928068 | 12 | 1.0086886 | 0.0951977 | 0.2453603 | 12 | 1.526012 | 0.0175222 | 0.0554692 | 3956.0 | 4539 | 3373 |
HIST1H2BD | HIST1H2BD | 19 | 0.8011073 | 0.0363658 | 0.2251003 | 20 | 1.842045 | 0.0228610 | 0.0632845 | 3947.5 | 4654 | 3241 |
FBXL12 | FBXL12 | 18 | 0.9750305 | 0.1845853 | 0.3315783 | 18 | 1.514718 | 0.0219121 | 0.0619874 | 3944.5 | 4533 | 3356 |
CTSV | CTSV | 6 | 1.1663051 | 0.0178527 | 0.2251003 | 6 | 1.297518 | 0.0017692 | 0.0386147 | 3922.5 | 4405 | 3440 |
genesets <- gmt_import("ReactomePathways.gmt")
cores = 8
Here I’m using the median t-statistic for downstream enrichment. Pathways are from REACTOME and analysis using mitch.
res_bl_df$bl <- res_bl_df$median
res_gu_df$gu <- res_gu_df$median
bl <- res_bl_df[,"bl",drop=FALSE]
mitch_bl <- mitch_calc(bl,genesets,priority="effect")
head( mitch_bl$enrichment_result,30) %>%
kbl(caption="BLOOD: Top effect size pathways found with mitch") %>%
kable_paper("hover", full_width = F)
sig <- subset(mitch_bl$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="BLOOD: Top effect size pathways found with mitch after 1% FDR filtering") %>%
kable_paper("hover", full_width = F)
gu <- res_gu_df[,"gu",drop=FALSE]
mitch_gu <- mitch_calc(gu,genesets,priority="effect")
head( mitch_gu$enrichment_result,30) %>%
kbl(caption="GUTHRIE: Top effect size pathways found with mitch") %>%
kable_paper("hover", full_width = F)
sig <- subset(mitch_gu$enrichment_result,`p.adjustANOVA`<0.05)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="GUTHRIE: Top effect size pathways found with mitch after 1% FDR filtering") %>%
kable_paper("hover", full_width = F)
Mitch joint analysis of blood and guthrie card. As using the median value is apparently the most sensitive, we’ll go with that.
m <- merge(res_bl_df,res_gu_df,by=0)
rownames(m) <- m$Row.names
m2 <- m[,c("bl","gu")]
m2[is.na(m2)] <- 0
res <- mitch_calc(m2, genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mitch_report(res,"blgu_mitch.html",overwrite=TRUE)
## Note: overwriting existing report
## Dataset saved as " /tmp/RtmpQdgvaM/blgu_mitch.rds ".
##
##
## processing file: mitch.Rmd
##
|
| | 0%
|
|. | 3%
|
|.. | 6% (checklibraries)
|
|.... | 9%
|
|..... | 12% (peek)
|
|...... | 15%
|
|....... | 18% (metrics)
|
|........ | 21%
|
|.......... | 24% (scatterplot)
|
|........... | 26%
|
|............ | 29% (contourplot)
|
|............. | 32%
|
|.............. | 35% (input_geneset_metrics1)
|
|................ | 38%
|
|................. | 41% (input_geneset_metrics2)
|
|.................. | 44%
|
|................... | 47% (input_geneset_metrics3)
|
|.................... | 50%
|
|...................... | 53% (echart1d)
|
|....................... | 56% (echart2d)
|
|........................ | 59%
|
|......................... | 62% (heatmap)
|
|........................... | 65%
|
|............................ | 68% (effectsize)
|
|............................. | 71%
|
|.............................. | 74% (results_table)
|
|............................... | 76%
|
|................................. | 79% (results_table_complete)
|
|.................................. | 82%
|
|................................... | 85% (detailed_geneset_reports1d)
|
|.................................... | 88%
|
|..................................... | 91% (detailed_geneset_reports2d)
|
|....................................... | 94%
|
|........................................ | 97% (session_info)
|
|.........................................| 100%
## output file: /home/mdz/projects/asd_meth/mitch.knit.md
## /usr/bin/pandoc +RTS -K512m -RTS /home/mdz/projects/asd_meth/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpQdgvaM/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpQdgvaM/rmarkdown-str3a5ba4797eb09.html
##
## Output created: /tmp/RtmpQdgvaM/mitch_report.html
## [1] TRUE
mitch_plots(res=res,outfile="blgu_mitch_iiq.pdf")
## png
## 2
sig <- subset(res$enrichment_result,p.adjustMANOVA<0.01)
head(sig[order(-abs(sig$s.dist)),],30) %>%
kbl(caption="Top pathways found with mitch (effect size after 1% FDR filtering)") %>%
kable_paper("hover", full_width = F)
set | setSize | pMANOVA | s.bl | s.gu | p.bl | p.gu | s.dist | SD | p.adjustMANOVA | |
---|---|---|---|---|---|---|---|---|---|---|
1163 | SARS-CoV-1 modulates host translation machinery | 34 | 0.0000000 | 0.6171111 | 0.6153784 | 0.0000000 | 0.0000000 | 0.8715025 | 0.0012252 | 0.0000000 |
212 | Class C/3 (Metabotropic glutamate/pheromone receptors) | 39 | 0.0000000 | -0.5458771 | -0.5909819 | 0.0000000 | 0.0000000 | 0.8045131 | 0.0318939 | 0.0000000 |
903 | Peptide chain elongation | 84 | 0.0000000 | 0.5513161 | 0.5698309 | 0.0000000 | 0.0000000 | 0.7928787 | 0.0130919 | 0.0000000 |
397 | Eukaryotic Translation Termination | 87 | 0.0000000 | 0.5466818 | 0.5582196 | 0.0000000 | 0.0000000 | 0.7813258 | 0.0081584 | 0.0000000 |
395 | Eukaryotic Translation Elongation | 88 | 0.0000000 | 0.5290112 | 0.5604745 | 0.0000000 | 0.0000000 | 0.7707039 | 0.0222479 | 0.0000000 |
1496 | Viral mRNA Translation | 84 | 0.0000000 | 0.5319653 | 0.5552583 | 0.0000000 | 0.0000000 | 0.7689596 | 0.0164707 | 0.0000000 |
910 | Pexophagy | 11 | 0.0013427 | 0.5535959 | 0.5301303 | 0.0014746 | 0.0023280 | 0.7664898 | 0.0165926 | 0.0078579 |
1208 | Selenocysteine synthesis | 87 | 0.0000000 | 0.5203790 | 0.5540953 | 0.0000000 | 0.0000000 | 0.7601420 | 0.0238410 | 0.0000000 |
829 | Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) | 89 | 0.0000000 | 0.5194955 | 0.5386221 | 0.0000000 | 0.0000000 | 0.7483244 | 0.0135245 | 0.0000000 |
1145 | Response of EIF2AK4 (GCN2) to amino acid deficiency | 95 | 0.0000000 | 0.5143112 | 0.5307287 | 0.0000000 | 0.0000000 | 0.7390460 | 0.0116089 | 0.0000000 |
1169 | SARS-CoV-2 modulates host translation machinery | 46 | 0.0000000 | 0.4962560 | 0.5356786 | 0.0000000 | 0.0000000 | 0.7302202 | 0.0278760 | 0.0000000 |
882 | PINK1-PRKN Mediated Mitophagy | 21 | 0.0000271 | 0.4805864 | 0.5081978 | 0.0001371 | 0.0000552 | 0.6994485 | 0.0195242 | 0.0002301 |
449 | Formation of a pool of free 40S subunits | 94 | 0.0000000 | 0.4900509 | 0.4985651 | 0.0000000 | 0.0000000 | 0.6990830 | 0.0060204 | 0.0000000 |
1187 | SRP-dependent cotranslational protein targeting to membrane | 105 | 0.0000000 | 0.4866611 | 0.5001541 | 0.0000000 | 0.0000000 | 0.6978489 | 0.0095411 | 0.0000000 |
493 | GTP hydrolysis and joining of the 60S ribosomal subunit | 104 | 0.0000000 | 0.4817311 | 0.4680431 | 0.0000000 | 0.0000000 | 0.6716615 | 0.0096789 | 0.0000000 |
828 | Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC) | 108 | 0.0000000 | 0.4607581 | 0.4724254 | 0.0000000 | 0.0000000 | 0.6599120 | 0.0082501 | 0.0000000 |
830 | Nonsense-Mediated Decay (NMD) | 108 | 0.0000000 | 0.4607581 | 0.4724254 | 0.0000000 | 0.0000000 | 0.6599120 | 0.0082501 | 0.0000000 |
1207 | Selenoamino acid metabolism | 103 | 0.0000000 | 0.4514971 | 0.4782845 | 0.0000000 | 0.0000000 | 0.6577276 | 0.0189416 | 0.0000000 |
663 | L13a-mediated translational silencing of Ceruloplasmin expression | 103 | 0.0000000 | 0.4656036 | 0.4596198 | 0.0000000 | 0.0000000 | 0.6542454 | 0.0042312 | 0.0000000 |
165 | Cap-dependent Translation Initiation | 111 | 0.0000000 | 0.4744206 | 0.4456284 | 0.0000000 | 0.0000000 | 0.6508914 | 0.0203592 | 0.0000000 |
396 | Eukaryotic Translation Initiation | 111 | 0.0000000 | 0.4744206 | 0.4456284 | 0.0000000 | 0.0000000 | 0.6508914 | 0.0203592 | 0.0000000 |
455 | Formation of the ternary complex, and subsequently, the 43S complex | 46 | 0.0000000 | 0.4686591 | 0.4508367 | 0.0000000 | 0.0000001 | 0.6503038 | 0.0126023 | 0.0000000 |
399 | Expression and translocation of olfactory receptors | 356 | 0.0000000 | -0.2312500 | -0.5867456 | 0.0000000 | 0.0000000 | 0.6306718 | 0.2513733 | 0.0000000 |
516 | Glucuronidation | 25 | 0.0000430 | -0.4184360 | -0.4652789 | 0.0002920 | 0.0000563 | 0.6257581 | 0.0331229 | 0.0003634 |
703 | Major pathway of rRNA processing in the nucleolus and cytosol | 171 | 0.0000000 | 0.4461367 | 0.4341333 | 0.0000000 | 0.0000000 | 0.6225028 | 0.0084877 | 0.0000000 |
853 | Olfactory Signaling Pathway | 363 | 0.0000000 | -0.2261979 | -0.5778318 | 0.0000000 | 0.0000000 | 0.6205280 | 0.2486427 | 0.0000000 |
1540 | rRNA processing in the nucleus and cytosol | 180 | 0.0000000 | 0.4337822 | 0.4348889 | 0.0000000 | 0.0000000 | 0.6142437 | 0.0007826 | 0.0000000 |
597 | Influenza Viral RNA Transcription and Replication | 129 | 0.0000000 | 0.4216643 | 0.4327537 | 0.0000000 | 0.0000000 | 0.6042156 | 0.0078414 | 0.0000000 |
1115 | Regulation of expression of SLITs and ROBOs | 159 | 0.0000000 | 0.4155850 | 0.4341264 | 0.0000000 | 0.0000000 | 0.6009798 | 0.0131107 | 0.0000000 |
1172 | SCF(Skp2)-mediated degradation of p27/p21 | 58 | 0.0000000 | 0.4387715 | 0.4080545 | 0.0000000 | 0.0000001 | 0.5991902 | 0.0217202 | 0.0000000 |
Camera is designed to control for correlation bias. Using the median value gives better results than the directional significance or the combined metric. Blood dataset. Need to filter by FDR then sort by effect size.
stat <- res_bl_df$median
names(stat) <- rownames(res_bl_df)
stat[is.na(stat)] <- 0
tic()
cres <- cameraPR(statistic=stat, index=genesets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
cres <- subset(cres,NGenes>4)
cres$FDR <- p.adjust(cres$PValue,method="fdr")
mymedians <- sapply(1:nrow(cres),function(i) {
myset <- genesets[[which(names(genesets) == rownames(cres)[i])]]
mystats <- stat[names(stat) %in% myset]
median(mystats)
})
cres$median <- mymedians
toc() # 1.0 sec
## 3.12 sec elapsed
sig <- subset(cres,FDR<0.05)
head(sig[order(-abs(sig$median)),],20) %>%
kbl(caption = "BLOOD: Top effect size pathways found with CAMERA after 5% FDR filtering") %>%
kable_paper("hover", full_width = F)
NGenes | Direction | PValue | FDR | median | |
---|---|---|---|---|---|
Class C/3 (Metabotropic glutamate/pheromone receptors) | 39 | Down | 0.0000000 | 0.0000083 | -1.7895967 |
Sensory perception of sweet, bitter, and umami (glutamate) taste | 41 | Down | 0.0000131 | 0.0008738 | -1.4758806 |
Expression and translocation of olfactory receptors | 359 | Down | 0.0000172 | 0.0011046 | -1.3981359 |
Olfactory Signaling Pathway | 366 | Down | 0.0000250 | 0.0015116 | -1.3911868 |
Sensory perception of taste | 47 | Down | 0.0002544 | 0.0074474 | -1.2136578 |
Aspirin ADME | 44 | Down | 0.0004114 | 0.0101587 | -1.1624630 |
Glucuronidation | 25 | Down | 0.0000364 | 0.0018482 | -1.1579103 |
Sensory Perception | 570 | Down | 0.0001808 | 0.0063503 | -1.1423309 |
Cell Cycle Checkpoints | 248 | Up | 0.0029608 | 0.0402835 | -0.4630513 |
Regulation of mRNA stability by proteins that bind AU-rich elements | 87 | Up | 0.0009354 | 0.0188215 | -0.4603099 |
Mitotic Anaphase | 221 | Up | 0.0011543 | 0.0210383 | -0.4423622 |
G2/M Checkpoints | 129 | Up | 0.0039329 | 0.0499889 | -0.4404655 |
Mitotic Metaphase and Anaphase | 222 | Up | 0.0009796 | 0.0192048 | -0.4394142 |
Separation of Sister Chromatids | 161 | Up | 0.0012853 | 0.0219760 | -0.4316783 |
TNFR2 non-canonical NF-kB pathway | 94 | Up | 0.0030465 | 0.0411604 | -0.4301446 |
RNA Polymerase III Transcription Initiation From Type 1 Promoter | 28 | Up | 0.0026733 | 0.0375839 | -0.4266033 |
HIV Life Cycle | 139 | Up | 0.0020467 | 0.0318885 | -0.4224742 |
Regulation of TP53 Activity through Phosphorylation | 88 | Up | 0.0020723 | 0.0320288 | -0.4083851 |
HIV Infection | 216 | Up | 0.0006193 | 0.0135969 | -0.4069087 |
RNA Polymerase III Transcription Initiation From Type 2 Promoter | 27 | Up | 0.0026846 | 0.0375839 | -0.4068808 |
nrow(subset(cres,FDR<0.05 & Direction=="Up"))
## [1] 144
nrow(subset(cres,FDR<0.05 & Direction=="Down"))
## [1] 8
cres_bl <- cres
Now drilling down to the top set.
myset <- rownames(head(sig[order(-abs(sig$median)),],1))
myset_members <- genesets[[which(names(genesets) == myset)]]
mystats <- stat[which(names(stat) %in% myset_members)]
myl <- list("allgenes"=stat,myset=mystats)
boxplot(myl,cex=0)
library(vioplot)
vioplot(myl,main=myset)
mygenes <- head(mystats[order(mystats)],5)
mygenes
## TAS2R42 TAS2R43 TAS2R10 TAS2R50 TAS2R8
## -3.942369 -3.919834 -3.828529 -3.742545 -3.716158
myprobes <- unique(unlist(sets[which(names(sets) %in% names(mygenes))]))
myprobedat <- res_bl_top[which(rownames(res_bl_top) %in% myprobes),]
myprobedat %>%
kbl(caption = "Probe data for top 5 genes") %>%
kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
cg11820113 | -0.8863896 | 3.078575 | -4.343904 | 0.0008033 | 0.0369615 | -0.3133902 |
cg12252888 | -0.7626417 | 3.125413 | -4.204968 | 0.0010389 | 0.0369615 | -0.5486824 |
cg19534172 | -1.0625056 | 2.569175 | -4.048605 | 0.0013910 | 0.0369615 | -0.8161181 |
cg04365308 | -0.4903002 | 1.694837 | -3.960372 | 0.0016418 | 0.0369615 | -0.9681369 |
cg19120125 | -0.5545737 | 3.503665 | -3.929286 | 0.0017408 | 0.0369615 | -1.0218678 |
cg25023291 | -0.4798319 | 3.523094 | -3.924366 | 0.0017570 | 0.0369615 | -1.0303812 |
cg25106783 | -0.2690797 | 1.725882 | -3.919834 | 0.0017721 | 0.0369615 | -1.0382239 |
cg07342723 | -0.4101521 | 3.909610 | -3.901186 | 0.0018355 | 0.0369615 | -1.0705134 |
cg23740316 | -0.8800022 | 2.641863 | -3.858671 | 0.0019890 | 0.0369615 | -1.1442387 |
cg24250315 | -0.4394500 | 2.781691 | -3.831953 | 0.0020921 | 0.0369922 | -1.1906467 |
cg21156620 | -0.5535477 | 3.794757 | -3.782503 | 0.0022976 | 0.0371821 | -1.2766807 |
cg03138863 | -0.5590461 | 3.697740 | -3.727771 | 0.0025493 | 0.0374639 | -1.3721092 |
cg25565179 | -1.0195587 | 2.067779 | -3.653138 | 0.0029383 | 0.0381417 | -1.5025424 |
cg01629530 | -0.6783977 | 3.193472 | -3.649812 | 0.0029570 | 0.0381703 | -1.5083621 |
cg10857884 | -0.4079745 | 3.720112 | -3.512812 | 0.0038412 | 0.0402640 | -1.7485566 |
cg09600247 | -0.5326815 | 3.823606 | -3.405753 | 0.0047154 | 0.0428773 | -1.9367148 |
cg23342872 | -0.8582921 | 2.801241 | -2.947730 | 0.0113637 | 0.0634392 | -2.7409286 |
cg08507270 | -0.1852411 | 3.927859 | -1.990941 | 0.0680227 | 0.1827126 | -4.3344282 |
cg20769111 | -0.0239190 | 3.899344 | -0.328058 | 0.7481128 | 0.8423542 | -6.0357095 |
Guthrie dataset.
stat <- res_gu_df$median
names(stat) <- rownames(res_gu_df)
stat[is.na(stat)] <- 0
tic()
cres <- cameraPR(statistic=stat, index=genesets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
cres <- subset(cres,NGenes>4)
cres$FDR <- p.adjust(cres$PValue,method="fdr")
mymedians <- sapply(1:nrow(cres),function(i) {
myset <- genesets[[which(names(genesets) == rownames(cres)[i])]]
mystats <- stat[names(stat) %in% myset]
median(mystats)
})
cres$median <- mymedians
toc() # 1.0 sec
## 3.09 sec elapsed
sig <- subset(cres,FDR<0.05)
head(sig[order(-abs(sig$median)),],20) %>%
kbl(caption = "GUTHRIE: Top effect size pathways found with CAMERA after 5% FDR filtering") %>%
kable_paper("hover", full_width = F)
NGenes | Direction | PValue | FDR | median | |
---|---|---|---|---|---|
Class C/3 (Metabotropic glutamate/pheromone receptors) | 39 | Down | 0.0000000 | 0.0000005 | -1.5895209 |
Expression and translocation of olfactory receptors | 356 | Down | 0.0000000 | 0.0000000 | -1.5586837 |
Olfactory Signaling Pathway | 363 | Down | 0.0000000 | 0.0000000 | -1.5475361 |
Synthesis of bile acids and bile salts via 27-hydroxycholesterol | 15 | Down | 0.0040131 | 0.0497007 | -1.3538406 |
Sensory perception of sweet, bitter, and umami (glutamate) taste | 41 | Down | 0.0000054 | 0.0003238 | -1.3069652 |
Glucuronidation | 25 | Down | 0.0000320 | 0.0015226 | -1.2954794 |
Aspirin ADME | 44 | Down | 0.0000331 | 0.0015226 | -1.2750297 |
Sensory Perception | 567 | Down | 0.0000000 | 0.0000000 | -1.2706886 |
Digestion | 18 | Down | 0.0034443 | 0.0449616 | -1.2105062 |
Beta defensins | 28 | Down | 0.0014720 | 0.0225710 | -1.2047429 |
Sensory perception of taste | 47 | Down | 0.0000665 | 0.0027951 | -1.1680533 |
Drug ADME | 104 | Down | 0.0038884 | 0.0494235 | -1.0629162 |
Keratinization | 213 | Down | 0.0000206 | 0.0010559 | -0.9826272 |
Bile acid and bile salt metabolism | 45 | Down | 0.0034264 | 0.0449616 | -0.9712772 |
Transcriptional Regulation by TP53 | 346 | Up | 0.0008744 | 0.0150826 | -0.4240193 |
Cellular response to chemical stress | 187 | Up | 0.0027470 | 0.0376399 | -0.4180161 |
Cellular response to heat stress | 95 | Up | 0.0022382 | 0.0322708 | -0.4177024 |
Regulation of TP53 Activity through Phosphorylation | 88 | Up | 0.0040603 | 0.0499646 | -0.3814699 |
Cellular responses to stimuli | 696 | Up | 0.0002862 | 0.0078778 | -0.3767445 |
Cellular responses to stress | 682 | Up | 0.0003281 | 0.0082484 | -0.3767445 |
cres_gu <- cres
Compare median set score for both contrasts. In the graph below, the larger points represent larger gene sets. Red fill means significant in blood contrast. Blue ring means significant in guthrie contrast.
m4 <- merge(cres_bl,cres_gu,by=0)
sigx <- subset(m4,FDR.x<0.05)
sigy <- subset(m4,FDR.y<0.05)
plot(m4$median.x,m4$median.y,col="gray",pch=19,cex=log10(m4$NGenes.x),
xlab="blood",ylab="Guthrie",main="median t-stat")
grid()
points(sigx$median.x,sigx$median.y,col="red",pch=19,cex=log10(sigx$NGenes.x))
points(sigy$median.x,sigy$median.y,col="blue",pch=1,cex=log10(sigy$NGenes.x))
abline(v=0,h=0,lty=2,lwd=3)
par(mfrow=c(1,1))
pdf("camera_pw_iiq.pdf")
plot(m4$median.x,m4$median.y,col="gray",pch=19,cex=log10(m4$NGenes.x),
xlab="blood",ylab="Guthrie",main="median t-stat")
grid()
points(sigx$median.x,sigx$median.y,col="red",pch=19,cex=log10(sigx$NGenes.x))
points(sigy$median.x,sigy$median.y,col="blue",pch=1,cex=log10(sigy$NGenes.x))
abline(v=0,h=0,lty=2,lwd=3)
dev.off()
## png
## 2
subset(m4,FDR.x<0.05 & FDR.y<0.05 ) %>%
kbl(caption = "Pathways significant in blood (x) and guthrie cards (y) at 5%FDR") %>%
kable_paper("hover", full_width = F)
Row.names | NGenes.x | Direction.x | PValue.x | FDR.x | median.x | NGenes.y | Direction.y | PValue.y | FDR.y | median.y | |
---|---|---|---|---|---|---|---|---|---|---|---|
30 | Activation of APC/C and APC/C:Cdc20 mediated degradation of mitotic proteins | 75 | Up | 0.0001686 | 0.0061462 | -0.1507241 | 75 | Up | 0.0001140 | 0.0040122 | -0.2308938 |
44 | Activation of NF-kappaB in B cells | 64 | Up | 0.0005045 | 0.0118618 | -0.1367042 | 64 | Up | 0.0016202 | 0.0244544 | -0.2723566 |
55 | Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S | 54 | Up | 0.0000912 | 0.0037482 | -0.0680689 | 54 | Up | 0.0005798 | 0.0109822 | -0.1964158 |
107 | APC:Cdc20 mediated degradation of cell cycle proteins prior to satisfation of the cell cycle checkpoint | 72 | Up | 0.0002696 | 0.0075496 | -0.1384490 | 72 | Up | 0.0001412 | 0.0046228 | -0.2181010 |
108 | APC/C-mediated degradation of cell cycle proteins | 86 | Up | 0.0001632 | 0.0060639 | -0.2200318 | 86 | Up | 0.0001675 | 0.0051356 | -0.2621814 |
110 | APC/C:Cdc20 mediated degradation of mitotic proteins | 74 | Up | 0.0001722 | 0.0061620 | -0.1384490 | 74 | Up | 0.0000968 | 0.0037496 | -0.2181010 |
111 | APC/C:Cdc20 mediated degradation of Securin | 66 | Up | 0.0002530 | 0.0074474 | -0.1244291 | 66 | Up | 0.0001151 | 0.0040122 | -0.2181010 |
112 | APC/C:Cdh1 mediated degradation of Cdc20 and other APC/C:Cdh1 targeted proteins in late mitosis/early G1 | 72 | Up | 0.0002118 | 0.0068213 | -0.1384490 | 72 | Up | 0.0001759 | 0.0052281 | -0.2455224 |
128 | Aspirin ADME | 44 | Down | 0.0004114 | 0.0101587 | -1.1624630 | 44 | Down | 0.0000331 | 0.0015226 | -1.2750297 |
134 | Assembly of the pre-replicative complex | 82 | Up | 0.0025015 | 0.0359536 | -0.2078673 | 82 | Up | 0.0005558 | 0.0107374 | -0.2695150 |
136 | Asymmetric localization of PCP proteins | 62 | Up | 0.0016285 | 0.0264385 | -0.1900570 | 62 | Up | 0.0005419 | 0.0106249 | -0.2621814 |
144 | AUF1 (hnRNP D0) binds and destabilizes mRNA | 53 | Up | 0.0001277 | 0.0049362 | -0.0958328 | 53 | Up | 0.0004153 | 0.0100414 | -0.2308938 |
146 | Autodegradation of Cdh1 by Cdh1:APC/C | 62 | Up | 0.0005165 | 0.0118618 | -0.1244291 | 62 | Up | 0.0000970 | 0.0037496 | -0.2145125 |
147 | Autodegradation of the E3 ubiquitin ligase COP1 | 50 | Up | 0.0002439 | 0.0074474 | -0.1138818 | 50 | Up | 0.0006698 | 0.0120937 | -0.1956145 |
198 | Cap-dependent Translation Initiation | 111 | Up | 0.0000001 | 0.0000209 | 0.0035706 | 111 | Up | 0.0000004 | 0.0000292 | -0.1183549 |
217 | Cdc20:Phospho-APC/C mediated degradation of Cyclin A | 71 | Up | 0.0002178 | 0.0068970 | -0.1261739 | 71 | Up | 0.0001079 | 0.0040097 | -0.2053083 |
220 | CDK-mediated phosphorylation and removal of Cdc6 | 71 | Up | 0.0005219 | 0.0118618 | -0.2050610 | 71 | Up | 0.0000894 | 0.0035986 | -0.2237168 |
234 | Cellular response to hypoxia | 71 | Up | 0.0016780 | 0.0269356 | -0.3211467 | 71 | Up | 0.0005777 | 0.0109822 | -0.2805014 |
235 | Cellular response to starvation | 147 | Up | 0.0000503 | 0.0023158 | -0.1300314 | 147 | Up | 0.0000006 | 0.0000404 | -0.1732266 |
262 | Class C/3 (Metabotropic glutamate/pheromone receptors) | 39 | Down | 0.0000000 | 0.0000083 | -1.7895967 | 39 | Down | 0.0000000 | 0.0000005 | -1.5895209 |
324 | Cyclin A:Cdk2-associated events at S phase entry | 83 | Up | 0.0009841 | 0.0192048 | -0.2522611 | 83 | Up | 0.0002749 | 0.0076986 | -0.2308938 |
327 | Cyclin E associated events during G1/S transition | 81 | Up | 0.0008413 | 0.0174451 | -0.2293899 | 81 | Up | 0.0003287 | 0.0082484 | -0.2525185 |
347 | Dectin-1 mediated noncanonical NF-kB signaling | 60 | Up | 0.0006065 | 0.0134684 | -0.2752683 | 60 | Up | 0.0004346 | 0.0100587 | -0.2621814 |
356 | Defective CFTR causes cystic fibrosis | 59 | Up | 0.0000796 | 0.0034174 | -0.0958328 | 59 | Up | 0.0004879 | 0.0103866 | -0.2308938 |
387 | Degradation of AXIN | 53 | Up | 0.0019804 | 0.0311070 | -0.2293899 | 53 | Up | 0.0022131 | 0.0322708 | -0.2642118 |
390 | Degradation of DVL | 55 | Up | 0.0008482 | 0.0174451 | -0.2293899 | 55 | Up | 0.0013539 | 0.0212659 | -0.2626361 |
391 | Degradation of GLI1 by the proteasome | 58 | Up | 0.0016870 | 0.0269356 | -0.1367042 | 58 | Up | 0.0027814 | 0.0378433 | -0.2649248 |
392 | Degradation of GLI2 by the proteasome | 58 | Up | 0.0013088 | 0.0221813 | -0.1900570 | 58 | Up | 0.0022660 | 0.0324292 | -0.2730696 |
445 | DNA Replication | 125 | Up | 0.0006993 | 0.0148476 | -0.2393406 | 125 | Up | 0.0005091 | 0.0106249 | -0.3070651 |
447 | DNA Replication Pre-Initiation | 97 | Up | 0.0012618 | 0.0217656 | -0.2218278 | 97 | Up | 0.0006012 | 0.0110837 | -0.2748181 |
457 | Downstream signaling events of B Cell Receptor (BCR) | 78 | Up | 0.0012451 | 0.0216717 | -0.3728188 | 78 | Up | 0.0038113 | 0.0487648 | -0.3146123 |
499 | ER-Phagosome pathway | 87 | Up | 0.0039191 | 0.0499889 | -0.3756232 | 87 | Up | 0.0031426 | 0.0418719 | -0.2812087 |
518 | Eukaryotic Translation Elongation | 88 | Up | 0.0000000 | 0.0000124 | 0.0613625 | 88 | Up | 0.0000000 | 0.0000011 | -0.0845332 |
519 | Eukaryotic Translation Initiation | 111 | Up | 0.0000001 | 0.0000209 | 0.0035706 | 111 | Up | 0.0000004 | 0.0000292 | -0.1183549 |
520 | Eukaryotic Translation Termination | 87 | Up | 0.0000000 | 0.0000083 | 0.0717554 | 87 | Up | 0.0000000 | 0.0000011 | -0.0838276 |
522 | Expression and translocation of olfactory receptors | 359 | Down | 0.0000172 | 0.0011046 | -1.3981359 | 356 | Down | 0.0000000 | 0.0000000 | -1.5586837 |
533 | FBXL7 down-regulates AURKA during mitotic entry and in early mitosis | 53 | Up | 0.0002667 | 0.0075496 | -0.1050792 | 53 | Up | 0.0006024 | 0.0110837 | -0.2308938 |
538 | FCERI mediated NF-kB activation | 74 | Up | 0.0021074 | 0.0323140 | -0.3922026 | 74 | Up | 0.0039710 | 0.0497007 | -0.3355458 |
566 | Formation of a pool of free 40S subunits | 94 | Up | 0.0000002 | 0.0000211 | 0.0280826 | 94 | Up | 0.0000001 | 0.0000095 | -0.1039186 |
581 | Formation of the ternary complex, and subsequently, the 43S complex | 46 | Up | 0.0000363 | 0.0018482 | 0.0392328 | 46 | Up | 0.0000861 | 0.0035377 | -0.1274391 |
613 | G1/S DNA Damage Checkpoints | 66 | Up | 0.0002597 | 0.0074894 | -0.2408255 | 66 | Up | 0.0001809 | 0.0052967 | -0.2140558 |
614 | G1/S Transition | 126 | Up | 0.0010197 | 0.0195063 | -0.2656847 | 126 | Up | 0.0004718 | 0.0102427 | -0.3066651 |
644 | GLI3 is processed to GLI3R by the proteasome | 58 | Up | 0.0012070 | 0.0216333 | -0.1900570 | 58 | Up | 0.0020902 | 0.0308263 | -0.2730696 |
652 | Glucuronidation | 25 | Down | 0.0000364 | 0.0018482 | -1.1579103 | 25 | Down | 0.0000320 | 0.0015226 | -1.2954794 |
682 | GSK3B and BTRC:CUL1-mediated-degradation of NFE2L2 | 50 | Up | 0.0002267 | 0.0070658 | -0.0478918 | 50 | Up | 0.0004530 | 0.0100878 | -0.1956145 |
683 | GTP hydrolysis and joining of the 60S ribosomal subunit | 104 | Up | 0.0000001 | 0.0000209 | -0.0002830 | 104 | Up | 0.0000002 | 0.0000179 | -0.1149267 |
699 | Hedgehog ligand biogenesis | 63 | Up | 0.0008488 | 0.0174451 | -0.3182771 | 63 | Up | 0.0004585 | 0.0100878 | -0.2008274 |
705 | Hh mutants abrogate ligand secretion | 57 | Up | 0.0002029 | 0.0068213 | -0.1226843 | 57 | Up | 0.0001074 | 0.0040097 | -0.1515061 |
706 | Hh mutants are degraded by ERAD | 54 | Up | 0.0001153 | 0.0045476 | -0.1004560 | 54 | Up | 0.0001394 | 0.0046228 | -0.1559207 |
712 | HIV Infection | 216 | Up | 0.0006193 | 0.0135969 | -0.4069087 | 216 | Up | 0.0003017 | 0.0078778 | -0.3361044 |
713 | HIV Life Cycle | 139 | Up | 0.0020467 | 0.0318885 | -0.4224742 | 139 | Up | 0.0011965 | 0.0193536 | -0.3430391 |
747 | Influenza Infection | 148 | Up | 0.0000012 | 0.0000939 | -0.1281808 | 148 | Up | 0.0000003 | 0.0000261 | -0.1772241 |
748 | Influenza Viral RNA Transcription and Replication | 129 | Up | 0.0000012 | 0.0000929 | -0.1230421 | 129 | Up | 0.0000002 | 0.0000179 | -0.1631145 |
838 | L13a-mediated translational silencing of Ceruloplasmin expression | 103 | Up | 0.0000003 | 0.0000373 | -0.0041365 | 103 | Up | 0.0000003 | 0.0000261 | -0.1171894 |
843 | Late Phase of HIV Life Cycle | 126 | Up | 0.0034179 | 0.0452785 | -0.4030267 | 126 | Up | 0.0015456 | 0.0235125 | -0.3611796 |
874 | Major pathway of rRNA processing in the nucleolus and cytosol | 171 | Up | 0.0000001 | 0.0000154 | -0.0249127 | 171 | Up | 0.0000000 | 0.0000066 | -0.1533395 |
925 | Metabolism of RNA | 639 | Up | 0.0000060 | 0.0004464 | -0.3208619 | 639 | Up | 0.0000043 | 0.0002800 | -0.3275401 |
952 | Mitochondrial translation | 93 | Up | 0.0000249 | 0.0015116 | -0.1917864 | 93 | Up | 0.0000456 | 0.0020510 | -0.2365114 |
953 | Mitochondrial translation elongation | 87 | Up | 0.0000499 | 0.0023158 | -0.2592075 | 87 | Up | 0.0000584 | 0.0025056 | -0.2365114 |
954 | Mitochondrial translation initiation | 87 | Up | 0.0000293 | 0.0016651 | -0.2592075 | 87 | Up | 0.0000323 | 0.0015226 | -0.2315887 |
955 | Mitochondrial translation termination | 87 | Up | 0.0000263 | 0.0015418 | -0.2377331 | 87 | Up | 0.0000247 | 0.0012239 | -0.2226806 |
960 | Mitotic G1 phase and G1/S transition | 144 | Up | 0.0012209 | 0.0216395 | -0.3001275 | 144 | Up | 0.0004595 | 0.0100878 | -0.3171454 |
976 | mRNA Splicing | 184 | Up | 0.0000126 | 0.0008721 | -0.3285524 | 184 | Up | 0.0000077 | 0.0004386 | -0.2977187 |
977 | mRNA Splicing - Major Pathway | 174 | Up | 0.0000316 | 0.0016957 | -0.3709640 | 174 | Up | 0.0000208 | 0.0010559 | -0.3014558 |
978 | mRNA Splicing - Minor Pathway | 51 | Up | 0.0000399 | 0.0019788 | -0.2452373 | 51 | Up | 0.0003016 | 0.0078778 | -0.3096140 |
1017 | Negative regulation of NOTCH4 signaling | 52 | Up | 0.0004840 | 0.0116894 | -0.1004560 | 52 | Up | 0.0004373 | 0.0100587 | -0.2455224 |
1042 | NIK–>noncanonical NF-kB signaling | 57 | Up | 0.0003604 | 0.0092835 | -0.1507241 | 57 | Up | 0.0004364 | 0.0100587 | -0.2601510 |
1048 | Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC) | 108 | Up | 0.0000004 | 0.0000387 | -0.0002830 | 108 | Up | 0.0000001 | 0.0000103 | -0.1100576 |
1049 | Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) | 89 | Up | 0.0000001 | 0.0000144 | 0.0434370 | 89 | Up | 0.0000000 | 0.0000023 | -0.0935453 |
1050 | Nonsense-Mediated Decay (NMD) | 108 | Up | 0.0000004 | 0.0000387 | -0.0002830 | 108 | Up | 0.0000001 | 0.0000103 | -0.1100576 |
1096 | Olfactory Signaling Pathway | 366 | Down | 0.0000250 | 0.0015116 | -1.3911868 | 363 | Down | 0.0000000 | 0.0000000 | -1.5475361 |
1101 | Orc1 removal from chromatin | 69 | Up | 0.0037111 | 0.0481195 | -0.2050610 | 69 | Up | 0.0026980 | 0.0372321 | -0.3731306 |
1112 | Oxygen-dependent proline hydroxylation of Hypoxia-inducible Factor Alpha | 62 | Up | 0.0010830 | 0.0201686 | -0.2381320 | 62 | Up | 0.0014665 | 0.0225710 | -0.2937832 |
1116 | p53-Dependent G1 DNA Damage Response | 64 | Up | 0.0002108 | 0.0068213 | -0.1900570 | 64 | Up | 0.0001655 | 0.0051356 | -0.1807401 |
1117 | p53-Dependent G1/S DNA damage checkpoint | 64 | Up | 0.0002108 | 0.0068213 | -0.1900570 | 64 | Up | 0.0001655 | 0.0051356 | -0.1807401 |
1118 | p53-Independent DNA Damage Response | 50 | Up | 0.0004154 | 0.0101587 | -0.1367042 | 50 | Up | 0.0005444 | 0.0106249 | -0.2305129 |
1119 | p53-Independent G1/S DNA damage checkpoint | 50 | Up | 0.0004154 | 0.0101587 | -0.1367042 | 50 | Up | 0.0005444 | 0.0106249 | -0.2305129 |
1137 | Peptide chain elongation | 84 | Up | 0.0000000 | 0.0000083 | 0.0894856 | 84 | Up | 0.0000000 | 0.0000011 | -0.0738674 |
1178 | PINK1-PRKN Mediated Mitophagy | 21 | Up | 0.0028297 | 0.0390502 | -0.0043523 | 21 | Up | 0.0008402 | 0.0146243 | -0.1252274 |
1230 | Processing of Capped Intron-Containing Pre-mRNA | 232 | Up | 0.0000313 | 0.0016957 | -0.3335896 | 232 | Up | 0.0000151 | 0.0008312 | -0.3147570 |
1296 | Regulation of activated PAK-2p34 by proteasome mediated degradation | 48 | Up | 0.0005821 | 0.0130775 | -0.1138818 | 48 | Up | 0.0009165 | 0.0155326 | -0.2455224 |
1297 | Regulation of APC/C activators between G1/S and early anaphase | 79 | Up | 0.0003137 | 0.0083010 | -0.2106736 | 79 | Up | 0.0001163 | 0.0040122 | -0.2308938 |
1298 | Regulation of Apoptosis | 51 | Up | 0.0015212 | 0.0253360 | -0.1507241 | 51 | Up | 0.0029211 | 0.0394649 | -0.2601510 |
1305 | Regulation of expression of SLITs and ROBOs | 159 | Up | 0.0000008 | 0.0000693 | -0.1071797 | 159 | Up | 0.0000001 | 0.0000103 | -0.1463581 |
1326 | Regulation of mitotic cell cycle | 86 | Up | 0.0001632 | 0.0060639 | -0.2200318 | 86 | Up | 0.0001675 | 0.0051356 | -0.2621814 |
1327 | Regulation of mRNA stability by proteins that bind AU-rich elements | 87 | Up | 0.0009354 | 0.0188215 | -0.4603099 | 87 | Up | 0.0002979 | 0.0078778 | -0.2849237 |
1333 | Regulation of ornithine decarboxylase (ODC) | 49 | Up | 0.0009976 | 0.0192743 | -0.1507241 | 49 | Up | 0.0039955 | 0.0497007 | -0.2805014 |
1343 | Regulation of RUNX3 expression and activity | 53 | Up | 0.0006485 | 0.0140767 | -0.1226843 | 53 | Up | 0.0004047 | 0.0100247 | -0.1605096 |
1353 | Regulation of TP53 Activity through Phosphorylation | 88 | Up | 0.0020723 | 0.0320288 | -0.4083851 | 88 | Up | 0.0040603 | 0.0499646 | -0.3814699 |
1370 | Respiratory electron transport | 90 | Up | 0.0012375 | 0.0216717 | -0.2120186 | 90 | Up | 0.0002952 | 0.0078778 | -0.2816802 |
1371 | Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins. | 95 | Up | 0.0014034 | 0.0235773 | -0.2271289 | 95 | Up | 0.0002526 | 0.0071761 | -0.2817041 |
1373 | Response of EIF2AK4 (GCN2) to amino acid deficiency | 95 | Up | 0.0000000 | 0.0000135 | 0.0434370 | 95 | Up | 0.0000000 | 0.0000023 | -0.0935453 |
1415 | Ribosomal scanning and start codon recognition | 53 | Up | 0.0000450 | 0.0021733 | -0.1320014 | 53 | Up | 0.0003147 | 0.0081074 | -0.1917897 |
1455 | rRNA processing | 186 | Up | 0.0000003 | 0.0000299 | -0.0522210 | 186 | Up | 0.0000001 | 0.0000132 | -0.1647427 |
1457 | rRNA processing in the nucleus and cytosol | 180 | Up | 0.0000001 | 0.0000209 | -0.0379091 | 180 | Up | 0.0000000 | 0.0000053 | -0.1503964 |
1481 | S Phase | 157 | Up | 0.0023528 | 0.0347720 | -0.3429145 | 157 | Up | 0.0011519 | 0.0188595 | -0.3252247 |
1484 | SARS-CoV-1 Infection | 134 | Up | 0.0012093 | 0.0216333 | -0.3636173 | 134 | Up | 0.0001125 | 0.0040122 | -0.2879411 |
1485 | SARS-CoV-1 modulates host translation machinery | 34 | Up | 0.0000009 | 0.0000748 | 0.2704098 | 34 | Up | 0.0000012 | 0.0000826 | -0.0373158 |
1487 | SARS-CoV-1-host interactions | 90 | Up | 0.0000819 | 0.0034415 | -0.1563956 | 90 | Up | 0.0000076 | 0.0004386 | -0.1964158 |
1491 | SARS-CoV-2 modulates host translation machinery | 46 | Up | 0.0000112 | 0.0008019 | 0.0572894 | 46 | Up | 0.0000051 | 0.0003172 | -0.1031047 |
1498 | SCF-beta-TrCP mediated degradation of Emi1 | 53 | Up | 0.0003577 | 0.0092835 | -0.1050792 | 53 | Up | 0.0004158 | 0.0100414 | -0.2237168 |
1499 | SCF(Skp2)-mediated degradation of p27/p21 | 58 | Up | 0.0000586 | 0.0025741 | -0.0847179 | 58 | Up | 0.0000496 | 0.0021776 | -0.1619179 |
1502 | Selenoamino acid metabolism | 103 | Up | 0.0000008 | 0.0000695 | -0.0173032 | 103 | Up | 0.0000001 | 0.0000123 | -0.1187921 |
1503 | Selenocysteine synthesis | 87 | Up | 0.0000001 | 0.0000144 | 0.0434370 | 87 | Up | 0.0000000 | 0.0000015 | -0.0852388 |
1510 | Senescence-Associated Secretory Phenotype (SASP) | 53 | Up | 0.0032324 | 0.0433677 | -0.2059696 | 53 | Up | 0.0001189 | 0.0040300 | -0.1994702 |
1512 | Sensory Perception | 570 | Down | 0.0001808 | 0.0063503 | -1.1423309 | 567 | Down | 0.0000000 | 0.0000000 | -1.2706886 |
1514 | Sensory perception of sweet, bitter, and umami (glutamate) taste | 41 | Down | 0.0000131 | 0.0008738 | -1.4758806 | 41 | Down | 0.0000054 | 0.0003238 | -1.3069652 |
1515 | Sensory perception of taste | 47 | Down | 0.0002544 | 0.0074474 | -1.2136578 | 47 | Down | 0.0000665 | 0.0027951 | -1.1680533 |
1603 | Signaling by NOTCH4 | 79 | Up | 0.0006931 | 0.0148476 | -0.1875735 | 79 | Up | 0.0005352 | 0.0106249 | -0.3050715 |
1623 | Signaling by ROBO receptors | 203 | Up | 0.0000534 | 0.0024007 | -0.3055900 | 203 | Up | 0.0000189 | 0.0010163 | -0.2308938 |
1659 | SRP-dependent cotranslational protein targeting to membrane | 105 | Up | 0.0000002 | 0.0000211 | -0.0173032 | 105 | Up | 0.0000000 | 0.0000053 | -0.1027870 |
1660 | Stabilization of p53 | 55 | Up | 0.0001973 | 0.0068058 | -0.1226843 | 55 | Up | 0.0004892 | 0.0103866 | -0.1603352 |
1688 | Switching of origins to a post-replicative state | 90 | Up | 0.0026006 | 0.0369434 | -0.2162507 | 90 | Up | 0.0004347 | 0.0100587 | -0.2776598 |
1702 | Synthesis of DNA | 118 | Up | 0.0018206 | 0.0288306 | -0.3050778 | 118 | Up | 0.0006652 | 0.0120937 | -0.3125184 |
1758 | The role of GTSE1 in G2/M progression after G2 checkpoint | 58 | Up | 0.0002824 | 0.0077945 | -0.1900570 | 58 | Up | 0.0001735 | 0.0052281 | -0.2245964 |
1774 | TNFR2 non-canonical NF-kB pathway | 94 | Up | 0.0030465 | 0.0411604 | -0.4301446 | 94 | Up | 0.0005894 | 0.0110552 | -0.3071230 |
1831 | Translation | 262 | Up | 0.0000002 | 0.0000211 | -0.1335679 | 262 | Up | 0.0000003 | 0.0000261 | -0.2179891 |
1832 | Translation initiation complex formation | 53 | Up | 0.0001092 | 0.0043969 | -0.1320014 | 53 | Up | 0.0004579 | 0.0100878 | -0.1917897 |
1884 | Ubiquitin Mediated Degradation of Phosphorylated Cdc25A | 50 | Up | 0.0004154 | 0.0101587 | -0.1367042 | 50 | Up | 0.0005444 | 0.0106249 | -0.2305129 |
1885 | Ubiquitin-dependent degradation of Cyclin D | 50 | Up | 0.0002872 | 0.0078152 | -0.1138818 | 50 | Up | 0.0007691 | 0.0136323 | -0.2455224 |
1886 | UCH proteinases | 81 | Up | 0.0023577 | 0.0347720 | -0.3768372 | 81 | Up | 0.0023369 | 0.0331971 | -0.3070651 |
1903 | Vif-mediated degradation of APOBEC3G | 50 | Up | 0.0010729 | 0.0201686 | -0.1138818 | 50 | Up | 0.0012021 | 0.0193536 | -0.2455224 |
1905 | Viral mRNA Translation | 84 | Up | 0.0000000 | 0.0000135 | 0.0613625 | 84 | Up | 0.0000000 | 0.0000015 | -0.0845332 |
1918 | Vpu mediated degradation of CD4 | 50 | Up | 0.0005213 | 0.0118618 | -0.1138818 | 50 | Up | 0.0005283 | 0.0106249 | -0.1956145 |
make scatterplots of the top sets
XMAX=max(res$ranked_profile[,1])
XMIN=min(res$ranked_profile[,1])
YMAX=max(res$ranked_profile[,2])
YMIN=min(res$ranked_profile[,2])
plot(res$detailed_sets[[1]], pch=19,cex=2,col="lightgray",
xlim=c(XMIN,XMAX),ylim=c(YMIN,YMAX),
xlab="blood",ylab="Guthrie")
text(res$detailed_sets[[1]],labels=rownames(res$detailed_sets[[1]]))
mtext(names(res$detailed_sets)[[1]])
grid() ; abline(v=0,h=0,lty=2,lwd=3)
pdf("c3_metabotropic_iiq.pdf")
plot(res$detailed_sets[[1]], pch=19,cex=2,col="lightgray",
xlim=c(XMIN,XMAX),ylim=c(YMIN,YMAX),
xlab="blood",ylab="Guthrie")
text(res$detailed_sets[[1]],labels=rownames(res$detailed_sets[[1]]))
mtext(names(res$detailed_sets)[[1]])
grid() ; abline(v=0,h=0,lty=2,lwd=3)
dev.off()
## png
## 2
Here I will use MDS plot to cluster samples.
First I will use all probes, then I will focus on a subset of candidate probes.
plot(cmdscale(dist(t(bl_mvals))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
mtext("Numbers indicate iIQ score")
text(cmdscale(dist(t(bl_mvals))), labels=bl_design[,ncol(bl_design)], )
plot(cmdscale(dist(t(gu_mvals))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Guthrie card")
mtext("Numbers indicate iIQ score")
text(cmdscale(dist(t(gu_mvals))), labels=gu_design[,ncol(gu_design)], )
There is no apparent clustering by iIQ score, indicating a subtle effect.
Now I will examine the probes associated with the pathway of interest.
myprobes
## [1] "cg03138863" "cg19120125" "cg25023291" "cg20769111" "cg11820113"
## [6] "cg04365308" "cg03989876" "cg20418818" "cg17773562" "cg25106783"
## [11] "cg25565179" "cg11032157" "cg24250315" "cg09600247" "cg19534172"
## [16] "cg07342723" "cg10857884" "cg23342872" "cg21156620" "cg12252888"
## [21] "cg23740316" "cg01629530" "cg08507270"
myprobedat <- bl_mvals[which(rownames(bl_mvals) %in% myprobes),]
plot(cmdscale(dist(t(myprobedat))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat))), labels=bl_design[,ncol(bl_design)], )
myprobedat <- gu_mvals[which(rownames(gu_mvals) %in% myprobes),]
plot(cmdscale(dist(t(myprobedat))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Guthrie card")
text(cmdscale(dist(t(myprobedat))), labels=gu_design[,ncol(gu_design)], )
Again, there is no apparent clustering by iIQ score. This indicates that the top probes cannot be used diagnostically.
myprobes2 <- head(bl_lim$Name,50)
myprobedat2 <- bl_mvals[which(rownames(bl_mvals) %in% myprobes2),]
plot(cmdscale(dist(t(myprobedat2))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat2))), labels=bl_design[,ncol(bl_design)], )
myprobes2 <- head(gu_lim$Name,50)
myprobedat2 <- gu_mvals[which(rownames(gu_mvals) %in% myprobes2),]
plot(cmdscale(dist(t(myprobedat2))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n", main="Guthrie")
text(cmdscale(dist(t(myprobedat2))), labels=gu_design[,ncol(gu_design)], )
MDS analysis of top probes identified without correction for covariates.
bl_design2 <- bl_design[,c(1,10)]
fit <- lmFit(bl_mvals, bl_design2)
fit <- eBayes(fit)
summary(decideTests(fit))
## (Intercept) iiq
## Down 256203 0
## NotSig 16923 802647
## Up 529521 0
top <- topTable(fit,coef="iiq",num=Inf, sort.by = "P")
head(top)
## logFC AveExpr t P.Value adj.P.Val B
## cg12959820 -0.4669369 -0.3527632 -6.372542 3.454596e-06 0.9502867 0.5541144
## cg03553358 -0.5309352 -2.3186818 -6.246456 4.521950e-06 0.9502867 0.4560435
## cg14659662 -0.8294573 2.5089879 -5.498778 2.323396e-05 0.9502867 -0.1749449
## cg00510320 -0.6404064 2.0755496 -5.350277 3.240270e-05 0.9502867 -0.3105061
## cg08681904 -0.6094845 -2.9652179 -5.168746 4.880622e-05 0.9502867 -0.4808272
## cg04367545 0.4540847 0.8087474 5.074153 6.049272e-05 0.9502867 -0.5715707
myprobes3 <- rownames(top)[1:20]
myprobedat3 <- bl_mvals[which(rownames(bl_mvals) %in% myprobes3),]
plot(cmdscale(dist(t(myprobedat3))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Blood")
text(cmdscale(dist(t(myprobedat3))), labels=bl_design[,ncol(bl_design)], )
gu_design2 <- gu_design[,c(1,12)]
fit <- lmFit(gu_mvals, gu_design2)
fit <- eBayes(fit)
summary(decideTests(fit))
## (Intercept) iiq
## Down 262467 0
## NotSig 22560 790658
## Up 505631 0
top <- topTable(fit,coef="iiq",num=Inf, sort.by = "P")
head(top)
## logFC AveExpr t P.Value adj.P.Val B
## cg04465078 0.9053692 -2.3163770 6.978065 3.497134e-07 0.2765037 1.3606781
## cg14396619 0.4822666 1.3192719 6.260740 1.912995e-06 0.7562624 0.7574465
## cg23266258 0.5110707 0.8423866 5.889953 4.711786e-06 0.8576060 0.4159170
## cg12040280 0.4243534 1.9013148 5.867519 4.978148e-06 0.8576060 0.3945960
## cg20782117 -0.2823256 2.7999698 -5.729042 6.997585e-06 0.8576060 0.2613321
## cg05272476 -0.7925929 2.8652814 -5.697045 7.572286e-06 0.8576060 0.2301360
myprobes3 <- rownames(top)[1:20]
myprobedat3 <- gu_mvals[which(rownames(gu_mvals) %in% myprobes3),]
plot(cmdscale(dist(t(myprobedat3))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "n",main="Guthrie")
text(cmdscale(dist(t(myprobedat3))), labels=gu_design[,ncol(gu_design)], )
Again it was unsuccessful.
ebGSEA uses globaltest to identify DMGs from infinium data. Some points to note:
only 1 sided test - does not distinguish between up and down methylated genes.
cannot use complex experiment designs.
Therefore I modify how the ebGSEA code uses globaltest.
item mapEIDto850k.lv : A list mapping Entrez Gene ID to 850k probes
#bl_gt <- doGT(bl_design,bl_mvals,array="850k",ncores=16)
data.m <- bl_mvals
subsets <- mclapply(mapEIDto850k.lv,intersect,rownames(data.m),mc.cores = 16)
nrep.v <- unlist(lapply(subsets,length))
selG.idx <- which(nrep.v>0)
gt.o <- gt(response=bl_design[,ncol(bl_design)], alternative=t(data.m), model="linear",
directional = TRUE, standardize = FALSE, permutations = 0,
subsets=subsets[selG.idx],trace=F)
gt_res <- result(gt.o)
gt_res <- gt_res[order(gt_res$`p-value`),]
genesets <- gmt_import("ReactomePathways.gmt")
cores = 8
Aggregate mval to genes and examine pathway enrichment in blood and guthries.
tic()
bl_gsmea <- gsmea(mval=bl_mvals,design=bl_design,probesets=sets,genesets=genesets,cores=8)
toc()
## 199.254 sec elapsed
bl_gsmea$res <- subset(bl_gsmea$res,nprobes>=5)
bl_gsmea$res$FDR <- p.adjust(bl_gsmea$res$PValue,method="fdr")
head(bl_gsmea$res,20) %>% kbl(caption="Top DM pathways in blood identified with GSMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
Sealing of the nuclear envelope (NE) by ESCRT-III | 31 | -1.1299532 | 0.0000018 | 0.0035010 |
VLDL assembly | 5 | -2.0270901 | 0.0000094 | 0.0090804 |
Blood group systems biosynthesis | 22 | -1.8926720 | 0.0000728 | 0.0170308 |
Caspase activation via extrinsic apoptotic signalling pathway | 25 | -1.4298865 | 0.0000851 | 0.0170308 |
Methylation | 14 | -1.3284281 | 0.0001138 | 0.0170308 |
RHOH GTPase cycle | 36 | -1.0233603 | 0.0001456 | 0.0170308 |
Muscarinic acetylcholine receptors | 5 | -1.8562137 | 0.0001643 | 0.0170308 |
Mitochondrial calcium ion transport | 22 | -2.3944875 | 0.0001777 | 0.0170308 |
SUMOylation of DNA methylation proteins | 16 | -2.0067835 | 0.0001918 | 0.0170308 |
Peptide hormone biosynthesis | 11 | -2.6234254 | 0.0001941 | 0.0170308 |
RHO GTPases Activate ROCKs | 19 | -1.4558282 | 0.0002166 | 0.0170308 |
HIV Transcription Initiation | 43 | -0.6840918 | 0.0002174 | 0.0170308 |
RNA Polymerase II HIV Promoter Escape | 43 | -0.6840918 | 0.0002174 | 0.0170308 |
RNA Polymerase II Promoter Escape | 43 | -0.6840918 | 0.0002174 | 0.0170308 |
RNA Polymerase II Transcription Initiation | 43 | -0.6840918 | 0.0002174 | 0.0170308 |
RNA Polymerase II Transcription Initiation And Promoter Clearance | 43 | -0.6840918 | 0.0002174 | 0.0170308 |
RNA Polymerase II Transcription Pre-Initiation And Promoter Opening | 43 | -0.6840918 | 0.0002174 | 0.0170308 |
RHO GTPases activate PKNs | 34 | -1.4490298 | 0.0002330 | 0.0170308 |
Ubiquinol biosynthesis | 8 | -1.7506721 | 0.0002412 | 0.0170308 |
Assembly of the ORC complex at the origin of replication | 10 | -3.4791597 | 0.0002454 | 0.0170308 |
tic()
gu_gsmea <- gsmea(mval=gu_mvals,design=gu_design,probesets=sets,genesets=genesets,cores=8)
toc()
## 198.183 sec elapsed
gu_gsmea$res <- subset(gu_gsmea$res,nprobes>=5)
gu_gsmea$res$FDR <- p.adjust(gu_gsmea$res$PValue,method="fdr")
head(gu_gsmea$res,20) %>% kbl(caption="Top DM pathways in Guthrie card identified with GSMEA") %>%
kable_paper("hover", full_width = F)
nprobes | median | PValue | FDR | |
---|---|---|---|---|
Assembly of active LPL and LIPC lipase complexes | 17 | -1.5085282 | 0.0014523 | 0.1805158 |
Plasma lipoprotein remodeling | 32 | -1.5690869 | 0.0021292 | 0.1805158 |
Anchoring fibril formation | 7 | -1.8094790 | 0.0022137 | 0.1805158 |
KSRP (KHSRP) binds and destabilizes mRNA | 17 | -0.8109465 | 0.0037503 | 0.1805158 |
Chylomicron remodeling | 10 | -1.1906161 | 0.0039553 | 0.1805158 |
Ficolins bind to repetitive carbohydrate structures on the target cell surface | 5 | -0.9841517 | 0.0040253 | 0.1805158 |
Receptor-type tyrosine-protein phosphatases | 16 | -2.0742953 | 0.0041857 | 0.1805158 |
Sulfur amino acid metabolism | 27 | -1.0021767 | 0.0047531 | 0.1805158 |
Reversible hydration of carbon dioxide | 11 | -0.5205952 | 0.0051762 | 0.1805158 |
RUNX2 regulates chondrocyte maturation | 5 | -0.7145035 | 0.0052970 | 0.1805158 |
Sensory perception of salty taste | 6 | -1.7388228 | 0.0054472 | 0.1805158 |
Lectin pathway of complement activation | 8 | -1.5933528 | 0.0054844 | 0.1805158 |
Binding of TCF/LEF:CTNNB1 to target gene promoters | 8 | -2.0152584 | 0.0056854 | 0.1805158 |
TFAP2 (AP-2) family regulates transcription of cell cycle factors | 5 | -1.2351815 | 0.0058492 | 0.1805158 |
AKT phosphorylates targets in the nucleus | 9 | -1.7242678 | 0.0063814 | 0.1805158 |
Rap1 signalling | 16 | -1.6015848 | 0.0066954 | 0.1805158 |
Lipid particle organization | 6 | -1.9960508 | 0.0067117 | 0.1805158 |
Cation-coupled Chloride cotransporters | 7 | -1.4504456 | 0.0069327 | 0.1805158 |
Na+/Cl- dependent neurotransmitter transporters | 18 | -1.1755644 | 0.0083434 | 0.1805158 |
Transport of vitamins, nucleosides, and related molecules | 38 | -1.0051110 | 0.0089909 | 0.1805158 |
For reproducibility
sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] vioplot_0.4.0
## [2] zoo_1.8-11
## [3] sm_2.2-5.7.1
## [4] pkgload_1.3.2
## [5] GGally_2.1.2
## [6] ggplot2_3.4.1
## [7] gtools_3.9.4
## [8] tibble_3.2.0
## [9] dplyr_1.1.0
## [10] echarts4r_0.4.4
## [11] ebGSEA_0.1.0
## [12] globaltest_5.52.0
## [13] survival_3.5-5
## [14] tictoc_1.1
## [15] RIdeogram_0.2.2
## [16] kableExtra_1.3.4
## [17] data.table_1.14.8
## [18] ENmix_1.34.0
## [19] doParallel_1.0.17
## [20] qqman_0.1.8
## [21] RCircos_1.2.2
## [22] beeswarm_0.4.0
## [23] forestplot_3.1.1
## [24] abind_1.4-5
## [25] checkmate_2.1.0
## [26] reshape2_1.4.4
## [27] gplots_3.1.3
## [28] eulerr_7.0.0
## [29] GEOquery_2.66.0
## [30] RColorBrewer_1.1-3
## [31] IlluminaHumanMethylation450kmanifest_0.4.0
## [32] topconfects_1.14.0
## [33] DMRcatedata_2.16.0
## [34] ExperimentHub_2.6.0
## [35] AnnotationHub_3.6.0
## [36] BiocFileCache_2.6.0
## [37] dbplyr_2.3.1
## [38] DMRcate_2.12.0
## [39] limma_3.54.0
## [40] missMethyl_1.32.0
## [41] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [42] R.utils_2.12.2
## [43] R.oo_1.25.0
## [44] R.methodsS3_1.8.2
## [45] plyr_1.8.8
## [46] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [47] minfi_1.44.0
## [48] bumphunter_1.40.0
## [49] locfit_1.5-9.7
## [50] iterators_1.0.14
## [51] foreach_1.5.2
## [52] Biostrings_2.66.0
## [53] XVector_0.38.0
## [54] SummarizedExperiment_1.28.0
## [55] Biobase_2.58.0
## [56] MatrixGenerics_1.10.0
## [57] matrixStats_0.63.0
## [58] GenomicRanges_1.50.2
## [59] GenomeInfoDb_1.34.6
## [60] IRanges_2.32.0
## [61] S4Vectors_0.36.1
## [62] BiocGenerics_0.44.0
## [63] mitch_1.10.0
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.58.0
## [3] tidyr_1.3.0 bit64_4.0.5
## [5] knitr_1.42 DelayedArray_0.24.0
## [7] rpart_4.1.19 KEGGREST_1.38.0
## [9] RCurl_1.98-1.10 AnnotationFilter_1.22.0
## [11] generics_0.1.3 GenomicFeatures_1.50.3
## [13] preprocessCore_1.60.2 RSQLite_2.3.0
## [15] bit_4.0.5 tzdb_0.3.0
## [17] webshot_0.5.4 xml2_1.3.3
## [19] httpuv_1.6.9 assertthat_0.2.1
## [21] xfun_0.37 hms_1.1.2
## [23] jquerylib_0.1.4 evaluate_0.20
## [25] promises_1.2.0.1 fansi_1.0.4
## [27] restfulr_0.0.15 scrime_1.3.5
## [29] progress_1.2.2 caTools_1.18.2
## [31] readxl_1.4.2 DBI_1.1.3
## [33] geneplotter_1.76.0 htmlwidgets_1.6.2
## [35] reshape_0.8.9 purrr_1.0.1
## [37] ellipsis_0.3.2 backports_1.4.1
## [39] permute_0.9-7 calibrate_1.7.7
## [41] grImport2_0.2-0 annotate_1.76.0
## [43] biomaRt_2.54.0 deldir_1.0-6
## [45] sparseMatrixStats_1.10.0 vctrs_0.6.0
## [47] ensembldb_2.22.0 withr_2.5.0
## [49] cachem_1.0.7 Gviz_1.42.0
## [51] BSgenome_1.66.2 GenomicAlignments_1.34.0
## [53] prettyunits_1.1.1 mclust_6.0.0
## [55] svglite_2.1.1 cluster_2.1.4
## [57] RPMM_1.25 lazyeval_0.2.2
## [59] crayon_1.5.2 genefilter_1.80.3
## [61] labeling_0.4.2 edgeR_3.40.2
## [63] pkgconfig_2.0.3 nlme_3.1-162
## [65] ProtGenerics_1.30.0 nnet_7.3-18
## [67] rlang_1.1.0 lifecycle_1.0.3
## [69] filelock_1.0.2 dichromat_2.0-0.1
## [71] rsvg_2.4.0 cellranger_1.1.0
## [73] rngtools_1.5.2 base64_2.0.1
## [75] Matrix_1.5-3 Rhdf5lib_1.20.0
## [77] base64enc_0.1-3 viridisLite_0.4.1
## [79] png_0.1-8 rjson_0.2.21
## [81] bitops_1.0-7 KernSmooth_2.23-20
## [83] rhdf5filters_1.10.0 blob_1.2.4
## [85] DelayedMatrixStats_1.20.0 doRNG_1.8.6
## [87] stringr_1.5.0 nor1mix_1.3-0
## [89] readr_2.1.4 jpeg_0.1-10
## [91] scales_1.2.1 memoise_2.0.1
## [93] magrittr_2.0.3 zlibbioc_1.44.0
## [95] compiler_4.2.2 BiocIO_1.8.0
## [97] illuminaio_0.40.0 Rsamtools_2.14.0
## [99] cli_3.6.0 DSS_2.46.0
## [101] htmlTable_2.4.1 Formula_1.2-5
## [103] MASS_7.3-58.3 tidyselect_1.2.0
## [105] stringi_1.7.12 highr_0.10
## [107] yaml_2.3.7 askpass_1.1
## [109] latticeExtra_0.6-30 sass_0.4.5
## [111] VariantAnnotation_1.44.0 tools_4.2.2
## [113] rstudioapi_0.14 foreign_0.8-84
## [115] bsseq_1.34.0 gridExtra_2.3
## [117] farver_2.1.1 digest_0.6.31
## [119] BiocManager_1.30.20 shiny_1.7.4
## [121] quadprog_1.5-8 Rcpp_1.0.10
## [123] siggenes_1.72.0 BiocVersion_3.16.0
## [125] later_1.3.0 org.Hs.eg.db_3.16.0
## [127] httr_1.4.5 AnnotationDbi_1.60.0
## [129] biovizBase_1.46.0 colorspace_2.1-0
## [131] rvest_1.0.3 XML_3.99-0.13
## [133] splines_4.2.2 statmod_1.5.0
## [135] kpmt_0.1.0 multtest_2.54.0
## [137] systemfonts_1.0.4 xtable_1.8-4
## [139] jsonlite_1.8.4 dynamicTreeCut_1.63-1
## [141] R6_2.5.1 Hmisc_5.0-1
## [143] pillar_1.8.1 htmltools_0.5.4
## [145] mime_0.12 glue_1.6.2
## [147] fastmap_1.1.1 BiocParallel_1.32.5
## [149] interactiveDisplayBase_1.36.0 beanplot_1.3.1
## [151] codetools_0.2-19 utf8_1.2.3
## [153] lattice_0.20-45 bslib_0.4.2
## [155] curl_5.0.0 openssl_2.0.6
## [157] interp_1.1-3 rmarkdown_2.20
## [159] munsell_0.5.0 rhdf5_2.42.0
## [161] GenomeInfoDbData_1.2.9 HDF5Array_1.26.0
## [163] impute_1.72.3 gtable_0.3.2