Source: https://github.com/markziemann/asd_meth
Here we are analysing blood methylation in 9 twin pairs.
baseDir=getwd()
dataDir=paste(baseDir,"/ASD_EPIC_DATA",sep="")
library("missMethyl")
library("limma")
library("minfi")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("IlluminaHumanMethylationEPICanno.ilm10b2.hg19")
library("ruv")
library("RColorBrewer")
library("matrixStats")
library("gplots")
library("WGCNA")
library("FlowSorted.Blood.450k")
library("reshape2")
library("ggplot2")
library("missMethyl")
library("DMRcate")
Load the annotation data and the Epic methylation data.
This analysis is to be conducted on Ubuntu with R4.
ann = getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b2.hg19)
targets_gen = read.metharray.sheet(dataDir, pattern = "ASD_blood_summarysheet.csv")
## [read.metharray.sheet] Found the following CSV files:
## [1] "/mnt/data/mdz/projects/asd_meth/ASD_EPIC_DATA/ASD_blood_summarysheet.csv"
#targets$ID = paste(targets$Sample_Group,targets_gen$Sample_Name,sep=".")
rgSet = read.metharray.exp(targets = targets_gen)
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
sampleNames(rgSet) = targets_gen$SampleID
snpBetas = getSnpBeta(rgSet)
d = dist(t(snpBetas))
hr = hclust(d, method = "complete", members=NULL)
plot(hr)
#removepair1 = !targets\(Twin_Group %in% c("Q020","Q026", "Q034", "Q038", "Q043", "Q044") #removepairtest = !targets\)Sample_Group %in% “0” #targets = targets[removepairtest,] #rgSet = read.metharray.exp(targets = targets) #sampleNames(rgSet) = targets$ID
detP = detectionP(rgSet)
qcReport(rgSet, sampNames = targets_gen$Sample_Name, pdf="qc-report_ASD_blood_nov27.pdf")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## X11cairo
## 2
cols=brewer.pal(4,"Set1")
barplot(apply(detP,2,mean),
col=as.numeric(factor(targets_gen$Sample_Name)),
las=2,cex.names= 0.8, cex.axis=0.75,
main="Mean detection p-values of probe signals",
ylab="Mean detection p-value")
barplot(apply(detP,2,mean),
col=as.numeric(factor(targets_gen$Sample_Name)),
las=2,cex.names= 0.8, cex.axis=0.75,ylim=c(0,0.010),
main="Mean detection p-values of probe signals",
ylab="Mean detection p-value")
mset.raw = preprocessRaw(rgSet)
Using Multi-dimensional scaling (MDS) plots before filtering.
mdsPlot(mset.raw, sampGroups = targets_gen$Sample_Name,
sampNames=targets_gen$Social_interaction_on_ADOS,legendPos="bottom")
mdsPlot(mset.raw, sampGroups = targets_gen$Sex,
sampNames=targets_gen$SampleID,legendPos="bottom")
SQN is used as it gave a more stable result.
mSetSw = preprocessSWAN(rgSet)
densityPlot(getBeta(mSetSw),main="SWAN")
#SQN method
mSetSq = preprocessQuantile(rgSet)
## [preprocessQuantile] Mapping to genome.
## [preprocessQuantile] Fixing outliers.
## [preprocessQuantile] Quantile normalizing.
densityPlot(rgSet, sampGroups = targets_gen$Social_interaction_on_ADOS,
main="Raw", legend = FALSE)
densityPlot(getBeta(mSetSw),
sampGroups = targets_gen$Social_interaction_on_ADOS,main="SWAN",
legend = FALSE)
densityPlot(getBeta(mSetSq),
sampGroups = targets_gen$Social_interaction_on_ADOS,main="SQN",
legend = FALSE)
par(mfrow=c(1,1))
Need to consider using estimatecellcounts2.
rgSet$Slide <- as.numeric(rgSet$Slide)
rgSet$Sex<- as.character(rgSet$Sex)
rgSet$Sample_Name<- as.character(rgSet$Sample_Name)
cellCounts_new <- estimateCellCounts(rgSet, compositeCellType = "Blood",
processMethod = "auto", probeSelect = "auto",
cellTypes = c("CD8T","CD4T", "NK","Bcell","Mono","Gran"),
referencePlatform = c("IlluminaHumanMethylation450k"),
returnAll = FALSE, meanPlot = TRUE)
## [estimateCellCounts] Combining user data with reference (flow sorted) data.
## Warning in DataFrame(sampleNames = c(colnames(rgSet),
## colnames(referenceRGset)), : 'stringsAsFactors' is ignored
## [estimateCellCounts] Processing user and reference data together.
## [preprocessQuantile] Mapping to genome.
## [preprocessQuantile] Fixing outliers.
## [preprocessQuantile] Quantile normalizing.
## [estimateCellCounts] Picking probes for composition estimation.
## [estimateCellCounts] Estimating composition.
#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")
cellCounts_long <- melt(cellCounts_new, id = "celltype")
cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1, targets_gen$SampleID),"Diagnosis"]
colnames(cellCounts_long) <- c("diagnosis","celltype","value")
ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) +
geom_boxplot()
wx <- lapply(1:ncol(cellCounts_new) , function(i) {
wilcox.test(a[,i],c[,i], paired=FALSE)
} )
## Warning in wilcox.test.default(a[, i], c[, i], paired = FALSE): cannot compute
## exact p-value with ties
names(wx) <- colnames(cellCounts_new)
wx
## $CD8T
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 12, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
##
##
## $CD4T
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 13, p-value = 0.8112
## alternative hypothesis: true location shift is not equal to 0
##
##
## $NK
##
## Wilcoxon rank sum test with continuity correction
##
## data: a[, i] and c[, i]
## W = 16, p-value = 0.9317
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Bcell
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 18, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Mono
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 23, p-value = 0.2168
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Gran
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 15, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
None of the p-values are less than 0.05.
Now filter for high detection p-value and overlap with SNP.
Need to get a copy of the Xreact probes from Namitha. In the mean time I have downloaded from github - link below.
detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mSetSw <- mSetSw[keep,]
gmSetSw <- mapToGenome(mSetSw)
#remove SNPs
gmSetSqFlt = dropLociWithSnps(gmSetSw, snps = c("CpG", "SBE"))
#Removing cross-reactive probes
Xreact <- read.csv("https://raw.githubusercontent.com/sirselim/illumina450k_filtering/master/EPIC/13059_2016_1066_MOESM1_ESM.csv")
#Xreact = read.csv(file="/group/canc2/puumba/Data/InfiniumData/NamithaData/Rprojects/Autism/Analysis_Sept11/EPIC_850k_crossreactiveProbes.csv", stringsAsFactors=FALSE)
#Xreact = read.csv(file="~/48639-non-specific-probes-Illumina450k.csv", stringsAsFactors=FALSE)
noXreact <- !(featureNames(gmSetSw) %in% Xreact$X)
gmSetSw <- gmSetSw[noXreact,]
#Removing probes on X and Y chromosomes
autosomes <- !(featureNames(gmSetSw) %in% ann$Name[ann$chr %in% c("chrX","chrY")])
gmSetSqFlt <- gmSetSq[autosomes,]
#Relative log expression (RLE plot)
mValsSq = getM(gmSetSqFlt)
medSq = apply(mValsSq, 1, median)
YSq = mValsSq - medSq
boxplot(YSq,outline=FALSE,ylim=c(-1.5,1.5),
ylab="Relative Log Methylation Value",
cols=as.character(factor(targets_gen$Social_interaction_on_ADOS,)) )
pal = brewer.pal(8, "Dark2")
mds1Sq = plotMDS(mValsSq, top=1000, gene.selection="common",dim.plot=c(1,2))
mds2Sq = plotMDS(mValsSq, top=1000, gene.selection="common",dim.plot=c(1,3))
mds3Sq = plotMDS(mValsSq, top=1000, gene.selection="common",dim.plot=c(2,3))
mds4Sq = plotMDS(mValsSq, top=1000, gene.selection="common",dim.plot=c(3,4))
plotMDS(mds1Sq, xlab="Dimension 1", ylab="Dimension 2",
col=pal[as.factor(targets_gen$Diagnosis)],
dim=c(1,2), labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)
plotMDS(mds2Sq, xlab="Dimension 1", ylab="Dimension 3",
col=pal[as.factor(targets_gen$Diagnosis)],dim=c(1,3),
labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)
plotMDS(mds3Sq, xlab="Dimension 2", ylab="Dimension 3",
col=pal[as.factor(targets_gen$Diagnosis)],dim=c(2,3),
labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)
plotMDS(mds4Sq, xlab="Dimension 3", ylab="Dimension 4",
col=pal[as.factor(targets_gen$Diagnosis)],dim=c(3,4),
labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)
fit <- prcomp(t(mValsSq),center = TRUE, scale = TRUE,retx=TRUE)
loadings = fit$x
plot(fit,type="lines")
nGenes = nrow(mValsSq)
nSamples = ncol(mValsSq)
datTraits = targets_gen[,7:15]
moduleTraitCor = cor(loadings[,1:6], datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
par(cex=0.75, mar = c(6, 8.5, 3, 3))
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = t(moduleTraitCor),
xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
cex.lab.y = 0.6, zlim = c(-1,1),
main = paste("PCA-trait relationships: Top principal components"))
ADOS is the outcome variable (continuous)
mDat = mValsSq
Twin_Group <- factor(targets_gen$Family_ID)
ADOS <- targets_gen$Social_interaction_on_ADOS
design_tw <- model.matrix(~ Twin_Group + ADOS )
fit_tw <- lmFit(mDat, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))
## (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down 253400 9674 4684 5694 3585 4023
## NotSig 47732 779803 796131 793604 797008 797082
## Up 503691 15346 4008 5525 4230 3718
## Twin_Group8 Twin_Group12 Twin_Group13 ADOS
## Down 4132 3818 5197 0
## NotSig 797348 796882 793056 804823
## Up 3343 4123 6570 0
top <- topTable(fit2_tw,coef="ADOS",num=Inf, sort.by = "P")
head(top)
## logFC AveExpr t P.Value adj.P.Val B
## cg11027325 -0.09472534 -0.471978 -8.360574 2.053380e-06 0.9850646 5.350689
## cg00203621 -0.12362793 2.505613 -6.711278 1.935023e-05 0.9850646 3.204353
## cg23127183 -0.08490578 -2.347118 -6.691836 1.990912e-05 0.9850646 3.176724
## cg05008070 -0.10134930 3.848472 -6.403793 3.053732e-05 0.9850646 2.760720
## cg02380270 -0.13202012 2.736014 -6.398554 3.077904e-05 0.9850646 2.753037
## cg16581085 -0.08611543 4.918151 -6.320836 3.461293e-05 0.9850646 2.638578
nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)
## [1] 27604
ann_sub =
ann[,c("chr","pos","strand","Name","Islands_Name",
"Relation_to_Island","UCSC_RefGene_Name","UCSC_RefGene_Group")]
output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
write.csv(output[order(output$P.Value),],
file="ASD_blood_top_dmps_onADOS_withintw_Nov27_limma.csv",row.names=FALSE)
top1000 <- topTable(fit2_tw,coef="ADOS",num=1000, sort.by = "P")
output_1000 <- merge(ann_sub,top1000,by.x="Name",by.y="row.names")
write.csv(output_1000[order(output_1000$P.Value),],
file="ASD_blood_top_dmps_onADOS_Nov27_withintw_limma_top1k.csv",row.names=FALSE)
bDat = ilogit2(mDat)
bDat_new = getBeta(gmSetSqFlt)
#View(bDat)
write.csv(bDat,file="ASD_blood_beta_onADOS_withintw_Nov27.csv",row.names=TRUE)
Extract illumina negative control
#Extract illumina negative control
INCs <- getINCs(rgSet)
head(INCs)
## S1 S2 S29 S30 S9 S10
## 10609447 -1.5065914 -1.24236084 -0.59909678 -0.5831328 -1.5189237 -0.76782656
## 10627500 -1.3285460 -1.07038933 -1.72855335 -0.9713172 -1.0464356 -0.44029029
## 10676356 0.1764266 0.08092000 -0.15869775 0.1255309 -1.1634987 -0.51986747
## 10731326 0.4106323 0.06307868 0.03016197 -0.5590839 0.1761570 -0.05690239
## 10732387 -0.4745385 -0.23967041 -0.53923523 -0.3638468 -0.6445865 -0.76411174
## 10740401 -0.9763874 -0.99001591 -0.47974319 -0.4807351 -0.8479969 -1.15094190
## S15 S16 S19 S20 S25
## 10609447 -0.64566471 -0.21626868 -0.86891672 0.05188674 -0.58686203
## 10627500 -0.81072965 -0.71095527 -0.92926549 -0.64519761 -0.44130018
## 10676356 0.04907599 0.18111268 -0.13445020 -0.23606736 0.65802596
## 10731326 0.58814375 0.59560974 0.01849634 0.20945337 0.14201900
## 10732387 -0.16520259 -0.18442457 -0.69187770 -0.07635089 -0.47175189
## 10740401 -0.68352634 -0.03151097 -0.72662365 -0.19513034 -0.00698643
## S26 S5 S6 S37 S38 S33
## 10609447 -0.75754196 -0.5065013 -0.2752577 -0.73515544 -0.6072612 -1.0923936
## 10627500 -1.17338056 -1.0027298 -0.3681837 -0.67384378 -1.5506926 -0.9083254
## 10676356 -0.46284898 0.2925535 -0.1317057 -0.34421616 0.1687877 -0.2043585
## 10731326 -0.02008425 0.1387641 0.4030221 -0.31633263 -0.0489096 0.7004397
## 10732387 -0.52299675 0.3569345 -0.1934058 -0.46588664 -0.3987969 -0.5360529
## 10740401 0.19826963 -0.3289001 -0.7438520 -0.06737544 -0.3243508 -0.9018196
## S34
## 10609447 -1.232429944
## 10627500 -1.019628807
## 10676356 0.006455022
## 10731326 0.198226110
## 10732387 -0.068386975
## 10740401 -1.190016419
#add negative control data to M-values
Mc <- rbind(mValsSq,INCs)
ctl <- rownames(Mc) %in% rownames(INCs)
table(ctl)
## ctl
## FALSE TRUE
## 804823 411
#old RUV syntax is updated
#rfit1 <- RUVfit(data=Mc, design=design_tw, coef=2, ctl=ctl) # Stage 1 analysis
# this is the updatedcommand - not sure if correct
rfit1 <- RUVfit(Y = Mc, X = design_tw[,"ADOS"] , ctl = ctl) # Stage 1 analysis
rfit2 <- RUVadj(Y = Mc, fit = rfit1)
top1 <- topRUV(rfit2, num=Inf)
head(top1)
## F.p F.p.BH p_X1 p.BH_X1 b_X1 sigma2
## cg24645532 4.032485e-06 0.999984 4.032485e-06 0.999984 -0.06816282 0.04653511
## cg14454440 7.107518e-06 0.999984 7.107518e-06 0.999984 -0.07142256 0.05616710
## cg27066092 1.461790e-05 0.999984 1.461790e-05 0.999984 -0.05999985 0.04486539
## cg16245467 1.792348e-05 0.999984 1.792348e-05 0.999984 0.11659146 0.17558499
## cg07799895 2.314009e-05 0.999984 2.314009e-05 0.999984 -0.08825072 0.10526683
## cg03013727 2.992034e-05 0.999984 2.992034e-05 0.999984 -0.07025319 0.06986695
## var.b_X1 fit.ctl mean
## cg24645532 9.918595e-05 FALSE 2.834280
## cg14454440 1.197158e-04 FALSE 5.001569
## cg27066092 9.562708e-05 FALSE 3.116925
## cg16245467 3.742457e-04 FALSE 1.967496
## cg07799895 2.243680e-04 FALSE 3.320452
## cg03013727 1.489160e-04 FALSE 2.511936
#ctl <- rownames(mValsSq) %in% rownames(top1[top1$p.ebayes > 0.5,])
#bottom 80% used as negative controls
ctl <- rownames(mValsSq) %in% rownames(top1[ceiling(0.2*nrow(top1)):nrow(top1),])
table(ctl)
## ctl
## FALSE TRUE
## 160921 643902
# Perform RUV adjustment and fit
rfit1 <- RUVfit(Y = mValsSq, X=design_tw[,"ADOS"], ctl=ctl) # Stage 2 analysis
rfit2 <- RUVadj(Y = mValsSq, fit=rfit1)
top_ruv <- topRUV(rfit2, number = Inf)
top_ruv_1000 <- topRUV(rfit2, number = 1000)
head(top_ruv_1000)
## F.p F.p.BH p_X1 p.BH_X1 b_X1 sigma2
## cg14454440 3.664309e-06 0.4796448 3.664309e-06 0.4796448 -0.07059431 0.04652333
## cg19944119 4.101964e-06 0.4796448 4.101964e-06 0.4796448 -0.07590162 0.05487436
## cg00656820 4.515240e-06 0.4796448 4.515240e-06 0.4796448 -0.04070223 0.01605325
## cg16245467 1.084968e-05 0.4796448 1.084968e-05 0.4796448 0.10023945 0.11422192
## cg27066092 1.323336e-05 0.4796448 1.323336e-05 0.4796448 -0.05643253 0.03756417
## cg16702666 1.331355e-05 0.4796448 1.331355e-05 0.4796448 -0.04696979 0.02605206
## var.b_X1 fit.ctl mean
## cg14454440 9.173082e-05 FALSE 5.001569
## cg19944119 1.081967e-04 FALSE 3.050748
## cg00656820 3.165246e-05 FALSE 3.014805
## cg16245467 2.252133e-04 FALSE 1.967496
## cg27066092 7.406589e-05 FALSE 3.116925
## cg16702666 5.136729e-05 FALSE 2.449335
#dat_subset_ruv = as.matrix(subset(mValsSq, rownames(mValsSq) %in% rownames(top_ruv_1000)))
#heatmap.2(dat_subset_ruv, cexRow=0.8, cexCol=0.8, col=colorRampPalette(c("Blue","Yellow")), density.info="none", trace="none", scale="row", main="diff meth probes due to epilepsy")
output_ruv = merge(ann,top_ruv,by.x="Name",by.y="row.names")
output_ruv_1000 = merge(ann,top_ruv_1000,by.x="Name",by.y="row.names")
#output_full_epicanno = merge(ann_epic,top,by.x="Name",by.y="row.names")
write.csv(output_ruv[order(output_ruv$F.p),],
file="ASD_blood_topruv80%_dmps_onADOS_withintw_Nov29.csv",
row.names=FALSE)
write.csv(output_ruv_1000[order(output_ruv_1000$F.p),],
file="ASD_blood_topruv80%_1kdmps_onADOS_withintw_Nov29.csv",
row.names=FALSE)
#Madj <- getAdjusted(mValsSw, rfit2)
# cant get this part to work
mValsSq <- mValsSq[which(rownames(mValsSq) %in% colnames(rfit2$Y) ),]
#Madj <- missMethyl::getAdj(Y = mValsSq, fit = rfit2)
plotMDS(mValsSq, labels=targets_gen$Sample_Name,
col=as.integer(factor(targets_gen$Social_interaction_on_ADOS)),
main="Unadjusted")
#legend(legend=c("1","0"),pch=7,cex=0.7,col=1:2)
# i had to comment the below stuff out because Madj couldn't be generated.
#plotMDS(Madj, labels=targets_gen$Sample_Name,
# col=as.integer(factor(targets_gen$Social_interaction_on_ADOS)),
# main="Adjusted: RUV-inverse")
#legend(legend=c("1","0"),pch=7,cex=0.7,col=1:2)
#Trying different k values
# mark fixed a major bug here
rfit_k1 <- RUVfit(Y=mValsSq, X=design_tw[,"ADOS"], ctl=ctl,
method = "ruv4", k=1) # Stage 2 analysis
rfit_k1 <- RUVadj(Y=mValsSq, fit=rfit_k1)
rfit_k2 <- RUVfit(Y=mValsSq, X=design_tw[,"ADOS"], ctl=ctl,
method = "ruv4", k=2) # Stage 2 analysis
rfit_k2 <- RUVadj(Y=mValsSq, fit=rfit_k2)
rfit_k8 <- RUVfit(Y=mValsSq, X=design_tw[,"ADOS"], ctl=ctl,
method = "ruv4", k=8) # Stage 2 analysis
rfit_k8 <- RUVadj(Y=mValsSq, fit=rfit_k8)
#get adjusted values - doesn't work as at Apr 2022.
#Madj1 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k1)
#Madj2 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k2)
#Madj3 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k8)
#Visualise plots
par(mfrow=c(1,1))
plotMDS(mValsSq, labels=targets_gen$Sample_Name,
col=as.integer(factor(targets_gen$Diagnosis)),
dim=c(14,15),
main="Unadjusted")
#legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)
#plotMDS(Madj, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
# main="Adjusted: RUV-inverse")
#legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)
#plotMDS(Madj1, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
# main="Adjusted: RUV-4, k=1")
#legend("bottomleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)
#plotMDS(Madj2, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
# main="Adjusted: RUV-4, k=2")
#legend("bottomright",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)
#plotMDS(Madj3, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
# main="Adjusted: RUV-4, k=8")
#legend
Gene Ontology analysis (gometh): top 1000 probes
res <- read.csv("ASD_blood_topruv80%_dmps_onADOS_withintw_Nov29.csv", header = TRUE)
sigCpGs_1k = res$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)
all = res$Name
length(all)
## [1] 804823
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
head( gometh_kegg[order(gometh_kegg$P.DE),] , 20)
## Description N DE
## path:hsa00730 Thiamine metabolism 15 3
## path:hsa04670 Leukocyte transendothelial migration 105 6
## path:hsa00513 Various types of N-glycan biosynthesis 36 3
## path:hsa00780 Biotin metabolism 3 1
## path:hsa04392 Hippo signaling pathway - multiple species 28 3
## path:hsa00280 Valine, leucine and isoleucine degradation 47 3
## path:hsa01232 Nucleotide metabolism 78 4
## path:hsa04714 Thermogenesis 209 8
## path:hsa04977 Vitamin digestion and absorption 23 2
## path:hsa04213 Longevity regulating pathway - multiple species 61 4
## path:hsa04530 Tight junction 161 7
## path:hsa00650 Butanoate metabolism 27 2
## path:hsa04211 Longevity regulating pathway 88 5
## path:hsa01240 Biosynthesis of cofactors 147 5
## path:hsa04360 Axon guidance 177 9
## path:hsa04726 Serotonergic synapse 108 5
## path:hsa05130 Pathogenic Escherichia coli infection 187 7
## path:hsa04921 Oxytocin signaling pathway 151 7
## path:hsa00920 Sulfur metabolism 10 1
## path:hsa00071 Fatty acid degradation 41 2
## P.DE FDR
## path:hsa00730 0.004796095 1
## path:hsa04670 0.038727306 1
## path:hsa00513 0.045978414 1
## path:hsa00780 0.058735766 1
## path:hsa04392 0.058738197 1
## path:hsa00280 0.061602773 1
## path:hsa01232 0.064266960 1
## path:hsa04714 0.067722817 1
## path:hsa04977 0.074304530 1
## path:hsa04213 0.085670541 1
## path:hsa04530 0.087493572 1
## path:hsa00650 0.089462339 1
## path:hsa04211 0.092624247 1
## path:hsa01240 0.100150558 1
## path:hsa04360 0.105557111 1
## path:hsa04726 0.111717778 1
## path:hsa05130 0.116953903 1
## path:hsa04921 0.122967997 1
## path:hsa00920 0.138142049 1
## path:hsa00071 0.145792842 1
write.csv(gometh_kegg, file = "GOmeth_kegg.csv", row.names = TRUE)
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
head( gometh_go[order(gometh_go$P.DE),] , 20)
## ONTOLOGY
## GO:0002092 BP
## GO:0004017 MF
## GO:0030860 BP
## GO:0050145 MF
## GO:0016339 BP
## GO:0005845 CC
## GO:0098927 BP
## GO:0034518 CC
## GO:0046940 BP
## GO:0005547 MF
## GO:0140552 CC
## GO:0000340 MF
## GO:0004550 MF
## GO:0060319 BP
## GO:0006172 BP
## GO:0009180 BP
## GO:0061502 BP
## GO:0140326 MF
## GO:0048260 BP
## GO:0009188 BP
## TERM
## GO:0002092 positive regulation of receptor internalization
## GO:0004017 adenylate kinase activity
## GO:0030860 regulation of polarized epithelial cell differentiation
## GO:0050145 nucleoside monophosphate kinase activity
## GO:0016339 calcium-dependent cell-cell adhesion via plasma membrane cell adhesion molecules
## GO:0005845 mRNA cap binding complex
## GO:0098927 vesicle-mediated transport between endosomal compartments
## GO:0034518 RNA cap binding complex
## GO:0046940 nucleoside monophosphate phosphorylation
## GO:0005547 phosphatidylinositol-3,4,5-trisphosphate binding
## GO:0140552 TEAD-YAP complex
## GO:0000340 RNA 7-methylguanosine cap binding
## GO:0004550 nucleoside diphosphate kinase activity
## GO:0060319 primitive erythrocyte differentiation
## GO:0006172 ADP biosynthetic process
## GO:0009180 purine ribonucleoside diphosphate biosynthetic process
## GO:0061502 early endosome to recycling endosome transport
## GO:0140326 ATPase-coupled intramembrane lipid transporter activity
## GO:0048260 positive regulation of receptor-mediated endocytosis
## GO:0009188 ribonucleoside diphosphate biosynthetic process
## N DE P.DE FDR
## GO:0002092 22 5 0.0002580819 1
## GO:0004017 7 3 0.0005837699 1
## GO:0030860 2 2 0.0010532279 1
## GO:0050145 18 4 0.0012740494 1
## GO:0016339 42 5 0.0016337224 1
## GO:0005845 10 3 0.0017904495 1
## GO:0098927 41 5 0.0022232794 1
## GO:0034518 11 3 0.0022264662 1
## GO:0046940 12 3 0.0022372312 1
## GO:0005547 36 5 0.0026812922 1
## GO:0140552 3 2 0.0031990802 1
## GO:0000340 12 3 0.0043298628 1
## GO:0004550 18 3 0.0043575537 1
## GO:0060319 3 2 0.0046016985 1
## GO:0006172 5 2 0.0050089050 1
## GO:0009180 5 2 0.0050089050 1
## GO:0061502 4 2 0.0052101818 1
## GO:0140326 24 4 0.0063256563 1
## GO:0048260 47 5 0.0064264754 1
## GO:0009188 6 2 0.0064602730 1
write.csv(gometh_go, file = "GOmeth_go.csv", row.names = TRUE)
#Plotting effect sizes of top DMPs
plot(ilogit2(mDat[rownames(mDat) == "cg11027325",]), ADOS,
main = "HES6: cg11027325", ylab = "ADOS z-score", xlab = "Beta Value")
regl5<-lm(ADOS ~ ilogit2(mDat[rownames(mDat) == "cg11027325",]))
abline(regl5)
cor.test(ADOS, ilogit2(mDat[rownames(mDat) == "cg11027325",]), method = "pearson")
##
## Pearson's product-moment correlation
##
## data: ADOS and ilogit2(mDat[rownames(mDat) == "cg11027325", ])
## t = -0.2059, df = 16, p-value = 0.8395
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5061304 0.4256788
## sample estimates:
## cor
## -0.05140787
plot(ilogit2(mDat[rownames(mDat) == "cg20507028",]), ADOS,
main = "ZNF713: cg20507028", ylab = "ADOS z-score", xlab = "Beta Value")
regl5<-lm(ADOS ~ ilogit2(mDat[rownames(mDat) == "cg20507028",]))
abline(regl5)
cor.test(ADOS, ilogit2(mDat[rownames(mDat) == "cg20507028",]), method = "pearson")
##
## Pearson's product-moment correlation
##
## data: ADOS and ilogit2(mDat[rownames(mDat) == "cg20507028", ])
## t = 0.081723, df = 16, p-value = 0.9359
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4507421 0.4826934
## sample estimates:
## cor
## 0.02042652
plot(ilogit2(mDat[rownames(mDat) == "cg21244116",]), ADOS,
main = "EHMT1: cg21244116", ylab = "ADOS z-score", xlab = "Beta Value")
regl5<-lm(ADOS ~ ilogit2(mDat[rownames(mDat) == "cg21244116",]))
abline(regl5)
cor.test(ADOS, ilogit2(mDat[rownames(mDat) == "cg21244116",]), method = "pearson")
##
## Pearson's product-moment correlation
##
## data: ADOS and ilogit2(mDat[rownames(mDat) == "cg21244116", ])
## t = 2.2889, df = 16, p-value = 0.03602
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03877897 0.78216357
## sample estimates:
## cor
## 0.4966572
plot(ilogit2(mDat[rownames(mDat) == "cg21241156",]), ADOS,
main = "TLE4: cg21241156", ylab = "ADOS z-score", xlab = "Beta Value")
regl5<-lm(ADOS ~ ilogit2(mDat[rownames(mDat) == "cg21241156",]))
abline(regl5)
cor.test(ADOS, ilogit2(mDat[rownames(mDat) == "cg21241156",]), method = "pearson")
##
## Pearson's product-moment correlation
##
## data: ADOS and ilogit2(mDat[rownames(mDat) == "cg21241156", ])
## t = -1.6205, df = 16, p-value = 0.1247
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7167153 0.1108063
## sample estimates:
## cor
## -0.3754886
#Effect sizes of differences in ADOS score versus difference in beta value wthin twin pairs
diff_ADOS_1 = abs(ADOS[1] - ADOS[2])
diff_ADOS_2 = abs(ADOS[3] - ADOS[4])
diff_ADOS_3 = abs(ADOS[5] - ADOS[6])
diff_ADOS_4 = abs(ADOS[7] - ADOS[8])
diff_ADOS_5 = abs(ADOS[9] - ADOS[10])
diff_ADOS_6 = abs(ADOS[11] - ADOS[12])
diff_ADOS_7 = abs(ADOS[13] - ADOS[14])
diff_ADOS_8 = abs(ADOS[15] - ADOS[16])
diff_ADOS_9 = abs(ADOS[17] - ADOS[18])
diff_ADOS_score = c(9,4,3,7,5,8,2,5,12)
#diff_ADOS_score = c(diff_ADOS_1+diff_ADOS_2+diff_ADOS_3+diff_ADOS_4+diff_ADOS_5+diff_ADOS_6+diff_ADOS_7+diff_ADOS_8+diff_ADOS_9)
#for (i in 1:length(ADOS))
#{
# diff_ADOS_score<-abs(ADOS[i] - ADOS[i+1])
# i = i +1
#diff_ADOS_score <- c(diff_ADOS_score, diff_ADOS_score[i])
#}
#print(diff_ADOS_score)
diff_effectsize_1 = ilogit2(mDat[rownames(mDat) == "cg21241156"][1]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][2])
diff_effectsize_2 = ilogit2(mDat[rownames(mDat) == "cg21241156"][3]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][4])
diff_effectsize_3 = ilogit2(mDat[rownames(mDat) == "cg21241156"][5]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][6])
diff_effectsize_4 = ilogit2(mDat[rownames(mDat) == "cg21241156"][7]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][8])
diff_effectsize_5 = ilogit2(mDat[rownames(mDat) == "cg21241156"][9]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][10])
diff_effectsize_6 = ilogit2(mDat[rownames(mDat) == "cg21241156"][11]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][12])
diff_effectsize_7 = ilogit2(mDat[rownames(mDat) == "cg21241156"][13]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][14])
diff_effectsize_8 = ilogit2(mDat[rownames(mDat) == "cg21241156"][15]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][16])
diff_effectsize_9 = ilogit2(mDat[rownames(mDat) == "cg21241156"][17]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][18])
diff_effectsize_tot = c(0.02383603, 0.01913442, -0.0003221395, 0.07068035, -0.05522228,
0.05076427, 0.002702124, -0.03403429, 0.06949949)
diff_effectsize_tot
## [1] 0.0238360300 0.0191344200 -0.0003221395 0.0706803500 -0.0552222800
## [6] 0.0507642700 0.0027021240 -0.0340342900 0.0694994900
#TLE4: cg21241156
plot(diff_effectsize_tot, diff_ADOS_score, main = "TLE4: cg21241156",
ylab = "ADOS difference z-score", xlab = "Delta Beta Value")
regl5<-lm(diff_ADOS_score ~ diff_effectsize_tot)
abline(regl5)
cor.test(diff_ADOS_score, diff_effectsize_tot, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: diff_ADOS_score and diff_effectsize_tot
## t = 2.0866, df = 7, p-value = 0.07535
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0762115 0.9093826
## sample estimates:
## cor
## 0.6192529
#EHMT1: cg21244116
diff_effectsize_1 = ilogit2(mDat[rownames(mDat) == "cg21244116"][1]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][2])
diff_effectsize_2 = ilogit2(mDat[rownames(mDat) == "cg21244116"][3]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][4])
diff_effectsize_3 = ilogit2(mDat[rownames(mDat) == "cg21244116"][5]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][6])
diff_effectsize_4 = ilogit2(mDat[rownames(mDat) == "cg21244116"][7]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][8])
diff_effectsize_5 = ilogit2(mDat[rownames(mDat) == "cg21244116"][9]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][10])
diff_effectsize_6 = ilogit2(mDat[rownames(mDat) == "cg21244116"][11]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][12])
diff_effectsize_7 = ilogit2(mDat[rownames(mDat) == "cg21244116"][13]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][14])
diff_effectsize_8 = ilogit2(mDat[rownames(mDat) == "cg21244116"][15]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][16])
diff_effectsize_9 = ilogit2(mDat[rownames(mDat) == "cg21244116"][17]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][18])
diff_effectsize_tot_cg21244116 = c(-0.0340176, -0.001041366, 0.02669383, -0.02422778, 0.03996136,
-0.03206927, -0.004060284, 0.019002, -0.04277449)
diff_effectsize_tot_cg21244116
## [1] -0.034017600 -0.001041366 0.026693830 -0.024227780 0.039961360
## [6] -0.032069270 -0.004060284 0.019002000 -0.042774490
plot(diff_effectsize_tot_cg21244116, diff_ADOS_score, main = "EHMT1: cg21244116",
ylab = "ADOS difference z-score", xlab = "Delta Beta Value")
regl5<-lm(diff_ADOS_score ~ diff_effectsize_tot_cg21244116)
abline(regl5)
cor.test(diff_ADOS_score, diff_effectsize_tot_cg21244116, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: diff_ADOS_score and diff_effectsize_tot_cg21244116
## t = -2.9788, df = 7, p-value = 0.02055
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9433676 -0.1659451
## sample estimates:
## cor
## -0.747668
#ZNF713: cg20507028
diff_effectsize_1 = ilogit2(mDat[rownames(mDat) == "cg20507028"][1]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][2])
diff_effectsize_2 = ilogit2(mDat[rownames(mDat) == "cg20507028"][3]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][4])
diff_effectsize_3 = ilogit2(mDat[rownames(mDat) == "cg20507028"][5]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][6])
diff_effectsize_4 = ilogit2(mDat[rownames(mDat) == "cg20507028"][7]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][8])
diff_effectsize_5 = ilogit2(mDat[rownames(mDat) == "cg20507028"][9]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][10])
diff_effectsize_6 = ilogit2(mDat[rownames(mDat) == "cg20507028"][11]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][12])
diff_effectsize_7 = ilogit2(mDat[rownames(mDat) == "cg20507028"][13]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][14])
diff_effectsize_8 = ilogit2(mDat[rownames(mDat) == "cg20507028"][15]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][16])
diff_effectsize_9 = ilogit2(mDat[rownames(mDat) == "cg20507028"][17]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][18])
diff_effectsize_tot_cg20507028 = c(-0.005014283, -0.04933726, 0.0541018, -0.06395128, 0.07563989,
-0.1120907, 0.004739958, 0.01840117, -0.06681291)
diff_effectsize_tot_cg20507028
## [1] -0.005014283 -0.049337260 0.054101800 -0.063951280 0.075639890
## [6] -0.112090700 0.004739958 0.018401170 -0.066812910
plot(diff_effectsize_tot_cg20507028, diff_ADOS_score, main = "ZNF713: cg20507028",
ylab = "ADOS difference z-score", xlab = "Delta Beta Value")
regl5<-lm(diff_ADOS_score ~ diff_effectsize_tot_cg20507028)
abline(regl5)
cor.test(diff_ADOS_score, diff_effectsize_tot_cg20507028, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: diff_ADOS_score and diff_effectsize_tot_cg20507028
## t = -1.754, df = 7, p-value = 0.1229
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8900562 0.1762479
## sample estimates:
## cor
## -0.5525498
#HES6: cg11027325
diff_effectsize_1 = ilogit2(mDat[rownames(mDat) == "cg11027325"][1]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][2])
diff_effectsize_2 = ilogit2(mDat[rownames(mDat) == "cg11027325"][3]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][4])
diff_effectsize_3 = ilogit2(mDat[rownames(mDat) == "cg11027325"][5]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][6])
diff_effectsize_4 = ilogit2(mDat[rownames(mDat) == "cg11027325"][7]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][8])
diff_effectsize_5 = ilogit2(mDat[rownames(mDat) == "cg11027325"][9]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][10])
diff_effectsize_6 = ilogit2(mDat[rownames(mDat) == "cg11027325"][11]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][12])
diff_effectsize_7 = ilogit2(mDat[rownames(mDat) == "cg11027325"][13]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][14])
diff_effectsize_8 = ilogit2(mDat[rownames(mDat) == "cg11027325"][15]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][16])
diff_effectsize_9 = ilogit2(mDat[rownames(mDat) == "cg11027325"][17]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][18])
diff_effectsize_tot_cg11027325 = c(0.1334995, 0.02969043, -0.04247427, 0.1107637, -0.03854689,
0.1779549, 0.007191577, -0.03854834, 0.1683442)
diff_effectsize_tot_cg11027325
## [1] 0.133499500 0.029690430 -0.042474270 0.110763700 -0.038546890
## [6] 0.177954900 0.007191577 -0.038548340 0.168344200
plot(diff_effectsize_tot_cg11027325, diff_ADOS_score, main = "HES6: cg11027325",
ylab = "ADOS difference z-score", xlab = "Delta Beta Value")
regl5<-lm(diff_ADOS_score ~ diff_effectsize_tot_cg11027325)
abline(regl5)
cor.test(diff_ADOS_score, diff_effectsize_tot_cg11027325, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: diff_ADOS_score and diff_effectsize_tot_cg11027325
## t = 3.9556, df = 7, p-value = 0.005493
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3729744 0.9634707
## sample estimates:
## cor
## 0.831206
#design matrix in regression
design_tw <- model.matrix(~ADOS+Twin_Group)
design_tw_dmrc <- model.matrix(~ADOS)
#myannotation <- cpg.annotate("array", mDat, analysis.type="differential", design=design_tw, coef=2, fdr=1)
myannotation <- cpg.annotate("array", mDat, what = "M",
arraytype = "EPIC", analysis.type="differential", design_tw, coef=2)
## Your contrast returned no individually significant probes. Try increasing the fdr. Alternatively, set pcutoff manually in dmrcate() to return DMRs, but be warned there is an increased risk of Type I errors.
dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2, pcutoff=0.001)
## Fitting chr1...
## Fitting chr2...
## Fitting chr3...
## Fitting chr4...
## Fitting chr5...
## Fitting chr6...
## Fitting chr7...
## Fitting chr8...
## Fitting chr9...
## Fitting chr10...
## Fitting chr11...
## Fitting chr12...
## Fitting chr13...
## Fitting chr14...
## Fitting chr15...
## Fitting chr16...
## Fitting chr17...
## Fitting chr18...
## Fitting chr19...
## Fitting chr20...
## Fitting chr21...
## Fitting chr22...
## Fitting chrX...
## Fitting chrY...
## Demarcating regions...
## Done!
results.ranges <- data.frame(extractRanges(dmrcoutput, genome = "hg19") )
## snapshotDate(): 2021-05-18
## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation
## loading from cache
head(results.ranges, 20)
## seqnames start end width strand no.cpgs min_smoothed_fdr Stouffer
## 1 chr1 4794912 4795012 101 * 2 5.548416e-04 0.9989347
## 2 chr1 191843335 191843399 65 * 2 2.567107e-04 0.9989347
## 3 chr3 54122032 54122146 115 * 2 5.222949e-04 0.9989347
## 4 chr4 22445207 22445349 143 * 2 3.838120e-05 0.9989347
## 5 chr8 110693461 110693859 399 * 2 5.445921e-09 0.9989347
## 6 chr9 98236338 98236406 69 * 2 2.612195e-04 0.9989347
## 7 chr11 62106739 62106872 134 * 2 2.079725e-04 0.9989347
## 8 chr11 125479352 125479362 11 * 2 2.803063e-06 0.9989347
## 9 chr14 107253273 107253290 18 * 2 5.522862e-04 0.9989347
## 10 chr15 43309399 43309540 142 * 2 4.946098e-04 0.9989347
## 11 chr3 3213603 3213634 32 * 2 9.786746e-05 0.9992349
## 12 chr7 157162115 157162202 88 * 2 2.254582e-04 0.9998091
## 13 chr3 113249814 113250232 419 * 3 1.561141e-04 0.9999156
## 14 chr7 55954891 55954957 67 * 3 5.053488e-04 0.9999156
## 15 chr9 140729241 140729712 472 * 3 6.616449e-06 0.9999156
## 16 chr16 85465589 85465657 69 * 3 1.327067e-04 0.9999478
## 17 chr8 141046436 141046852 417 * 5 3.728981e-04 0.9999994
## 18 chr6 28945189 28945507 319 * 7 1.327067e-04 1.0000000
## 19 chr11 57508449 57508856 408 * 8 1.327067e-04 1.0000000
## 20 chr11 5617367 5618023 657 * 8 9.786746e-05 1.0000000
## HMFDR Fisher maxdiff meandiff
## 1 0.9850646 0.9995561 -0.006211006 -0.0048567106
## 2 0.9850646 0.9995561 -0.009731567 -0.0088596958
## 3 0.9850646 0.9995561 -0.008676342 -0.0036259846
## 4 0.9850646 0.9995561 -0.005127505 -0.0009542798
## 5 0.9850646 0.9995561 -0.011266044 -0.0050649373
## 6 0.9850646 0.9995561 -0.007524800 -0.0032620071
## 7 0.9850646 0.9995561 -0.007182332 -0.0049541347
## 8 0.9850646 0.9995561 0.009127710 0.0029452176
## 9 0.9850646 0.9995561 -0.012064556 -0.0098097668
## 10 0.9850646 0.9995561 -0.004550075 -0.0031806926
## 11 0.9873008 0.9996790 -0.003951027 -0.0018703955
## 12 0.9914051 0.9998534 -0.003801339 -0.0019083301
## 13 0.9850646 0.9999852 -0.005270309 -0.0033645183
## 14 0.9850646 0.9999852 0.007225873 0.0037528467
## 15 0.9850646 0.9999852 0.003916152 0.0004668367
## 16 0.9871267 0.9999905 -0.001981089 -0.0014041778
## 17 0.9850646 1.0000000 -0.009159352 -0.0048570805
## 18 0.9859176 1.0000000 -0.006709371 -0.0044028435
## 19 0.9850646 1.0000000 -0.007194960 0.0002647476
## 20 0.9851186 1.0000000 -0.004561438 0.0002026570
## overlapping.genes
## 1 AJAP1
## 2 RP11-541F9.1
## 3 <NA>
## 4 GPR125
## 5 SYBU
## 6 PTCH1
## 7 ASRGL1
## 8 STT3A
## 9 <NA>
## 10 UBR1
## 11 CRBN
## 12 DNAJB6
## 13 <NA>
## 14 <NA>
## 15 EHMT1
## 16 <NA>
## 17 TRAPPC9
## 18 RN7SL471P
## 19 TMX2-CTNND1, C11orf31
## 20 TRIM6, TRIM6-TRIM34, HBG2, AC015691.13
write.csv(results.ranges, file = "dmrcoutput.csv", row.names = TRUE)
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] DMRcatedata_2.10.0
## [2] ExperimentHub_2.0.0
## [3] AnnotationHub_3.0.2
## [4] BiocFileCache_2.0.0
## [5] dbplyr_2.1.1
## [6] DMRcate_2.6.0
## [7] ggplot2_3.3.5
## [8] reshape2_1.4.4
## [9] dplyr_1.0.8
## [10] IlluminaHumanMethylation450kmanifest_0.4.0
## [11] IlluminaHumanMethylationEPICmanifest_0.3.0
## [12] FlowSorted.Blood.450k_1.30.0
## [13] WGCNA_1.70-3
## [14] fastcluster_1.2.3
## [15] dynamicTreeCut_1.63-1
## [16] gplots_3.1.1
## [17] RColorBrewer_1.1-2
## [18] ruv_0.9.7.1
## [19] IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0
## [20] limma_3.48.3
## [21] missMethyl_1.26.1
## [22] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [23] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [24] minfi_1.38.0
## [25] bumphunter_1.34.0
## [26] locfit_1.5-9.5
## [27] iterators_1.0.14
## [28] foreach_1.5.2
## [29] Biostrings_2.60.2
## [30] XVector_0.32.0
## [31] SummarizedExperiment_1.22.0
## [32] Biobase_2.52.0
## [33] MatrixGenerics_1.4.3
## [34] matrixStats_0.61.0
## [35] GenomicRanges_1.44.0
## [36] GenomeInfoDb_1.28.4
## [37] IRanges_2.26.0
## [38] S4Vectors_0.30.2
## [39] BiocGenerics_0.38.0
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.52.1
## [3] R.methodsS3_1.8.1 tidyr_1.2.0
## [5] bit64_4.0.5 knitr_1.37
## [7] DelayedArray_0.18.0 R.utils_2.11.0
## [9] data.table_1.14.2 rpart_4.1.16
## [11] KEGGREST_1.32.0 RCurl_1.98-1.6
## [13] GEOquery_2.60.0 AnnotationFilter_1.16.0
## [15] doParallel_1.0.17 generics_0.1.2
## [17] GenomicFeatures_1.44.2 preprocessCore_1.54.0
## [19] RSQLite_2.2.11 bit_4.0.4
## [21] tzdb_0.2.0 xml2_1.3.3
## [23] httpuv_1.6.5 assertthat_0.2.1
## [25] xfun_0.30 hms_1.1.1
## [27] jquerylib_0.1.4 evaluate_0.15
## [29] promises_1.2.0.1 fansi_1.0.2
## [31] restfulr_0.0.13 scrime_1.3.5
## [33] progress_1.2.2 caTools_1.18.2
## [35] readxl_1.3.1 DBI_1.1.2
## [37] htmlwidgets_1.5.4 reshape_0.8.8
## [39] purrr_0.3.4 ellipsis_0.3.2
## [41] backports_1.4.1 permute_0.9-7
## [43] annotate_1.70.0 biomaRt_2.48.3
## [45] sparseMatrixStats_1.4.2 vctrs_0.3.8
## [47] ensembldb_2.16.4 cachem_1.0.6
## [49] withr_2.5.0 Gviz_1.36.2
## [51] BSgenome_1.60.0 checkmate_2.0.0
## [53] GenomicAlignments_1.28.0 prettyunits_1.1.1
## [55] mclust_5.4.9 cluster_2.1.2
## [57] lazyeval_0.2.2 crayon_1.5.0
## [59] genefilter_1.74.1 edgeR_3.34.1
## [61] pkgconfig_2.0.3 labeling_0.4.2
## [63] nlme_3.1-155 ProtGenerics_1.24.0
## [65] nnet_7.3-17 rlang_1.0.2
## [67] lifecycle_1.0.1 filelock_1.0.2
## [69] dichromat_2.0-0 cellranger_1.1.0
## [71] rngtools_1.5.2 base64_2.0
## [73] Matrix_1.4-1 Rhdf5lib_1.14.2
## [75] base64enc_0.1-3 png_0.1-7
## [77] rjson_0.2.21 bitops_1.0-7
## [79] R.oo_1.24.0 KernSmooth_2.23-20
## [81] rhdf5filters_1.4.0 blob_1.2.2
## [83] DelayedMatrixStats_1.14.3 doRNG_1.8.2
## [85] stringr_1.4.0 nor1mix_1.3-0
## [87] readr_2.1.2 jpeg_0.1-9
## [89] scales_1.1.1 memoise_2.0.1
## [91] magrittr_2.0.2 plyr_1.8.6
## [93] zlibbioc_1.38.0 compiler_4.1.3
## [95] BiocIO_1.2.0 illuminaio_0.34.0
## [97] Rsamtools_2.8.0 cli_3.2.0
## [99] DSS_2.40.0 htmlTable_2.4.0
## [101] Formula_1.2-4 MASS_7.3-56
## [103] tidyselect_1.1.2 stringi_1.7.6
## [105] highr_0.9 yaml_2.3.5
## [107] askpass_1.1 latticeExtra_0.6-29
## [109] grid_4.1.3 sass_0.4.1
## [111] VariantAnnotation_1.38.0 tools_4.1.3
## [113] rstudioapi_0.13 foreign_0.8-82
## [115] bsseq_1.28.0 gridExtra_2.3
## [117] farver_2.1.0 digest_0.6.29
## [119] BiocManager_1.30.16 shiny_1.7.1
## [121] quadprog_1.5-8 Rcpp_1.0.8.3
## [123] siggenes_1.66.0 BiocVersion_3.13.1
## [125] later_1.3.0 org.Hs.eg.db_3.13.0
## [127] httr_1.4.2 AnnotationDbi_1.54.1
## [129] biovizBase_1.40.0 colorspace_2.0-3
## [131] XML_3.99-0.9 splines_4.1.3
## [133] statmod_1.4.36 multtest_2.48.0
## [135] xtable_1.8-4 jsonlite_1.8.0
## [137] R6_2.5.1 Hmisc_4.6-0
## [139] pillar_1.7.0 htmltools_0.5.2
## [141] mime_0.12 glue_1.6.2
## [143] fastmap_1.1.0 BiocParallel_1.26.2
## [145] interactiveDisplayBase_1.30.0 beanplot_1.2
## [147] codetools_0.2-18 utf8_1.2.2
## [149] lattice_0.20-45 bslib_0.3.1
## [151] tibble_3.1.6 curl_4.3.2
## [153] BiasedUrn_1.07 gtools_3.9.2
## [155] GO.db_3.13.0 openssl_2.0.0
## [157] survival_3.3-1 rmarkdown_2.13
## [159] munsell_0.5.0 rhdf5_2.36.0
## [161] GenomeInfoDbData_1.2.6 HDF5Array_1.20.0
## [163] impute_1.66.0 gtable_0.3.0