Source: https://github.com/markziemann/asd_meth

Introduction

Here we are analysing blood methylation in 9 twin pairs.

baseDir=getwd()
dataDir=paste(baseDir,"/ASD_EPIC_DATA",sep="")
library("missMethyl")
library("limma")
library("minfi")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("IlluminaHumanMethylationEPICanno.ilm10b2.hg19")
library("ruv")
library("RColorBrewer")
library("matrixStats")
library("gplots")
library("WGCNA")
library("FlowSorted.Blood.450k")
library("reshape2")
library("ggplot2")
library("missMethyl")
library("DMRcate")

Load data

Load the annotation data and the Epic methylation data.

This analysis is to be conducted on Ubuntu with R4.

ann = getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b2.hg19)
targets_gen = read.metharray.sheet(dataDir, pattern = "ASD_blood_summarysheet.csv")

## [read.metharray.sheet] Found the following CSV files:

## [1] "/mnt/data/mdz/projects/asd_meth/ASD_EPIC_DATA/ASD_blood_summarysheet.csv"

#targets$ID = paste(targets$Sample_Group,targets_gen$Sample_Name,sep=".")
rgSet = read.metharray.exp(targets = targets_gen)

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

sampleNames(rgSet) = targets_gen$SampleID

Testing MZ status

snpBetas = getSnpBeta(rgSet)
d = dist(t(snpBetas))
hr = hclust(d, method = "complete", members=NULL)
plot(hr)

Build new target removing controls

#removepair1 = !targets$Twin_Group %in% c("Q020","Q026", "Q034", "Q038", "Q043", "Q044") #removepairtest = !targets$Sample_Group %in% “0” #targets = targets[removepairtest,] #rgSet = read.metharray.exp(targets = targets) #sampleNames(rgSet) = targets$ID

Quality control

detP = detectionP(rgSet)
qcReport(rgSet, sampNames = targets_gen$Sample_Name, pdf="qc-report_ASD_blood_nov27.pdf")

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## X11cairo 
##        2

cols=brewer.pal(4,"Set1")

barplot(apply(detP,2,mean),
  col=as.numeric(factor(targets_gen$Sample_Name)),
  las=2,cex.names= 0.8, cex.axis=0.75,
  main="Mean detection p-values of probe signals",
  ylab="Mean detection p-value")

barplot(apply(detP,2,mean),
  col=as.numeric(factor(targets_gen$Sample_Name)),
  las=2,cex.names= 0.8, cex.axis=0.75,ylim=c(0,0.010),
  main="Mean detection p-values of probe signals",
  ylab="Mean detection p-value")

Preprocessing

mset.raw = preprocessRaw(rgSet)

Data exploration

Using Multi-dimensional scaling (MDS) plots before filtering.

mdsPlot(mset.raw, sampGroups = targets_gen$Sample_Name, 
  sampNames=targets_gen$Social_interaction_on_ADOS,legendPos="bottom")

mdsPlot(mset.raw, sampGroups = targets_gen$Sex, 
  sampNames=targets_gen$SampleID,legendPos="bottom")

Normalisation

SQN is used as it gave a more stable result.

mSetSw = preprocessSWAN(rgSet)

densityPlot(getBeta(mSetSw),main="SWAN")

#SQN method
mSetSq = preprocessQuantile(rgSet)

## [preprocessQuantile] Mapping to genome.

## [preprocessQuantile] Fixing outliers.

## [preprocessQuantile] Quantile normalizing.

densityPlot(rgSet, sampGroups = targets_gen$Social_interaction_on_ADOS,
  main="Raw", legend = FALSE)

densityPlot(getBeta(mSetSw), 
  sampGroups = targets_gen$Social_interaction_on_ADOS,main="SWAN", 
  legend = FALSE)

densityPlot(getBeta(mSetSq), 
  sampGroups = targets_gen$Social_interaction_on_ADOS,main="SQN", 
  legend = FALSE)

par(mfrow=c(1,1))

Cell type composition analysis

Need to consider using estimatecellcounts2.

rgSet$Slide <- as.numeric(rgSet$Slide)
rgSet$Sex<- as.character(rgSet$Sex)
rgSet$Sample_Name<- as.character(rgSet$Sample_Name)

cellCounts_new <- estimateCellCounts(rgSet, compositeCellType = "Blood", 
  processMethod = "auto", probeSelect = "auto", 
  cellTypes = c("CD8T","CD4T", "NK","Bcell","Mono","Gran"),
  referencePlatform = c("IlluminaHumanMethylation450k"),
  returnAll = FALSE, meanPlot = TRUE)

## [estimateCellCounts] Combining user data with reference (flow sorted) data.

## Warning in DataFrame(sampleNames = c(colnames(rgSet),
## colnames(referenceRGset)), : 'stringsAsFactors' is ignored

## [estimateCellCounts] Processing user and reference data together.

## [preprocessQuantile] Mapping to genome.

## [preprocessQuantile] Fixing outliers.

## [preprocessQuantile] Quantile normalizing.

## [estimateCellCounts] Picking probes for composition estimation.

## [estimateCellCounts] Estimating composition.

#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")

cellCounts_long <- melt(cellCounts_new, id = "celltype")

cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1, targets_gen$SampleID),"Diagnosis"]

colnames(cellCounts_long) <- c("diagnosis","celltype","value")

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) + 
  geom_boxplot()

wx <- lapply(1:ncol(cellCounts_new) , function(i) {
  wilcox.test(a[,i],c[,i], paired=FALSE)
} )

## Warning in wilcox.test.default(a[, i], c[, i], paired = FALSE): cannot compute
## exact p-value with ties

names(wx) <- colnames(cellCounts_new)
wx

## $CD8T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 12, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $CD4T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 13, p-value = 0.8112
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $NK
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  a[, i] and c[, i]
## W = 16, p-value = 0.9317
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Bcell
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 18, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Mono
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 23, p-value = 0.2168
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Gran
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 15, p-value = 1
## alternative hypothesis: true location shift is not equal to 0

Filtering

None of the p-values are less than 0.05.

Now filter for high detection p-value and overlap with SNP.

Need to get a copy of the Xreact probes from Namitha. In the mean time I have downloaded from github - link below.

detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mSetSw <- mSetSw[keep,]
gmSetSw <- mapToGenome(mSetSw)

#remove SNPs
gmSetSqFlt = dropLociWithSnps(gmSetSw, snps = c("CpG", "SBE"))

#Removing cross-reactive probes
Xreact <- read.csv("https://raw.githubusercontent.com/sirselim/illumina450k_filtering/master/EPIC/13059_2016_1066_MOESM1_ESM.csv")

#Xreact = read.csv(file="/group/canc2/puumba/Data/InfiniumData/NamithaData/Rprojects/Autism/Analysis_Sept11/EPIC_850k_crossreactiveProbes.csv", stringsAsFactors=FALSE)
#Xreact = read.csv(file="~/48639-non-specific-probes-Illumina450k.csv", stringsAsFactors=FALSE)
noXreact <-  !(featureNames(gmSetSw) %in% Xreact$X)

gmSetSw <- gmSetSw[noXreact,]

#Removing probes on X and Y chromosomes
autosomes <- !(featureNames(gmSetSw) %in% ann$Name[ann$chr %in% c("chrX","chrY")])
gmSetSqFlt <- gmSetSq[autosomes,]

#Relative log expression (RLE plot)
mValsSq = getM(gmSetSqFlt)
medSq = apply(mValsSq, 1, median)
YSq = mValsSq - medSq

boxplot(YSq,outline=FALSE,ylim=c(-1.5,1.5), 
  ylab="Relative Log Methylation Value", 
  cols=as.character(factor(targets_gen$Social_interaction_on_ADOS,)) )

MDS plots generation after filtering

pal = brewer.pal(8, "Dark2")
mds1Sq = plotMDS(mValsSq, top=1000, gene.selection="common",dim.plot=c(1,2))

mds2Sq = plotMDS(mValsSq, top=1000, gene.selection="common",dim.plot=c(1,3))

mds3Sq = plotMDS(mValsSq, top=1000, gene.selection="common",dim.plot=c(2,3))

mds4Sq = plotMDS(mValsSq, top=1000, gene.selection="common",dim.plot=c(3,4))

plotMDS(mds1Sq, xlab="Dimension 1", ylab="Dimension 2",
  col=pal[as.factor(targets_gen$Diagnosis)],
  dim=c(1,2), labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds2Sq, xlab="Dimension 1", ylab="Dimension 3",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(1,3),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds3Sq, xlab="Dimension 2", ylab="Dimension 3",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(2,3),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds4Sq, xlab="Dimension 3", ylab="Dimension 4",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(3,4),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

Principal Component Analysis (PCA)

fit <- prcomp(t(mValsSq),center = TRUE, scale = TRUE,retx=TRUE)
loadings = fit$x
plot(fit,type="lines")

nGenes = nrow(mValsSq)
nSamples = ncol(mValsSq)
datTraits = targets_gen[,7:15]
moduleTraitCor = cor(loadings[,1:6], datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
par(cex=0.75, mar = c(6, 8.5, 3, 3))
textMatrix = paste(signif(moduleTraitCor, 2), "\n(", 
  signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)

labeledHeatmap(Matrix = t(moduleTraitCor), 
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-1,1), 
  main = paste("PCA-trait relationships: Top principal components"))

Regression analysis - within twin pair on ADOS

ADOS is the outcome variable (continuous)

mDat = mValsSq
Twin_Group <- factor(targets_gen$Family_ID)
ADOS <- targets_gen$Social_interaction_on_ADOS

design_tw <- model.matrix(~ Twin_Group + ADOS )

fit_tw <- lmFit(mDat, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        253400        9674        4684        5694        3585        4023
## NotSig       47732      779803      796131      793604      797008      797082
## Up          503691       15346        4008        5525        4230        3718
##        Twin_Group8 Twin_Group12 Twin_Group13   ADOS
## Down          4132         3818         5197      0
## NotSig      797348       796882       793056 804823
## Up            3343         4123         6570      0

top <- topTable(fit2_tw,coef="ADOS",num=Inf, sort.by = "P")
head(top)

##                  logFC   AveExpr         t      P.Value adj.P.Val        B
## cg11027325 -0.09472534 -0.471978 -8.360574 2.053380e-06 0.9850646 5.350689
## cg00203621 -0.12362793  2.505613 -6.711278 1.935023e-05 0.9850646 3.204353
## cg23127183 -0.08490578 -2.347118 -6.691836 1.990912e-05 0.9850646 3.176724
## cg05008070 -0.10134930  3.848472 -6.403793 3.053732e-05 0.9850646 2.760720
## cg02380270 -0.13202012  2.736014 -6.398554 3.077904e-05 0.9850646 2.753037
## cg16581085 -0.08611543  4.918151 -6.320836 3.461293e-05 0.9850646 2.638578

nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 27604

ann_sub = 
  ann[,c("chr","pos","strand","Name","Islands_Name",
    "Relation_to_Island","UCSC_RefGene_Name","UCSC_RefGene_Group")]

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")

write.csv(output[order(output$P.Value),],
  file="ASD_blood_top_dmps_onADOS_withintw_Nov27_limma.csv",row.names=FALSE)

top1000 <- topTable(fit2_tw,coef="ADOS",num=1000, sort.by = "P")
output_1000 <- merge(ann_sub,top1000,by.x="Name",by.y="row.names")

write.csv(output_1000[order(output_1000$P.Value),],
  file="ASD_blood_top_dmps_onADOS_Nov27_withintw_limma_top1k.csv",row.names=FALSE)

Converting to beta from M values

bDat = ilogit2(mDat)
bDat_new = getBeta(gmSetSqFlt)
#View(bDat)
write.csv(bDat,file="ASD_blood_beta_onADOS_withintw_Nov27.csv",row.names=TRUE)

RUV

Extract illumina negative control

#Extract illumina negative control
INCs <- getINCs(rgSet)
head(INCs)

##                  S1          S2         S29        S30         S9         S10
## 10609447 -1.5065914 -1.24236084 -0.59909678 -0.5831328 -1.5189237 -0.76782656
## 10627500 -1.3285460 -1.07038933 -1.72855335 -0.9713172 -1.0464356 -0.44029029
## 10676356  0.1764266  0.08092000 -0.15869775  0.1255309 -1.1634987 -0.51986747
## 10731326  0.4106323  0.06307868  0.03016197 -0.5590839  0.1761570 -0.05690239
## 10732387 -0.4745385 -0.23967041 -0.53923523 -0.3638468 -0.6445865 -0.76411174
## 10740401 -0.9763874 -0.99001591 -0.47974319 -0.4807351 -0.8479969 -1.15094190
##                  S15         S16         S19         S20         S25
## 10609447 -0.64566471 -0.21626868 -0.86891672  0.05188674 -0.58686203
## 10627500 -0.81072965 -0.71095527 -0.92926549 -0.64519761 -0.44130018
## 10676356  0.04907599  0.18111268 -0.13445020 -0.23606736  0.65802596
## 10731326  0.58814375  0.59560974  0.01849634  0.20945337  0.14201900
## 10732387 -0.16520259 -0.18442457 -0.69187770 -0.07635089 -0.47175189
## 10740401 -0.68352634 -0.03151097 -0.72662365 -0.19513034 -0.00698643
##                  S26         S5         S6         S37        S38        S33
## 10609447 -0.75754196 -0.5065013 -0.2752577 -0.73515544 -0.6072612 -1.0923936
## 10627500 -1.17338056 -1.0027298 -0.3681837 -0.67384378 -1.5506926 -0.9083254
## 10676356 -0.46284898  0.2925535 -0.1317057 -0.34421616  0.1687877 -0.2043585
## 10731326 -0.02008425  0.1387641  0.4030221 -0.31633263 -0.0489096  0.7004397
## 10732387 -0.52299675  0.3569345 -0.1934058 -0.46588664 -0.3987969 -0.5360529
## 10740401  0.19826963 -0.3289001 -0.7438520 -0.06737544 -0.3243508 -0.9018196
##                   S34
## 10609447 -1.232429944
## 10627500 -1.019628807
## 10676356  0.006455022
## 10731326  0.198226110
## 10732387 -0.068386975
## 10740401 -1.190016419

#add negative control data to M-values
Mc <- rbind(mValsSq,INCs)
ctl <- rownames(Mc) %in% rownames(INCs)
table(ctl)

## ctl
##  FALSE   TRUE 
## 804823    411

#old RUV syntax is updated
#rfit1 <- RUVfit(data=Mc, design=design_tw, coef=2, ctl=ctl) # Stage 1 analysis

# this is the updatedcommand - not sure if correct
rfit1 <- RUVfit(Y = Mc, X = design_tw[,"ADOS"] , ctl = ctl) # Stage 1 analysis

rfit2 <- RUVadj(Y = Mc, fit = rfit1)

top1 <- topRUV(rfit2, num=Inf)

head(top1)

##                     F.p   F.p.BH         p_X1  p.BH_X1        b_X1     sigma2
## cg24645532 4.032485e-06 0.999984 4.032485e-06 0.999984 -0.06816282 0.04653511
## cg14454440 7.107518e-06 0.999984 7.107518e-06 0.999984 -0.07142256 0.05616710
## cg27066092 1.461790e-05 0.999984 1.461790e-05 0.999984 -0.05999985 0.04486539
## cg16245467 1.792348e-05 0.999984 1.792348e-05 0.999984  0.11659146 0.17558499
## cg07799895 2.314009e-05 0.999984 2.314009e-05 0.999984 -0.08825072 0.10526683
## cg03013727 2.992034e-05 0.999984 2.992034e-05 0.999984 -0.07025319 0.06986695
##                var.b_X1 fit.ctl     mean
## cg24645532 9.918595e-05   FALSE 2.834280
## cg14454440 1.197158e-04   FALSE 5.001569
## cg27066092 9.562708e-05   FALSE 3.116925
## cg16245467 3.742457e-04   FALSE 1.967496
## cg07799895 2.243680e-04   FALSE 3.320452
## cg03013727 1.489160e-04   FALSE 2.511936

#ctl <- rownames(mValsSq) %in% rownames(top1[top1$p.ebayes > 0.5,])
#bottom 80% used as negative controls
ctl <- rownames(mValsSq) %in% rownames(top1[ceiling(0.2*nrow(top1)):nrow(top1),])
table(ctl)

## ctl
##  FALSE   TRUE 
## 160921 643902

# Perform RUV adjustment and fit
rfit1 <- RUVfit(Y = mValsSq, X=design_tw[,"ADOS"], ctl=ctl) # Stage 2 analysis 
rfit2 <- RUVadj(Y = mValsSq, fit=rfit1)
top_ruv <- topRUV(rfit2, number = Inf)
top_ruv_1000 <- topRUV(rfit2, number = 1000)
head(top_ruv_1000)

##                     F.p    F.p.BH         p_X1   p.BH_X1        b_X1     sigma2
## cg14454440 3.664309e-06 0.4796448 3.664309e-06 0.4796448 -0.07059431 0.04652333
## cg19944119 4.101964e-06 0.4796448 4.101964e-06 0.4796448 -0.07590162 0.05487436
## cg00656820 4.515240e-06 0.4796448 4.515240e-06 0.4796448 -0.04070223 0.01605325
## cg16245467 1.084968e-05 0.4796448 1.084968e-05 0.4796448  0.10023945 0.11422192
## cg27066092 1.323336e-05 0.4796448 1.323336e-05 0.4796448 -0.05643253 0.03756417
## cg16702666 1.331355e-05 0.4796448 1.331355e-05 0.4796448 -0.04696979 0.02605206
##                var.b_X1 fit.ctl     mean
## cg14454440 9.173082e-05   FALSE 5.001569
## cg19944119 1.081967e-04   FALSE 3.050748
## cg00656820 3.165246e-05   FALSE 3.014805
## cg16245467 2.252133e-04   FALSE 1.967496
## cg27066092 7.406589e-05   FALSE 3.116925
## cg16702666 5.136729e-05   FALSE 2.449335

#dat_subset_ruv = as.matrix(subset(mValsSq, rownames(mValsSq) %in% rownames(top_ruv_1000)))
#heatmap.2(dat_subset_ruv, cexRow=0.8, cexCol=0.8, col=colorRampPalette(c("Blue","Yellow")), density.info="none", trace="none", scale="row", main="diff meth probes due to epilepsy")
output_ruv = merge(ann,top_ruv,by.x="Name",by.y="row.names")
output_ruv_1000 = merge(ann,top_ruv_1000,by.x="Name",by.y="row.names")
#output_full_epicanno = merge(ann_epic,top,by.x="Name",by.y="row.names")

write.csv(output_ruv[order(output_ruv$F.p),],
  file="ASD_blood_topruv80%_dmps_onADOS_withintw_Nov29.csv",
  row.names=FALSE)

write.csv(output_ruv_1000[order(output_ruv_1000$F.p),],
  file="ASD_blood_topruv80%_1kdmps_onADOS_withintw_Nov29.csv",
  row.names=FALSE)

Visualise the effect of RUV adjustment

#Madj <- getAdjusted(mValsSw, rfit2)
# cant get this part to work
mValsSq <- mValsSq[which(rownames(mValsSq) %in% colnames(rfit2$Y) ),]
#Madj <- missMethyl::getAdj(Y = mValsSq, fit = rfit2)

plotMDS(mValsSq, labels=targets_gen$Sample_Name,
  col=as.integer(factor(targets_gen$Social_interaction_on_ADOS)),
  main="Unadjusted")

#legend(legend=c("1","0"),pch=7,cex=0.7,col=1:2)

# i had to comment the below stuff out because Madj couldn't be generated.
#plotMDS(Madj, labels=targets_gen$Sample_Name, 
#  col=as.integer(factor(targets_gen$Social_interaction_on_ADOS)),
#  main="Adjusted: RUV-inverse")
#legend(legend=c("1","0"),pch=7,cex=0.7,col=1:2)

#Trying different k values
# mark fixed a major bug here
rfit_k1 <- RUVfit(Y=mValsSq, X=design_tw[,"ADOS"], ctl=ctl, 
  method = "ruv4", k=1) # Stage 2 analysis 

rfit_k1 <- RUVadj(Y=mValsSq, fit=rfit_k1)

rfit_k2 <- RUVfit(Y=mValsSq, X=design_tw[,"ADOS"], ctl=ctl,
  method = "ruv4", k=2) # Stage 2 analysis 

rfit_k2 <- RUVadj(Y=mValsSq, fit=rfit_k2)

rfit_k8 <- RUVfit(Y=mValsSq, X=design_tw[,"ADOS"], ctl=ctl,
  method = "ruv4", k=8) # Stage 2 analysis 

rfit_k8 <- RUVadj(Y=mValsSq, fit=rfit_k8)

#get adjusted values - doesn't work as at Apr 2022.
#Madj1 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k1)
#Madj2 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k2)
#Madj3 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k8)

#Visualise plots
par(mfrow=c(1,1))
plotMDS(mValsSq, labels=targets_gen$Sample_Name, 
  col=as.integer(factor(targets_gen$Diagnosis)),
  dim=c(14,15),
  main="Unadjusted")

#legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)

#plotMDS(Madj, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
#        main="Adjusted: RUV-inverse")
#legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)

#plotMDS(Madj1, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
#        main="Adjusted: RUV-4, k=1")
#legend("bottomleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)

#plotMDS(Madj2, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
#        main="Adjusted: RUV-4, k=2")
#legend("bottomright",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)

#plotMDS(Madj3, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
#        main="Adjusted: RUV-4, k=8")
#legend

Gene ontology of top DMPs

Gene Ontology analysis (gometh): top 1000 probes

res <- read.csv("ASD_blood_topruv80%_dmps_onADOS_withintw_Nov29.csv", header = TRUE)
sigCpGs_1k = res$Name[1:1000]

sigCpGs_1k = as.character(sigCpGs_1k)
all = res$Name
length(all)

## [1] 804823

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)

## All input CpGs are used for testing.

head( gometh_kegg[order(gometh_kegg$P.DE),] , 20)

##                                                   Description   N DE
## path:hsa00730                             Thiamine metabolism  15  3
## path:hsa04670            Leukocyte transendothelial migration 105  6
## path:hsa00513          Various types of N-glycan biosynthesis  36  3
## path:hsa00780                               Biotin metabolism   3  1
## path:hsa04392      Hippo signaling pathway - multiple species  28  3
## path:hsa00280      Valine, leucine and isoleucine degradation  47  3
## path:hsa01232                           Nucleotide metabolism  78  4
## path:hsa04714                                   Thermogenesis 209  8
## path:hsa04977                Vitamin digestion and absorption  23  2
## path:hsa04213 Longevity regulating pathway - multiple species  61  4
## path:hsa04530                                  Tight junction 161  7
## path:hsa00650                            Butanoate metabolism  27  2
## path:hsa04211                    Longevity regulating pathway  88  5
## path:hsa01240                       Biosynthesis of cofactors 147  5
## path:hsa04360                                   Axon guidance 177  9
## path:hsa04726                            Serotonergic synapse 108  5
## path:hsa05130           Pathogenic Escherichia coli infection 187  7
## path:hsa04921                      Oxytocin signaling pathway 151  7
## path:hsa00920                               Sulfur metabolism  10  1
## path:hsa00071                          Fatty acid degradation  41  2
##                      P.DE FDR
## path:hsa00730 0.004796095   1
## path:hsa04670 0.038727306   1
## path:hsa00513 0.045978414   1
## path:hsa00780 0.058735766   1
## path:hsa04392 0.058738197   1
## path:hsa00280 0.061602773   1
## path:hsa01232 0.064266960   1
## path:hsa04714 0.067722817   1
## path:hsa04977 0.074304530   1
## path:hsa04213 0.085670541   1
## path:hsa04530 0.087493572   1
## path:hsa00650 0.089462339   1
## path:hsa04211 0.092624247   1
## path:hsa01240 0.100150558   1
## path:hsa04360 0.105557111   1
## path:hsa04726 0.111717778   1
## path:hsa05130 0.116953903   1
## path:hsa04921 0.122967997   1
## path:hsa00920 0.138142049   1
## path:hsa00071 0.145792842   1

write.csv(gometh_kegg, file = "GOmeth_kegg.csv", row.names = TRUE)

# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)

## All input CpGs are used for testing.

head( gometh_go[order(gometh_go$P.DE),] , 20)

##            ONTOLOGY
## GO:0002092       BP
## GO:0004017       MF
## GO:0030860       BP
## GO:0050145       MF
## GO:0016339       BP
## GO:0005845       CC
## GO:0098927       BP
## GO:0034518       CC
## GO:0046940       BP
## GO:0005547       MF
## GO:0140552       CC
## GO:0000340       MF
## GO:0004550       MF
## GO:0060319       BP
## GO:0006172       BP
## GO:0009180       BP
## GO:0061502       BP
## GO:0140326       MF
## GO:0048260       BP
## GO:0009188       BP
##                                                                                        TERM
## GO:0002092                                  positive regulation of receptor internalization
## GO:0004017                                                        adenylate kinase activity
## GO:0030860                          regulation of polarized epithelial cell differentiation
## GO:0050145                                         nucleoside monophosphate kinase activity
## GO:0016339 calcium-dependent cell-cell adhesion via plasma membrane cell adhesion molecules
## GO:0005845                                                         mRNA cap binding complex
## GO:0098927                        vesicle-mediated transport between endosomal compartments
## GO:0034518                                                          RNA cap binding complex
## GO:0046940                                         nucleoside monophosphate phosphorylation
## GO:0005547                                 phosphatidylinositol-3,4,5-trisphosphate binding
## GO:0140552                                                                 TEAD-YAP complex
## GO:0000340                                                RNA 7-methylguanosine cap binding
## GO:0004550                                           nucleoside diphosphate kinase activity
## GO:0060319                                            primitive erythrocyte differentiation
## GO:0006172                                                         ADP biosynthetic process
## GO:0009180                           purine ribonucleoside diphosphate biosynthetic process
## GO:0061502                                   early endosome to recycling endosome transport
## GO:0140326                          ATPase-coupled intramembrane lipid transporter activity
## GO:0048260                             positive regulation of receptor-mediated endocytosis
## GO:0009188                                  ribonucleoside diphosphate biosynthetic process
##             N DE         P.DE FDR
## GO:0002092 22  5 0.0002580819   1
## GO:0004017  7  3 0.0005837699   1
## GO:0030860  2  2 0.0010532279   1
## GO:0050145 18  4 0.0012740494   1
## GO:0016339 42  5 0.0016337224   1
## GO:0005845 10  3 0.0017904495   1
## GO:0098927 41  5 0.0022232794   1
## GO:0034518 11  3 0.0022264662   1
## GO:0046940 12  3 0.0022372312   1
## GO:0005547 36  5 0.0026812922   1
## GO:0140552  3  2 0.0031990802   1
## GO:0000340 12  3 0.0043298628   1
## GO:0004550 18  3 0.0043575537   1
## GO:0060319  3  2 0.0046016985   1
## GO:0006172  5  2 0.0050089050   1
## GO:0009180  5  2 0.0050089050   1
## GO:0061502  4  2 0.0052101818   1
## GO:0140326 24  4 0.0063256563   1
## GO:0048260 47  5 0.0064264754   1
## GO:0009188  6  2 0.0064602730   1

write.csv(gometh_go, file = "GOmeth_go.csv", row.names = TRUE)

#Plotting effect sizes of top DMPs
plot(ilogit2(mDat[rownames(mDat) == "cg11027325",]), ADOS,
  main = "HES6: cg11027325", ylab = "ADOS z-score", xlab = "Beta Value")

regl5<-lm(ADOS ~ ilogit2(mDat[rownames(mDat) == "cg11027325",]))
abline(regl5)

cor.test(ADOS, ilogit2(mDat[rownames(mDat) == "cg11027325",]), method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  ADOS and ilogit2(mDat[rownames(mDat) == "cg11027325", ])
## t = -0.2059, df = 16, p-value = 0.8395
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5061304  0.4256788
## sample estimates:
##         cor 
## -0.05140787

plot(ilogit2(mDat[rownames(mDat) == "cg20507028",]), ADOS,
  main = "ZNF713: cg20507028", ylab = "ADOS z-score", xlab = "Beta Value")

regl5<-lm(ADOS ~ ilogit2(mDat[rownames(mDat) == "cg20507028",]))
abline(regl5)

cor.test(ADOS, ilogit2(mDat[rownames(mDat) == "cg20507028",]), method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  ADOS and ilogit2(mDat[rownames(mDat) == "cg20507028", ])
## t = 0.081723, df = 16, p-value = 0.9359
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4507421  0.4826934
## sample estimates:
##        cor 
## 0.02042652

plot(ilogit2(mDat[rownames(mDat) == "cg21244116",]), ADOS, 
  main = "EHMT1: cg21244116", ylab = "ADOS z-score", xlab = "Beta Value")

regl5<-lm(ADOS ~ ilogit2(mDat[rownames(mDat) == "cg21244116",]))
abline(regl5)

cor.test(ADOS, ilogit2(mDat[rownames(mDat) == "cg21244116",]), method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  ADOS and ilogit2(mDat[rownames(mDat) == "cg21244116", ])
## t = 2.2889, df = 16, p-value = 0.03602
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03877897 0.78216357
## sample estimates:
##       cor 
## 0.4966572

plot(ilogit2(mDat[rownames(mDat) == "cg21241156",]), ADOS, 
  main = "TLE4: cg21241156", ylab = "ADOS z-score", xlab = "Beta Value")
regl5<-lm(ADOS ~ ilogit2(mDat[rownames(mDat) == "cg21241156",]))
abline(regl5)

cor.test(ADOS, ilogit2(mDat[rownames(mDat) == "cg21241156",]), method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  ADOS and ilogit2(mDat[rownames(mDat) == "cg21241156", ])
## t = -1.6205, df = 16, p-value = 0.1247
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7167153  0.1108063
## sample estimates:
##        cor 
## -0.3754886

#Effect sizes of differences in ADOS score versus difference in beta value wthin twin pairs

diff_ADOS_1 = abs(ADOS[1] - ADOS[2])
diff_ADOS_2 = abs(ADOS[3] - ADOS[4])
diff_ADOS_3 = abs(ADOS[5] - ADOS[6])
diff_ADOS_4 = abs(ADOS[7] - ADOS[8])
diff_ADOS_5 = abs(ADOS[9] - ADOS[10])
diff_ADOS_6 = abs(ADOS[11] - ADOS[12])
diff_ADOS_7 = abs(ADOS[13] - ADOS[14])
diff_ADOS_8 = abs(ADOS[15] - ADOS[16])
diff_ADOS_9 = abs(ADOS[17] - ADOS[18])
diff_ADOS_score = c(9,4,3,7,5,8,2,5,12)

#diff_ADOS_score = c(diff_ADOS_1+diff_ADOS_2+diff_ADOS_3+diff_ADOS_4+diff_ADOS_5+diff_ADOS_6+diff_ADOS_7+diff_ADOS_8+diff_ADOS_9)

#for (i in 1:length(ADOS))
#{
#  diff_ADOS_score<-abs(ADOS[i] - ADOS[i+1])
#    i = i +1
#diff_ADOS_score <- c(diff_ADOS_score, diff_ADOS_score[i])
#}
#print(diff_ADOS_score)

diff_effectsize_1 = ilogit2(mDat[rownames(mDat) == "cg21241156"][1]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][2])
diff_effectsize_2 = ilogit2(mDat[rownames(mDat) == "cg21241156"][3]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][4])
diff_effectsize_3 = ilogit2(mDat[rownames(mDat) == "cg21241156"][5]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][6])
diff_effectsize_4 = ilogit2(mDat[rownames(mDat) == "cg21241156"][7]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][8])
diff_effectsize_5 = ilogit2(mDat[rownames(mDat) == "cg21241156"][9]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][10])
diff_effectsize_6 = ilogit2(mDat[rownames(mDat) == "cg21241156"][11]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][12])
diff_effectsize_7 = ilogit2(mDat[rownames(mDat) == "cg21241156"][13]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][14])
diff_effectsize_8 = ilogit2(mDat[rownames(mDat) == "cg21241156"][15]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][16])
diff_effectsize_9 = ilogit2(mDat[rownames(mDat) == "cg21241156"][17]) - ilogit2(mDat[rownames(mDat) == "cg21241156"][18])
diff_effectsize_tot = c(0.02383603, 0.01913442, -0.0003221395, 0.07068035, -0.05522228, 
  0.05076427, 0.002702124, -0.03403429, 0.06949949)
diff_effectsize_tot

## [1]  0.0238360300  0.0191344200 -0.0003221395  0.0706803500 -0.0552222800
## [6]  0.0507642700  0.0027021240 -0.0340342900  0.0694994900

#TLE4: cg21241156
plot(diff_effectsize_tot, diff_ADOS_score, main = "TLE4: cg21241156",
  ylab = "ADOS difference z-score", xlab = "Delta Beta Value")

regl5<-lm(diff_ADOS_score ~ diff_effectsize_tot)
abline(regl5)

cor.test(diff_ADOS_score, diff_effectsize_tot, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  diff_ADOS_score and diff_effectsize_tot
## t = 2.0866, df = 7, p-value = 0.07535
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.0762115  0.9093826
## sample estimates:
##       cor 
## 0.6192529

#EHMT1: cg21244116
diff_effectsize_1 = ilogit2(mDat[rownames(mDat) == "cg21244116"][1]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][2])
diff_effectsize_2 = ilogit2(mDat[rownames(mDat) == "cg21244116"][3]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][4])
diff_effectsize_3 = ilogit2(mDat[rownames(mDat) == "cg21244116"][5]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][6])
diff_effectsize_4 = ilogit2(mDat[rownames(mDat) == "cg21244116"][7]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][8])
diff_effectsize_5 = ilogit2(mDat[rownames(mDat) == "cg21244116"][9]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][10])
diff_effectsize_6 = ilogit2(mDat[rownames(mDat) == "cg21244116"][11]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][12])
diff_effectsize_7 = ilogit2(mDat[rownames(mDat) == "cg21244116"][13]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][14])
diff_effectsize_8 = ilogit2(mDat[rownames(mDat) == "cg21244116"][15]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][16])
diff_effectsize_9 = ilogit2(mDat[rownames(mDat) == "cg21244116"][17]) - ilogit2(mDat[rownames(mDat) == "cg21244116"][18])
diff_effectsize_tot_cg21244116 = c(-0.0340176, -0.001041366, 0.02669383, -0.02422778, 0.03996136, 
  -0.03206927, -0.004060284, 0.019002, -0.04277449)
diff_effectsize_tot_cg21244116

## [1] -0.034017600 -0.001041366  0.026693830 -0.024227780  0.039961360
## [6] -0.032069270 -0.004060284  0.019002000 -0.042774490

plot(diff_effectsize_tot_cg21244116, diff_ADOS_score, main = "EHMT1: cg21244116", 
  ylab = "ADOS difference z-score", xlab = "Delta Beta Value")
regl5<-lm(diff_ADOS_score ~ diff_effectsize_tot_cg21244116)
abline(regl5)

cor.test(diff_ADOS_score, diff_effectsize_tot_cg21244116, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  diff_ADOS_score and diff_effectsize_tot_cg21244116
## t = -2.9788, df = 7, p-value = 0.02055
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9433676 -0.1659451
## sample estimates:
##       cor 
## -0.747668

#ZNF713: cg20507028
diff_effectsize_1 = ilogit2(mDat[rownames(mDat) == "cg20507028"][1]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][2])
diff_effectsize_2 = ilogit2(mDat[rownames(mDat) == "cg20507028"][3]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][4])
diff_effectsize_3 = ilogit2(mDat[rownames(mDat) == "cg20507028"][5]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][6])
diff_effectsize_4 = ilogit2(mDat[rownames(mDat) == "cg20507028"][7]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][8])
diff_effectsize_5 = ilogit2(mDat[rownames(mDat) == "cg20507028"][9]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][10])
diff_effectsize_6 = ilogit2(mDat[rownames(mDat) == "cg20507028"][11]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][12])
diff_effectsize_7 = ilogit2(mDat[rownames(mDat) == "cg20507028"][13]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][14])
diff_effectsize_8 = ilogit2(mDat[rownames(mDat) == "cg20507028"][15]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][16])
diff_effectsize_9 = ilogit2(mDat[rownames(mDat) == "cg20507028"][17]) - ilogit2(mDat[rownames(mDat) == "cg20507028"][18])
diff_effectsize_tot_cg20507028 = c(-0.005014283, -0.04933726, 0.0541018, -0.06395128, 0.07563989, 
  -0.1120907, 0.004739958, 0.01840117, -0.06681291)
diff_effectsize_tot_cg20507028

## [1] -0.005014283 -0.049337260  0.054101800 -0.063951280  0.075639890
## [6] -0.112090700  0.004739958  0.018401170 -0.066812910

plot(diff_effectsize_tot_cg20507028, diff_ADOS_score, main = "ZNF713: cg20507028", 
  ylab = "ADOS difference z-score", xlab = "Delta Beta Value")
regl5<-lm(diff_ADOS_score ~ diff_effectsize_tot_cg20507028)
abline(regl5)

cor.test(diff_ADOS_score, diff_effectsize_tot_cg20507028, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  diff_ADOS_score and diff_effectsize_tot_cg20507028
## t = -1.754, df = 7, p-value = 0.1229
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8900562  0.1762479
## sample estimates:
##        cor 
## -0.5525498

#HES6: cg11027325
diff_effectsize_1 = ilogit2(mDat[rownames(mDat) == "cg11027325"][1]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][2])
diff_effectsize_2 = ilogit2(mDat[rownames(mDat) == "cg11027325"][3]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][4])
diff_effectsize_3 = ilogit2(mDat[rownames(mDat) == "cg11027325"][5]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][6])
diff_effectsize_4 = ilogit2(mDat[rownames(mDat) == "cg11027325"][7]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][8])
diff_effectsize_5 = ilogit2(mDat[rownames(mDat) == "cg11027325"][9]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][10])
diff_effectsize_6 = ilogit2(mDat[rownames(mDat) == "cg11027325"][11]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][12])
diff_effectsize_7 = ilogit2(mDat[rownames(mDat) == "cg11027325"][13]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][14])
diff_effectsize_8 = ilogit2(mDat[rownames(mDat) == "cg11027325"][15]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][16])
diff_effectsize_9 = ilogit2(mDat[rownames(mDat) == "cg11027325"][17]) - ilogit2(mDat[rownames(mDat) == "cg11027325"][18])
diff_effectsize_tot_cg11027325 = c(0.1334995, 0.02969043, -0.04247427, 0.1107637, -0.03854689, 
  0.1779549, 0.007191577, -0.03854834, 0.1683442)
diff_effectsize_tot_cg11027325

## [1]  0.133499500  0.029690430 -0.042474270  0.110763700 -0.038546890
## [6]  0.177954900  0.007191577 -0.038548340  0.168344200

plot(diff_effectsize_tot_cg11027325, diff_ADOS_score, main = "HES6: cg11027325", 
  ylab = "ADOS difference z-score", xlab = "Delta Beta Value")
regl5<-lm(diff_ADOS_score ~ diff_effectsize_tot_cg11027325)
abline(regl5)

cor.test(diff_ADOS_score, diff_effectsize_tot_cg11027325, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  diff_ADOS_score and diff_effectsize_tot_cg11027325
## t = 3.9556, df = 7, p-value = 0.005493
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3729744 0.9634707
## sample estimates:
##      cor 
## 0.831206

DMRCate - differentially methylated region analysis

#design matrix in regression 
design_tw <- model.matrix(~ADOS+Twin_Group)
design_tw_dmrc <- model.matrix(~ADOS)
#myannotation <- cpg.annotate("array", mDat, analysis.type="differential", design=design_tw, coef=2, fdr=1)
myannotation <- cpg.annotate("array", mDat, what = "M", 
  arraytype = "EPIC", analysis.type="differential", design_tw, coef=2)

## Your contrast returned no individually significant probes. Try increasing the fdr. Alternatively, set pcutoff manually in dmrcate() to return DMRs, but be warned there is an increased risk of Type I errors.

dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2, pcutoff=0.001)

## Fitting chr1...

## Fitting chr2...

## Fitting chr3...

## Fitting chr4...

## Fitting chr5...

## Fitting chr6...

## Fitting chr7...

## Fitting chr8...

## Fitting chr9...

## Fitting chr10...

## Fitting chr11...

## Fitting chr12...

## Fitting chr13...

## Fitting chr14...

## Fitting chr15...

## Fitting chr16...

## Fitting chr17...

## Fitting chr18...

## Fitting chr19...

## Fitting chr20...

## Fitting chr21...

## Fitting chr22...

## Fitting chrX...

## Fitting chrY...

## Demarcating regions...

## Done!

results.ranges <- data.frame(extractRanges(dmrcoutput, genome = "hg19") )

## snapshotDate(): 2021-05-18

## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation

## loading from cache

head(results.ranges, 20)

##    seqnames     start       end width strand no.cpgs min_smoothed_fdr  Stouffer
## 1      chr1   4794912   4795012   101      *       2     5.548416e-04 0.9989347
## 2      chr1 191843335 191843399    65      *       2     2.567107e-04 0.9989347
## 3      chr3  54122032  54122146   115      *       2     5.222949e-04 0.9989347
## 4      chr4  22445207  22445349   143      *       2     3.838120e-05 0.9989347
## 5      chr8 110693461 110693859   399      *       2     5.445921e-09 0.9989347
## 6      chr9  98236338  98236406    69      *       2     2.612195e-04 0.9989347
## 7     chr11  62106739  62106872   134      *       2     2.079725e-04 0.9989347
## 8     chr11 125479352 125479362    11      *       2     2.803063e-06 0.9989347
## 9     chr14 107253273 107253290    18      *       2     5.522862e-04 0.9989347
## 10    chr15  43309399  43309540   142      *       2     4.946098e-04 0.9989347
## 11     chr3   3213603   3213634    32      *       2     9.786746e-05 0.9992349
## 12     chr7 157162115 157162202    88      *       2     2.254582e-04 0.9998091
## 13     chr3 113249814 113250232   419      *       3     1.561141e-04 0.9999156
## 14     chr7  55954891  55954957    67      *       3     5.053488e-04 0.9999156
## 15     chr9 140729241 140729712   472      *       3     6.616449e-06 0.9999156
## 16    chr16  85465589  85465657    69      *       3     1.327067e-04 0.9999478
## 17     chr8 141046436 141046852   417      *       5     3.728981e-04 0.9999994
## 18     chr6  28945189  28945507   319      *       7     1.327067e-04 1.0000000
## 19    chr11  57508449  57508856   408      *       8     1.327067e-04 1.0000000
## 20    chr11   5617367   5618023   657      *       8     9.786746e-05 1.0000000
##        HMFDR    Fisher      maxdiff      meandiff
## 1  0.9850646 0.9995561 -0.006211006 -0.0048567106
## 2  0.9850646 0.9995561 -0.009731567 -0.0088596958
## 3  0.9850646 0.9995561 -0.008676342 -0.0036259846
## 4  0.9850646 0.9995561 -0.005127505 -0.0009542798
## 5  0.9850646 0.9995561 -0.011266044 -0.0050649373
## 6  0.9850646 0.9995561 -0.007524800 -0.0032620071
## 7  0.9850646 0.9995561 -0.007182332 -0.0049541347
## 8  0.9850646 0.9995561  0.009127710  0.0029452176
## 9  0.9850646 0.9995561 -0.012064556 -0.0098097668
## 10 0.9850646 0.9995561 -0.004550075 -0.0031806926
## 11 0.9873008 0.9996790 -0.003951027 -0.0018703955
## 12 0.9914051 0.9998534 -0.003801339 -0.0019083301
## 13 0.9850646 0.9999852 -0.005270309 -0.0033645183
## 14 0.9850646 0.9999852  0.007225873  0.0037528467
## 15 0.9850646 0.9999852  0.003916152  0.0004668367
## 16 0.9871267 0.9999905 -0.001981089 -0.0014041778
## 17 0.9850646 1.0000000 -0.009159352 -0.0048570805
## 18 0.9859176 1.0000000 -0.006709371 -0.0044028435
## 19 0.9850646 1.0000000 -0.007194960  0.0002647476
## 20 0.9851186 1.0000000 -0.004561438  0.0002026570
##                         overlapping.genes
## 1                                   AJAP1
## 2                            RP11-541F9.1
## 3                                    <NA>
## 4                                  GPR125
## 5                                    SYBU
## 6                                   PTCH1
## 7                                  ASRGL1
## 8                                   STT3A
## 9                                    <NA>
## 10                                   UBR1
## 11                                   CRBN
## 12                                 DNAJB6
## 13                                   <NA>
## 14                                   <NA>
## 15                                  EHMT1
## 16                                   <NA>
## 17                                TRAPPC9
## 18                              RN7SL471P
## 19                  TMX2-CTNND1, C11orf31
## 20 TRIM6, TRIM6-TRIM34, HBG2, AC015691.13

write.csv(results.ranges, file = "dmrcoutput.csv", row.names = TRUE)

Session information

sessionInfo()

## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] DMRcatedata_2.10.0                                 
##  [2] ExperimentHub_2.0.0                                
##  [3] AnnotationHub_3.0.2                                
##  [4] BiocFileCache_2.0.0                                
##  [5] dbplyr_2.1.1                                       
##  [6] DMRcate_2.6.0                                      
##  [7] ggplot2_3.3.5                                      
##  [8] reshape2_1.4.4                                     
##  [9] dplyr_1.0.8                                        
## [10] IlluminaHumanMethylation450kmanifest_0.4.0         
## [11] IlluminaHumanMethylationEPICmanifest_0.3.0         
## [12] FlowSorted.Blood.450k_1.30.0                       
## [13] WGCNA_1.70-3                                       
## [14] fastcluster_1.2.3                                  
## [15] dynamicTreeCut_1.63-1                              
## [16] gplots_3.1.1                                       
## [17] RColorBrewer_1.1-2                                 
## [18] ruv_0.9.7.1                                        
## [19] IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0
## [20] limma_3.48.3                                       
## [21] missMethyl_1.26.1                                  
## [22] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [23] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0 
## [24] minfi_1.38.0                                       
## [25] bumphunter_1.34.0                                  
## [26] locfit_1.5-9.5                                     
## [27] iterators_1.0.14                                   
## [28] foreach_1.5.2                                      
## [29] Biostrings_2.60.2                                  
## [30] XVector_0.32.0                                     
## [31] SummarizedExperiment_1.22.0                        
## [32] Biobase_2.52.0                                     
## [33] MatrixGenerics_1.4.3                               
## [34] matrixStats_0.61.0                                 
## [35] GenomicRanges_1.44.0                               
## [36] GenomeInfoDb_1.28.4                                
## [37] IRanges_2.26.0                                     
## [38] S4Vectors_0.30.2                                   
## [39] BiocGenerics_0.38.0                                
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                rtracklayer_1.52.1           
##   [3] R.methodsS3_1.8.1             tidyr_1.2.0                  
##   [5] bit64_4.0.5                   knitr_1.37                   
##   [7] DelayedArray_0.18.0           R.utils_2.11.0               
##   [9] data.table_1.14.2             rpart_4.1.16                 
##  [11] KEGGREST_1.32.0               RCurl_1.98-1.6               
##  [13] GEOquery_2.60.0               AnnotationFilter_1.16.0      
##  [15] doParallel_1.0.17             generics_0.1.2               
##  [17] GenomicFeatures_1.44.2        preprocessCore_1.54.0        
##  [19] RSQLite_2.2.11                bit_4.0.4                    
##  [21] tzdb_0.2.0                    xml2_1.3.3                   
##  [23] httpuv_1.6.5                  assertthat_0.2.1             
##  [25] xfun_0.30                     hms_1.1.1                    
##  [27] jquerylib_0.1.4               evaluate_0.15                
##  [29] promises_1.2.0.1              fansi_1.0.2                  
##  [31] restfulr_0.0.13               scrime_1.3.5                 
##  [33] progress_1.2.2                caTools_1.18.2               
##  [35] readxl_1.3.1                  DBI_1.1.2                    
##  [37] htmlwidgets_1.5.4             reshape_0.8.8                
##  [39] purrr_0.3.4                   ellipsis_0.3.2               
##  [41] backports_1.4.1               permute_0.9-7                
##  [43] annotate_1.70.0               biomaRt_2.48.3               
##  [45] sparseMatrixStats_1.4.2       vctrs_0.3.8                  
##  [47] ensembldb_2.16.4              cachem_1.0.6                 
##  [49] withr_2.5.0                   Gviz_1.36.2                  
##  [51] BSgenome_1.60.0               checkmate_2.0.0              
##  [53] GenomicAlignments_1.28.0      prettyunits_1.1.1            
##  [55] mclust_5.4.9                  cluster_2.1.2                
##  [57] lazyeval_0.2.2                crayon_1.5.0                 
##  [59] genefilter_1.74.1             edgeR_3.34.1                 
##  [61] pkgconfig_2.0.3               labeling_0.4.2               
##  [63] nlme_3.1-155                  ProtGenerics_1.24.0          
##  [65] nnet_7.3-17                   rlang_1.0.2                  
##  [67] lifecycle_1.0.1               filelock_1.0.2               
##  [69] dichromat_2.0-0               cellranger_1.1.0             
##  [71] rngtools_1.5.2                base64_2.0                   
##  [73] Matrix_1.4-1                  Rhdf5lib_1.14.2              
##  [75] base64enc_0.1-3               png_0.1-7                    
##  [77] rjson_0.2.21                  bitops_1.0-7                 
##  [79] R.oo_1.24.0                   KernSmooth_2.23-20           
##  [81] rhdf5filters_1.4.0            blob_1.2.2                   
##  [83] DelayedMatrixStats_1.14.3     doRNG_1.8.2                  
##  [85] stringr_1.4.0                 nor1mix_1.3-0                
##  [87] readr_2.1.2                   jpeg_0.1-9                   
##  [89] scales_1.1.1                  memoise_2.0.1                
##  [91] magrittr_2.0.2                plyr_1.8.6                   
##  [93] zlibbioc_1.38.0               compiler_4.1.3               
##  [95] BiocIO_1.2.0                  illuminaio_0.34.0            
##  [97] Rsamtools_2.8.0               cli_3.2.0                    
##  [99] DSS_2.40.0                    htmlTable_2.4.0              
## [101] Formula_1.2-4                 MASS_7.3-56                  
## [103] tidyselect_1.1.2              stringi_1.7.6                
## [105] highr_0.9                     yaml_2.3.5                   
## [107] askpass_1.1                   latticeExtra_0.6-29          
## [109] grid_4.1.3                    sass_0.4.1                   
## [111] VariantAnnotation_1.38.0      tools_4.1.3                  
## [113] rstudioapi_0.13               foreign_0.8-82               
## [115] bsseq_1.28.0                  gridExtra_2.3                
## [117] farver_2.1.0                  digest_0.6.29                
## [119] BiocManager_1.30.16           shiny_1.7.1                  
## [121] quadprog_1.5-8                Rcpp_1.0.8.3                 
## [123] siggenes_1.66.0               BiocVersion_3.13.1           
## [125] later_1.3.0                   org.Hs.eg.db_3.13.0          
## [127] httr_1.4.2                    AnnotationDbi_1.54.1         
## [129] biovizBase_1.40.0             colorspace_2.0-3             
## [131] XML_3.99-0.9                  splines_4.1.3                
## [133] statmod_1.4.36                multtest_2.48.0              
## [135] xtable_1.8-4                  jsonlite_1.8.0               
## [137] R6_2.5.1                      Hmisc_4.6-0                  
## [139] pillar_1.7.0                  htmltools_0.5.2              
## [141] mime_0.12                     glue_1.6.2                   
## [143] fastmap_1.1.0                 BiocParallel_1.26.2          
## [145] interactiveDisplayBase_1.30.0 beanplot_1.2                 
## [147] codetools_0.2-18              utf8_1.2.2                   
## [149] lattice_0.20-45               bslib_0.3.1                  
## [151] tibble_3.1.6                  curl_4.3.2                   
## [153] BiasedUrn_1.07                gtools_3.9.2                 
## [155] GO.db_3.13.0                  openssl_2.0.0                
## [157] survival_3.3-1                rmarkdown_2.13               
## [159] munsell_0.5.0                 rhdf5_2.36.0                 
## [161] GenomeInfoDbData_1.2.6        HDF5Array_1.20.0             
## [163] impute_1.66.0                 gtable_0.3.0

Autism Spectrum Disorder Methylation analysis - blood analysis

Namitha Mohandras, Mark Ziemann

2022-04-08