Autism Spectrum Disorder Methylation analysis

Introduction

Here we are analysing blood methylation in 9 twin pairs.

baseDir=getwd()
dataDir=paste(baseDir,"/ASD_EPIC_DATA",sep="")

suppressPackageStartupMessages({
  library("missMethyl")
  library("limma")
  library("minfi")
  library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  library("RColorBrewer")
  library("matrixStats")
  library("gplots")
  library("WGCNA")
  library("FlowSorted.Blood.450k")
  library("reshape2")
  library("ggplot2")
  library("missMethyl")
  library("DMRcate")
  library("FlowSorted.Blood.EPIC")
  library("mitch")
  library("kableExtra")
  library("vioplot")
})

source("meth_functions.R")

Load data

Load the annotation data and the Epic methylation data.

This analysis is to be conducted on Ubuntu with R4.

ann = getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)

ann_sub = ann[,c("chr","pos","strand","Name","Islands_Name",
    "Relation_to_Island","UCSC_RefGene_Name","UCSC_RefGene_Group")]

targets_gen = read.metharray.sheet(dataDir, pattern = "ASD_blood_summarysheet.csv")

## [read.metharray.sheet] Found the following CSV files:

## [1] "/asd_meth/ASD_EPIC_DATA/ASD_blood_summarysheet.csv"

#targets$ID = paste(targets$Sample_Group,targets_gen$Sample_Name,sep=".")
rgSet = read.metharray.exp(targets = targets_gen)
sampleNames(rgSet) = targets_gen$SampleID

Testing MZ status

snpBetas = getSnpBeta(rgSet)

## Loading required package: IlluminaHumanMethylationEPICmanifest

d = dist(t(snpBetas))
hr = hclust(d, method = "complete", members=NULL)
plot(hr)

Quality control

detP = detectionP(rgSet)
qcReport(rgSet, sampNames = targets_gen$Sample_Name,
  pdf="qc-report_ASD_blood_nov27.pdf")

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## png 
##   2

cols=brewer.pal(4,"Set1")

barplot(apply(detP,2,mean),
  col=as.numeric(factor(targets_gen$Sample_Name)),
  las=2,cex.names= 0.8, cex.axis=0.75,
  main="Mean detection p-values of probe signals",
  ylab="Mean detection p-value")

barplot(apply(detP,2,mean),
  col=as.numeric(factor(targets_gen$Sample_Name)),
  las=2,cex.names= 0.8, cex.axis=0.75,ylim=c(0,0.010),
  main="Mean detection p-values of probe signals",
  ylab="Mean detection p-value")

Preprocessing

mset.raw = preprocessRaw(rgSet)

Data exploration

Using Multi-dimensional scaling (MDS) plots before filtering.

mdsPlot(mset.raw, sampGroups = targets_gen$Sample_Name, 
  sampNames=targets_gen$Social_interaction_on_ADOS,legendPos="bottom")

mdsPlot(mset.raw, sampGroups = targets_gen$Sex, 
  sampNames=targets_gen$SampleID,legendPos="bottom")

Cell type composition analysis

Need to consider using estimatecellcounts2.

rgSet$Slide <- as.numeric(rgSet$Slide)
rgSet$Sex<- as.character(rgSet$Sex)
rgSet$Sample_Name<- as.character(rgSet$Sample_Name)

cellCounts_new <- estimateCellCounts(rgSet, compositeCellType = "Blood", 
  processMethod = "auto", probeSelect = "auto", 
  cellTypes = c("CD8T","CD4T", "NK","Bcell","Mono","Gran"),
  referencePlatform = c("IlluminaHumanMethylation450k"),
  returnAll = FALSE, meanPlot = TRUE)

## Loading required package: IlluminaHumanMethylation450kmanifest

## [estimateCellCounts] Combining user data with reference (flow sorted) data.

## Warning in DataFrame(sampleNames = c(colnames(rgSet),
## colnames(referenceRGset)), : 'stringsAsFactors' is ignored

## [estimateCellCounts] Processing user and reference data together.

## [preprocessQuantile] Mapping to genome.

## [preprocessQuantile] Fixing outliers.

## [preprocessQuantile] Quantile normalizing.

## [estimateCellCounts] Picking probes for composition estimation.

## [estimateCellCounts] Estimating composition.

#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")

cellCounts_long <- melt(cellCounts_new, id = "celltype")

cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1, 
  targets_gen$SampleID),"Diagnosis"]

colnames(cellCounts_long) <- c("diagnosis","celltype","value")

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) + 
  geom_boxplot()

wx <- lapply(1:ncol(cellCounts_new) , function(i) {
  wilcox.test(a[,i],c[,i], paired=FALSE)
} )

## Warning in wilcox.test.default(a[, i], c[, i], paired = FALSE): cannot compute
## exact p-value with ties

names(wx) <- colnames(cellCounts_new)
wx

## $CD8T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 12, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $CD4T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 13, p-value = 0.8112
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $NK
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  a[, i] and c[, i]
## W = 15, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Bcell
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 18, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Mono
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 23, p-value = 0.2168
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Gran
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 15, p-value = 1
## alternative hypothesis: true location shift is not equal to 0

Try estimatecellcounts2.

cells <- estimateCellCounts2(rgSet, referencePlatform= "IlluminaHumanMethylationEPIC", 
  returnAll = TRUE)

## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation

## downloading 1 resources

## retrieving 1 resource

## loading from cache

## [estimateCellCounts2] Combining user data with reference (flow sorted) data.

## Warning in asMethod(object): NAs introduced by coercion

## [estimateCellCounts2] Processing user and reference data together.

## [estimateCellCounts2] Using IDOL L-DMR probes for composition estimation.

## [estimateCellCounts2] Estimating proportion composition (prop), if you provide cellcounts those will be provided as counts in the composition estimation.

mset <- cells$normalizedData

cellCounts_new <- cells[[1]]
#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")

cellCounts_long <- melt(cellCounts_new, id = "celltype")

cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1,  targets_gen$SampleID),"Diagnosis"]

colnames(cellCounts_long) <- c("diagnosis","celltype","value")

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) + 
  geom_boxplot()

pdf("cells_blood.pdf")

par(mar=c(3,8,2,2))

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) +
  geom_boxplot()

dev.off()

## png 
##   2

wx <- lapply(1:ncol(cellCounts_new) , function(i) {
  wilcox.test(a[,i],c[,i], paired=FALSE)
} )
names(wx) <- colnames(cellCounts_new)
wx

## $CD8T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 14, p-value = 0.9371
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $CD4T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 13, p-value = 0.8112
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $NK
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 17, p-value = 0.8112
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Bcell
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 18, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Mono
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 26, p-value = 0.07692
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Neu
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 14, p-value = 0.9371
## alternative hypothesis: true location shift is not equal to 0

Filtering

None of the p-values are less than 0.05.

Now filter for high detection p-value and overlap with SNP.

Need to get a copy of the Xreact probes from Namitha. In the mean time I have downloaded from github - link below.

detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mset <- mset[keep,]
gmset <- mapToGenome(mset)

#remove SNPs
gmset_flt = dropLociWithSnps(gmset, snps = c("CpG", "SBE"))

#Removing cross-reactive probes
XURL="https://raw.githubusercontent.com/sirselim/illumina450k_filtering/master/EPIC/13059_2016_1066_MOESM1_ESM.csv"
Xreact <- read.csv(XURL)

#Xreact = read.csv(file="/group/canc2/puumba/Data/InfiniumData/NamithaData/Rprojects/Autism/Analysis_Sept11/EPIC_850k_crossreactiveProbes.csv", stringsAsFactors=FALSE)
#Xreact = read.csv(file="~/48639-non-specific-probes-Illumina450k.csv", stringsAsFactors=FALSE)
noXreact <-  !(featureNames(gmset) %in% Xreact$X)

gmset <- gmset[noXreact,]

#Removing probes on X and Y chromosomes
autosomes <- !(featureNames(gmset) %in% ann$Name[ann$chr %in% c("chrX","chrY")])
gmset_flt <- gmset[autosomes,]

#getBeta
beta = getM(gmset_flt)
saveRDS(beta,"bl_beta.rds")
df <- data.frame(t(t(colMeans(beta))))
colnames(df) = "gwam_bl"
write.table(df,file="bl_gwam.tsv")

#Relative log expression (RLE plot)
mvals = getM(gmset_flt)
medSq = apply(mvals, 1, median)
YSq = mvals - medSq

boxplot(YSq,outline=FALSE,ylim=c(-1.5,1.5), 
  ylab="Relative Log Methylation Value", 
  cols=as.character(factor(targets_gen$Social_interaction_on_ADOS,)) )

MDS plots generation after filtering

pal = brewer.pal(8, "Dark2")
mds1Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(1,2))

mds2Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(1,3))

mds3Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(2,3))

mds4Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(3,4))

plotMDS(mds1Sq, xlab="Dimension 1", ylab="Dimension 2",
  col=pal[as.factor(targets_gen$Diagnosis)],
  dim=c(1,2), labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds2Sq, xlab="Dimension 1", ylab="Dimension 3",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(1,3),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds3Sq, xlab="Dimension 2", ylab="Dimension 3",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(2,3),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds4Sq, xlab="Dimension 3", ylab="Dimension 4",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(3,4),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

Principal Component Analysis (PCA)

fit <- prcomp(t(mvals),center = TRUE, scale = TRUE,retx=TRUE)
loadings = fit$x
plot(fit,type="lines")

nGenes = nrow(mvals)
nSamples = ncol(mvals)
datTraits = targets_gen[,7:15]
moduleTraitCor = cor(loadings[,1:6], datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
par(cex=0.75, mar = c(6, 8.5, 3, 3))
textMatrix = paste(signif(moduleTraitCor, 2), "\n(", 
  signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)

labeledHeatmap(Matrix = t(moduleTraitCor), 
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-1,1), 
  main = paste("PCA-trait relationships: Top principal components"))

pdf("pca_blood.pdf")

par(mar=c(3,8,2,2))

labeledHeatmap(Matrix = t(moduleTraitCor), 
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-1,1), 
  main = paste("PCA-trait relationships: Top principal components"))

dev.off()

## png 
##   2

Regression analysis - within twin pair on ADOS

ADOS is the outcome variable (continuous)

Twin_Group <- factor(targets_gen$Family_ID)
ADOS <- targets_gen$Social_interaction_on_ADOS
names(ADOS) <- targets_gen$SampleID
motor <- targets_gen$Motor_skills
diagnosis <- targets_gen$Diagnosis
SRS<- targets_gen$SRS_social_scores
iiq <- 1/targets_gen$IQ
# fix infinite
iiq[iiq==Inf] <- 0.025
iiq <- as.vector(scale(iiq))
ilanguage <- as.vector(scale(1/targets_gen$language))

#ADOS
design_tw <- model.matrix(~ Twin_Group + ADOS )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        245472        8632        4657        5582        3441        3896
## NotSig       42797      762396      793722      790564      793202      795941
## Up          514378       31619        4268        6501        6004        2810
##        Twin_Group8 Twin_Group12 Twin_Group13   ADOS
## Down          2567         3672         7139      0
## NotSig      796786       795931       791579 802647
## Up            3294         3044         3929      0

top <- topTable(fit2_tw,coef="ADOS",num=Inf, sort.by = "P")
nrow(subset(top,adj.P.Val < 0.05))

## [1] 0

nrow(subset(top,P.Value < 1e-4))

## [1] 4

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output, file="limma_blood_ADOS.csv",row.names=FALSE)
output <- subset(output,P.Value<1e-4)
head(output,30) %>% kbl() %>% kable_paper("hover", full_width = F)

Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg07655025	chr19	12835001	chr19:12833104-12833574	S_Shore	TNPO2	TSS200	-0.0854949	-4.3361600	-6.706650	2.28e-05	0.8764716	3.048127
cg18253799	chr17	78069191		OpenSea	CCDC40	Body	0.1005063	4.6851023	5.976848	6.70e-05	0.8764716	2.036801
cg25032595	chr13	96204978	chr13:96204691-96205496	Island	CLDN10;CLDN10;CLDN10;CLDN10	5’UTR;1stExon;Body;Body	-0.0705806	-4.5040270	-5.801969	8.77e-05	0.8764716	1.781885
cg16956665	chr11	125479362		OpenSea	STT3A	Body	0.1228970	0.6660224	5.770674	9.21e-05	0.8764716	1.735749

saveRDS(design_tw, "bl_design_ados.rds")
saveRDS(mvals, "bl_mvals.rds")

Other models:

motor skills impairment
diagnosis
SRS
inverse IQ
inverse langguage

Regression analysis - within twin pair on motor skills

#motor:820,1.053055e-12
design_tw <- model.matrix(~ Twin_Group + motor )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        229684        4434        3653        1755        1453        5029
## NotSig       83474      788381      796338      799389      799771      794382
## Up          489489        9832        2656        1503        1423        3236
##        Twin_Group8 Twin_Group12 Twin_Group13  motor
## Down          4104         1623         1628      3
## NotSig      793202       799776       799910 802643
## Up            5341         1248         1109      1

top <- topTable(fit2_tw,coef="motor",num=Inf, sort.by = "P")
nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 13734

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_motor.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="motor") %>% kable_paper("hover", full_width = F)

motor
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg06210070	chr6	33085063	chr6:33084740-33084995	S_Shore	HLA-DPB2	Body	2.1349589	-1.243178	13.270789	0.0000000	0.0158546	-3.115049
cg16853842	chr12	56390939		OpenSea	SUOX;SUOX;SUOX	TSS200;TSS200;TSS200	-1.8492659	-3.316265	-12.453699	0.0000000	0.0159779	-3.130500
cg08638320	chr1	47900265	chr1:47899661-47900385	Island	FOXD2;MGC12982	TSS1500;Body	-2.3455882	-1.387550	-11.823209	0.0000001	0.0188119	-3.144416
cg26848940	chr6	37787530	chr6:37786707-37788114	Island	ZFAND3;ZFAND3	5’UTR;1stExon	-2.1854214	-5.339348	-10.553508	0.0000002	0.0481955	-3.179286
cg03896436	chr5	141082572	chr5:141082245-141082592	Island			-1.0547315	-3.970117	-8.670124	0.0000019	0.3041140	-3.256077
cg10013356	chr6	150244295	chr6:150243902-150244210	S_Shore	RAET1G	TSS200	2.1354039	-3.707090	7.593191	0.0000072	0.9670964	-3.321246
cg22401197	chr2	219261843	chr2:219263098-219265556	N_Shore	CTDSP1	TSS1500	1.0215084	-5.424140	7.108057	0.0000138	0.9999765	-3.357991
cg19820070	chr3	48595777	chr3:48594022-48594853	S_Shore			0.9966580	-4.814164	6.864147	0.0000193	0.9999765	-3.378599
cg08857348	chr17	40760642	chr17:40761059-40761761	N_Shore	TUBG1;FAM134C;FAM134C	TSS1500;Body;Body	-1.0571880	-4.778048	-6.861185	0.0000194	0.9999765	-3.378859
cg03122453	chr10	18940666	chr10:18940528-18940765	Island	NSUN6	TSS200	0.8626666	-4.730919	6.509986	0.0000319	0.9999765	-3.411414
cg05905482	chr16	22207553		OpenSea			-0.8705145	-4.380715	-6.361905	0.0000396	0.9999765	-3.426244
cg06753055	chr15	40228656	chr15:40226210-40226631	S_Shelf	EIF2AK4	Body	0.8532322	1.588910	6.287176	0.0000442	0.9999765	-3.433993
cg27179327	chr22	51066859	chr22:51066107-51067051	Island	ARSA;ARSA;ARSA;ARSA;ARSA	TSS1500;TSS1500;TSS1500;TSS1500;TSS1500	-0.8623289	-5.720339	-6.275171	0.0000450	0.9999765	-3.435255
cg15407916	chr18	5133225		OpenSea			-1.0635897	3.550668	-6.224045	0.0000485	0.9999765	-3.440683
cg22188917	chr6	43138180	chr6:43138711-43140292	N_Shore	SRF	TSS1500	-1.0633309	-3.701277	-6.179868	0.0000518	0.9999765	-3.445444
cg20850829	chr2	55646538	chr2:55646621-55647329	N_Shore	CCDC88A;CCDC88A;CCDC88A;CCDC88A;CCDC88A;CCDC88A	1stExon;1stExon;1stExon;5’UTR;5’UTR;5’UTR	0.9471775	-5.105669	6.059595	0.0000620	0.9999765	-3.458750
cg11858249	chr21	45079790	chr21:45077671-45079821	Island	HSF2BP;RRP1B	TSS1500;Body	0.7795903	-4.933768	6.039472	0.0000639	0.9999765	-3.461026
cg00786952	chr1	21763130	chr1:21763070-21763416	Island			1.1501735	-4.475003	6.021161	0.0000656	0.9999765	-3.463110
cg00140914	chr17	44859215		OpenSea	WNT3	Body	-1.0902420	1.783406	-6.020199	0.0000657	0.9999765	-3.463220
cg24326130	chr6	32811997	chr6:32811494-32811839	S_Shore	PSMB8;PSMB8	Body;TSS200	0.7980946	-4.711303	5.955631	0.0000725	0.9999765	-3.470667
cg24985071	chr1	185126361	chr1:185125850-185126705	Island	SWT1;TRMT1L;SWT1;SWT1;TRMT1L	5’UTR;TSS200;1stExon;5’UTR;TSS1500	-0.9618703	-5.273609	-5.943504	0.0000738	0.9999765	-3.472083
cg08344081	chr6	41339785	chr6:41339236-41340027	Island			-0.7355960	-5.824034	-5.894639	0.0000795	0.9999765	-3.477844
cg07751341	chr19	12848092	chr19:12848278-12848586	N_Shore	ASNA1	TSS1500	-1.1036569	-4.558074	-5.856010	0.0000844	0.9999765	-3.482463
cg23561766	chr14	50998480	chr14:50998577-50999711	N_Shore	MAP4K5;MAP4K5;ATL1	Body;Body;TSS1500	-0.8288611	-4.271157	-5.843770	0.0000860	0.9999765	-3.483938
cg05240976	chr2	54787135	chr2:54785026-54785969	S_Shore	SPTBN1;SPTBN1	Body;Body	-0.8814228	-5.412744	-5.696999	0.0001078	0.9999765	-3.502085
cg03546814	chr1	12498784		OpenSea	VPS13D;VPS13D	Body;Body	1.1694721	-3.098589	5.633903	0.0001189	0.9999765	-3.510152
cg13445358	chr12	53661749	chr12:53661928-53662429	N_Shore	ESPL1	TSS1500	-0.8170765	-4.198784	-5.609503	0.0001236	0.9999765	-3.513316
cg09666237	chr17	18128581	chr17:18128577-18128821	Island	LLGL1	TSS1500	-0.6334495	-4.535014	-5.591907	0.0001270	0.9999765	-3.515613
cg02085891	chr1	9256865	chr1:9258565-9258956	N_Shore			-0.7945559	-5.127125	-5.478465	0.0001519	0.9999765	-3.530736
cg18933904	chr3	187461439	chr3:187461974-187462197	N_Shore	BCL6	5’UTR	-0.7941410	-4.615164	-5.460913	0.0001562	0.9999765	-3.533125

saveRDS(design_tw, "bl_design_motor.rds")

Regression analysis - within twin pair on diagnosis

#diagnosis:0,3.080605e-05
design_tw <- model.matrix(~ Twin_Group + diagnosis )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        247391        9532        5404        5422        3987        4134
## NotSig       39652      759118      792733      790954      791606      795732
## Up          515604       33997        4510        6271        7054        2781
##        Twin_Group8 Twin_Group12 Twin_Group13 diagnosis
## Down          3593         4886         6461         0
## NotSig      794321       794189       792486    802647
## Up            4733         3572         3700         0

top <- topTable(fit2_tw,coef="diagnosis",num=Inf, sort.by = "P")
nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 14778

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_diagnosis.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="diagnosis") %>% kable_paper("hover", full_width = F)

diagnosis
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg03643559	chr10	124757121		OpenSea	IKZF5	Body	0.4071168	0.4238158	5.399880	0.0001754	0.9999989	-2.119538
cg13620413	chr1	212235894		OpenSea	DTL;DTL;DTL	5’UTR;Body;Body	-0.3446894	3.5177546	-5.255890	0.0002207	0.9999989	-2.174991
cg09119495	chr5	90252465		OpenSea	ADGRV1;ADGRV1	Body;Body	0.4023156	-1.4703628	5.177953	0.0002503	0.9999989	-2.205961
cg09809932	chr1	6515597	chr1:6514108-6516325	Island	ESPN	Body	-0.4251736	-5.4865756	-5.168133	0.0002544	0.9999989	-2.209912
cg15980797	chr6	137813648	chr6:137814355-137815202	N_Shore	OLIG3;OLIG3	1stExon;3’UTR	0.4830796	-3.5461377	5.138311	0.0002670	0.9999989	-2.221977
cg05608508	chr16	15140886		OpenSea	NTAN1	Body	0.4911798	4.9277255	5.113885	0.0002778	0.9999989	-2.231934
cg23272193	chr13	77901449	chr13:77903398-77903872	N_Shore	MYCBP2	TSS1500	0.4929036	-5.2686441	5.111727	0.0002788	0.9999989	-2.232817
cg19088141	chr1	113056151	chr1:113050813-113052301	S_Shelf	WNT2B;WNT2B	Body;Body	0.3418715	3.0113422	5.098103	0.0002850	0.9999989	-2.238404
cg08481520	chr3	110789352	chr3:110790149-110791401	N_Shore	PVRL3-AS1;PVRL3;PVRL3	TSS1500;TSS1500;TSS1500	0.4070263	-4.7221702	5.032268	0.0003175	0.9999989	-2.265704
cg16956665	chr11	125479362		OpenSea	STT3A	Body	0.6696878	0.6660224	5.023038	0.0003223	0.9999989	-2.269572
cg11006267	chr5	139017424	chr5:139017133-139017668	Island			0.4351828	-5.9444345	5.014290	0.0003270	0.9999989	-2.273247
cg03062264	chr7	134354807		OpenSea	BPGM;BPGM;BPGM	Body;Body;Body	0.3103958	-4.0944066	5.005887	0.0003315	0.9999989	-2.276786
cg26240231	chr7	1148101	chr7:1149179-1150306	N_Shore	C7orf50;C7orf50;C7orf50	Body;Body;Body	-0.3485023	3.2237614	-4.968365	0.0003526	0.9999989	-2.292687
cg04406808	chr11	43702245	chr11:43702016-43702597	Island	HSD17B12;HSD17B12	5’UTR;1stExon	-0.4750830	-4.8290504	-4.863746	0.0004194	0.9999989	-2.337910
cg00830735	chr10	93393238	chr10:93392667-93393147	S_Shore	PPP1R3C	TSS1500	-0.3070775	-3.2560593	-4.837906	0.0004378	0.9999989	-2.349282
cg10764907	chr7	158886532	chr7:158886367-158886595	Island	VIPR2	Body	0.4184330	2.8988197	4.834167	0.0004406	0.9999989	-2.350934
cg02733698	chr1	208084824	chr1:208084098-208084513	S_Shore	CD34;CD34	TSS200;TSS200	0.2886961	-3.7135497	4.822465	0.0004492	0.9999989	-2.356116
cg25032595	chr13	96204978	chr13:96204691-96205496	Island	CLDN10;CLDN10;CLDN10;CLDN10	5’UTR;1stExon;Body;Body	-0.3770303	-4.5040270	-4.773131	0.0004879	0.9999989	-2.378147
cg04324167	chr11	129842470		OpenSea	PRDM10;PRDM10	5’UTR;5’UTR	-0.5488210	1.0338084	-4.757719	0.0005007	0.9999989	-2.385091
cg07655025	chr19	12835001	chr19:12833104-12833574	S_Shore	TNPO2	TSS200	-0.4410381	-4.3361600	-4.750070	0.0005071	0.9999989	-2.388548
cg19074669	chr2	8260068		OpenSea	LINC00299	Body	-0.4037790	1.8878291	-4.716698	0.0005364	0.9999989	-2.403715
cg13365972	chr11	1629275		OpenSea	HCCA2;KRTAP5-3	Body;1stExon	0.2935409	0.8693092	4.712810	0.0005399	0.9999989	-2.405491
cg23146534	chr7	99149191	chr7:99149508-99149807	N_Shore	C7orf38	5’UTR	0.3568616	-3.7440973	4.695541	0.0005559	0.9999989	-2.413402
cg05012902	chr17	8065517	chr17:8065909-8066494	N_Shore	VAMP2	Body	0.4978009	-3.3075049	4.675448	0.0005750	0.9999989	-2.422654
cg00905457	chr10	98015413		OpenSea	BLNK;BLNK;BLNK;BLNK;BLNK;BLNK;BLNK;BLNK;BLNK	Body;Body;Body;Body;Body;Body;Body;Body;Body	-0.5439586	1.1262740	-4.663317	0.0005870	0.9999989	-2.428264
cg18253799	chr17	78069191		OpenSea	CCDC40	Body	0.5289079	4.6851023	4.654237	0.0005960	0.9999989	-2.432475
cg06979656	chr7	116409579		OpenSea	MET;MET	Body;Body	-0.3182983	3.3518182	-4.645955	0.0006045	0.9999989	-2.436325
cg09115762	chr1	228591111	chr1:228593811-228594713	N_Shelf	TRIM11	Body	-0.3274110	4.0106091	-4.644123	0.0006063	0.9999989	-2.437178
cg21498965	chr19	12863540	chr19:12865419-12865825	N_Shore	BEST2	1stExon	-0.3621138	2.6455151	-4.643169	0.0006073	0.9999989	-2.437622
cg01919488	chr2	71558589	chr2:71558723-71559525	N_Shore	ZNF638;ZNF638	TSS1500;TSS1500	-0.3119749	-3.4363917	-4.594476	0.0006596	0.9999989	-2.460450

saveRDS(design_tw, "bl_design_diagnosis.rds")

Regression analysis - within twin pair on SRS

#SRS:0,1.895198e-07
design_tw <- model.matrix(~ Twin_Group + SRS )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        215912        4732        5776        2996        4794        5816
## NotSig      111628      787424      792699      796948      789217      793429
## Up          475107       10491        4172        2703        8636        3402
##        Twin_Group8 Twin_Group12 Twin_Group13    SRS
## Down          4594         3420         2438      0
## NotSig      791812       797045       799034 802647
## Up            6241         2182         1175      0

top <- topTable(fit2_tw,coef="SRS",num=Inf, sort.by = "P")
head(top)

##                  logFC   AveExpr         t      P.Value adj.P.Val        B
## cg21058391 -0.02879180  1.171893 -7.095354 1.116883e-05  0.381132 2.986096
## cg23996714 -0.02957017 -3.834259 -6.884670 1.508379e-05  0.381132 2.669287
## cg03715919 -0.02491278  4.010281 -6.494544 2.671957e-05  0.381132 2.067037
## cg21549639 -0.02858546 -2.903668 -6.480119 2.730112e-05  0.381132 2.044375
## cg12093819 -0.01999452  3.134778 -6.337027 3.385323e-05  0.381132 1.818043
## cg25724515 -0.02123326  3.783982 -6.295747 3.603923e-05  0.381132 1.752231

nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 105055

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_SRS.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="SRS score") %>% kable_paper("hover", full_width = F)

SRS score
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg21058391	chr4	160027487	chr4:160023787-160025437	S_Shelf			-0.0287918	1.171893	-7.095354	1.12e-05	0.381132	2.9860959
cg23996714	chr8	103666229	chr8:103666157-103668861	Island	KLF10;KLF10;KLF10;KLF10	Body;TSS200;TSS200;TSS200	-0.0295702	-3.834259	-6.884670	1.51e-05	0.381132	2.6692871
cg03715919	chr22	41605644		OpenSea	L3MBTL2	Body	-0.0249128	4.010281	-6.494544	2.67e-05	0.381132	2.0670373
cg21549639	chr19	45394156	chr19:45393833-45394992	Island	TOMM40;TOMM40;TOMM40	TSS1500;TSS1500;TSS1500	-0.0285855	-2.903668	-6.480119	2.73e-05	0.381132	2.0443749
cg12093819	chr21	30118661		OpenSea			-0.0199945	3.134778	-6.337027	3.39e-05	0.381132	1.8180434
cg25724515	chr11	131926464		OpenSea	NTM;NTM;NTM;NTM	Body;Body;Body;Body	-0.0212333	3.783981	-6.295746	3.60e-05	0.381132	1.7522306
cg09331760	chr9	132926455		OpenSea			-0.0254694	3.079046	-6.220647	4.04e-05	0.381132	1.6319035
cg25859812	chr15	99441745		OpenSea	IGF1R	Body	-0.0214454	3.350382	-6.164398	4.40e-05	0.381132	1.5412739
cg14500206	chr7	131223393		OpenSea	PODXL;PODXL	Body;Body	-0.0245284	3.926922	-6.127991	4.66e-05	0.381132	1.4823827
cg03860992	chr5	161273013		OpenSea	GABRA1	TSS1500	-0.0371557	2.070269	-6.067546	5.11e-05	0.381132	1.3842081
cg03627896	chr16	30934334	chr16:30933637-30935774	Island	NCRNA00095	Body	-0.0318163	-3.501236	-6.026692	5.45e-05	0.381132	1.3175691
cg06833636	chr12	12659232		OpenSea	DUSP16	Body	-0.0365255	1.229402	-6.012294	5.57e-05	0.381132	1.2940293
cg25247582	chr2	22754866		OpenSea			-0.0213127	3.216373	-5.998832	5.69e-05	0.381132	1.2719947
cg11911679	chr2	182520919	chr2:182521221-182521927	N_Shore	CERKL;CERKL;CERKL;CERKL;CERKL;CERKL;CERKL	Body;Body;Body;Body;Body;Body;Body	-0.0192322	2.487683	-5.974525	5.91e-05	0.381132	1.2321443
cg25073705	chr11	1008290	chr11:1007921-1008343	Island	AP2A2	Body	-0.0287487	4.987648	-5.970697	5.95e-05	0.381132	1.2258627
cg10014923	chr9	131597992		OpenSea	CCBL1;CCBL1;CCBL1;CCBL1;CCBL1	Body;Body;Body;Body;Body	-0.0205366	3.575841	-5.946456	6.18e-05	0.381132	1.1860281
cg25349621	chr10	65185619		OpenSea	JMJD1C	Body	-0.0219793	2.924361	-5.926093	6.38e-05	0.381132	1.1525032
cg24796554	chr11	128565519	chr11:128562671-128565011	S_Shore	FLI1;FLI1	Body;5’UTR	-0.0258670	-4.685528	-5.901892	6.62e-05	0.381132	1.1125874
cg02609271	chr15	92398726	chr15:92396013-92397682	S_Shore	SLCO3A1;SLCO3A1	Body;Body	-0.0194659	2.966990	-5.866407	7.01e-05	0.381132	1.0539133
cg02576920	chr4	76996888		OpenSea	ART3;ART3;ART3	5’UTR;5’UTR;5’UTR	-0.0263779	2.264277	-5.840509	7.30e-05	0.381132	1.0109838
cg03034673	chr7	106670287		OpenSea			-0.0214507	2.281965	-5.811119	7.65e-05	0.381132	0.9621551
cg24793746	chr8	1252864		OpenSea			-0.0186354	4.729247	-5.792388	7.88e-05	0.381132	0.9309724
cg10125291	chr4	4139916		OpenSea			-0.0385878	2.929353	-5.780874	8.02e-05	0.381132	0.9117822
cg22011526	chr6	36857605	chr6:36853590-36853927	S_Shelf	C6orf89	Body	-0.0370114	2.015564	-5.749296	8.44e-05	0.381132	0.8590557
cg05303408	chr6	20534748		OpenSea	CDKAL1;CDKAL1	1stExon;5’UTR	0.0236652	-4.661956	5.731876	8.67e-05	0.381132	0.8299123
cg26817186	chr1	188340817		OpenSea			-0.0238569	2.750575	-5.720987	8.83e-05	0.381132	0.8116734
cg19963546	chr6	148449297		OpenSea			-0.0198382	3.362010	-5.710127	8.98e-05	0.381132	0.7934669
cg12066726	chr1	35297677		OpenSea			-0.0170382	3.595962	-5.681247	9.41e-05	0.381132	0.7449733
cg01424561	chr7	106481485		OpenSea			-0.0214755	4.455325	-5.675229	9.50e-05	0.381132	0.7348527
cg12173308	chr7	47390824		OpenSea	TNS3	Body	-0.0237873	3.260284	-5.669683	9.58e-05	0.381132	0.7255228

saveRDS(design_tw, "bl_design_srs.rds")

Regression analysis - within twin pair on inverse IQ

#iiq:0,1.326967e-05
design_tw <- model.matrix(~ Twin_Group + iiq )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        248598       10008       12858       11360        5968        8335
## NotSig       32816      771176      782303      777593      784202      789527
## Up          521233       21463        7486       13694       12477        4785
##        Twin_Group8 Twin_Group12 Twin_Group13    iiq
## Down          6657        41852       158158  97403
## NotSig      784214       747025       624450 690484
## Up           11776        13770        20039  14760

top <- topTable(fit2_tw,coef="iiq",num=Inf, sort.by = "P")
head(top)

##                 logFC   AveExpr         t      P.Value  adj.P.Val        B
## cg04468444 -0.9134611  1.823403 -9.471934 3.478678e-07 0.03696148 6.265245
## cg24575266 -0.7229819  1.090382 -8.890445 7.133129e-07 0.03696148 5.717139
## cg23438384 -0.6267848  0.159518 -8.656717 9.614420e-07 0.03696148 5.484546
## cg02894209 -0.6930069 -3.599365 -8.651787 9.675755e-07 0.03696148 5.479562
## cg22557012 -0.7919297  1.045933 -8.614486 1.015359e-06 0.03696148 5.441740
## cg17257934  0.5566920  2.853856  8.214053 1.720148e-06 0.03696148 5.023644

nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 265932

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_iIQ.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="inverse IQ") %>% kable_paper("hover", full_width = F)

inverse IQ
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg04468444	chr3	124354566		OpenSea	KALRN;KALRN	Body;Body	-0.9134611	1.8234031	-9.471934	3.0e-07	0.0369615	6.265245
cg24575266	chr5	38559733	chr5:38556222-38557563	S_Shelf	LIFR-AS1;LIFR;LIFR-AS1	Body;5’UTR;Body	-0.7229819	1.0903824	-8.890446	7.0e-07	0.0369615	5.717139
cg23438384	chr6	12050022		OpenSea	HIVEP1	Body	-0.6267848	0.1595180	-8.656717	1.0e-06	0.0369615	5.484546
cg02894209	chr15	43028463	chr15:43028413-43029346	Island	CDAN1	Body	-0.6930069	-3.5993655	-8.651787	1.0e-06	0.0369615	5.479562
cg22557012	chr17	67138792		OpenSea	ABCA6	TSS1500	-0.7919297	1.0459327	-8.614486	1.0e-06	0.0369615	5.441740
cg17257934	chr12	131690459		OpenSea	LINC01257	Body	0.5566920	2.8538564	8.214053	1.7e-06	0.0369615	5.023644
cg14552672	chr2	111752564		OpenSea	ACOXL	Body	-0.7051687	0.9782751	-8.208317	1.7e-06	0.0369615	5.017492
cg21250898	chr8	12652814		OpenSea	LOC340357;LINC00681	Body;Body	-0.7957907	1.4895465	-8.152590	1.9e-06	0.0369615	4.957473
cg06250511	chr8	126231813		OpenSea	NSMCE2	Body	-0.7063101	0.6481511	-7.889909	2.7e-06	0.0369615	4.668501
cg22222965	chr14	91948818		OpenSea	PPP4R3A;PPP4R3A	Body;Body	-0.9378657	0.9602483	-7.883407	2.7e-06	0.0369615	4.661219
cg17312424	chr4	95517497		OpenSea	PDLIM5;PDLIM5;PDLIM5;PDLIM5;PDLIM5;PDLIM5	Body;5’UTR;Body;Body;Body;Body	-0.8858171	1.3814562	-7.856026	2.8e-06	0.0369615	4.630487
cg12443616	chr12	7959662		OpenSea			0.5282220	-4.9272362	7.848517	2.8e-06	0.0369615	4.622041
cg07250516	chr18	52275086		OpenSea			-0.5992332	2.8193183	-7.834423	2.9e-06	0.0369615	4.606163
cg26286934	chr3	181417051	chr3:181413014-181414022	S_Shelf	SOX2-OT;SOX2-OT;SOX2-OT;SOX2-OT;SOX2-OT;SOX2-OT	Body;Body;Body;Body;Body;Body	-0.6359582	1.6015191	-7.817897	3.0e-06	0.0369615	4.587507
cg08568736	chr20	1757780	chr20:1757779-1758134	Island			-0.5132923	-3.8137235	-7.686918	3.5e-06	0.0369615	4.438201
cg08341589	chr15	59971009	chr15:59968889-59969096	S_Shore	BNIP2	Body	-0.7603199	0.0753735	-7.681287	3.6e-06	0.0369615	4.431725
cg24837026	chr3	155607265		OpenSea	GMPS	Body	-0.5741611	4.3566518	-7.669545	3.6e-06	0.0369615	4.418204
cg27608402	chr3	152787564		OpenSea			-0.8036066	0.6518530	-7.649644	3.7e-06	0.0369615	4.395239
cg25104558	chr1	21872684		OpenSea	ALPL;ALPL;ALPL	5’UTR;5’UTR;5’UTR	-0.8786846	0.8985256	-7.575218	4.1e-06	0.0369615	4.308819
cg19889859	chr1	67672609		OpenSea	IL23R;IL23R	ExonBnd;Body	-0.5604408	2.9833920	-7.569380	4.2e-06	0.0369615	4.302005
cg19561774	chr6	160679690	chr6:160679377-160679701	Island	SLC22A2	1stExon	-0.4301370	4.0257152	-7.516633	4.5e-06	0.0369615	4.240197
cg01068931	chr17	70460238		OpenSea	LINC00673	Body	0.4933991	2.7541635	7.495424	4.6e-06	0.0369615	4.215223
cg19178585	chr12	109971145		OpenSea	UBE3B;UBE3B	Body;Body	-0.5313279	2.8843057	-7.475270	4.8e-06	0.0369615	4.191427
cg05434396	chr8	12652932		OpenSea	LOC340357;LINC00681	Body;Body	-0.7076171	1.8078986	-7.421156	5.2e-06	0.0369615	4.127224
cg16253870	chr12	68959172		OpenSea			-0.7887365	1.7549876	-7.383071	5.5e-06	0.0369615	4.081764
cg14551152	chr1	246271438		OpenSea	SMYD3;SMYD3	Body;Body	-0.8289026	0.8391214	-7.380123	5.5e-06	0.0369615	4.078236
cg21602651	chr1	220397618		OpenSea	RAB3GAP2	Body	-0.6727089	1.0222872	-7.349104	5.7e-06	0.0369615	4.041029
cg12058390	chr7	4885521		OpenSea	RADIL	Body	-0.4618408	-2.9685392	-7.327176	5.9e-06	0.0369615	4.014634
cg12667941	chr1	8450904		OpenSea	RERE;RERE;RERE	5’UTR;Body;Body	-0.7574051	0.2086911	-7.326099	5.9e-06	0.0369615	4.013336
cg02438517	chr1	167523547	chr1:167522469-167523070	S_Shore	CREG1	TSS1500	-0.4740406	-2.8724274	-7.310770	6.1e-06	0.0369615	3.994838

saveRDS(design_tw, "bl_design_iiq.rds")

Regression analysis - within twin pair on inverse language skills

#ilanguage:0,9.149182e-08
design_tw <- model.matrix(~ Twin_Group + ilanguage )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        249393       15379       20367       15499        6781        8973
## NotSig       31334      757149      770300      767557      781353      788514
## Up          521920       30119       11980       19591       14513        5160
##        Twin_Group8 Twin_Group12 Twin_Group13 ilanguage
## Down          8683       106282        94082    140108
## NotSig      776403       669094       694176    635625
## Up           17561        27271        14389     26914

top <- topTable(fit2_tw,coef="ilanguage",num=Inf, sort.by = "P")
head(top)

##                 logFC  AveExpr          t      P.Value  adj.P.Val        B
## cg16452467 -0.7352017 1.981872 -11.027956 5.632419e-08 0.01611082 8.073587
## cg19797280 -0.6465954 2.690812  -9.816791 2.200096e-07 0.01611082 6.996977
## cg23802117 -0.6488990 3.721327  -9.683595 2.576273e-07 0.01611082 6.868711
## cg04924454 -0.5897079 1.658075  -9.389367 3.673146e-07 0.01611082 6.577938
## cg00317081 -0.6541627 1.151801  -9.342842 3.888068e-07 0.01611082 6.531003
## cg08441285 -0.7674833 2.256906  -9.303528 4.080133e-07 0.01611082 6.491137

nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 289985

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_ilanguage.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="inverse language") %>% kable_paper("hover", full_width = F)

inverse language
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg16452467	chr1	46509145		OpenSea	PIK3R3;PIK3R3;PIK3R3;PIK3R3;PIK3R3	3’UTR;3’UTR;3’UTR;3’UTR;3’UTR	-0.7352017	1.981872	-11.027956	1.0e-07	0.0161108	8.073587
cg19797280	chr5	132009731		OpenSea	IL4;IL4;IL4;IL4	5’UTR;5’UTR;1stExon;1stExon	-0.6465954	2.690812	-9.816791	2.0e-07	0.0161108	6.996977
cg23802117	chr1	26625392		OpenSea	UBXN11;UBXN11;UBXN11	Body;Body;Body	-0.6488990	3.721327	-9.683595	3.0e-07	0.0161108	6.868711
cg04924454	chr7	26327799	chr7:26331463-26331944	N_Shelf			-0.5897079	1.658075	-9.389367	4.0e-07	0.0161108	6.577938
cg00317081	chr7	38182566		OpenSea			-0.6541627	1.151801	-9.342842	4.0e-07	0.0161108	6.531003
cg08441285	chr7	37741388		OpenSea			-0.7674833	2.256906	-9.303528	4.0e-07	0.0161108	6.491137
cg25926652	chr2	201810588		OpenSea	ORC2L	Body	-0.7397377	2.450556	-9.259294	4.0e-07	0.0161108	6.446052
cg04765650	chr10	50717412		OpenSea	ERCC6	Body	-1.0424272	1.979610	-9.222028	5.0e-07	0.0161108	6.407882
cg22171706	chr15	91158378		OpenSea	CRTC3;CRTC3	Body;Body	-0.7518183	2.613924	-9.149102	5.0e-07	0.0161108	6.332685
cg21550248	chr12	123011757	chr12:123011261-123011765	Island	RSRC2;KNTC1;RSRC2;RSRC2	TSS1500;TSS200;TSS1500;TSS1500	-0.8617833	-4.224074	-9.140566	5.0e-07	0.0161108	6.323839
cg07958316	chr17	67853145		OpenSea	LINC01483;LINC01483	Body;Body	0.5905435	2.416168	9.093584	5.0e-07	0.0161108	6.274991
cg17312424	chr4	95517497		OpenSea	PDLIM5;PDLIM5;PDLIM5;PDLIM5;PDLIM5;PDLIM5	Body;5’UTR;Body;Body;Body;Body	-0.8755778	1.381456	-8.895919	7.0e-07	0.0161108	6.066393
cg12108564	chr4	104454891		OpenSea			-0.6682651	3.100187	-8.864030	7.0e-07	0.0161108	6.032269
cg04468444	chr3	124354566		OpenSea	KALRN;KALRN	Body;Body	-0.8759047	1.823403	-8.739686	8.0e-07	0.0161108	5.897941
cg22611504	chr10	52750754	chr10:52750956-52751765	N_Shore	PRKG1	TSS200	-0.5961863	-3.929255	-8.725293	8.0e-07	0.0161108	5.882262
cg23452498	chr5	144889607		OpenSea			-0.5558488	3.506532	-8.620379	1.0e-06	0.0161108	5.767137
cg02873783	chr11	110225729		OpenSea			-0.5898812	2.203426	-8.602360	1.0e-06	0.0161108	5.747217
cg09399225	chr9	123007911		OpenSea	MIR147	TSS1500	0.4759751	2.301524	8.594000	1.0e-06	0.0161108	5.737960
cg02595886	chr6	37270436		OpenSea	TBC1D22B;TBC1D22B	Body;Body	-0.8495480	2.512721	-8.579342	1.0e-06	0.0161108	5.721706
cg15027446	chr4	56484062		OpenSea	NMU;NMU;NMU;NMU	Body;Body;Body;Body	0.5092146	1.967602	8.511648	1.1e-06	0.0161108	5.646267
cg10409831	chr8	30469883		OpenSea	GTF2E2	Body	-0.6877905	4.251862	-8.510512	1.1e-06	0.0161108	5.644996
cg23412299	chr11	127813919		OpenSea			0.5737846	1.719428	8.498057	1.1e-06	0.0161108	5.631046
cg24485437	chr6	139265331		OpenSea	REPS1;REPS1;REPS1;REPS1	Body;Body;Body;Body	-0.9074396	1.984342	-8.477866	1.2e-06	0.0161108	5.608388
cg21639114	chr6	130687884	chr6:130686414-130687736	S_Shore			-0.7108508	-2.495998	-8.373044	1.3e-06	0.0161108	5.489862
cg21486250	chr6	147478688		OpenSea	STXBP5-AS1	Body	-0.5010170	2.716102	-8.330417	1.4e-06	0.0161108	5.441230
cg07453055	chr1	11786594		OpenSea			-0.5922369	2.107664	-8.327145	1.4e-06	0.0161108	5.437486
cg25684961	chr10	75761004	chr10:75757484-75758227	S_Shelf	VCL;VCL	Body;Body	-0.5727379	3.359333	-8.283647	1.5e-06	0.0161108	5.387580
cg07426802	chr8	60885377		OpenSea			0.5294140	1.173708	8.283313	1.5e-06	0.0161108	5.387196
cg25291613	chr6	145569051		OpenSea			0.5217365	1.685947	8.275394	1.5e-06	0.0161108	5.378082
cg19178585	chr12	109971145		OpenSea	UBE3B;UBE3B	Body;Body	-0.5236255	2.884306	-8.265678	1.5e-06	0.0161108	5.366888

saveRDS(design_tw, "bl_design_ilanguage.rds")

Converting to beta from M values

bDat = ilogit2(mvals)
bDat_new = getBeta(gmset_flt)
#View(bDat)
write.csv(bDat,file="ASD_blood_beta_onADOS_withintw_Nov27.csv",row.names=TRUE)

Genome wide average methylation

vioplot(bDat)

#gwam <- colMeans(bDat)
gwam <- apply(bDat,2,median)

message("GWAM assocation with ADOS")

## GWAM assocation with ADOS

mylm <- lm(gwam~ADOS)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ ADOS)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.058946 -0.013289  0.006852  0.010805  0.025095 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.8250743  0.0126420   65.26   <2e-16 ***
## ADOS        -0.0004924  0.0009850   -0.50    0.624    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02285 on 16 degrees of freedom
## Multiple R-squared:  0.01538,    Adjusted R-squared:  -0.04616 
## F-statistic: 0.2499 on 1 and 16 DF,  p-value: 0.624

message("GWAM assocation with SRS")

## GWAM assocation with SRS

mylm <- lm(gwam~SRS)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ SRS)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.053804 -0.010818  0.000743  0.009880  0.030569 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.8086684  0.0131917  61.301   <2e-16 ***
## SRS         0.0001430  0.0001617   0.885    0.389    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02249 on 16 degrees of freedom
## Multiple R-squared:  0.04665,    Adjusted R-squared:  -0.01293 
## F-statistic: 0.7829 on 1 and 16 DF,  p-value: 0.3894

message("GWAM assocation with motor impairment")

## GWAM assocation with motor impairment

mylm <- lm(gwam~motor)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ motor)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.059606 -0.011604  0.005402  0.011541  0.026897 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.193e-01  7.485e-03 109.459   <2e-16 ***
## motor       2.696e-05  5.798e-03   0.005    0.996    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02303 on 16 degrees of freedom
## Multiple R-squared:  1.352e-06,  Adjusted R-squared:  -0.0625 
## F-statistic: 2.162e-05 on 1 and 16 DF,  p-value: 0.9963

message("GWAM assocation with diagnosis")

## GWAM assocation with diagnosis

mylm <- lm(gwam~diagnosis)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ diagnosis)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057435 -0.013965  0.007486  0.010423  0.026534 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.824345   0.011270  73.147   <2e-16 ***
## diagnosis   -0.003591   0.007128  -0.504    0.621    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02285 on 16 degrees of freedom
## Multiple R-squared:  0.01562,    Adjusted R-squared:  -0.0459 
## F-statistic: 0.2539 on 1 and 16 DF,  p-value: 0.6212

message("GWAM assocation with iIQ")

## GWAM assocation with iIQ

mylm <- lm(gwam~iiq)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ iiq)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.051755 -0.013123  0.006734  0.015212  0.030774 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.819357   0.005148 159.153   <2e-16 ***
## iiq         -0.007085   0.005297  -1.337      0.2    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02184 on 16 degrees of freedom
## Multiple R-squared:  0.1006, Adjusted R-squared:  0.04434 
## F-statistic: 1.789 on 1 and 16 DF,  p-value: 0.1998

plot(gwam~iiq)
abline(mylm)

message("GWAM assocation with ilanguage")

## GWAM assocation with ilanguage

mylm <- lm(gwam~ilanguage)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ ilanguage)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.039199 -0.014987  0.004347  0.015767  0.028298 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.819357   0.004795 170.862   <2e-16 ***
## ilanguage   -0.010471   0.004934  -2.122   0.0498 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02035 on 16 degrees of freedom
## Multiple R-squared:  0.2196, Adjusted R-squared:  0.1708 
## F-statistic: 4.503 on 1 and 16 DF,  p-value: 0.04981

plot(gwam~ilanguage)
abline(mylm)

probes_body <- rownames(ann_sub[grep("Body",ann_sub$UCSC_RefGene_Group),])
bDat_body <- bDat[which(rownames(bDat) %in% probes_body),]
gwam <- apply(bDat_body,2,median)

probes_body <- rownames(ann_sub[grep("Island",ann_sub$Relation_to_Island),])
bDat_body <- bDat[which(rownames(bDat) %in% probes_body),]
gwam <- apply(bDat_body,2,median)

Plotting effect sizes of top DMPs

res <- read.csv("limma_blood_ADOS.csv")

TOPPROBES <- head(res$Name,9)
par(mfrow=c(3,3))
sapply(TOPPROBES,effect_plot)

## $cg07655025
## NULL
## 
## $cg18253799
## NULL
## 
## $cg25032595
## NULL
## 
## $cg16956665
## NULL
## 
## $cg06705930
## NULL
## 
## $cg18845598
## NULL
## 
## $cg13069346
## NULL
## 
## $cg12322146
## NULL
## 
## $cg11568374
## NULL

TOPPROBES <- head(res$Name,4)
par(mfrow=c(2,2))
sapply(TOPPROBES,effect_plot)

## $cg07655025
## NULL
## 
## $cg18253799
## NULL
## 
## $cg25032595
## NULL
## 
## $cg16956665
## NULL

pdf("effect_blood.pdf")
par(mfrow=c(2,2))
sapply(TOPPROBES,effect_plot)

## $cg07655025
## NULL
## 
## $cg18253799
## NULL
## 
## $cg25032595
## NULL
## 
## $cg16956665
## NULL

dev.off()

## png 
##   2

par(mfrow=c(1,1))

Gene ontology of top DMPs LIMMA data

Gene Ontology analysis (gometh): top 1000 probes (limma data).

sigCpGs_1k = res$Name[1:1000]

sigCpGs_1k = as.character(sigCpGs_1k)
all = res$Name
length(all)

## [1] 802647

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)

## All input CpGs are used for testing.

gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)

##                                                        Description   N DE
## hsa04014                                     Ras signaling pathway 227 12
## hsa05032                                        Morphine addiction  88  6
## hsa04961 Endocrine and other factor-regulated calcium reabsorption  51  4
## hsa04972                                      Pancreatic secretion  93  5
## hsa04730                                      Long-term depression  58  4
## hsa04727                                         GABAergic synapse  84  5
## hsa04510                                            Focal adhesion 195  9
## hsa04211                              Longevity regulating pathway  88  5
## hsa04913                                   Ovarian steroidogenesis  50  3
## hsa04910                                 Insulin signaling pathway 131  6
## hsa05217                                      Basal cell carcinoma  63  4
## hsa03010                                                  Ribosome 125  4
## hsa04010                                    MAPK signaling pathway 289 11
## hsa04144                                               Endocytosis 241  9
## hsa05214                                                    Glioma  74  4
## hsa04146                                                Peroxisome  81  3
## hsa04928       Parathyroid hormone synthesis, secretion and action 105  5
## hsa05200                                        Pathways in cancer 507 16
## hsa04370                                    VEGF signaling pathway  59  3
## hsa01521                 EGFR tyrosine kinase inhibitor resistance  77  4
##                P.DE FDR
## hsa04014 0.01012547   1
## hsa05032 0.03804687   1
## hsa04961 0.03877257   1
## hsa04972 0.04526915   1
## hsa04730 0.06423433   1
## hsa04727 0.07270276   1
## hsa04510 0.07439893   1
## hsa04211 0.07551999   1
## hsa04913 0.08782501   1
## hsa04910 0.08917767   1
## hsa05217 0.09291131   1
## hsa03010 0.10771987   1
## hsa04010 0.11639308   1
## hsa04144 0.11693837   1
## hsa05214 0.13356094   1
## hsa04146 0.15575274   1
## hsa04928 0.16047009   1
## hsa05200 0.16940708   1
## hsa04370 0.17198143   1
## hsa01521 0.17468000   1

# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)

## All input CpGs are used for testing.

gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)

##            ONTOLOGY                                                TERM   N DE
## GO:0021517       BP                     ventral spinal cord development  46  8
## GO:0060737       BP                 prostate gland morphogenetic growth   4  3
## GO:0048663       BP                              neuron fate commitment  68  8
## GO:0007442       BP                               hindgut morphogenesis   7  3
## GO:0061525       BP                                 hindgut development   8  3
## GO:0046886       BP positive regulation of hormone biosynthetic process  10  3
## GO:0060736       BP                               prostate gland growth  10  3
## GO:0048665       BP                           neuron fate specification  33  5
## GO:0030500       BP                   regulation of bone mineralization  75  7
## GO:0001708       BP                             cell fate specification 106  9
## GO:0032331       BP  negative regulation of chondrocyte differentiation  24  4
## GO:0021515       BP                 cell differentiation in spinal cord  50  6
## GO:0048873       BP      homeostasis of number of cells within a tissue  28  4
## GO:0098982       CC                                  GABA-ergic synapse  77  7
## GO:0021510       BP                             spinal cord development 100  8
## GO:0030501       BP          positive regulation of bone mineralization  42  5
## GO:0034707       CC                            chloride channel complex  50  5
## GO:0061037       BP        negative regulation of cartilage development  31  4
## GO:0032352       BP    positive regulation of hormone metabolic process  15  3
## GO:0045669       BP   positive regulation of osteoblast differentiation  72  6
##                    P.DE        FDR
## GO:0021517 3.856049e-05 0.08143975
## GO:0060737 2.378381e-04 0.50207623
## GO:0048663 8.512534e-04 1.00000000
## GO:0007442 1.151716e-03 1.00000000
## GO:0061525 1.508117e-03 1.00000000
## GO:0046886 2.015782e-03 1.00000000
## GO:0060736 2.134847e-03 1.00000000
## GO:0048665 2.325196e-03 1.00000000
## GO:0030500 2.704020e-03 1.00000000
## GO:0001708 2.815011e-03 1.00000000
## GO:0032331 2.941636e-03 1.00000000
## GO:0021515 3.019843e-03 1.00000000
## GO:0048873 3.500302e-03 1.00000000
## GO:0098982 3.879903e-03 1.00000000
## GO:0021510 5.405672e-03 1.00000000
## GO:0030501 5.590602e-03 1.00000000
## GO:0034707 5.709473e-03 1.00000000
## GO:0061037 6.181592e-03 1.00000000
## GO:0032352 6.671851e-03 1.00000000
## GO:0045669 6.785512e-03 1.00000000

Now specifically analyse hypermethylated probes

res2 <- subset(res,logFC>0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)

# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)

Now specifically analyse hypomethylated probes

res2 <- subset(res,logFC<0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)

# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)

DMRCate - differentially methylated region analysis

#design matrix in regression 
design_tw <- model.matrix(~ADOS+Twin_Group)
design_tw_dmrc <- model.matrix(~ADOS)
#myannotation <- cpg.annotate("array", mDat, analysis.type="differential", design=design_tw, coef=2, fdr=1)
myannotation <- cpg.annotate("array", mvals, what = "M", 
  arraytype = "EPIC", analysis.type="differential", design_tw, coef=2)

## Your contrast returned no individually significant probes. Try increasing the fdr. Alternatively, set pcutoff manually in dmrcate() to return DMRs, but be warned there is an increased risk of Type I errors.

dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2, pcutoff=0.001)

## Fitting chr1...

## Fitting chr2...

## Fitting chr3...

## Fitting chr4...

## Fitting chr5...

## Fitting chr6...

## Fitting chr7...

## Fitting chr8...

## Fitting chr9...

## Fitting chr10...

## Fitting chr11...

## Fitting chr12...

## Fitting chr13...

## Fitting chr14...

## Fitting chr15...

## Fitting chr16...

## Fitting chr17...

## Fitting chr18...

## Fitting chr19...

## Fitting chr20...

## Fitting chr21...

## Fitting chr22...

## Demarcating regions...

## Done!

results.ranges <- data.frame(extractRanges(dmrcoutput, genome = "hg19") )

## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation

## downloading 1 resources

## retrieving 1 resource

## loading from cache

head(results.ranges, 20)

##   seqnames     start       end width strand no.cpgs min_smoothed_fdr  Stouffer
## 1     chr7 143746413 143746594   182      *       2     0.0003779910 0.9491839
## 2    chr11 125479352 125479362    11      *       2     0.0003779910 0.9491839
## 3    chr13  66437490  66437506    17      *       2     0.0008880616 0.9491839
## 4     chr1   4794912   4795114   203      *       3     0.0003779910 0.9775130
## 5     chr6  28945189  28945507   319      *       7     0.0003779910 0.9994569
## 6    chr19  12834767  12835206   440      *       8     0.0003779910 0.9999513
##       HMFDR    Fisher      maxdiff     meandiff overlapping.genes
## 1 0.8764716 0.9707790 -0.005090061 -0.003658126              <NA>
## 2 0.8764716 0.9707790  0.008347445  0.001441863             STT3A
## 3 0.8764716 0.9707790 -0.006440248 -0.004821590              <NA>
## 4 0.8764716 0.9923100 -0.009478235 -0.005155469             AJAP1
## 5 0.8869317 0.9999725 -0.006123642 -0.003685261         RN7SL471P
## 6 0.9017439 0.9999975 -0.007879880 -0.001744387             TNPO2

write.csv(results.ranges, file = "dmrcoutput.csv", row.names = TRUE)

Session information

sessionInfo()

## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] DMRcatedata_2.18.0                                 
##  [2] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [3] IlluminaHumanMethylationEPICmanifest_0.3.0         
##  [4] vioplot_0.4.0                                      
##  [5] zoo_1.8-12                                         
##  [6] sm_2.2-5.7.1                                       
##  [7] mitch_1.12.0                                       
##  [8] FlowSorted.Blood.EPIC_2.4.2                        
##  [9] ExperimentHub_2.8.1                                
## [10] AnnotationHub_3.8.0                                
## [11] BiocFileCache_2.11.1                               
## [12] dbplyr_2.4.0                                       
## [13] DMRcate_2.14.1                                     
## [14] reshape2_1.4.4                                     
## [15] FlowSorted.Blood.450k_1.38.0                       
## [16] WGCNA_1.72-1                                       
## [17] fastcluster_1.2.3                                  
## [18] dynamicTreeCut_1.63-1                              
## [19] limma_3.56.2                                       
## [20] missMethyl_1.34.0                                  
## [21] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [22] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1 
## [23] minfi_1.46.0                                       
## [24] bumphunter_1.42.0                                  
## [25] locfit_1.5-9.8                                     
## [26] iterators_1.0.14                                   
## [27] foreach_1.5.2                                      
## [28] Biostrings_2.68.1                                  
## [29] XVector_0.40.0                                     
## [30] SummarizedExperiment_1.30.2                        
## [31] Biobase_2.60.0                                     
## [32] MatrixGenerics_1.12.3                              
## [33] matrixStats_1.1.0                                  
## [34] GenomicRanges_1.52.1                               
## [35] GenomeInfoDb_1.36.4                                
## [36] IRanges_2.34.1                                     
## [37] S4Vectors_0.38.2                                   
## [38] BiocGenerics_0.46.0                                
## [39] beeswarm_0.4.0                                     
## [40] ggplot2_3.4.4                                      
## [41] gplots_3.1.3                                       
## [42] RColorBrewer_1.1-3                                 
## [43] dplyr_1.1.3                                        
## [44] kableExtra_1.3.4                                   
## 
## loaded via a namespace (and not attached):
##   [1] DSS_2.48.0                    ProtGenerics_1.32.0          
##   [3] bitops_1.0-7                  httr_1.4.7                   
##   [5] webshot_0.5.5                 doParallel_1.0.17            
##   [7] tools_4.3.1                   doRNG_1.8.6                  
##   [9] backports_1.4.1               utf8_1.2.3                   
##  [11] R6_2.5.1                      HDF5Array_1.28.1             
##  [13] lazyeval_0.2.2                Gviz_1.44.2                  
##  [15] rhdf5filters_1.12.1           permute_0.9-7                
##  [17] withr_2.5.0                   GGally_2.1.2                 
##  [19] prettyunits_1.1.1             gridExtra_2.3                
##  [21] base64_2.0.1                  preprocessCore_1.61.0        
##  [23] cli_3.6.1                     labeling_0.4.3               
##  [25] sass_0.4.7                    readr_2.1.4                  
##  [27] genefilter_1.82.1             askpass_1.2.0                
##  [29] Rsamtools_2.16.0              systemfonts_1.0.4            
##  [31] foreign_0.8-85                siggenes_1.74.0              
##  [33] illuminaio_0.42.0             svglite_2.1.2                
##  [35] R.utils_2.12.2                dichromat_2.0-0.1            
##  [37] scrime_1.3.5                  BSgenome_1.68.0              
##  [39] readxl_1.4.3                  rstudioapi_0.15.0            
##  [41] impute_1.74.1                 RSQLite_2.3.3                
##  [43] generics_0.1.3                BiocIO_1.10.0                
##  [45] gtools_3.9.4                  GO.db_3.17.0                 
##  [47] Matrix_1.6-1                  interp_1.1-4                 
##  [49] fansi_1.0.4                   abind_1.4-5                  
##  [51] R.methodsS3_1.8.2             lifecycle_1.0.3              
##  [53] edgeR_3.42.4                  yaml_2.3.7                   
##  [55] rhdf5_2.44.0                  grid_4.3.1                   
##  [57] blob_1.2.4                    promises_1.2.1               
##  [59] crayon_1.5.2                  lattice_0.21-8               
##  [61] echarts4r_0.4.5               GenomicFeatures_1.52.2       
##  [63] annotate_1.78.0               KEGGREST_1.40.1              
##  [65] pillar_1.9.0                  knitr_1.43                   
##  [67] beanplot_1.3.1                rjson_0.2.21                 
##  [69] codetools_0.2-19              glue_1.6.2                   
##  [71] data.table_1.14.8             vctrs_0.6.3                  
##  [73] png_0.1-8                     cellranger_1.1.0             
##  [75] gtable_0.3.4                  cachem_1.0.8                 
##  [77] xfun_0.40                     mime_0.12                    
##  [79] S4Arrays_1.0.6                survival_3.5-7               
##  [81] statmod_1.5.0                 ellipsis_0.3.2               
##  [83] interactiveDisplayBase_1.38.0 nlme_3.1-163                 
##  [85] bit64_4.0.5                   bsseq_1.36.0                 
##  [87] progress_1.2.2                filelock_1.0.2               
##  [89] bslib_0.5.1                   nor1mix_1.3-0                
##  [91] KernSmooth_2.23-22            rpart_4.1.19                 
##  [93] colorspace_2.1-0              DBI_1.1.3                    
##  [95] Hmisc_5.1-1                   nnet_7.3-19                  
##  [97] tidyselect_1.2.0              bit_4.0.5                    
##  [99] compiler_4.3.1                curl_5.0.2                   
## [101] rvest_1.0.3                   htmlTable_2.4.2              
## [103] BiasedUrn_2.0.11              xml2_1.3.5                   
## [105] DelayedArray_0.26.7           rtracklayer_1.60.1           
## [107] checkmate_2.3.0               scales_1.2.1                 
## [109] caTools_1.18.2                quadprog_1.5-8               
## [111] rappdirs_0.3.3                stringr_1.5.0                
## [113] digest_0.6.33                 rmarkdown_2.24               
## [115] GEOquery_2.68.0               htmltools_0.5.6              
## [117] pkgconfig_2.0.3               jpeg_0.1-10                  
## [119] base64enc_0.1-3               sparseMatrixStats_1.12.2     
## [121] highr_0.10                    fastmap_1.1.1                
## [123] ensembldb_2.24.1              rlang_1.1.1                  
## [125] htmlwidgets_1.6.2             shiny_1.7.5                  
## [127] DelayedMatrixStats_1.22.6     farver_2.1.1                 
## [129] jquerylib_0.1.4               jsonlite_1.8.7               
## [131] BiocParallel_1.34.2           mclust_6.0.0                 
## [133] R.oo_1.25.0                   VariantAnnotation_1.46.0     
## [135] RCurl_1.98-1.13               magrittr_2.0.3               
## [137] Formula_1.2-5                 GenomeInfoDbData_1.2.10      
## [139] Rhdf5lib_1.22.1               munsell_0.5.0                
## [141] Rcpp_1.0.11                   stringi_1.7.12               
## [143] zlibbioc_1.46.0               MASS_7.3-60                  
## [145] plyr_1.8.9                    org.Hs.eg.db_3.17.0          
## [147] deldir_1.0-9                  splines_4.3.1                
## [149] multtest_2.56.0               hms_1.1.3                    
## [151] rngtools_1.5.2                biomaRt_2.56.1               
## [153] BiocVersion_3.17.1            XML_3.99-0.15                
## [155] evaluate_0.21                 latticeExtra_0.6-30          
## [157] biovizBase_1.48.0             BiocManager_1.30.22          
## [159] httpuv_1.6.11                 tzdb_0.4.0                   
## [161] tidyr_1.3.0                   openssl_2.1.0                
## [163] purrr_1.0.2                   reshape_0.8.9                
## [165] xtable_1.8-4                  restfulr_0.0.15              
## [167] AnnotationFilter_1.24.0       later_1.3.1                  
## [169] viridisLite_0.4.2             tibble_3.2.1                 
## [171] memoise_2.0.1                 AnnotationDbi_1.62.2         
## [173] GenomicAlignments_1.36.0      cluster_2.1.4

Autism Spectrum Disorder Methylation analysis - blood analysis

Namitha Mohandras, Mark Ziemann

2023-11-14