Autism Spectrum Disorder Methylation analysis

Introduction

Here we are analysing blood methylation in 9 twin pairs.

baseDir=getwd()
dataDir=paste(baseDir,"/ASD_EPIC_DATA",sep="")

suppressPackageStartupMessages({
  library("missMethyl")
  library("limma")
  library("minfi")
  library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  library("RColorBrewer")
  library("matrixStats")
  library("gplots")
  library("WGCNA")
  library("FlowSorted.Blood.450k")
  library("reshape2")
  library("ggplot2")
  library("missMethyl")
  library("DMRcate")
  library("FlowSorted.Blood.EPIC")
  library("mitch")
  library("kableExtra")
  library("vioplot")
  library("RhpcBLASctl")
})

source("meth_functions.R")

RhpcBLASctl::blas_set_num_threads(1)

Load data

Load the annotation data and the Epic methylation data.

This analysis is to be conducted on Ubuntu with R4.

ann = getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
anno <- ann
ann_sub = ann[,c("chr","pos","strand","Name","Islands_Name",
    "Relation_to_Island","UCSC_RefGene_Name","UCSC_RefGene_Group")]

targets_gen = read.metharray.sheet(dataDir, pattern = "ASD_blood_summarysheet.csv")

## [read.metharray.sheet] Found the following CSV files:

## [1] "/asd_meth/ASD_EPIC_DATA/ASD_blood_summarysheet.csv"

#targets$ID = paste(targets$Sample_Group,targets_gen$Sample_Name,sep=".")
rgSet = read.metharray.exp(targets = targets_gen)
sampleNames(rgSet) = targets_gen$SampleID

Testing MZ status

snpBetas = getSnpBeta(rgSet)

## Loading required package: IlluminaHumanMethylationEPICmanifest

d = dist(t(snpBetas))
hr = hclust(d, method = "complete", members=NULL)
plot(hr)

Quality control

detP = detectionP(rgSet)
qcReport(rgSet, sampNames = targets_gen$Sample_Name,
  pdf="qc-report_ASD_blood_nov27.pdf")

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## png 
##   2

cols=brewer.pal(4,"Set1")

barplot(apply(detP,2,mean),
  col=as.numeric(factor(targets_gen$Sample_Name)),
  las=2,cex.names= 0.8, cex.axis=0.75,
  main="Mean detection p-values of probe signals",
  ylab="Mean detection p-value")

barplot(apply(detP,2,mean),
  col=as.numeric(factor(targets_gen$Sample_Name)),
  las=2,cex.names= 0.8, cex.axis=0.75,ylim=c(0,0.010),
  main="Mean detection p-values of probe signals",
  ylab="Mean detection p-value")

Preprocessing

mset.raw = preprocessRaw(rgSet)

Data exploration

Using Multi-dimensional scaling (MDS) plots before filtering.

mdsPlot(mset.raw, sampGroups = targets_gen$Sample_Name, 
  sampNames=targets_gen$Social_interaction_on_ADOS,legendPos="bottom")

mdsPlot(mset.raw, sampGroups = targets_gen$Sex, 
  sampNames=targets_gen$SampleID,legendPos="bottom")

Cell type composition analysis

Need to consider using estimatecellcounts2.

rgSet$Slide <- as.numeric(rgSet$Slide)
rgSet$Sex<- as.character(rgSet$Sex)
rgSet$Sample_Name<- as.character(rgSet$Sample_Name)

cellCounts_new <- estimateCellCounts(rgSet, compositeCellType = "Blood", 
  processMethod = "auto", probeSelect = "auto", 
  cellTypes = c("CD8T","CD4T", "NK","Bcell","Mono","Gran"),
  referencePlatform = c("IlluminaHumanMethylation450k"),
  returnAll = FALSE, meanPlot = TRUE)

## Loading required package: IlluminaHumanMethylation450kmanifest

## [estimateCellCounts] Combining user data with reference (flow sorted) data.

## Warning in DataFrame(sampleNames = c(colnames(rgSet),
## colnames(referenceRGset)), : 'stringsAsFactors' is ignored

## [estimateCellCounts] Processing user and reference data together.

## [preprocessQuantile] Mapping to genome.

## [preprocessQuantile] Fixing outliers.

## [preprocessQuantile] Quantile normalizing.

## [estimateCellCounts] Picking probes for composition estimation.

## [estimateCellCounts] Estimating composition.

#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")

cellCounts_long <- melt(cellCounts_new, id = "celltype")

cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1, 
  targets_gen$SampleID),"Diagnosis"]

colnames(cellCounts_long) <- c("diagnosis","celltype","value")

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) + 
  geom_boxplot()

wx <- lapply(1:ncol(cellCounts_new) , function(i) {
  wilcox.test(a[,i],c[,i], paired=FALSE)
} )

## Warning in wilcox.test.default(a[, i], c[, i], paired = FALSE): cannot compute
## exact p-value with ties

names(wx) <- colnames(cellCounts_new)
wx

## $CD8T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 12, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $CD4T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 13, p-value = 0.8112
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $NK
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  a[, i] and c[, i]
## W = 15, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Bcell
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 18, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Mono
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 23, p-value = 0.2168
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Gran
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 15, p-value = 1
## alternative hypothesis: true location shift is not equal to 0

Try estimatecellcounts2.

cells <- estimateCellCounts2(rgSet, referencePlatform= "IlluminaHumanMethylationEPIC",  returnAll = TRUE)

## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation

## loading from cache

## [estimateCellCounts2] Combining user data with reference (flow sorted) data.

## Warning in asMethod(object): NAs introduced by coercion

## [estimateCellCounts2] Processing user and reference data together.

## [estimateCellCounts2] Using IDOL L-DMR probes for composition estimation.

## [estimateCellCounts2] Estimating proportion composition (prop), if you provide cellcounts those will be provided as counts in the composition estimation.

mset <- cells$normalizedData

cellCounts_new <- cells[[1]]
#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")

cellCounts_long <- melt(cellCounts_new, id = "celltype")

cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1,  targets_gen$SampleID),"Diagnosis"]

colnames(cellCounts_long) <- c("diagnosis","celltype","value")

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) + 
  geom_boxplot()

pdf("cells_blood.pdf")

par(mar=c(3,8,2,2))

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) +
  geom_boxplot()

dev.off()

## png 
##   2

wx <- lapply(1:ncol(cellCounts_new) , function(i) {
  wilcox.test(a[,i],c[,i], paired=FALSE)
} )

## Warning in wilcox.test.default(a[, i], c[, i], paired = FALSE): cannot compute
## exact p-value with ties

names(wx) <- colnames(cellCounts_new)
wx

## $CD8T
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  a[, i] and c[, i]
## W = 14.5, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $CD4T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 13, p-value = 0.8112
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $NK
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 17, p-value = 0.8112
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Bcell
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 18, p-value = 0.6923
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Mono
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 26, p-value = 0.07692
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Neu
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 14, p-value = 0.9371
## alternative hypothesis: true location shift is not equal to 0

Filtering

None of the p-values are less than 0.05.

Now filter for high detection p-value and overlap with SNP.

Need to get a copy of the Xreact probes from Namitha. In the mean time I have downloaded from github - link below.

detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mset <- mset[keep,]
gmset <- mapToGenome(mset)

#remove SNPs
gmset_flt = dropLociWithSnps(gmset, snps = c("CpG", "SBE"))

#Removing cross-reactive probes
XURL="https://raw.githubusercontent.com/sirselim/illumina450k_filtering/master/EPIC/13059_2016_1066_MOESM1_ESM.csv"
Xreact <- read.csv(XURL)

#Xreact = read.csv(file="/group/canc2/puumba/Data/InfiniumData/NamithaData/Rprojects/Autism/Analysis_Sept11/EPIC_850k_crossreactiveProbes.csv", stringsAsFactors=FALSE)
#Xreact = read.csv(file="~/48639-non-specific-probes-Illumina450k.csv", stringsAsFactors=FALSE)
noXreact <-  !(featureNames(gmset) %in% Xreact$X)

gmset <- gmset[noXreact,]

#Removing probes on X and Y chromosomes
autosomes <- !(featureNames(gmset) %in% ann$Name[ann$chr %in% c("chrX","chrY")])
gmset_flt <- gmset[autosomes,]

#getBeta
beta = getM(gmset_flt)
saveRDS(beta,"bl_beta.rds")
df <- data.frame(t(t(colMeans(beta))))
colnames(df) = "gwam_bl"
write.table(df,file="bl_gwam.tsv")

#Relative log expression (RLE plot)
mvals = getM(gmset_flt)
medSq = apply(mvals, 1, median)
YSq = mvals - medSq

boxplot(YSq,outline=FALSE,ylim=c(-1.5,1.5), 
  ylab="Relative Log Methylation Value", 
  cols=as.character(factor(targets_gen$Social_interaction_on_ADOS,)) )

MDS plots generation after filtering

pal = brewer.pal(8, "Dark2")
mds1Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(1,2))

mds2Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(1,3))

mds3Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(2,3))

mds4Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(3,4))

plotMDS(mds1Sq, xlab="Dimension 1", ylab="Dimension 2",
  col=pal[as.factor(targets_gen$Diagnosis)],
  dim=c(1,2), labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds2Sq, xlab="Dimension 1", ylab="Dimension 3",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(1,3),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds3Sq, xlab="Dimension 2", ylab="Dimension 3",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(2,3),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds4Sq, xlab="Dimension 3", ylab="Dimension 4",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(3,4),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

Principal Component Analysis (PCA)

fit <- prcomp(t(mvals),center = TRUE, scale = TRUE,retx=TRUE)
loadings = fit$x
plot(fit,type="lines")

nGenes = nrow(mvals)
nSamples = ncol(mvals)
datTraits = targets_gen[,7:15]
moduleTraitCor = cor(loadings[,1:6], datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
par(cex=0.75, mar = c(6, 8.5, 3, 3))
textMatrix = paste(signif(moduleTraitCor, 2), "\n(", 
  signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)

labeledHeatmap(Matrix = t(moduleTraitCor), 
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-1,1), 
  main = paste("PCA-trait relationships: Top principal components"))

pdf("pca_blood.pdf")

par(mar=c(3,8,2,2))

labeledHeatmap(Matrix = t(moduleTraitCor), 
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-1,1), 
  main = paste("PCA-trait relationships: Top principal components"))

dev.off()

## png 
##   2

Regression analysis - within twin pair on ADOS

ADOS is the outcome variable (continuous)

Twin_Group <- factor(targets_gen$Family_ID)
ADOS <- targets_gen$Social_interaction_on_ADOS
names(ADOS) <- targets_gen$SampleID
motor <- targets_gen$Motor_skills
diagnosis <- targets_gen$Diagnosis
SRS<- targets_gen$SRS_social_scores
iiq <- 1/targets_gen$IQ
# fix infinite
iiq[iiq==Inf] <- 0.025
iiq <- as.vector(scale(iiq))
ilanguage <- as.vector(scale(1/targets_gen$language))

#ADOS
design_tw <- model.matrix(~ Twin_Group + ADOS )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        245549        8681        4743        5446        3499        3969
## NotSig       42657      761993      793564      790621      792979      795809
## Up          514441       31973        4340        6580        6169        2869
##        Twin_Group8 Twin_Group12 Twin_Group13   ADOS
## Down          2588         3727         7402      0
## NotSig      796687       795828       791235 802647
## Up            3372         3092         4010      0

top <- topTable(fit2_tw,coef="ADOS",num=Inf, sort.by = "P")
nrow(subset(top,adj.P.Val < 0.05))

## [1] 0

nrow(subset(top,P.Value < 1e-4))

## [1] 4

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output, file="limma_blood_ADOS.csv",row.names=FALSE)
output <- subset(output,P.Value<1e-4)
head(output,30) %>% kbl() %>% kable_paper("hover", full_width = F)

Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg07655025	chr19	12835001	chr19:12833104-12833574	S_Shore	TNPO2	TSS200	-0.0854809	-4.3364981	-6.796061	2.13e-05	0.8744474	3.122211
cg11392858	chr17	17109651	chr17:17108771-17109701	Island	PLD6	TSS200	-0.0794041	-4.8008955	-6.341963	4.08e-05	0.8744474	2.512537
cg21843616	chr4	85424469	chr4:85422929-85423190	S_Shore			0.0962309	-2.9777034	5.794119	9.28e-05	0.8744474	1.733772
cg16956665	chr11	125479362		OpenSea	STT3A	Body	0.1227929	0.6645605	5.771321	9.61e-05	0.8744474	1.700329

saveRDS(design_tw, "bl_design_ados.rds")
saveRDS(mvals, "bl_mvals.rds")

Other models:

motor skills impairment
diagnosis
SRS
inverse IQ
inverse langguage

Regression analysis - within twin pair on motor skills

#motor:820,1.053055e-12
design_tw <- model.matrix(~ Twin_Group + motor )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        229734        4479        3710        1789        1478        5148
## NotSig       83307      788122      796238      799344      799737      794187
## Up          489606       10046        2699        1514        1432        3312
##        Twin_Group8 Twin_Group12 Twin_Group13  motor
## Down          4097         1642         1658      1
## NotSig      793108       799749       799861 802645
## Up            5442         1256         1128      1

top <- topTable(fit2_tw,coef="motor",num=Inf, sort.by = "P")
nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 13989

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_motor.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="motor") %>% kable_paper("hover", full_width = F)

motor
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg06210070	chr6	33085063	chr6:33084740-33084995	S_Shore	HLA-DPB2	Body	2.1346930	-1.242011	13.380503	0.0000000	0.0165030	-3.089212
cg08638320	chr1	47900265	chr1:47899661-47900385	Island	FOXD2;MGC12982	TSS1500;Body	-2.3448818	-1.387035	-11.860527	0.0000001	0.0306472	-3.119885
cg15407916	chr18	5133225		OpenSea			-1.7391027	3.242286	-10.838398	0.0000002	0.0520882	-3.147155
cg26848940	chr6	37787530	chr6:37786707-37788114	Island	ZFAND3;ZFAND3	5’UTR;1stExon	-2.1855603	-5.339672	-10.579331	0.0000003	0.0520882	-3.155172
cg03896436	chr5	141082572	chr5:141082245-141082592	Island			-1.0543013	-3.968335	-8.823686	0.0000017	0.2760938	-3.225741
cg16853842	chr12	56390939		OpenSea	SUOX;SUOX;SUOX	TSS200;TSS200;TSS200	-1.1313878	-3.612767	-8.647193	0.0000021	0.2826972	-3.234818
cg10013356	chr6	150244295	chr6:150243902-150244210	S_Shore	RAET1G	TSS200	2.1355657	-3.708028	7.584719	0.0000078	0.8980786	-3.300126
cg22401197	chr2	219261843	chr2:219263098-219265556	N_Shore	CTDSP1	TSS1500	1.0215959	-5.424390	7.187242	0.0000132	0.9999945	-3.330255
cg14838039	chr11	85339468	chr11:85339343-85339629	Island	DLG2;TMEM126B;TMEM126B;TMEM126B;TMEM126B;TMEM126B	TSS1500;TSS200;TSS200;TSS200;TSS200;TSS200	1.0593723	-6.092858	7.114290	0.0000146	0.9999945	-3.336187
cg19820070	chr3	48595777	chr3:48594022-48594853	S_Shore			0.9967844	-4.814562	6.938100	0.0000185	0.9999945	-3.351068
cg08857348	chr17	40760642	chr17:40761059-40761761	N_Shore	TUBG1;FAM134C;FAM134C	TSS1500;Body;Body	-1.0572529	-4.778403	-6.920365	0.0000190	0.9999945	-3.352610
cg03122453	chr10	18940666	chr10:18940528-18940765	Island	NSUN6	TSS200	0.8628768	-4.731136	6.602630	0.0000296	0.9999945	-3.381729
cg05905482	chr16	22207553		OpenSea			-0.8705039	-4.381135	-6.443158	0.0000371	0.9999945	-3.397464
cg08872494	chr1	114447794	chr1:114447454-114448024	Island	DCLRE1B;AP4B1	TSS1500;TSS200	1.0081329	-5.676813	6.393956	0.0000399	0.9999945	-3.402480
cg22188917	chr6	43138180	chr6:43138711-43140292	N_Shore	SRF	TSS1500	-1.0634581	-3.701636	-6.215194	0.0000517	0.9999945	-3.421371
cg20850829	chr2	55646538	chr2:55646621-55647329	N_Shore	CCDC88A;CCDC88A;CCDC88A;CCDC88A;CCDC88A;CCDC88A	1stExon;1stExon;1stExon;5’UTR;5’UTR;5’UTR	0.9471784	-5.106019	6.109895	0.0000604	0.9999945	-3.433009
cg00140914	chr17	44859215		OpenSea	WNT3	Body	-1.0897001	1.782852	-6.048164	0.0000662	0.9999945	-3.440015
cg00786952	chr1	21763130	chr1:21763070-21763416	Island			1.1508278	-4.472752	6.042320	0.0000668	0.9999945	-3.440686
cg24326130	chr6	32811997	chr6:32811494-32811839	S_Shore	PSMB8;PSMB8	Body;TSS200	0.7983296	-4.711539	6.038650	0.0000672	0.9999945	-3.441107
cg23561766	chr14	50998480	chr14:50998577-50999711	N_Shore	MAP4K5;MAP4K5;ATL1	Body;Body;TSS1500	-0.8288758	-4.271728	-5.908606	0.0000816	0.9999945	-3.456372
cg07751341	chr19	12848092	chr19:12848278-12848586	N_Shore	ASNA1	TSS1500	-1.1037191	-4.558390	-5.880121	0.0000852	0.9999945	-3.459801
cg05240976	chr2	54787135	chr2:54785026-54785969	S_Shore	SPTBN1;SPTBN1	Body;Body	-0.8814379	-5.412952	-5.745508	0.0001046	0.9999945	-3.476435
cg09666237	chr17	18128581	chr17:18128577-18128821	Island	LLGL1	TSS1500	-0.6334484	-4.535416	-5.714021	0.0001098	0.9999945	-3.480431
cg13445358	chr12	53661749	chr12:53661928-53662429	N_Shore	ESPL1	TSS1500	-0.8171242	-4.199432	-5.670191	0.0001175	0.9999945	-3.486060
cg03546814	chr1	12498784		OpenSea	VPS13D;VPS13D	Body;Body	1.1695422	-3.099129	5.644907	0.0001221	0.9999945	-3.489344
cg07735844	chr16	4775267		OpenSea	ANKS3;ANKS3;ANKS3;ANKS3	Body;Body;Body;Body	0.6239971	3.301588	5.549502	0.0001417	0.9999945	-3.501978
cg02085891	chr1	9256865	chr1:9258565-9258956	N_Shore			-0.7945654	-5.127327	-5.536473	0.0001446	0.9999945	-3.503734
cg18933904	chr3	187461439	chr3:187461974-187462197	N_Shore	BCL6	5’UTR	-0.7942291	-4.615514	-5.520384	0.0001483	0.9999945	-3.505912
cg18555199	chr3	180630509	chr3:180630152-180630996	Island	FXR1;FXR1;FXR1;FXR1	1stExon;1stExon;5’UTR;1stExon	0.7430541	-4.377617	5.496406	0.0001539	0.9999945	-3.509180
cg10468862	chr9	35103350	chr9:35102695-35103463	Island	STOML2	TSS200	0.9481154	-4.951194	5.458066	0.0001635	0.9999945	-3.514457

saveRDS(design_tw, "bl_design_motor.rds")

Regression analysis - within twin pair on diagnosis

#diagnosis:0,3.080605e-05
design_tw <- model.matrix(~ Twin_Group + diagnosis )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        247489        9587        5520        5294        4058        4214
## NotSig       39513      758728      792549      791018      791329      795586
## Up          515645       34332        4578        6335        7260        2847
##        Twin_Group8 Twin_Group12 Twin_Group13 diagnosis
## Down          3622         4936         6663         0
## NotSig      794194       794099       792193    802647
## Up            4831         3612         3791         0

top <- topTable(fit2_tw,coef="diagnosis",num=Inf, sort.by = "P")
nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 14748

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_diagnosis.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="diagnosis") %>% kable_paper("hover", full_width = F)

diagnosis
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg03643559	chr10	124757121		OpenSea	IKZF5	Body	0.4069443	0.4224622	5.464372	0.0001642	0.9999949	-2.068814
cg13620413	chr1	212235894		OpenSea	DTL;DTL;DTL	5’UTR;Body;Body	-0.3443567	3.5159374	-5.339265	0.0002000	0.9999949	-2.116448
cg09119495	chr5	90252465		OpenSea	ADGRV1;ADGRV1	Body;Body	0.4025992	-1.4719519	5.237453	0.0002353	0.9999949	-2.156481
cg19088141	chr1	113056151	chr1:113050813-113052301	S_Shelf	WNT2B;WNT2B	Body;Body	0.3417720	3.0095660	5.170284	0.0002620	0.9999949	-2.183530
cg15980797	chr6	137813648	chr6:137814355-137815202	N_Shore	OLIG3;OLIG3	1stExon;3’UTR	0.4831093	-3.5441833	5.166084	0.0002638	0.9999949	-2.185239
cg03062264	chr7	134354807		OpenSea	BPGM;BPGM;BPGM	Body;Body;Body	0.3104951	-4.0947995	5.105611	0.0002909	0.9999949	-2.210063
cg08481520	chr3	110789352	chr3:110790149-110791401	N_Shore	PVRL3-AS1;PVRL3;PVRL3	TSS1500;TSS1500;TSS1500	0.4071196	-4.7224305	5.080804	0.0003028	0.9999949	-2.220370
cg26240231	chr7	1148101	chr7:1149179-1150306	N_Shore	C7orf50;C7orf50;C7orf50	Body;Body;Body	-0.3486271	3.2240156	-5.040964	0.0003230	0.9999949	-2.237072
cg16956665	chr11	125479362		OpenSea	STT3A	Body	0.6693177	0.6645605	5.023908	0.0003321	0.9999949	-2.244279
cg02733698	chr1	208084824	chr1:208084098-208084513	S_Shore	CD34;CD34	TSS200;TSS200	0.2888156	-3.7138083	4.928567	0.0003882	0.9999949	-2.285207
cg00830735	chr10	93393238	chr10:93392667-93393147	S_Shore	PPP1R3C	TSS1500	-0.3070720	-3.2559925	-4.927618	0.0003888	0.9999949	-2.285620
cg04406808	chr11	43702245	chr11:43702016-43702597	Island	HSD17B12;HSD17B12	5’UTR;1stExon	-0.4750655	-4.8295237	-4.884386	0.0004175	0.9999949	-2.304543
cg10764907	chr7	158886532	chr7:158886367-158886595	Island	VIPR2	Body	0.4181137	2.8975070	4.861485	0.0004336	0.9999949	-2.314659
cg13365972	chr11	1629275		OpenSea	HCCA2;KRTAP5-3	Body;1stExon	0.2937900	0.8671237	4.806457	0.0004750	0.9999949	-2.339231
cg07655025	chr19	12835001	chr19:12833104-12833574	S_Shore	TNPO2	TSS200	-0.4409929	-4.3364981	-4.777612	0.0004983	0.9999949	-2.352261
cg04324167	chr11	129842470		OpenSea	PRDM10;PRDM10	5’UTR;5’UTR	-0.5486171	1.0319531	-4.762831	0.0005107	0.9999949	-2.358977
cg23146534	chr7	99149191	chr7:99149508-99149807	N_Shore	C7orf38	5’UTR	0.3566806	-3.7451369	4.752649	0.0005194	0.9999949	-2.363620
cg19074669	chr2	8260068		OpenSea	LINC00299	Body	-0.4033071	1.8867077	-4.747812	0.0005237	0.9999949	-2.365830
cg06979656	chr7	116409579		OpenSea	MET;MET	Body;Body	-0.3180532	3.3499745	-4.716590	0.0005517	0.9999949	-2.380166
cg09115762	chr1	228591111	chr1:228593811-228594713	N_Shelf	TRIM11	Body	-0.3272950	4.0086554	-4.711479	0.0005564	0.9999949	-2.382524
cg21498965	chr19	12863540	chr19:12865419-12865825	N_Shore	BEST2	1stExon	-0.3616512	2.6440094	-4.691243	0.0005756	0.9999949	-2.391894
cg05012902	chr17	8065517	chr17:8065909-8066494	N_Shore	VAMP2	Body	0.4979217	-3.3079168	4.689268	0.0005775	0.9999949	-2.392812
cg18253799	chr17	78069191		OpenSea	CCDC40	Body	0.3130236	4.8647714	4.688565	0.0005782	0.9999949	-2.393138
cg01919488	chr2	71558589	chr2:71558723-71559525	N_Shore	ZNF638;ZNF638	TSS1500;TSS1500	-0.3119692	-3.4366566	-4.663940	0.0006026	0.9999949	-2.404618
cg00905457	chr10	98015413		OpenSea	BLNK;BLNK;BLNK;BLNK;BLNK;BLNK;BLNK;BLNK;BLNK	Body;Body;Body;Body;Body;Body;Body;Body;Body	-0.5433528	1.1248718	-4.663582	0.0006029	0.9999949	-2.404786
cg03581262	chr2	106192732		OpenSea			0.3311815	-0.4544598	4.642715	0.0006245	0.9999949	-2.414576
cg16942213	chr20	49104270		OpenSea			0.3114097	3.0308669	4.619143	0.0006497	0.9999949	-2.425701
cg19118595	chr9	97786774		OpenSea	C9orf3;C9orf3	Body;Body	0.4128831	3.1820691	4.586944	0.0006860	0.9999949	-2.441012
cg06705930	chr11	31823191	chr11:31820060-31821416	S_Shore	PAX6;PAX6;PAX6	Body;Body;Body	-0.3233499	0.6527203	-4.586502	0.0006865	0.9999949	-2.441223
cg04546181	chr2	127933555		OpenSea			0.2818781	3.1972689	4.585718	0.0006874	0.9999949	-2.441598

saveRDS(design_tw, "bl_design_diagnosis.rds")

Regression analysis - within twin pair on SRS

#SRS:0,1.895198e-07
design_tw <- model.matrix(~ Twin_Group + SRS )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        215957        4783        5937        3022        4868        5910
## NotSig      111254      787234      792439      796884      788921      793278
## Up          475436       10630        4271        2741        8858        3459
##        Twin_Group8 Twin_Group12 Twin_Group13    SRS
## Down          4550         3427         2498      0
## NotSig      791747       797023       798952 802647
## Up            6350         2197         1197      0

top <- topTable(fit2_tw,coef="SRS",num=Inf, sort.by = "P")
head(top)

##                  logFC   AveExpr         t      P.Value adj.P.Val        B
## cg21058391 -0.02877136  1.171739 -7.132412 1.141893e-05 0.3811112 2.963468
## cg23996714 -0.02957109 -3.834427 -6.916413 1.547242e-05 0.3811112 2.642764
## cg21549639 -0.02858689 -2.903858 -6.504837 2.807040e-05 0.3811112 2.014550
## cg12093819 -0.01999308  3.133035 -6.428087 3.144642e-05 0.3811112 1.894879
## cg25724515 -0.02123747  3.782258 -6.368454 3.436634e-05 0.3811112 1.801344
## cg09331760 -0.02547794  3.077274 -6.249775 4.106784e-05 0.3811112 1.613754

nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 105002

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_SRS.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="SRS score") %>% kable_paper("hover", full_width = F)

SRS score
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg21058391	chr4	160027487	chr4:160023787-160025437	S_Shelf			-0.0287714	1.171739	-7.132412	1.14e-05	0.3811112	2.9634681
cg23996714	chr8	103666229	chr8:103666157-103668861	Island	KLF10;KLF10;KLF10;KLF10	Body;TSS200;TSS200;TSS200	-0.0295711	-3.834427	-6.916413	1.55e-05	0.3811112	2.6427644
cg21549639	chr19	45394156	chr19:45393833-45394992	Island	TOMM40;TOMM40;TOMM40	TSS1500;TSS1500;TSS1500	-0.0285869	-2.903858	-6.504837	2.81e-05	0.3811112	2.0145496
cg12093819	chr21	30118661		OpenSea			-0.0199931	3.133036	-6.428087	3.14e-05	0.3811112	1.8948793
cg25724515	chr11	131926464		OpenSea	NTM;NTM;NTM;NTM	Body;Body;Body;Body	-0.0212375	3.782258	-6.368454	3.44e-05	0.3811112	1.8013437
cg09331760	chr9	132926455		OpenSea			-0.0254779	3.077274	-6.249775	4.11e-05	0.3811112	1.6137537
cg25859812	chr15	99441745		OpenSea	IGF1R	Body	-0.0214470	3.348640	-6.227289	4.25e-05	0.3811112	1.5779952
cg14500206	chr7	131223393		OpenSea	PODXL;PODXL	Body;Body	-0.0245393	3.924956	-6.171169	4.63e-05	0.3811112	1.4884450
cg03860992	chr5	161273013		OpenSea	GABRA1	TSS1500	-0.0371626	2.068419	-6.053556	5.54e-05	0.3811112	1.2993748
cg11911679	chr2	182520919	chr2:182521221-182521927	N_Shore	CERKL;CERKL;CERKL;CERKL;CERKL;CERKL;CERKL	Body;Body;Body;Body;Body;Body;Body	-0.0192358	2.485932	-6.052025	5.55e-05	0.3811112	1.2969003
cg25247582	chr2	22754866		OpenSea			-0.0213138	3.214618	-6.049763	5.57e-05	0.3811112	1.2932452
cg03627896	chr16	30934334	chr16:30933637-30935774	Island	NCRNA00095	Body	-0.0318174	-3.501211	-6.031366	5.73e-05	0.3811112	1.2634898
cg10014923	chr9	131597992		OpenSea	CCBL1;CCBL1;CCBL1;CCBL1;CCBL1	Body;Body;Body;Body;Body	-0.0205352	3.574124	-5.998943	6.02e-05	0.3811112	1.2109359
cg06833636	chr12	12659232		OpenSea	DUSP16	Body	-0.0365350	1.227414	-5.998047	6.03e-05	0.3811112	1.2094812
cg25349621	chr10	65185619		OpenSea	JMJD1C	Body	-0.0219811	2.922620	-5.972692	6.27e-05	0.3811112	1.1682802
cg02609271	chr15	92398726	chr15:92396013-92397682	S_Shore	SLCO3A1;SLCO3A1	Body;Body	-0.0194694	2.965249	-5.933388	6.67e-05	0.3811112	1.1042379
cg24796554	chr11	128565519	chr11:128562671-128565011	S_Shore	FLI1;FLI1	Body;5’UTR	-0.0258665	-4.685588	-5.924308	6.76e-05	0.3811112	1.0894121
cg03034673	chr7	106670287		OpenSea			-0.0214450	2.280351	-5.861603	7.45e-05	0.3811112	0.9867235
cg02576920	chr4	76996888		OpenSea	ART3;ART3;ART3	5’UTR;5’UTR;5’UTR	-0.0263772	2.262634	-5.861007	7.46e-05	0.3811112	0.9857451
cg01113339	chr7	1660895	chr7:1659040-1659765	S_Shore			-0.0150889	3.296692	-5.798263	8.23e-05	0.3811112	0.8824500
cg12066726	chr1	35297677		OpenSea			-0.0170473	3.594028	-5.773288	8.56e-05	0.3811112	0.8411824
cg19963546	chr6	148449297		OpenSea			-0.0198424	3.360258	-5.768903	8.62e-05	0.3811112	0.8339290
cg26817186	chr1	188340817		OpenSea			-0.0238585	2.748832	-5.748565	8.90e-05	0.3811112	0.8002488
cg22011526	chr6	36857605	chr6:36853590-36853927	S_Shelf	C6orf89	Body	-0.0370158	2.013753	-5.735987	9.08e-05	0.3811112	0.7793920
cg02944312	chr7	3207719		OpenSea			0.0240135	2.394027	5.698117	9.64e-05	0.3811112	0.7164645
cg12173308	chr7	47390824		OpenSea	TNS3	Body	-0.0237906	3.258543	-5.694012	9.71e-05	0.3811112	0.7096314
cg04795668	chr6	56179634		OpenSea	RNU6-71P	Body	-0.0233130	3.235396	-5.689933	9.77e-05	0.3811112	0.7028381
cg06466407	chr9	128312369		OpenSea	MAPKAP1;MAPKAP1;MAPKAP1;MAPKAP1;MAPKAP1;MAPKAP1	Body;Body;Body;Body;Body;Body	-0.0165688	3.112436	-5.677090	9.97e-05	0.3811112	0.6814395
cg17508905	chr4	38121914		OpenSea	TBC1D1	Body	-0.0189325	3.452685	-5.675187	1.00e-04	0.3811112	0.6782662
cg10687550	chr5	101813557		OpenSea	SLCO6A1;SLCO6A1;SLCO6A1;SLCO6A1;SLCO6A1;SLCO6A1	ExonBnd;ExonBnd;Body;Body;Body;Body	-0.0183974	4.115505	-5.650612	1.04e-04	0.3811112	0.6372494

saveRDS(design_tw, "bl_design_srs.rds")

Regression analysis - within twin pair on inverse IQ

#iiq:0,1.326967e-05
design_tw <- model.matrix(~ Twin_Group + iiq )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        248680        9775       13327       11175        6009        8507
## NotSig       32651      771597      781659      777669      783892      789297
## Up          521316       21275        7661       13803       12746        4843
##        Twin_Group8 Twin_Group12 Twin_Group13    iiq
## Down          6551        39385       161032  97952
## NotSig      784156       750385       620416 689806
## Up           11940        12877        21199  14889

top <- topTable(fit2_tw,coef="iiq",num=Inf, sort.by = "P")
head(top)

##                 logFC    AveExpr         t      P.Value  adj.P.Val        B
## cg04468444 -0.9138904  1.8215888 -9.500994 3.826866e-07 0.03778865 6.182375
## cg24575266 -0.7233595  1.0883824 -8.953668 7.434345e-07 0.03778865 5.677507
## cg23438384 -0.6272277  0.1575785 -8.775389 9.290578e-07 0.03778865 5.504867
## cg02894209 -0.6929756 -3.5994930 -8.722806 9.928205e-07 0.03778865 5.453153
## cg22557012 -0.7923623  1.0440056 -8.658175 1.077655e-06 0.03778865 5.389086
## cg04996388 -0.6174303  2.5728078 -8.491186 1.334721e-06 0.03778865 5.220952

nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 268661

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_iIQ.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="inverse IQ") %>% kable_paper("hover", full_width = F)

inverse IQ
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg04468444	chr3	124354566		OpenSea	KALRN;KALRN	Body;Body	-0.9138904	1.8215888	-9.500994	4.0e-07	0.0377886	6.182375
cg24575266	chr5	38559733	chr5:38556222-38557563	S_Shelf	LIFR-AS1;LIFR;LIFR-AS1	Body;5’UTR;Body	-0.7233595	1.0883824	-8.953668	7.0e-07	0.0377886	5.677507
cg23438384	chr6	12050022		OpenSea	HIVEP1	Body	-0.6272277	0.1575785	-8.775389	9.0e-07	0.0377886	5.504867
cg02894209	chr15	43028463	chr15:43028413-43029346	Island	CDAN1	Body	-0.6929756	-3.5994930	-8.722806	1.0e-06	0.0377886	5.453153
cg22557012	chr17	67138792		OpenSea	ABCA6	TSS1500	-0.7923623	1.0440056	-8.658175	1.1e-06	0.0377886	5.389086
cg04996388	chr17	79905263		OpenSea	MYADML2	TSS200	-0.6174303	2.5728078	-8.491186	1.3e-06	0.0377886	5.220952
cg17257934	chr12	131690459		OpenSea	LINC01257	Body	0.5558943	2.8522779	8.338819	1.6e-06	0.0377886	5.064214
cg14552672	chr2	111752564		OpenSea	ACOXL	Body	-0.7055866	0.9762296	-8.263054	1.8e-06	0.0377886	4.985074
cg21250898	chr8	12652814		OpenSea	LOC340357;LINC00681	Body;Body	-0.7962841	1.4876373	-8.177219	2.0e-06	0.0377886	4.894438
cg12443616	chr12	7959662		OpenSea			0.5279187	-4.9277823	7.967108	2.7e-06	0.0377886	4.668128
cg06250511	chr8	126231813		OpenSea	NSMCE2	Body	-0.7066601	0.6461808	-7.935795	2.8e-06	0.0377886	4.633854
cg07250516	chr18	52275086		OpenSea			-0.5995211	2.8175839	-7.919583	2.8e-06	0.0377886	4.616051
cg26286934	chr3	181417051	chr3:181413014-181414022	S_Shelf	SOX2-OT;SOX2-OT;SOX2-OT;SOX2-OT;SOX2-OT;SOX2-OT	Body;Body;Body;Body;Body;Body	-0.6364130	1.5996848	-7.891239	3.0e-06	0.0377886	4.584835
cg22222965	chr14	91948818		OpenSea	PPP4R3A;PPP4R3A	Body;Body	-0.9382529	0.9582456	-7.874591	3.0e-06	0.0377886	4.566445
cg17312424	chr4	95517497		OpenSea	PDLIM5;PDLIM5;PDLIM5;PDLIM5;PDLIM5;PDLIM5	Body;5’UTR;Body;Body;Body;Body	-0.8862970	1.3795360	-7.860319	3.1e-06	0.0377886	4.550647
cg08568736	chr20	1757780	chr20:1757779-1758134	Island			-0.5132346	-3.8138195	-7.806946	3.3e-06	0.0377886	4.491299
cg19561774	chr6	160679690	chr6:160679377-160679701	Island	SLC22A2	1stExon	-0.4306526	4.0237220	-7.718333	3.7e-06	0.0377886	4.391830
cg08341589	chr15	59971009	chr15:59968889-59969096	S_Shore	BNIP2	Body	-0.7606328	0.0732912	-7.695101	3.9e-06	0.0377886	4.365556
cg27608402	chr3	152787564		OpenSea			-0.8040609	0.6498661	-7.659736	4.1e-06	0.0377886	4.325406
cg19889859	chr1	67672609		OpenSea	IL23R;IL23R	ExonBnd;Body	-0.5607531	2.9816355	-7.655155	4.1e-06	0.0377886	4.320192
cg03936663	chr1	218098422		OpenSea			-0.6347863	-3.2241164	-7.622544	4.3e-06	0.0377886	4.282977
cg01068931	chr17	70460238		OpenSea	LINC00673	Body	0.4926495	2.7524591	7.619185	4.3e-06	0.0377886	4.279135
cg25104558	chr1	21872684		OpenSea	ALPL;ALPL;ALPL	5’UTR;5’UTR;5’UTR	-0.8790755	0.8965151	-7.572340	4.6e-06	0.0377886	4.225371
cg19178585	chr12	109971145		OpenSea	UBE3B;UBE3B	Body;Body	-0.5312564	2.8831649	-7.570601	4.6e-06	0.0377886	4.223370
cg05434396	chr8	12652932		OpenSea	LOC340357;LINC00681	Body;Body	-0.7080311	1.8059987	-7.448589	5.5e-06	0.0377886	4.081728
cg02438517	chr1	167523547	chr1:167522469-167523070	S_Shore	CREG1	TSS1500	-0.4742908	-2.8730400	-7.442588	5.5e-06	0.0377886	4.074703
cg16253870	chr12	68959172		OpenSea			-0.7891398	1.7530872	-7.387926	5.9e-06	0.0377886	4.010451
cg21602651	chr1	220397618		OpenSea	RAB3GAP2	Body	-0.6731451	1.0202756	-7.385020	6.0e-06	0.0377886	4.007022
cg14551152	chr1	246271438		OpenSea	SMYD3;SMYD3	Body;Body	-0.8293254	0.8370691	-7.376987	6.0e-06	0.0377886	3.997537
cg26758209	chr2	50993812		OpenSea	NRXN1;NRXN1	Body;Body	-0.4490356	3.8365228	-7.370353	6.1e-06	0.0377886	3.989697

saveRDS(design_tw, "bl_design_iiq.rds")

Regression analysis - within twin pair on inverse language skills

#ilanguage:0,9.149182e-08
design_tw <- model.matrix(~ Twin_Group + ilanguage )
fit_tw <- lmFit(mvals, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))

##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group6 Twin_Group7
## Down        249473       14837       21081       15159        6865        9167
## NotSig       31189      758038      769325      767794      780923      788238
## Up          521985       29772       12241       19694       14859        5242
##        Twin_Group8 Twin_Group12 Twin_Group13 ilanguage
## Down          8526       106086        94660    142340
## NotSig      776261       669770       693283    632858
## Up           17860        26791        14704     27449

top <- topTable(fit2_tw,coef="ilanguage",num=Inf, sort.by = "P")
head(top)

##                 logFC  AveExpr          t      P.Value  adj.P.Val        B
## cg16452467 -0.7356732 1.979912 -11.175876 5.607487e-08 0.01648184 8.067813
## cg19797280 -0.6470760 2.688842  -9.959691 2.129344e-07 0.01648184 7.020451
## cg23802117 -0.6489984 3.719596  -9.802851 2.552925e-07 0.01648184 6.873838
## cg04924454 -0.5900556 1.656113  -9.546304 3.451986e-07 0.01648184 6.627916
## cg00317081 -0.6545305 1.149801  -9.469612 3.782402e-07 0.01648184 6.552898
## cg16200531 -0.6494251 1.516628  -9.421295 4.007800e-07 0.01648184 6.505275

nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)

## [1] 293059

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
output <- output[order(output$P.Value),]
write.csv(output,file="limma_blood_ilanguage.csv",row.names=FALSE)
head(output,30) %>% kbl(caption="inverse language") %>% kable_paper("hover", full_width = F)

inverse language
Name	chr	pos	Islands_Name	Relation_to_Island	UCSC_RefGene_Name	UCSC_RefGene_Group	logFC	AveExpr	t	P.Value	adj.P.Val	B
cg16452467	chr1	46509145		OpenSea	PIK3R3;PIK3R3;PIK3R3;PIK3R3;PIK3R3	3’UTR;3’UTR;3’UTR;3’UTR;3’UTR	-0.7356732	1.979912	-11.175876	1.0e-07	0.0164818	8.067813
cg19797280	chr5	132009731		OpenSea	IL4;IL4;IL4;IL4	5’UTR;5’UTR;1stExon;1stExon	-0.6470760	2.688842	-9.959691	2.0e-07	0.0164818	7.020451
cg23802117	chr1	26625392		OpenSea	UBXN11;UBXN11;UBXN11	Body;Body;Body	-0.6489984	3.719596	-9.802851	3.0e-07	0.0164818	6.873838
cg04924454	chr7	26327799	chr7:26331463-26331944	N_Shelf			-0.5900556	1.656113	-9.546304	3.0e-07	0.0164818	6.627916
cg00317081	chr7	38182566		OpenSea			-0.6545305	1.149801	-9.469613	4.0e-07	0.0164818	6.552899
cg16200531	chr2	135532566		OpenSea			-0.6494251	1.516628	-9.421295	4.0e-07	0.0164818	6.505274
cg08441285	chr7	37741388		OpenSea			-0.7678553	2.255130	-9.371122	4.0e-07	0.0164818	6.455525
cg25926652	chr2	201810588		OpenSea	ORC2L	Body	-0.7400754	2.448754	-9.322913	5.0e-07	0.0164818	6.407435
cg08873424	chr6	28945492		OpenSea			-0.7134944	-3.251139	-9.237228	5.0e-07	0.0164818	6.321262
cg07958316	chr17	67853145		OpenSea	LINC01483;LINC01483	Body;Body	0.5895823	2.414822	9.221843	5.0e-07	0.0164818	6.305693
cg04765650	chr10	50717412		OpenSea	ERCC6	Body	-1.0427950	1.977773	-9.212762	5.0e-07	0.0164818	6.296490
cg22171706	chr15	91158378		OpenSea	CRTC3;CRTC3	Body;Body	-0.7521724	2.612111	-9.205626	5.0e-07	0.0164818	6.289251
cg21550248	chr12	123011757	chr12:123011261-123011765	Island	RSRC2;KNTC1;RSRC2;RSRC2	TSS1500;TSS200;TSS1500;TSS1500	-0.8618933	-4.224489	-9.164803	5.0e-07	0.0164818	6.247719
cg12108564	chr4	104454891		OpenSea			-0.6685912	3.098433	-8.940178	7.0e-07	0.0164818	6.015449
cg17312424	chr4	95517497		OpenSea	PDLIM5;PDLIM5;PDLIM5;PDLIM5;PDLIM5;PDLIM5	Body;5’UTR;Body;Body;Body;Body	-0.8760670	1.379536	-8.917438	7.0e-07	0.0164818	5.991579
cg22611504	chr10	52750754	chr10:52750956-52751765	N_Shore	PRKG1	TSS200	-0.5962134	-3.929514	-8.834635	8.0e-07	0.0164818	5.904097
cg09399225	chr9	123007911		OpenSea	MIR147	TSS1500	0.4751934	2.300075	8.791745	9.0e-07	0.0164818	5.858435
cg26472183	chr8	126445744	chr8:126444232-126444440	S_Shore	TRIB1;TRIB1	Body;Body	-0.8424745	4.386950	-8.779281	9.0e-07	0.0164818	5.845121
cg23452498	chr5	144889607		OpenSea			-0.5562380	3.504697	-8.751877	9.0e-07	0.0164818	5.815775
cg04468444	chr3	124354566		OpenSea	KALRN;KALRN	Body;Body	-0.8762999	1.821589	-8.750553	9.0e-07	0.0164818	5.814356
cg02873783	chr11	110225729		OpenSea			-0.5901543	2.201571	-8.703982	1.0e-06	0.0164818	5.764252
cg15027446	chr4	56484062		OpenSea	NMU;NMU;NMU;NMU	Body;Body;Body;Body	0.5082312	1.966385	8.659126	1.0e-06	0.0164818	5.715724
cg02595886	chr6	37270436		OpenSea	TBC1D22B;TBC1D22B	Body;Body	-0.8498946	2.510922	-8.595343	1.1e-06	0.0164818	5.646260
cg23412299	chr11	127813919		OpenSea			0.5727707	1.718453	8.594076	1.1e-06	0.0164818	5.644874
cg10409831	chr8	30469883		OpenSea	GTF2E2	Body	-0.6880764	4.250232	-8.565379	1.2e-06	0.0164818	5.613440
cg21486250	chr6	147478688		OpenSea	STXBP5-AS1	Body	-0.5014479	2.714166	-8.492190	1.3e-06	0.0164818	5.532769
cg24485437	chr6	139265331		OpenSea	REPS1;REPS1;REPS1;REPS1	Body;Body;Body;Body	-0.9077850	1.982502	-8.484601	1.3e-06	0.0164818	5.524363
cg21639114	chr6	130687884	chr6:130686414-130687736	S_Shore			-0.7110052	-2.496174	-8.417681	1.4e-06	0.0164818	5.449901
cg07453055	chr1	11786594		OpenSea			-0.5919097	2.107233	-8.413585	1.4e-06	0.0164818	5.445324
cg07426802	chr8	60885377		OpenSea			0.5284552	1.172720	8.399467	1.4e-06	0.0164818	5.429528

saveRDS(design_tw, "bl_design_ilanguage.rds")

Converting to beta from M values

bDat = ilogit2(mvals)
bDat_new = getBeta(gmset_flt)
#View(bDat)
write.csv(bDat,file="ASD_blood_beta_onADOS_withintw_Nov27.csv",row.names=TRUE)

Genome wide average methylation

vioplot(bDat)

#gwam <- colMeans(bDat)
gwam <- apply(bDat,2,median)

message("GWAM assocation with ADOS")

## GWAM assocation with ADOS

mylm <- lm(gwam~ADOS)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ ADOS)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.059149 -0.013275  0.006856  0.010783  0.025073 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.8250921  0.0126685  65.130   <2e-16 ***
## ADOS        -0.0004950  0.0009871  -0.501    0.623    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0229 on 16 degrees of freedom
## Multiple R-squared:  0.01547,    Adjusted R-squared:  -0.04606 
## F-statistic: 0.2515 on 1 and 16 DF,  p-value: 0.6229

message("GWAM assocation with SRS")

## GWAM assocation with SRS

mylm <- lm(gwam~SRS)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ SRS)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.053991 -0.010833  0.000767  0.009857  0.030661 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.8086198  0.0132192  61.170   <2e-16 ***
## SRS         0.0001435  0.0001620   0.886    0.389    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02253 on 16 degrees of freedom
## Multiple R-squared:  0.04677,    Adjusted R-squared:  -0.01281 
## F-statistic: 0.785 on 1 and 16 DF,  p-value: 0.3888

message("GWAM assocation with motor impairment")

## GWAM assocation with motor impairment

mylm <- lm(gwam~motor)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ motor)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.059794 -0.011589  0.005419  0.011506  0.026902 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.193e-01  7.501e-03 109.221   <2e-16 ***
## motor       4.736e-05  5.810e-03   0.008    0.994    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02308 on 16 degrees of freedom
## Multiple R-squared:  4.152e-06,  Adjusted R-squared:  -0.0625 
## F-statistic: 6.644e-05 on 1 and 16 DF,  p-value: 0.9936

message("GWAM assocation with diagnosis")

## GWAM assocation with diagnosis

mylm <- lm(gwam~diagnosis)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ diagnosis)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057628 -0.013957  0.007538  0.010403  0.026535 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.824364   0.011293  72.998   <2e-16 ***
## diagnosis   -0.003614   0.007142  -0.506     0.62    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0229 on 16 degrees of freedom
## Multiple R-squared:  0.01575,    Adjusted R-squared:  -0.04577 
## F-statistic: 0.256 on 1 and 16 DF,  p-value: 0.6198

message("GWAM assocation with iIQ")

## GWAM assocation with iIQ

mylm <- lm(gwam~iiq)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ iiq)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.051937 -0.013121  0.006734  0.015198  0.030773 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.819345   0.005159 158.829   <2e-16 ***
## iiq         -0.007108   0.005308  -1.339    0.199    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02189 on 16 degrees of freedom
## Multiple R-squared:  0.1008, Adjusted R-squared:  0.04457 
## F-statistic: 1.793 on 1 and 16 DF,  p-value: 0.1993

plot(gwam~iiq)
abline(mylm)

message("GWAM assocation with ilanguage")

## GWAM assocation with ilanguage

mylm <- lm(gwam~ilanguage)
summary(mylm)

## 
## Call:
## lm(formula = gwam ~ ilanguage)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.039349 -0.015018  0.004343  0.015797  0.028290 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.819345   0.004805 170.526   <2e-16 ***
## ilanguage   -0.010500   0.004944  -2.124   0.0496 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02039 on 16 degrees of freedom
## Multiple R-squared:  0.2199, Adjusted R-squared:  0.1711 
## F-statistic:  4.51 on 1 and 16 DF,  p-value: 0.04963

plot(gwam~ilanguage)
abline(mylm)

probes_body <- rownames(ann_sub[grep("Body",ann_sub$UCSC_RefGene_Group),])
bDat_body <- bDat[which(rownames(bDat) %in% probes_body),]
gwam <- apply(bDat_body,2,median)

probes_body <- rownames(ann_sub[grep("Island",ann_sub$Relation_to_Island),])
bDat_body <- bDat[which(rownames(bDat) %in% probes_body),]
gwam <- apply(bDat_body,2,median)

Plotting effect sizes of top DMPs

res <- read.csv("limma_blood_ADOS.csv")

TOPPROBES <- head(res$Name,9)
par(mfrow=c(3,3))
sapply(TOPPROBES,effect_plot)

## $cg07655025
## NULL
## 
## $cg11392858
## NULL
## 
## $cg21843616
## NULL
## 
## $cg16956665
## NULL
## 
## $cg06705930
## NULL
## 
## $cg18845598
## NULL
## 
## $cg13069346
## NULL
## 
## $cg24481303
## NULL
## 
## $cg12322146
## NULL

TOPPROBES <- head(res$Name,4)
par(mfrow=c(2,2))
sapply(TOPPROBES,effect_plot)

## $cg07655025
## NULL
## 
## $cg11392858
## NULL
## 
## $cg21843616
## NULL
## 
## $cg16956665
## NULL

pdf("effect_blood.pdf")
par(mfrow=c(2,2))
sapply(TOPPROBES,effect_plot)

## $cg07655025
## NULL
## 
## $cg11392858
## NULL
## 
## $cg21843616
## NULL
## 
## $cg16956665
## NULL

dev.off()

## png 
##   2

par(mfrow=c(1,1))

Gene ontology of top DMPs LIMMA data

Gene Ontology analysis (gometh): top 1000 probes (limma data).

sigCpGs_1k = res$Name[1:1000]

sigCpGs_1k = as.character(sigCpGs_1k)
all = res$Name
length(all)

## [1] 802647

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)

## All input CpGs are used for testing.

gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)

##                                                        Description   N DE
## hsa04014                                     Ras signaling pathway 227 12
## hsa04510                                            Focal adhesion 194 10
## hsa05032                                        Morphine addiction  88  6
## hsa04961 Endocrine and other factor-regulated calcium reabsorption  51  4
## hsa04972                                      Pancreatic secretion  93  5
## hsa04730                                      Long-term depression  58  4
## hsa04727                                         GABAergic synapse  84  5
## hsa04913                                   Ovarian steroidogenesis  50  3
## hsa05200                                        Pathways in cancer 508 17
## hsa00514                      Other types of O-glycan biosynthesis  43  3
## hsa05217                                      Basal cell carcinoma  63  4
## hsa04371                                  Apelin signaling pathway 136  6
## hsa04151                                PI3K-Akt signaling pathway 337 12
## hsa05214                                                    Glioma  74  4
## hsa04370                                    VEGF signaling pathway  59  3
## hsa04010                                    MAPK signaling pathway 287 10
## hsa01521                 EGFR tyrosine kinase inhibitor resistance  77  4
## hsa04270                        Vascular smooth muscle contraction 132  5
## hsa04926                                 Relaxin signaling pathway 124  5
## hsa04935            Growth hormone synthesis, secretion and action 118  5
##                 P.DE       FDR
## hsa04014 0.007478981 0.8675618
## hsa04510 0.026219719 1.0000000
## hsa05032 0.033658367 1.0000000
## hsa04961 0.034906276 1.0000000
## hsa04972 0.039583287 1.0000000
## hsa04730 0.057356085 1.0000000
## hsa04727 0.064586048 1.0000000
## hsa04913 0.081661071 1.0000000
## hsa05200 0.082489772 1.0000000
## hsa00514 0.082599839 1.0000000
## hsa05217 0.083167814 1.0000000
## hsa04371 0.085675884 1.0000000
## hsa04151 0.092410694 1.0000000
## hsa05214 0.121292320 1.0000000
## hsa04370 0.157104308 1.0000000
## hsa04010 0.157160238 1.0000000
## hsa01521 0.160686022 1.0000000
## hsa04270 0.161600434 1.0000000
## hsa04926 0.161754112 1.0000000
## hsa04935 0.164360971 1.0000000

# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)

## All input CpGs are used for testing.

gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)

##            ONTOLOGY                                                        TERM
## GO:0007442       BP                                       hindgut morphogenesis
## GO:0021517       BP                             ventral spinal cord development
## GO:0046886       BP         positive regulation of hormone biosynthetic process
## GO:0030145       MF                                       manganese ion binding
## GO:0061525       BP                                         hindgut development
## GO:0098982       CC                                          GABA-ergic synapse
## GO:0045471       BP                                         response to ethanol
## GO:0034707       CC                                    chloride channel complex
## GO:0030501       BP                  positive regulation of bone mineralization
## GO:0051155       BP positive regulation of striated muscle cell differentiation
## GO:0021522       BP                    spinal cord motor neuron differentiation
## GO:0032352       BP            positive regulation of hormone metabolic process
## GO:0045669       BP           positive regulation of osteoblast differentiation
## GO:0098637       CC            protein complex involved in cell-matrix adhesion
## GO:0010656       BP        negative regulation of muscle cell apoptotic process
## GO:0021513       BP                       spinal cord dorsal/ventral patterning
## GO:0051149       BP          positive regulation of muscle cell differentiation
## GO:0008157       MF                               protein phosphatase 1 binding
## GO:0070169       BP        positive regulation of biomineral tissue development
## GO:0030500       BP                           regulation of bone mineralization
##              N DE        P.DE FDR
## GO:0007442   7  3 0.001074407   1
## GO:0021517  44  6 0.001081949   1
## GO:0046886   9  3 0.001108901   1
## GO:0030145  62  6 0.001120402   1
## GO:0061525   8  3 0.001385114   1
## GO:0098982  73  7 0.002860103   1
## GO:0045471 116  8 0.003536127   1
## GO:0034707  51  5 0.005023726   1
## GO:0030501  43  5 0.005491312   1
## GO:0051155  52  5 0.005580617   1
## GO:0021522  28  4 0.005643624   1
## GO:0032352  15  3 0.005717392   1
## GO:0045669  74  6 0.006051094   1
## GO:0098637  17  3 0.007104795   1
## GO:0010656  54  5 0.007686735   1
## GO:0021513  16  3 0.008377428   1
## GO:0051149  84  6 0.008683696   1
## GO:0008157  24  3 0.008995994   1
## GO:0070169  52  5 0.009485135   1
## GO:0030500  77  6 0.010586215   1

Now specifically analyse hypermethylated probes

res2 <- subset(res,logFC>0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)

# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)

Now specifically analyse hypomethylated probes

res2 <- subset(res,logFC<0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)

# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)

DMRCate - differentially methylated region analysis

#design matrix in regression 
design_tw <- model.matrix(~ADOS+Twin_Group)
design_tw_dmrc <- model.matrix(~ADOS)
#myannotation <- cpg.annotate("array", mDat, analysis.type="differential", design=design_tw, coef=2, fdr=1)

# funny anno bug?
anno <- anno[which(rownames(anno) %in% rownames(mvals)),]

myannotation <- cpg.annotate("array", mvals, what = "M",
  arraytype = "EPICv1", analysis.type="differential", design=design_tw, coef=2)

## Your contrast returned no individually significant probes. Try increasing the fdr. Alternatively, set pcutoff manually in dmrcate() to return DMRs, but be warned there is an increased risk of Type I errors.

dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2, pcutoff=0.001)

## Fitting chr1...

## Fitting chr2...

## Fitting chr3...

## Fitting chr4...

## Fitting chr5...

## Fitting chr6...

## Fitting chr7...

## Fitting chr8...

## Fitting chr9...

## Fitting chr10...

## Fitting chr11...

## Fitting chr12...

## Fitting chr13...

## Fitting chr14...

## Fitting chr15...

## Fitting chr16...

## Fitting chr17...

## Fitting chr18...

## Fitting chr19...

## Fitting chr20...

## Fitting chr21...

## Fitting chr22...

## Demarcating regions...

## Done!

results.ranges <- data.frame(extractRanges(dmrcoutput, genome = "hg19") )

## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation

## downloading 1 resources

## retrieving 1 resource

## loading from cache

head(results.ranges, 20)

##   seqnames     start       end width strand no.cpgs min_smoothed_fdr   Stouffer
## 1     chr1 149174971 149175127   157      *       2     0.0003027263 0.03073655
##          HMFDR       Fisher      maxdiff     meandiff overlapping.genes
## 1 0.0001652536 0.0004640786 -0.002616879 -0.001200458                NA

results.ranges$overlapping.genes=NULL
write.csv(results.ranges, file = "dmrcoutput.csv", row.names = TRUE)

Session information

sessionInfo()

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] DMRcatedata_2.22.0                                 
##  [2] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [3] IlluminaHumanMethylationEPICmanifest_0.3.0         
##  [4] RhpcBLASctl_0.23-42                                
##  [5] vioplot_0.5.0                                      
##  [6] zoo_1.8-12                                         
##  [7] sm_2.2-6.0                                         
##  [8] kableExtra_1.4.0                                   
##  [9] mitch_1.16.0                                       
## [10] FlowSorted.Blood.EPIC_2.8.0                        
## [11] ExperimentHub_2.12.0                               
## [12] AnnotationHub_3.12.0                               
## [13] BiocFileCache_2.12.0                               
## [14] dbplyr_2.5.0                                       
## [15] DMRcate_3.0.8                                      
## [16] ggplot2_3.5.1                                      
## [17] reshape2_1.4.4                                     
## [18] FlowSorted.Blood.450k_1.42.0                       
## [19] WGCNA_1.72-5                                       
## [20] fastcluster_1.2.6                                  
## [21] dynamicTreeCut_1.63-1                              
## [22] gplots_3.1.3.1                                     
## [23] RColorBrewer_1.1-3                                 
## [24] limma_3.60.4                                       
## [25] missMethyl_1.38.0                                  
## [26] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [27] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1 
## [28] minfi_1.50.0                                       
## [29] bumphunter_1.46.0                                  
## [30] locfit_1.5-9.10                                    
## [31] iterators_1.0.14                                   
## [32] foreach_1.5.2                                      
## [33] Biostrings_2.72.1                                  
## [34] XVector_0.44.0                                     
## [35] SummarizedExperiment_1.34.0                        
## [36] Biobase_2.64.0                                     
## [37] MatrixGenerics_1.16.0                              
## [38] matrixStats_1.3.0                                  
## [39] GenomicRanges_1.56.1                               
## [40] GenomeInfoDb_1.40.1                                
## [41] IRanges_2.38.1                                     
## [42] S4Vectors_0.42.1                                   
## [43] BiocGenerics_0.50.0                                
## 
## loaded via a namespace (and not attached):
##   [1] ProtGenerics_1.36.0       bitops_1.0-8             
##   [3] httr_1.4.7                doParallel_1.0.17        
##   [5] tools_4.4.1               doRNG_1.8.6              
##   [7] backports_1.5.0           utf8_1.2.4               
##   [9] R6_2.5.1                  HDF5Array_1.32.1         
##  [11] lazyeval_0.2.2            Gviz_1.48.0              
##  [13] rhdf5filters_1.16.0       permute_0.9-7            
##  [15] withr_3.0.0               GGally_2.2.1             
##  [17] prettyunits_1.2.0         gridExtra_2.3            
##  [19] base64_2.0.1              preprocessCore_1.61.0    
##  [21] cli_3.6.3                 labeling_0.4.3           
##  [23] sass_0.4.9                readr_2.1.5              
##  [25] genefilter_1.86.0         askpass_1.2.0            
##  [27] systemfonts_1.1.0         Rsamtools_2.20.0         
##  [29] foreign_0.8-86            svglite_2.1.3            
##  [31] siggenes_1.78.0           illuminaio_0.46.0        
##  [33] R.utils_2.12.3            dichromat_2.0-0.1        
##  [35] scrime_1.3.5              BSgenome_1.72.0          
##  [37] readxl_1.4.3              rstudioapi_0.16.0        
##  [39] impute_1.78.0             RSQLite_2.3.7            
##  [41] generics_0.1.3            BiocIO_1.14.0            
##  [43] gtools_3.9.5              dplyr_1.1.4              
##  [45] GO.db_3.19.1              Matrix_1.7-0             
##  [47] interp_1.1-6              fansi_1.0.6              
##  [49] abind_1.4-5               R.methodsS3_1.8.2        
##  [51] lifecycle_1.0.4           edgeR_4.2.1              
##  [53] yaml_2.3.8                rhdf5_2.48.0             
##  [55] SparseArray_1.4.8         grid_4.4.1               
##  [57] blob_1.2.4                promises_1.3.0           
##  [59] crayon_1.5.3              lattice_0.22-6           
##  [61] echarts4r_0.4.5           GenomicFeatures_1.56.0   
##  [63] annotate_1.82.0           KEGGREST_1.44.1          
##  [65] pillar_1.9.0              knitr_1.47               
##  [67] beanplot_1.3.1            rjson_0.2.22             
##  [69] codetools_0.2-20          glue_1.7.0               
##  [71] data.table_1.15.4         vctrs_0.6.5              
##  [73] png_0.1-8                 cellranger_1.1.0         
##  [75] gtable_0.3.5              cachem_1.1.0             
##  [77] xfun_0.45                 mime_0.12                
##  [79] S4Arrays_1.4.1            survival_3.7-0           
##  [81] statmod_1.5.0             nlme_3.1-165             
##  [83] bit64_4.0.5               bsseq_1.40.0             
##  [85] progress_1.2.3            filelock_1.0.3           
##  [87] bslib_0.7.0               nor1mix_1.3-3            
##  [89] KernSmooth_2.23-24        rpart_4.1.23             
##  [91] colorspace_2.1-1          DBI_1.2.3                
##  [93] Hmisc_5.1-3               nnet_7.3-19              
##  [95] tidyselect_1.2.1          bit_4.0.5                
##  [97] compiler_4.4.1            curl_5.2.1               
##  [99] httr2_1.0.1               htmlTable_2.4.3          
## [101] BiasedUrn_2.0.12          xml2_1.3.6               
## [103] DelayedArray_0.30.1       rtracklayer_1.64.0       
## [105] checkmate_2.3.2           scales_1.3.0             
## [107] caTools_1.18.2            quadprog_1.5-8           
## [109] rappdirs_0.3.3            stringr_1.5.1            
## [111] digest_0.6.36             rmarkdown_2.27           
## [113] GEOquery_2.72.0           htmltools_0.5.8.1        
## [115] pkgconfig_2.0.3           jpeg_0.1-10              
## [117] base64enc_0.1-3           sparseMatrixStats_1.16.0 
## [119] highr_0.11                fastmap_1.2.0            
## [121] ensembldb_2.28.1          rlang_1.1.4              
## [123] htmlwidgets_1.6.4         UCSC.utils_1.0.0         
## [125] shiny_1.8.1.1             DelayedMatrixStats_1.26.0
## [127] farver_2.1.2              jquerylib_0.1.4          
## [129] jsonlite_1.8.8            BiocParallel_1.38.0      
## [131] mclust_6.1.1              R.oo_1.26.0              
## [133] VariantAnnotation_1.50.0  RCurl_1.98-1.16          
## [135] magrittr_2.0.3            Formula_1.2-5            
## [137] GenomeInfoDbData_1.2.12   Rhdf5lib_1.26.0          
## [139] munsell_0.5.1             Rcpp_1.0.12              
## [141] stringi_1.8.4             zlibbioc_1.50.0          
## [143] MASS_7.3-61               plyr_1.8.9               
## [145] org.Hs.eg.db_3.19.1       ggstats_0.6.0            
## [147] deldir_2.0-4              splines_4.4.1            
## [149] multtest_2.60.0           hms_1.1.3                
## [151] rngtools_1.5.2            biomaRt_2.60.1           
## [153] BiocVersion_3.19.1        XML_3.99-0.17            
## [155] evaluate_0.24.0           latticeExtra_0.6-30      
## [157] biovizBase_1.52.0         BiocManager_1.30.23      
## [159] httpuv_1.6.15             tzdb_0.4.0               
## [161] tidyr_1.3.1               openssl_2.2.0            
## [163] purrr_1.0.2               reshape_0.8.9            
## [165] xtable_1.8-4              restfulr_0.0.15          
## [167] AnnotationFilter_1.28.0   later_1.3.2              
## [169] viridisLite_0.4.2         tibble_3.2.1             
## [171] beeswarm_0.4.0            memoise_2.0.1            
## [173] AnnotationDbi_1.66.0      GenomicAlignments_1.40.0 
## [175] cluster_2.1.6

Autism Spectrum Disorder Methylation analysis - blood analysis

Namitha Mohandras, Mark Ziemann

2024-10-08