Source: https://github.com/markziemann/asd_meth

Introduction

Here we are analysing Guthrie card methylation in 9 twin pairs.

baseDir=getwd()
dataDir=paste(baseDir,"/ASD_EPIC_DATA",sep="")
library("missMethyl")
## Loading required package: IlluminaHumanMethylation450kanno.ilmn12.hg19
## Loading required package: minfi
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## Loading required package: Biostrings
## Loading required package: XVector
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
## Loading required package: bumphunter
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Loading required package: locfit
## locfit 1.5-9.7    2023-01-02
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## Loading required package: IlluminaHumanMethylationEPICanno.ilm10b4.hg19
## 
## Attaching package: 'IlluminaHumanMethylationEPICanno.ilm10b4.hg19'
## The following objects are masked from 'package:IlluminaHumanMethylation450kanno.ilmn12.hg19':
## 
##     Islands.UCSC, Locations, Manifest, Other, SNPs.132CommonSingle,
##     SNPs.135CommonSingle, SNPs.137CommonSingle, SNPs.138CommonSingle,
##     SNPs.141CommonSingle, SNPs.142CommonSingle, SNPs.144CommonSingle,
##     SNPs.146CommonSingle, SNPs.147CommonSingle, SNPs.Illumina
## 
library("limma")
## 
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
library("minfi")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("IlluminaHumanMethylationEPICanno.ilm10b2.hg19")
## 
## Attaching package: 'IlluminaHumanMethylationEPICanno.ilm10b2.hg19'
## The following objects are masked from 'package:IlluminaHumanMethylationEPICanno.ilm10b4.hg19':
## 
##     Islands.UCSC, Locations, Manifest, Other, SNPs.132CommonSingle,
##     SNPs.135CommonSingle, SNPs.137CommonSingle, SNPs.138CommonSingle,
##     SNPs.141CommonSingle, SNPs.142CommonSingle, SNPs.144CommonSingle,
##     SNPs.146CommonSingle, SNPs.147CommonSingle, SNPs.Illumina
## The following objects are masked from 'package:IlluminaHumanMethylation450kanno.ilmn12.hg19':
## 
##     Islands.UCSC, Locations, Manifest, Other, SNPs.132CommonSingle,
##     SNPs.135CommonSingle, SNPs.137CommonSingle, SNPs.138CommonSingle,
##     SNPs.141CommonSingle, SNPs.142CommonSingle, SNPs.144CommonSingle,
##     SNPs.146CommonSingle, SNPs.147CommonSingle, SNPs.Illumina
library("ruv")
library("RColorBrewer")
library("matrixStats")
library("gplots")
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
library("WGCNA")
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
## 
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
## 
##     hclust
## 
## 
## Attaching package: 'WGCNA'
## The following object is masked from 'package:IRanges':
## 
##     cor
## The following object is masked from 'package:S4Vectors':
## 
##     cor
## The following object is masked from 'package:stats':
## 
##     cor
library("FlowSorted.Blood.450k")
library("reshape2")
library("ggplot2")
library("missMethyl")
library("DMRcate")
library("FlowSorted.Blood.EPIC")
## Loading required package: ExperimentHub
## Loading required package: AnnotationHub
## Loading required package: BiocFileCache
## Loading required package: dbplyr
## 
## Attaching package: 'AnnotationHub'
## The following object is masked from 'package:Biobase':
## 
##     cache
library("mitch")
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library("kableExtra")

Load data

Load the annotation data and the Epic methylation data.

This analysis is to be conducted on Ubuntu with R4.

ann = getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b2.hg19)
targets_gen = read.metharray.sheet(dataDir, pattern = "ASD_guthrie_summarysheet.csv")
## [read.metharray.sheet] Found the following CSV files:
## [1] "/home/mdz/projects/asd_meth/ASD_EPIC_DATA/ASD_guthrie_summarysheet.csv"
#targets$ID = paste(targets$Sample_Group,targets_gen$Sample_Name,sep=".")
rgSet = read.metharray.exp(targets = targets_gen)
sampleNames(rgSet) = targets_gen$SampleID

Testing MZ status

snpBetas = getSnpBeta(rgSet)
## Loading required package: IlluminaHumanMethylationEPICmanifest
d = dist(t(snpBetas))
hr = hclust(d, method = "complete", members=NULL)
plot(hr)

Quality control

detP = detectionP(rgSet)
qcReport(rgSet, sampNames = targets_gen$Sample_Name,
  pdf="qc-report_ASD_guthrie_nov27.pdf")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## png 
##   2
cols=brewer.pal(4,"Set1")

barplot(apply(detP,2,mean),
  col=as.numeric(factor(targets_gen$Sample_Name)),
  las=2,cex.names= 0.8, cex.axis=0.75,
  main="Mean detection p-values of probe signals",
  ylab="Mean detection p-value")

barplot(apply(detP,2,mean),
  col=as.numeric(factor(targets_gen$Sample_Name)),
  las=2,cex.names= 0.8, cex.axis=0.75,ylim=c(0,0.010),
  main="Mean detection p-values of probe signals",
  ylab="Mean detection p-value")

Preprocessing

mset.raw = preprocessRaw(rgSet)

Data exploration

Using Multi-dimensional scaling (MDS) plots before filtering.

mdsPlot(mset.raw, sampGroups = targets_gen$Sample_Name, 
  sampNames=targets_gen$Social_interaction_on_ADOS,legendPos="bottom")

mdsPlot(mset.raw, sampGroups = targets_gen$Sex, 
  sampNames=targets_gen$SampleID,legendPos="bottom")

Normalisation

SQN is used as it gave a more stable result.

#SQN method
mSetSq = preprocessQuantile(rgSet)
## [preprocessQuantile] Mapping to genome.
## [preprocessQuantile] Fixing outliers.
## [preprocessQuantile] Quantile normalizing.
densityPlot(rgSet, sampGroups = targets_gen$Social_interaction_on_ADOS,
  main="Raw", legend = FALSE)

densityPlot(getBeta(mSetSq), 
  sampGroups = targets_gen$Social_interaction_on_ADOS,main="SQN", 
  legend = FALSE)

Cell type composition analysis

Using old method.

rgSet$Slide <- as.numeric(rgSet$Slide)
rgSet$Sex <- as.character(rgSet$Sex)
#rgSet$Sample_Name <- as.character(rgSet$Sample_Name)
#sampleNames(rgSet) = targets_gen$SampleID

cellCounts_new <- estimateCellCounts(rgSet, compositeCellType = "Blood", 
  processMethod = "auto", probeSelect = "auto", 
  cellTypes = c("CD8T","CD4T", "NK","Bcell","Mono","Gran"),
  referencePlatform = c("IlluminaHumanMethylation450k"),
  returnAll = FALSE, meanPlot = TRUE)
## Loading required package: IlluminaHumanMethylation450kmanifest
## [estimateCellCounts] Combining user data with reference (flow sorted) data.
## Warning in DataFrame(sampleNames = c(colnames(rgSet),
## colnames(referenceRGset)), : 'stringsAsFactors' is ignored
## [estimateCellCounts] Processing user and reference data together.
## [preprocessQuantile] Mapping to genome.
## [preprocessQuantile] Fixing outliers.
## [preprocessQuantile] Quantile normalizing.
## [estimateCellCounts] Picking probes for composition estimation.
## [estimateCellCounts] Estimating composition.

#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")

cellCounts_long <- melt(cellCounts_new, id = "celltype")

cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1,  targets_gen$SampleID),"Diagnosis"]

colnames(cellCounts_long) <- c("diagnosis","celltype","value")

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) + 
  geom_boxplot() 

wx <- lapply(1:ncol(cellCounts_new) , function(i) {
  wilcox.test(a[,i],c[,i], paired=FALSE)
} )
## Warning in wilcox.test.default(a[, i], c[, i], paired = FALSE): cannot compute
## exact p-value with ties
names(wx) <- colnames(cellCounts_new)
wx
## $CD8T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 17, p-value = 0.2674
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $CD4T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 37, p-value = 0.3196
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $NK
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  a[, i] and c[, i]
## W = 22, p-value = 0.491
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Bcell
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 36, p-value = 0.3773
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Mono
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 23, p-value = 0.6612
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Gran
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 26, p-value = 0.913
## alternative hypothesis: true location shift is not equal to 0

Try estimatecellcounts2.

cells <- estimateCellCounts2(rgSet, referencePlatform= "IlluminaHumanMethylationEPIC",
  returnAll = TRUE)
## snapshotDate(): 2022-10-31
## snapshotDate(): 2022-10-31
## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation
## loading from cache
## [estimateCellCounts2] Combining user data with reference (flow sorted) data.
## Warning in asMethod(object): NAs introduced by coercion
## [estimateCellCounts2] Processing user and reference data together.
## [estimateCellCounts2] Using IDOL L-DMR probes for composition estimation.
## [estimateCellCounts2] Estimating proportion composition (prop), if you provide cellcounts those will be provided as counts in the composition estimation.
mset <- cells$normalizedData

cellCounts_new <- cells[[1]]
#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")

cellCounts_long <- melt(cellCounts_new, id = "celltype")

cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1,  targets_gen$SampleID),"Diagnosis"]

colnames(cellCounts_long) <- c("diagnosis","celltype","value")

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) +
  geom_boxplot()

pdf("cells_guthrie.pdf")

par(mar=c(3,8,2,2))

ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) +
  geom_boxplot()

dev.off()
## png 
##   2
wx <- lapply(1:ncol(cellCounts_new) , function(i) {
  wilcox.test(a[,i],c[,i], paired=FALSE)
} )
names(wx) <- colnames(cellCounts_new)
wx
## $CD8T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 32, p-value = 0.6612
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $CD4T
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 36, p-value = 0.3773
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $NK
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 19, p-value = 0.3773
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Bcell
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 39, p-value = 0.2212
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Mono
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 20, p-value = 0.4409
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $Neu
## 
##  Wilcoxon rank sum exact test
## 
## data:  a[, i] and c[, i]
## W = 29, p-value = 0.913
## alternative hypothesis: true location shift is not equal to 0
#densityByProbeType(mSetSq, main = "Raw")
#densityByProbeType(mset[,1], main = "estimateCellCounts2")
#mset_reduced <- mset[which(rownames(mset) %in% names(keep[keep==TRUE])),]
#meth <- getMeth(mset_reduced)
#unmeth <- getUnmeth(mset_reduced)
#Mval <- log2((meth + 100)/(unmeth + 100))
#beta <- getBeta(mset_reduced)
#dim(Mval)

Filtering

None of the p-values are less than 0.05.

Now filter for high detection p-value and overlap with SNP.

Need to get a copy of the Xreact probes from Namitha. In the mean time I have downloaded from github - link below.

detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mset <- mset[keep,]
gmset <- mapToGenome(mset)

#remove SNPs
gmset_flt = dropLociWithSnps(gmset, snps = c("CpG", "SBE"))

#Removing cross-reactive probes
XURL="https://raw.githubusercontent.com/sirselim/illumina450k_filtering/master/EPIC/13059_2016_1066_MOESM1_ESM.csv"
Xreact <- read.csv(XURL)

#Xreact = read.csv(file="/group/canc2/puumba/Data/InfiniumData/NamithaData/Rprojects/Autism/Analysis_Sept11/EPIC_850k_crossreactiveProbes.csv", stringsAsFactors=FALSE)
#Xreact = read.csv(file="~/48639-non-specific-probes-Illumina450k.csv", stringsAsFactors=FALSE)
noXreact <-  !(featureNames(gmset) %in% Xreact$X)

gmset <- gmset[noXreact,]

#Removing probes on X and Y chromosomes
autosomes <- !(featureNames(gmset) %in% ann$Name[ann$chr %in% c("chrX","chrY")])
gmset_flt <- gmset[autosomes,]

#Relative log expression (RLE plot)
mvals = getM(gmset_flt)
medSq = apply(mvals, 1, median)
YSq = mvals - medSq

boxplot(YSq,outline=FALSE,ylim=c(-1.5,1.5), 
  ylab="Relative Log Methylation Value", 
  cols=as.character(factor(targets_gen$Social_interaction_on_ADOS,)) )

MDS plots generation after filtering

pal = brewer.pal(8, "Dark2")
mds1Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(1,2))

mds2Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(1,3))

mds3Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(2,3))

mds4Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(3,4))

plotMDS(mds1Sq, xlab="Dimension 1", ylab="Dimension 2",
  col=pal[as.factor(targets_gen$Diagnosis)],
  dim=c(1,2), labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds2Sq, xlab="Dimension 1", ylab="Dimension 3",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(1,3),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds3Sq, xlab="Dimension 2", ylab="Dimension 3",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(2,3),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

plotMDS(mds4Sq, xlab="Dimension 3", ylab="Dimension 4",
  col=pal[as.factor(targets_gen$Diagnosis)],dim=c(3,4),
  labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)

Principal Component Analysis (PCA)

fit <- prcomp(t(mvals),center = TRUE, scale = TRUE,retx=TRUE)
loadings = fit$x
plot(fit,type="lines")

nGenes = nrow(mvals)
nSamples = ncol(mvals)
datTraits = targets_gen[,7:15]
moduleTraitCor = cor(loadings[,1:6], datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
par(cex=0.75, mar = c(6, 8.5, 3, 3))
textMatrix = paste(signif(moduleTraitCor, 2), "\n(", 
  signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)

labeledHeatmap(Matrix = t(moduleTraitCor), 
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-1,1), 
  main = paste("PCA-trait relationships: Top principal components"))

pdf("pca_guthrie.pdf")

par(mar=c(3,8,2,2))

labeledHeatmap(Matrix = t(moduleTraitCor), 
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-1,1), 
  main = paste("PCA-trait relationships: Top principal components"))

dev.off()
## png 
##   2
#blah()

Regression analysis - within twin pair on ADOS

ADOS is the outcome variable (continuous)

mDat = mvals
Twin_Group <- factor(targets_gen$Family_ID)
ADOS <- targets_gen$Social_interaction_on_ADOS
names(ADOS) <- targets_gen$SampleID
design_tw <- model.matrix(~ Twin_Group + ADOS )

fit_tw <- lmFit(mDat, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))
##        (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group5 Twin_Group6
## Down        245993        2344        2566        2439        1909        3492
## NotSig       59114      786047      786358      786292      786913      785103
## Up          485551        2267        1734        1927        1836        2063
##        Twin_Group7 Twin_Group8 Twin_Group9 Twin_Group12 Twin_Group13   ADOS
## Down         48678        2099        2939         1854         2557      0
## NotSig      728154      786567      785798       786943       786409 790658
## Up           13826        1992        1921         1861         1692      0
top <- topTable(fit2_tw,coef="ADOS",num=Inf, sort.by = "P")
head(top)
##                  logFC   AveExpr         t      P.Value adj.P.Val        B
## cg12565057  0.08266213  3.046014  7.387285 4.009335e-06 0.9999951 4.581264
## cg00546248  0.10347799  1.801484  6.865500 8.922285e-06 0.9999951 3.850448
## cg15740041  0.13176113  1.600846  6.823044 9.537411e-06 0.9999951 3.789115
## cg05116656  0.09157137 -3.375059  6.719336 1.123567e-05 0.9999951 3.638100
## cg11632273 -0.13325664  2.268115 -6.338476 2.076849e-05 0.9999951 3.068838
## cg01656620 -0.13309386  3.051199 -6.203748 2.593336e-05 0.9999951 2.861912
nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)
## [1] 21151
ann_sub = 
  ann[,c("chr","pos","strand","Name","Islands_Name",
    "Relation_to_Island","UCSC_RefGene_Name","UCSC_RefGene_Group")]

output <-merge(ann_sub,top,by.x="Name",by.y="row.names")

write.csv(output[order(output$P.Value),],
  file="ASD_guthrie_top_dmps_onADOS_withintw_Nov27_limma.csv",row.names=FALSE)

top1000 <- topTable(fit2_tw,coef="ADOS",num=1000, sort.by = "P")
output_1000 <- merge(ann_sub,top1000,by.x="Name",by.y="row.names")

write.csv(output_1000[order(output_1000$P.Value),],
  file="ASD_guthrie_top_dmps_onADOS_Nov27_withintw_limma_top1k.csv",row.names=FALSE)

saveRDS(design_tw, "gu_design.rds")
saveRDS(mvals, "gu_mvals.rds")

output <- output[order(output$P.Value),]

output <- subset(output,P.Value<1e-4)

output %>% kbl() %>% kable_paper("hover", full_width = F)
Name chr pos strand Islands_Name Relation_to_Island UCSC_RefGene_Name UCSC_RefGene_Group logFC AveExpr t P.Value adj.P.Val B
cg12565057 chr7 100482617
chr7:100484425-100484651 N_Shore SRRT;SRRT;SRRT;SRRT Body;Body;Body;Body 0.0826621 3.0460140 7.387285 4.00e-06 0.9999951 4.581264
cg00546248 chr1 2412743
chr1:2411125-2411404 S_Shore PLCH2 Body 0.1034780 1.8014843 6.865500 8.90e-06 0.9999951 3.850448
cg15740041 chr16 87781336
OpenSea KLHDC4;KLHDC4;KLHDC4 Body;Body;Body 0.1317611 1.6008461 6.823044 9.50e-06 0.9999951 3.789115
cg05116656 chr5 94417892
OpenSea MCTP1;MCTP1;MCTP1 TSS1500;TSS1500;Body 0.0915714 -3.3750586 6.719336 1.12e-05 0.9999951 3.638100
cg11632273 chr22 20102580
chr22:20104374-20105729 N_Shore TRMT2A;TRMT2A;TRMT2A;RANBP1;MIR6816 Body;Body;Body;TSS1500;TSS1500 -0.1332566 2.2681145 -6.338476 2.08e-05 0.9999951 3.068838
cg01656620 chr12 124788698
chr12:124788559-124788884 Island FAM101A 5’UTR -0.1330939 3.0511986 -6.203748 2.59e-05 0.9999951 2.861912
cg10881749 chr1 234792390
OpenSea 0.0804887 0.9589997 6.114550 3.01e-05 0.9999951 2.723318
cg13699963 chr17 71548861
OpenSea SDK2 Body -0.0663828 2.9801617 -5.750106 5.58e-05 0.9999951 2.143861
cg22656126 chr17 1637206
chr17:1637053-1637492 Island WDR81;WDR81;WDR81;WDR81 Body;Body;Body;Body 0.1395930 5.7130309 5.698989 6.09e-05 0.9999951 2.060902
cg07162704 chr6 91113687
OpenSea 0.1009010 0.5704454 5.695702 6.13e-05 0.9999951 2.055553
cg13277464 chr2 43393040
OpenSea 0.1211560 2.3025832 5.606707 7.15e-05 0.9999951 1.910100
cg18731055 chr13 49108131
chr13:49106411-49107463 S_Shore RCBTB2 TSS1500 0.1016068 0.0882235 5.584670 7.43e-05 0.9999951 1.873891
cg03188919 chr10 31372663
OpenSea -0.1030369 2.0662356 -5.526631 8.23e-05 0.9999951 1.778167
cg01128603 chr11 65820391
chr11:65819367-65820224 S_Shore SF3B2 Body 0.0946490 -3.2440188 5.486222 8.83e-05 0.9999951 1.711213
cg03177876 chr4 71701931
chr4:71704664-71705736 N_Shelf GRSF1;GRSF1 5’UTR;Body 0.0982965 -1.2874739 5.481370 8.91e-05 0.9999951 1.703158
cg05791256 chr9 96077420
chr9:96080113-96080330 N_Shelf WNK2;WNK2 Body;Body -0.0749658 3.6607111 -5.477688 8.96e-05 0.9999951 1.697042

Converting to beta from M values

bDat = ilogit2(mDat)
bDat_new = getBeta(gmset_flt)
#View(bDat)
write.csv(bDat,file="ASD_guthrie_beta_onADOS_withintw_Nov27.csv",row.names=TRUE)

RUV

Extract illumina negative control

#Extract illumina negative control
INCs <- getINCs(rgSet)
head(INCs)
##                  S3         S4         S31        S32        S11        S12
## 10609447 -1.8776490 -1.1632518 -1.18519176 -0.7143278 -1.6703127 -0.9326960
## 10627500 -1.1555411 -0.9977650 -1.41063232 -0.6264921 -0.6773834 -0.7425038
## 10676356 -0.4866985 -0.1531999 -0.44704789 -0.9144243 -0.5461735 -0.5811102
## 10731326 -0.5239331 -0.2664330 -0.53287399 -0.3677318 -0.3942789 -0.3690454
## 10732387 -0.5808464 -0.3698891  0.02445304 -0.6246116 -0.8225041 -0.4441000
## 10740401 -0.6247424 -0.4382929 -0.12261929 -0.5911945 -0.3335561 -0.1339268
##                 S13         S14        S17        S18        S21         S22
## 10609447 -1.4266255 -1.65503374 -1.7325197 -2.0211992 -1.7301044 -1.70675098
## 10627500 -1.4321922 -1.11090931 -1.5051335 -1.4742964 -1.0479345 -1.97638744
## 10676356 -0.7391834 -0.71443903 -1.1014453 -0.9098022 -0.8210299 -0.09709869
## 10731326 -0.2289412 -0.71175503 -1.0727563 -0.1216786 -0.6648158  0.42950798
## 10732387 -1.0397263 -0.97893838 -0.9015374 -0.9911761 -0.7004397 -0.98943076
## 10740401 -0.2777810  0.07871198 -1.5360529 -0.1107035 -0.7868908 -0.74068388
##                 S27        S28         S7         S8        S23         S24
## 10609447 -1.5824091 -1.5864146 -1.3351474 -1.2598671 -1.0475135 -0.86742584
## 10627500 -0.6953229 -0.9076853 -1.1789701 -0.8329807 -1.0114048 -0.26615375
## 10676356 -0.7231598  0.1184415 -1.2825134 -0.6059221 -0.1861513 -0.37664526
## 10731326 -0.5672294 -0.1586977 -0.1512436 -0.3645157 -0.4609045 -0.27633123
## 10732387 -1.1776194 -0.6284205  0.1619675 -0.6341286 -0.7256376  0.07352904
## 10740401 -0.3617955 -0.5713134 -0.9259994 -0.7932338 -1.1415963 -0.14597931
##                 S39         S40        S35        S36
## 10609447 -1.2592127 -0.75375018 -1.5450238 -1.2677164
## 10627500 -1.1759877 -0.99288727 -1.1163866 -1.0860710
## 10676356 -0.1570212  0.27408959 -0.4942642 -0.9655116
## 10731326 -0.8314943  0.05578778  0.1271119 -0.5770573
## 10732387 -0.2847295 -0.21367520 -0.8371023 -0.1720994
## 10740401 -1.5156998 -0.65151369 -0.8073549 -0.8467537
#add negative control data to M-values
Mc <- rbind(mvals,INCs)
ctl <- rownames(Mc) %in% rownames(INCs)
table(ctl)
## ctl
##  FALSE   TRUE 
## 790658    411
#old RUV syntax is updated
#rfit1 <- RUVfit(data=Mc, design=design_tw, coef=2, ctl=ctl) # Stage 1 analysis

# this is the updatedcommand - not sure if correct
rfit1 <- RUVfit(Y = Mc, X = design_tw[,"ADOS"] , ctl = ctl) # Stage 1 analysis

rfit2 <- RUVadj(Y = Mc, fit = rfit1)

top1 <- topRUV(rfit2, num=Inf)

head(top1)
##                     F.p    F.p.BH         p_X1   p.BH_X1        b_X1     sigma2
## cg00738934 1.749734e-05 0.9999976 1.749734e-05 0.9999976  0.07012855 0.08075806
## cg23184556 2.012823e-05 0.9999976 2.012823e-05 0.9999976  0.08913465 0.13356894
## cg20718640 2.062933e-05 0.9999976 2.062933e-05 0.9999976  0.13884017 0.32542004
## cg00779672 3.786057e-05 0.9999976 3.786057e-05 0.9999976  0.06977061 0.09121294
## cg21941354 4.877250e-05 0.9999976 4.877250e-05 0.9999976 -0.21282376 0.88743705
## cg15828854 4.911974e-05 0.9999976 4.911974e-05 0.9999976  0.09896672 0.19214239
##                var.b_X1 fit.ctl       mean
## cg00738934 0.0001458951   FALSE  2.5400187
## cg23184556 0.0002413017   FALSE  0.4946566
## cg20718640 0.0005878942   FALSE  2.8093465
## cg00779672 0.0001647826   FALSE  2.1754652
## cg21941354 0.0016032174   FALSE -3.2212907
## cg15828854 0.0003471187   FALSE -0.4868451
#ctl <- rownames(mValsSq) %in% rownames(top1[top1$p.ebayes > 0.5,])
#bottom 80% used as negative controls
ctl <- rownames(mvals) %in% rownames(top1[ceiling(0.2*nrow(top1)):nrow(top1),])
table(ctl)
## ctl
##  FALSE   TRUE 
## 158105 632553
# Perform RUV adjustment and fit
rfit1 <- RUVfit(Y = mvals, X=design_tw[,"ADOS"], ctl=ctl) # Stage 2 analysis 
rfit2 <- RUVadj(Y = mvals, fit=rfit1)
top_ruv <- topRUV(rfit2, number = Inf)
top_ruv_1000 <- topRUV(rfit2, number = 1000)
head(top_ruv_1000)
##                     F.p    F.p.BH         p_X1   p.BH_X1        b_X1     sigma2
## cg07926098 4.006899e-06 0.8414528 4.006899e-06 0.8414528  0.07112171 0.05646704
## cg23088672 1.313169e-05 0.8414528 1.313169e-05 0.8414528  0.07896674 0.08605586
## cg17331950 1.565399e-05 0.8414528 1.565399e-05 0.8414528  0.05847637 0.04873836
## cg10593232 1.652526e-05 0.8414528 1.652526e-05 0.8414528  0.05969707 0.05130503
## cg19893067 1.769350e-05 0.8414528 1.769350e-05 0.8414528  0.08232386 0.09880945
## cg21431091 2.229518e-05 0.8414528 2.229518e-05 0.8414528 -0.07643017 0.08891945
##                var.b_X1 fit.ctl       mean
## cg07926098 9.820970e-05   FALSE  1.3940876
## cg23088672 1.496717e-04   FALSE  1.6058434
## cg17331950 8.476767e-05   FALSE  2.3723731
## cg10593232 8.923173e-05   FALSE  0.6769301
## cg19893067 1.718533e-04   FALSE  0.2863827
## cg21431091 1.546522e-04   FALSE -1.7599098
#dat_subset_ruv = as.matrix(subset(mValsSq, rownames(mValsSq) %in% rownames(top_ruv_1000)))
#heatmap.2(dat_subset_ruv, cexRow=0.8, cexCol=0.8, col=colorRampPalette(c("Blue","Yellow")), density.info="none", trace="none", scale="row", main="diff meth probes due to epilepsy")
output_ruv = merge(ann,top_ruv,by.x="Name",by.y="row.names")
output_ruv_1000 = merge(ann,top_ruv_1000,by.x="Name",by.y="row.names")
#output_full_epicanno = merge(ann_epic,top,by.x="Name",by.y="row.names")

write.csv(output_ruv[order(output_ruv$F.p),],
  file="ASD_guthrie_topruv80%_dmps_onADOS_withintw_Nov29.csv",
  row.names=FALSE)

write.csv(output_ruv_1000[order(output_ruv_1000$F.p),],
  file="ASD_guthrie_topruv80%_1kdmps_onADOS_withintw_Nov29.csv",
  row.names=FALSE)

output_ruv <- data.frame(output_ruv[order(output_ruv$F.p),])

output_ruv <- output_ruv[order(output_ruv$p_X1),]

output_ruv <- subset(output_ruv,p_X1<1e-4)

output_ruv %>% kbl() %>% kable_paper("hover", full_width = F)
Name chr pos strand AddressA AddressB ProbeSeqA ProbeSeqB Type NextBase Color Probe_rs Probe_maf CpG_rs CpG_maf SBE_rs SBE_maf Islands_Name Relation_to_Island Forward_Sequence SourceSeq UCSC_RefGene_Name UCSC_RefGene_Accession UCSC_RefGene_Group Phantom4_Enhancers Phantom5_Enhancers DMR X450k_Enhancer HMM_Island Regulatory_Feature_Name Regulatory_Feature_Group GencodeBasicV12_NAME GencodeBasicV12_Accession GencodeBasicV12_Group GencodeCompV12_NAME GencodeCompV12_Accession GencodeCompV12_Group DNase_Hypersensitivity_NAME DNase_Hypersensitivity_Evidence_Count OpenChromatin_NAME OpenChromatin_Evidence_Count TFBS_NAME TFBS_Evidence_Count Methyl27_Loci Methyl450_Loci Random_Loci F.p F.p.BH p_X1 p.BH_X1 b_X1 sigma2 var.b_X1 fit.ctl mean
cg07926098 chr21 31044189
55802984 TTCTCCCAAATTCAATCATCTAATCTTTTATTAATACTTCTATTTTAAAC II NA NA NA NA NA NA OpenSea ATATTTTTTCTTTCTCCCAAATTCAATCATCTGATCTTTTATTAATGCTTCTATTTTAAA[CG]TCTCACTAGCCTTTCCAGATGTGTATTACAAGATGTTTAGCAGTACCCACTAGTTGCCAG TCTCCCAAATTCAATCATCTGATCTTTTATTAATGCTTCTATTTTAAACG GRIK1;GRIK1 NM_175611;NM_000830 Body;Body GRIK1 ENST00000472429.1 3’UTR GRIK1 ENST00000472429.1 3’UTR chr21:31044146-31044323 5 chr21:31043811-31044512 6 4.00e-06 0.8414528 4.00e-06 0.8414528 0.0711217 0.0564670 0.0000982 FALSE 1.3940876
cg23088672 chr12 132403634
86647334 ATAAAATACAAAATATATAAAATATCRAACRTAACACATCCCAACTACCC II NA NA NA NA NA NA OpenSea GTGTACGGTGTGTGGGGTGCAGGGTGTGTGGGGTGTCGGGCGTGGCACATCCCAGCTGCC[CG]GGCTGCAGCAGGGGAGGGTACCCTGCCCTGCCTCCCCAAGGCGTCATCGGTGAGTCTGAA CGGGCAGCTGGGATGTGCCACGCCCGACACCCCACACACCCTGCACCCCA ULK1 NM_003565 Body ULK1 ENST00000542419.1 TSS1500 ULK1;ULK1;ULK1 ENST00000542419.1;ENST00000540568.1;ENST00000544718.1 TSS1500;TSS1500;TSS200 chr12:132403585-132403830 3 1.31e-05 0.8414528 1.31e-05 0.8414528 0.0789667 0.0860559 0.0001497 FALSE 1.6058434
cg17331950 chr1 181075666
56632211 ATCTCTCTCCCTCCATCTCTACTTAATTACTCTAATTTATTTCRAACACC II NA NA NA NA NA NA chr1:181073917-181075002 S_Shore GAGGACTAAACATCTCTCTCCCTCCATCTCTGCTTGGTTACTCTGGTTTGTTTCGAACAC[CG]GCTGCTTCTTCCCACTTCTTGACTCCCTGCACCCCTACCCGCCCCCCCAGCAATCACATT TCTCTCTCCCTCCATCTCTGCTTGGTTACTCTGGTTTGTTTCGAACACCG chr1:181075648-181075648 RDMR chr1:181055591-181075926 6 TRUE 1.57e-05 0.8414528 1.57e-05 0.8414528 0.0584764 0.0487384 0.0000848 FALSE 2.3723731
cg10593232 chr17 4378180
65747240 TCTTTCTAATACCCTCRCCACACATATTTTAATATTTTAAATAACTCCCC II NA NA NA NA NA NA OpenSea TATAGTCATCATCTTTCTGGTGCCCTCGCCACACATGTTTTGGTATTTTGAATGGCTCCC[CG]CATATGTACGTAGTTGTGGCGTGTGAGTTCTTCGTGTGTGTCTCAGATTGTGGGTGTCCG CGGGGAGCCATTCAAAATACCAAAACATGTGTGGCGAGGGCACCAGAAAG SPNS3 NM_182538 Body 17:4324799-4325035 SPNS3 ENST00000575194.1 3’UTR chr17:4377291-4381173 6 TRUE 1.65e-05 0.8414528 1.65e-05 0.8414528 0.0596971 0.0513050 0.0000892 FALSE 0.6769301
cg19893067 chr17 80732956
40769910 AAATACRATACCTATAAACCAAAAAACACCATCTAAATACTTCTCCCTCC II NA NA NA NA NA NA OpenSea TTGGCTCTGTGGAGTGCGGTGCCTGTGGGCCAGGAGGCACCATCTGAGTACTTCTCCCTC[CG]TGGGGTCCTCCGGGAAAAGAGCGCGCTGGCTCGGGATCCCGGGCGGACTCCCGGCGAGAC CGGAGGGAGAAGTACTCAGATGGTGCCTCCTGGCCCACAGGCACCGCACT TBCD NM_005993 Body 17:78326180-78326457 17:80732631-80734340 Unclassified TBCD;TBCD ENST00000334614.8;ENST00000397466.2 5’UTR;5’UTR TBCD;TBCD ENST00000334614.8;ENST00000397466.2 5’UTR;5’UTR chr17:80732665-80733595 3 TRUE 1.77e-05 0.8414528 1.77e-05 0.8414528 0.0823239 0.0988094 0.0001719 FALSE 0.2863827
cg21431091 chr1 201124025
84650942 ATTATTTAAAAATCCACRTCTTTACTACAAAAAACTTTCTACRCRACAAC II NA NA NA NA NA NA chr1:201123245-201123746 S_Shore AATCTGGAAATGTTATTTAGGGATCCACGTCTTTGCTGCAGAAGACTTTCTGCGCGGCAA[CG]TCTCTACCCCTACACACACCTCTGAGAAGGAGAAATCTAGGGTTGGCGATGGGTGGTGTG TTATTTAGGGATCCACGTCTTTGCTGCAGAAGACTTTCTGCGCGGCAACG TMEM9 NM_016456 TSS1500 1:201122822-201124261 Promoter_Associated TMEM9;TMEM9;TMEM9;TMEM9;TMEM9 ENST00000367329.1;ENST00000367330.1;ENST00000367334.5;ENST00000367332.1;ENST00000367333.1 TSS1500;TSS1500;TSS1500;TSS1500;5’UTR TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9 ENST00000455367.1;ENST00000495205.1;ENST00000435310.1;ENST00000367329.1;ENST00000497582.1;ENST00000367330.1;ENST00000414605.1;ENST00000367334.5;ENST00000367332.1;ENST00000367333.1 TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;5’UTR chr1:201123845-201124275 3 TRUE TRUE 2.23e-05 0.8414528 2.23e-05 0.8414528 -0.0764302 0.0889195 0.0001547 FALSE -1.7599098
cg00546248 chr1 2412743
1768293 ATATACRTAAATTAAAAACTAAATACTTACAAACACTCTAAAAACTACCC II NA NA NA NA NA NA chr1:2411125-2411404 S_Shore CAGGACAGGGTGTGTGCGTGAGTTAGGGGCTGGGTGCTTGCAGACACTCTGGGGACTGCC[CG]GGGCTTGTGAAGGCCCCAGTGGGGACACAGTCTGCCTGGGCTGTGGGGGCTGGCAGGTGG TGTGCGTGAGTTAGGGGCTGGGTGCTTGCAGACACTCTGGGGACTGCCCG PLCH2 NM_014638 Body 1:2412565-2413038 Unclassified_Cell_type_specific PLCH2 ENST00000464895.1 5’UTR chr1:2391098-2419210 6 chr1:2411589-2415067 6 TRUE 2.56e-05 0.8414528 2.56e-05 0.8414528 0.0813031 0.1032822 0.0001796 FALSE 1.8014843
cg15310518 chr14 106330520
96658329 ATCACATTATAAAAAACCCCATTAAAAAATACACAAAAACCTAACTCTCC II NA NA NA NA NA NA chr14:106333492-106333920 N_Shelf TAGTCAAAGTAGTCACATTGTGGGAGGCCCCATTAAGGGGTGCACAAAAACCTGACTCTC[CG]ACTGTCCCGGGCCGGCCGTGGCAGCCAGCCCCGTGTCCCAAGGTCATTTTGTCCCCAGCA CGGAGAGTCAGGTTTTTGTGCACCCCTTAATGGGGCCTCCCACAATGTGA IGHJ6;IGHJ5;IGHJ4 ENST00000390560.2;ENST00000488476.1;ENST00000461719.1 TSS1500;TSS1500;TSS200 IGHJ6;IGHJ5;IGHJ4 ENST00000390560.2;ENST00000488476.1;ENST00000461719.1 TSS1500;TSS1500;TSS200 chr14:106330520-106331295 3 2.79e-05 0.8414528 2.79e-05 0.8414528 -0.0436969 0.0303180 0.0000527 FALSE -0.3222181
cg25295510 chr19 38631842
52702188 CCAAAAATAATACTTTCCTACTCCRTCTTAAACTCCRAAATATTCATATC II NA NA NA NA NA NA OpenSea GTGGGCTGGGGCCTTTCTGAGTAATGCAGTTTTTCTCCCCAGGGGTTGGCCGGAGACCTA[CG]ACATGAATACCTCGGAGCCCAAGACGGAGCAGGAAAGCATCACTCCTGGGGGCCGGCCCC CGACATGAATACCTCGGAGCCCAAGACGGAGCAGGAAAGCATCACTCCTG SIPA1L3;SIPA1L3 NM_015073;NM_015073 ExonBnd;Body SIPA1L3 ENST00000222345.5 ExonBnd SIPA1L3 ENST00000222345.5 ExonBnd chr19:38631744-38632303 3 3.04e-05 0.8414528 3.04e-05 0.8414528 0.0800266 0.1033673 0.0001798 FALSE 3.6819004
cg00779672 chr18 47155847
55628264 AAATAAAAACATATTCAACATACRACTACCAAATACAAAAACRAACACCC II NA NA NA NA NA NA OpenSea GCATTTTAGGATTCCAAGTGCAGGGTGCCCTGGCAGGGCTGTACTCAGCCGGCGGTCCAA[CG]GGTGCTCGCCCTTGCACCTGGCAGTCGCATGTTGAATATGCTCCTATTTTCCCTACTTCT CGGGTGCTCGCCCTTGCACCTGGCAGTCGCATGTTGAATATGCTCCTATT chr18:47155300-47155855 3 3.34e-05 0.8414528 3.34e-05 0.8414528 0.0622395 0.0636649 0.0001107 FALSE 2.1754652
cg07696757 chr7 44229001
88769850 ACAAATAACTATATCTCTCACATCCTAACCTACTTCCCTAAAACTCAAAC II NA NA NA NA NA NA OpenSea GTAATTAGGCTGCAGGTGACTGTGTCTCTCACATCCTAGCCTGCTTCCCTGGGGCTCAGG[CG]CCGCTCGGCATTTCCTGCTCCAGCCAGGTGTGGAGTGGGGGATTCTCCTGCCAGGGCTTA CAGGTGACTGTGTCTCTCACATCCTAGCCTGCTTCCCTGGGGCTCAGGCG GCK;GCK NM_000162;NM_000162 1stExon;5’UTR GCK;GCK;GCK;GCK ENST00000403799.3;ENST00000403799.3;ENST00000437084.1;ENST00000476008.1 1stExon;5’UTR;TSS1500;3’UTR GCK;GCK;GCK;GCK ENST00000403799.3;ENST00000403799.3;ENST00000437084.1;ENST00000476008.1 1stExon;5’UTR;TSS1500;3’UTR chr7:44228925-44229250 3 TRUE 3.62e-05 0.8414528 3.62e-05 0.8414528 0.0568647 0.0539742 0.0000939 FALSE 2.9591324
cg21941354 chr8 143751709
14670238 3669838 AACCCCAACACCTTCAACACAAAATTCACTATCATAAAAAAAATATAACA AACCCCAACGCCTTCAACGCAAAATTCACTATCATAAAAAAAATATAACG I C Grn NA NA NA NA NA NA chr8:143750759-143751448 S_Shore GCGCGCGGCCTGGGCCCCGGGCTCCGCCCTGCTCTCTCCATCTGTAGAATGGGCGCAGGG[CG]TCACATTCCTTTCATGACAGTGAACCCTGCGCTGAAGGCGTTGGGGCTCCTGCAGTTCTG AGCCCCAACGCCTTCAGCGCAGGGTTCACTGTCATGAAAGGAATGTGACG PSCA;JRK;JRK;JRK NR_033343;NM_003724;NM_001279352;NM_001077527 TSS200;TSS1500;TSS1500;TSS1500 8:143747991-143748827 8:143750099-143751838 Promoter_Associated PSCA;JRK ENST00000505305.1;ENST00000507178.1 TSS200;TSS1500 PSCA;PSCA;JRK;JRK;JRK;JRK ENST00000505305.1;ENST00000510969.1;ENST00000507178.1;ENST00000422119.2;ENST00000503272.1;ENST00000512113.1 TSS200;TSS200;TSS1500;TSS1500;TSS1500;3’UTR chr8:143751305-143751935 3 4.08e-05 0.8414528 4.08e-05 0.8414528 -0.1751678 0.5240854 0.0009115 FALSE -3.2212907
cg05611414 chr19 50643613
25655843 47675873 ACAATAACTCCAAATATCAAAACTATCCTTTCTTCCCTTTCTCCCTCTCA ACAATAACTCCGAATATCGAAACTATCCTTTCTTCCCTTTCTCCCTCTCG I T Red NA NA NA NA NA NA OpenSea CCAGCCGGAGCCACAGTGGCTCCGGGTGTCGGGGCTGTCCTTTCTTCCCTTTCTCCCTCT[CG]TCTCCTGTCTCCCGTCTCCCTCTCCCCACGGTCTCCCTCTCCCTCTCCCTCTCCTGTCTC CGAGAGGGAGAAAGGGAAGAAAGGACAGCCCCGACACCCGGAGCCACTGT SNAR-D;SNAR-B1;SNAR-B2 NR_024243;NR_024231;NR_024230 TSS200;TSS1500;TSS1500 19:55335318-55336361 TRUE 4.34e-05 0.8414528 4.34e-05 0.8414528 0.0690128 0.0823146 0.0001432 FALSE 3.3701183
cg08648042 chr17 40086195
41750536 RATACCCTAAATAACTTACTTAACTACCTTTACTACRTTACTTCCAAAAC II NA NA NA NA NA NA chr17:40086751-40087098 N_Shore GTGTGCATTTGTAGTTAAAGACATCTTTTTACTATTTAAACTTTAACAATGACATTAGGA[CG]TTTTGGAAGTAACGTAGTAAAGGCAGTTAAGCAAGCTACTTAGGGTATCGTTGGAGCCCA ATACCCTAAGTAGCTTGCTTAACTGCCTTTACTACGTTACTTCCAAAACG TTC25;TTC25 NM_031421;NR_110662 TSS1500;TSS1500 17:40086180-40087180 Promoter_Associated AC091172.1 ENST00000377540.1 TSS1500 AC091172.1 ENST00000377540.1 TSS1500 chr17:40086140-40086410 3 4.54e-05 0.8414528 4.54e-05 0.8414528 0.0551645 0.0530580 0.0000923 FALSE -0.7160085
cg18036623 chr1 5300056
69658162 CCCATTTATATCAAAATAAATAACTCAAATATCCACCTTCAACCAAAAAC II NA NA NA NA NA NA OpenSea GCTCCTGTGTCCCCATTTGTATCAGAGTGGGTGACTCAAGTGTCCACCTTCAGCCAAGAG[CG]TGGCCAGGGCTGGGGCACGCCACATGACCTCACTTGTGTGTCTACCAGCTAAGGCACCTC CGCTCTTGGCTGAAGGTGGACACTTGAGTCACCCACTCTGATACAAATGG chr1:5299945-5300270 3 4.94e-05 0.8414528 4.94e-05 0.8414528 -0.0518544 0.0476559 0.0000829 FALSE 2.1850403
cg10258214 chr14 106330534
85771960 TTATACTAAAAACAAAATAACCTTAAAACACRAAACTAACTACCACRACC II rs141509289 0.011000 NA NA NA NA chr14:106333492-106333920 N_Shelf ACATTGTGGGAGGCCCCATTAAGGGGTGCACAAAAACCTGACTCTCCGACTGTCCCGGGC[CG]GCCGTGGCAGCCAGCCCCGTGTCCCAAGGTCATTTTGTCCCCAGCACAAGCATGACTCTG TGTGCTGGGGACAAAATGACCTTGGGACACGGGGCTGGCTGCCACGGCCG IGHJ6;IGHJ5;IGHJ4 ENST00000390560.2;ENST00000488476.1;ENST00000461719.1 TSS1500;TSS1500;TSS200 IGHJ6;IGHJ5;IGHJ4 ENST00000390560.2;ENST00000488476.1;ENST00000461719.1 TSS1500;TSS1500;TSS200 chr14:106330520-106331295 3 5.36e-05 0.8414528 5.36e-05 0.8414528 -0.0851995 0.1307293 0.0002274 FALSE 0.2716556
cg11879620 chr1 211499932
89702346 17768264 CATCTAATTTCCCTCACATACATCTAAATTATAACCATCTTACTCTTCCA CATCTAATTTCCCTCACGTACGTCTAAATTATAACCGTCTTACTCTTCCG I T Red NA NA NA NA NA NA chr1:211500151-211500636 N_Shore CAAGGCCTTGAGCAAATGGGTCAACACAGGAGAGGACGGGCTGAGCCTTGTTGTGCCTGA[CG]GAAGAGCAAGACGGCCACAATTCAGACGCACGTGAGGGAAATCAGATGACTGGACTTGTA CGGAAGAGCAAGACGGCCACAATTCAGACGCACGTGAGGGAAATCAGATG TRAF5;TRAF5 NM_001033910;NM_004619 TSS1500;TSS200 1:211499503-211501341 Promoter_Associated TRAF5;TRAF5;TRAF5 ENST00000427925.2;ENST00000261464.5;ENST00000462410.1 TSS1500;TSS1500;TSS200 TRAF5;TRAF5;TRAF5;TRAF5;TRAF5 ENST00000427925.2;ENST00000494355.1;ENST00000261464.5;ENST00000462410.1;ENST00000488428.1 TSS1500;TSS1500;TSS1500;TSS200;TSS200 chr1:211499485-211501315 3 5.49e-05 0.8414528 5.49e-05 0.8414528 -0.0833373 0.1256712 0.0002186 FALSE -4.6193643
cg20695515 chr20 61605688
4796962 TTACAATTAAACACAATAAAATAAATTCCCRTCAAAAATATATTCCAACC II NA NA NA NA NA NA OpenSea CTGCCCAGCCCCTCCCCTCCCCTGGGTCCTGCCAAGTCGGTCAGAAGGAAAGATGGGCAG[CG]GTTGGAATATACTTTTGACGGGAATTTACTTTATTGTGTTTAATTGTAAAGCGTGGAAGT TACAATTAAACACAATAAAGTAAATTCCCGTCAAAAGTATATTCCAACCG TRUE 20:61605334-61607081 Unclassified chr20:61605300-61605955 3 5.72e-05 0.8414528 5.72e-05 0.8414528 0.0513965 0.0481834 0.0000838 FALSE -3.1116354
cg11150807 chr10 43354902
70613211 CCAATCCAATAACCTAACCTACRTTTTCTACATAACTACTAATTCCAACC II rs2796561 0.460346 NA NA NA NA OpenSea GGAGCAGATGAGCGGGTAAGACCGGCAAATCCGTCACATACCCTCAGGACGCAGGGAGCG[CG]GCTGGAATCAGCAGTTATGCAGAAAACGCAGGCCAGGTTACTGGACTGGAACATGCGGGC CAGTCCAGTAACCTGGCCTGCGTTTTCTGCATAACTGCTGATTCCAGCCG 10:42674870-42674970 TRUE 5.74e-05 0.8414528 5.74e-05 0.8414528 0.0683731 0.0853268 0.0001484 FALSE 1.8086412
cg15447535 chr1 182808025
77671883 CTTTCAAAAAACACTAACRTTCAAACTATCTAAATCAATCCRATAACTAC II NA NA NA NA NA NA chr1:182808312-182809122 N_Shore CCAGTTTATTTCTTTCAAGAGACACTGGCGTTCAGACTATCTGAATCAATCCGGTAACTA[CG]CCCACTGAACACCTACTACATGCCAGGTATTGAACTAAGCCGCCCTTCATATGCCATCTC TTTCAAGAGACACTGGCGTTCAGACTATCTGAATCAATCCGGTAACTACG DHX9;DHX9 NM_001357;NR_033302 TSS1500;TSS1500 1:182807913-182809448 Promoter_Associated DHX9;DHX9 ENST00000399175.2;ENST00000367549.3 TSS1500;TSS1500 DHX9;DHX9;DHX9 ENST00000399175.2;ENST00000367549.3;ENST00000483416.1 TSS1500;TSS1500;TSS1500 chr1:182808000-182808570 3 5.98e-05 0.8414528 5.98e-05 0.8414528 0.0400614 0.0295342 0.0000514 FALSE -0.8898368
cg20430773 chr1 16534157
13655147 80758121 CTAATAACTCACAAAAAAACAATTAATCCAAACCCAAATACCACCTCCCA CTAATAACTCACGAAAAAACGATTAATCCGAACCCAAATACCACCTCCCG I T Red NA NA NA NA NA NA chr1:16532917-16534580 Island GGAAAGAGAGAGCGTCACGGCCCTAGTTCGCCTGCACCCTGTCTCGGACCCAAGCCCAAA[CG]GGAGGTGGTACCTGGGCTCGGACCAATCGCCCTCTCGTGAGTCACCAGGGGCTGGCCGCG CGGGAGGTGGTACCTGGGCTCGGACCAATCGCCCTCTCGTGAGTCACCAG ARHGEF19 NM_153213 Body 1:16406494-16407143 1:16532722-16534260 Promoter_Associated ARHGEF19;ARHGEF19 ENST00000478117.1;ENST00000270747.3 TSS1500;ExonBnd ARHGEF19;ARHGEF19;ARHGEF19;ARHGEF19;ARHGEF19 ENST00000478210.1;ENST00000449495.1;ENST00000471928.1;ENST00000478117.1;ENST00000270747.3 TSS1500;TSS1500;TSS1500;TSS1500;ExonBnd chr1:16533945-16534395 3 TRUE 6.10e-05 0.8414528 6.10e-05 0.8414528 0.0745545 0.1026686 0.0001786 FALSE -0.6625730
cg03270036 chr9 130270004
61617279 CAAAAAAAAACCCAAACTTACACCTAAAATCTAAATAAAACCCACTCTTC II NA NA NA NA NA NA chr9:130271237-130271447 N_Shore ACGCGAGAAGAAACTAAAGCACAGAGACGCAAGGCTAAGGCTGCTCCGGAGGCCACACAG[CG]AAGAGTGGGCTTCACCCAGACCCCAGGTGCAAGTCTGGGCTTTTCTTTGAGCCTTAGCTG AAAGAAAAGCCCAGACTTGCACCTGGGGTCTGGGTGAAGCCCACTCTTCG FAM129B;FAM129B NM_022833;NM_001035534 Body;Body TRUE 9:129309331-129310655 FAM129B ENST00000468379.1 3’UTR FAM129B;FAM129B;FAM129B ENST00000478917.1;ENST00000468379.1;ENST00000484348.1 3’UTR;3’UTR;3’UTR chr9:130267748-130272673 6 chr9:130265003-130270077 6 TRUE 6.33e-05 0.8414528 6.33e-05 0.8414528 0.0614434 0.0702602 0.0001222 FALSE 3.6921774
cg01692639 chr6 99273568
11803246 CACTTATAACCRAAACCTACCCTATATTCATCCCAAACAATCAAACAAAC II NA NA NA NA NA NA chr6:99272394-99273014 S_Shore CTCTCACTCCCCACTTGTGGCCGAGGCCTGCCCTGTGTTCATCCCAGACAGTCAGGCAGA[CG]CTGAGCTGTCCGCGTCGGCGTGGGCGCCTGTGGCCAGCGTATTGTGAGATTTCTAAATGT ACTTGTGGCCGAGGCCTGCCCTGTGTTCATCCCAGACAGTCAGGCAGACG TRUE 6:99380289-99380401 chr6:99273085-99273735 3 TRUE 6.39e-05 0.8414528 6.39e-05 0.8414528 -0.0592549 0.0654657 0.0001139 FALSE -3.8720322
cg24666046 chr2 159823997
93638314 TTACTATAAACCRATTTATTAATAACCCCTAACCTTACTATAACAAACCC II NA NA NA NA NA NA chr2:159824854-159826401 N_Shore TCTGCGTGAGGTTGAAGTTTTTATGAGGAAACTTGTTTAAAACACAATCGTATTTTAAGG[CG]GGTTTGCCACAGCAAGGTTAGGGGTTACCAATAAATCGGTTTACAGCAACTTCTCTATTC CGGGTTTGCCACAGCAAGGTTAGGGGTTACCAATAAATCGGTTTACAGCA TANC1;TANC1 NM_001145909;NM_033394 TSS1500;TSS1500 TANC1;TANC1 ENST00000454300.1;ENST00000263635.6 TSS1500;TSS1500 TANC1;TANC1 ENST00000454300.1;ENST00000263635.6 TSS1500;TSS1500 chr2:159823780-159824855 3 6.41e-05 0.8414528 6.41e-05 0.8414528 0.0574785 0.0616363 0.0001072 FALSE 0.3774693
cg04098297 chr13 114312482
54800969 97749381 AAATAAACATCTAACCTCAATCTAAACATAAAAAATACAAAAAACAAACA AAATAAACATCTAACCTCAATCTAAACGTAAAAAATACAAAAAACAAACG I A Red NA NA NA NA NA NA OpenSea CGCTGGCCACACGTCTTCTTCTCCTGCAGAGCCGCCATCGTCCCTGGCCTGAGATCCTCC[CG]TCTGCTCCCTGCACCCTCTACGCCCAGACTGAGGCCAGATGCCCACCTCTGGGCTTTATA AGGTGGGCATCTGGCCTCAGTCTGGGCGTAGAGGGTGCAGGGAGCAGACG ATP4B;ATP4B NM_000705;NM_000705 1stExon;5’UTR 13:113360106-113360765 ATP4B;ATP4B ENST00000335288.3;ENST00000335288.3 1stExon;5’UTR ATP4B;ATP4B ENST00000335288.3;ENST00000335288.3 1stExon;5’UTR chr13:114305668-114313712 6 TRUE 6.58e-05 0.8414528 6.58e-05 0.8414528 0.0601741 0.0678935 0.0001181 FALSE 2.5127605
cg27310889 chr19 56032497
90604170 ACACRAAAATTCACATCACAATATTCAAAATTTAAATCCCTAACTCTACC II NA NA NA NA NA NA chr19:56028236-56028824 S_Shelf TCCCCGTTTCAGAAAGGACTCTGAGGCTCAGAGAGGGCAGGCCCGTTGCCAAAGGGCACA[CG]GCAGAGCCAGGGACTCAAACCCTGAATACTGTGATGTGAATTTCCGTGCTTTGACCATGG CGGCAGAGCCAGGGACTCAAACCCTGAATACTGTGATGTGAATTTCCGTG chr19:56032210-56033487 5 chr19:56030543-56032575 6 6.83e-05 0.8414528 6.83e-05 0.8414528 0.0546608 0.0564490 0.0000982 FALSE 2.7820735
cg15992730 chr12 7848093
90793277 CAACRACCACTAAAATCTCCCRAAACTTATACTACRTAAAAAAACTAAAC II NA NA NA NA NA NA OpenSea CTGGATAACACTCTATTTCCTTACCTTGGTCTGGGAGAAAGCGAAGTACATTCCCGCGGA[CG]CCCAGCTCCTTTACGTAGCATAAGTCTCGGGAGACCCCAGTGGTCGCTGCTGCCTCGCGA CGCCCAGCTCCTTTACGTAGCATAAGTCTCGGGAGACCCCAGTGGTCGCT GDF3 NM_020634 1stExon TRUE GDF3 ENST00000329913.3 1stExon GDF3 ENST00000329913.3 1stExon chr12:7847640-7848110 3 TRUE TRUE 6.92e-05 0.8414528 6.92e-05 0.8414528 -0.0412180 0.0321790 0.0000560 FALSE -2.1239170
cg17798901 chr3 100236708
62809170 ACCTATAAACAAACCCATTTAAACCACAATCTTTCCTAAAAAAATACCAC II rs72943302 0.047075 NA NA NA NA OpenSea TGAGCATCTTCCTCTTCTTGAAAGGGCTGTGATGCTTGTGATTGGAAAAAGCCGTTCTTA[CG]TGGCATCCTTTTAGGAAAGACTGTGGTTTAAATGGGCTTGTTTACAGGCCTCACTTTTGG CGTGGCATCCTTTTAGGAAAGACTGTGGTTTAAATGGGCTTGTTTACAGG TMEM45A NM_018004 5’UTR TMEM45A;TMEM45A;TMEM45A ENST00000403410.1;ENST00000462884.1;ENST00000323523.4 5’UTR;TSS1500;5’UTR TMEM45A;TMEM45A;TMEM45A;TMEM45A ENST00000403410.1;ENST00000449609.1;ENST00000462884.1;ENST00000323523.4 5’UTR;5’UTR;TSS1500;5’UTR 6.97e-05 0.8414528 6.97e-05 0.8414528 -0.0752652 0.1074586 0.0001869 FALSE 1.8375590
cg16585214 chr10 121297227
19603902 CTTCCTTCTCRTTTATTTATAAACTAATAAAAAAAACCRCCTACCAACCC II NA NA NA NA NA NA OpenSea TAAAATGAATTGAGGAGAGGAGGCCAGGGGTTCCCAGGTGGCCGCCTGCCCACAGTGCCA[CG]GGCTGGTAGGCGGTTTTCTTCACTAGTCCACAAATAAACGAGAAGGAAGACTAGGAAATA CGGGCTGGTAGGCGGTTTTCTTCACTAGTCCACAAATAAACGAGAAGGAA RGS10;RGS10 NM_002925;NM_001005339 TSS1500;Body TRUE RGS10 ENST00000392865.1 TSS1500 RGS10 ENST00000392865.1 TSS1500 chr10:121297185-121297455 3 7.27e-05 0.8414528 7.27e-05 0.8414528 0.0634947 0.0771195 0.0001341 FALSE -3.5749317
cg09423836 chr4 77070069
7695393 AAAAATACCTAACACATAATAAACRCAACRTATTTACTACAATTCCATTC II NA NA NA NA NA NA chr4:77069066-77069672 S_Shore TGAAGGATGAGTAGACGGAGCAGAAGCAGTAAAGAGGATGCACGTTGGAAGAAATGAAAG[CG]AATGGAATTGTAGCAAACACGTTGCGTTTACTATGTGCCAGGCACTCTTCTAGACATTTT AGAGTGCCTGGCACATAGTAAACGCAACGTGTTTGCTACAATTCCATTCG NUP54 NM_017426 TSS1500 4:77068555-77070477 Promoter_Associated NUP54;NUP54;NUP54;NUP54 ENST00000515460.1;ENST00000514987.1;ENST00000342467.6;ENST00000264883.3 TSS1500;TSS1500;TSS1500;TSS1500 NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54 ENST00000506098.1;ENST00000510884.1;ENST00000514307.1;ENST00000515460.1;ENST00000514987.1;ENST00000514901.1;ENST00000342467.6;ENST00000502850.1;ENST00000507257.1;ENST00000508583.1;ENST00000504173.1;ENST00000264883.3 TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500 chr4:77068812-77070486 6 chr4:77068148-77071219 6 TRUE TRUE 8.46e-05 0.8414528 8.46e-05 0.8414528 -0.1153642 0.2624751 0.0004565 FALSE -3.8828315
cg24046505 chr10 25351544
45629175 TCCACCCAAACCTAAATATAACTAATCAAAATACRTACTATTACTAACAC II rs16925307 0.027132 NA NA NA NA OpenSea TGGTGACACAGCGGGTCTGGCCTCATTCATTACTAATTGTTGACGCTGTGTTCGGAGGAA[CG]TGTTAGTAACAGCACGTACCTTGATCAGCCACACTCAGGCCTGGGTGGAGGAGATAGGTG CGTGTTAGTAACAGCACGTACCTTGATCAGCCACACTCAGGCCTGGGTGG ENKUR NM_001270383 TSS1500 10:25351514-25351702 Unclassified_Cell_type_specific chr10:25350980-25351775 3 8.65e-05 0.8414528 8.65e-05 0.8414528 0.0524204 0.0544310 0.0000947 FALSE -1.9843389
cg18259003 chr17 72347925
99703156 TTAAAATTAACCTCAAACTAAAACTCCAAACAAATACCTAAAATCCAAAC II NA NA NA NA NA NA chr17:72347924-72348322 Island AGATTCAGCTTTTAGGGTTGGCCTCAGGCTGGAGCTCCAGACAGATGCCTGGAGTCCAGG[CG]CTGGAGGGTCGAGGACGAGCTGGGCAGGCGGATCGGACCCCTCTCCCCGCTGCCAGATGC CGCCTGGACTCCAGGCATCTGTCTGGAGCTCCAGCCTGAGGCCAACCCTA KIF19 NM_153209 Body 17:72347925-72348453 Unclassified_Cell_type_specific chr17:72347725-72348675 3 TRUE 8.69e-05 0.8414528 8.69e-05 0.8414528 0.0421317 0.0351944 0.0000612 FALSE -2.4552826
cg20117369 chr19 49061546
12626896 ACCTAACTATATCACRCCCTAAATAAAAAACTTAAAAATAAACAAAACCC II NA NA NA NA NA NA chr19:49061545-49061769 Island GAACCAGGGAAGCCTGACTGTATCACGCCCTGGATGGGGAACTTGGGGATGAGCAAGACC[CG]GAAGGAGCGAGATCCGGGAGGGAGACTCAGCCCAGACAGGAGGCGTTAGCAGGCGCTCTG CGGGTCTTGCTCATCCCCAAGTTCCCCATCCAGGGCGTGATACAGTCAGG SULT2B1 NM_177973 Body chr19:49061440-49061935 3 TRUE 8.78e-05 0.8414528 8.78e-05 0.8414528 0.0716314 0.1019551 0.0001773 FALSE -2.1040575
cg01208318 chr14 106329652
77751469 TCCTAAAAATAACTTAACAACCRTCTACTTACAATTAAACTTCCCAAACC II NA NA NA NA NA NA chr14:106333492-106333920 N_Shelf TCTGCTGGCCTCCCACGTACCCCACATTCTGGCCTGACCCCTCAGAAGCCAGACCACTGT[CG]GCCTGGGAAGTCCAACTGCAAGCAGACGGCTGCTAAGTCACCCCCAGGAGTCCAAAAACC CGGCCTGGGAAGTCCAACTGCAAGCAGACGGCTGCTAAGTCACCCCCAGG IGHJ6 ENST00000390560.2 TSS200 IGHJ6 ENST00000390560.2 TSS200 chr14:106329085-106330430 3 TRUE 8.82e-05 0.8414528 8.82e-05 0.8414528 -0.1079063 0.2315524 0.0004027 FALSE -0.0233932
cg01780326 chr14 75609136
23725895 TACCCAAAATAATCACAACATTTAAACTACCAAACTCTACTATAAAATTC II NA NA NA NA NA NA OpenSea TCTTAACTCCCTACCCAGGGTGATCACAGCATTTAAACTGCCAAACTCTGCTGTAAAATT[CG]AGAACAGAATCCCTGACTTCCAAAAGCATTTATGCTCTCTGTACCCAAATCTGACCTATC ACCCAGGGTGATCACAGCATTTAAACTGCCAAACTCTGCTGTAAAATTCG TMED10 NM_006827 Body TMED10 ENST00000557670.1 3’UTR TMED10;TMED10;TMED10 ENST00000556969.1;ENST00000557670.1;ENST00000555036.1 3’UTR;3’UTR;3’UTR chr14:75609040-75609490 3 8.82e-05 0.8414528 8.82e-05 0.8414528 0.0376005 0.0281181 0.0000489 FALSE 3.3619202
cg00564870 chr3 149018035
11736209 TAATAAATCATCCATTTCTCATTTTCCAAAACTAATTTCTACTTACTTAC II NA NA NA NA NA NA OpenSea TCGAACATGCCTCAGTATGCCCCATTCCTTCTAACCTTTAACCAGAATTCTTTCCTAAGG[CG]TAAGTAAGCAGAAACCAGCTCTGGAAAATGAGAAATGGATGACTCATTACTTTGTCAACT AATGAGTCATCCATTTCTCATTTTCCAGAGCTGGTTTCTGCTTACTTACG TRUE RP11-206M11.7 ENST00000489011.1 5’UTR RP11-206M11.7 ENST00000489011.1 5’UTR chr3:149017960-149018255 3 9.05e-05 0.8414528 9.05e-05 0.8414528 -0.0425898 0.0362561 0.0000631 FALSE 0.3797182
cg19021188 chr8 143859734
42736887 ATTACCTCTCRACAAACAAAACAACTAACTAACCTAATTTACTAAAAACC II NA NA NA NA NA NA chr8:143858279-143859411 S_Shore CTGAGGTACAGAATACACCATGCCTGCTGGGCAAGCGGGCTCCGGTTCCAGGAAATCCTT[CG]GTCCCCAGCAAACTAGGCCAGTTAGTTGTTCTGTTTGCCGAGAGGTAATGCGCACCCTCC CGGTCCCCAGCAAACTAGGCCAGTTAGTTGTTCTGTTTGCCGAGAGGTAA LYNX1;LYNX1;LYNX1;LYNX1 NM_023946;NM_177476;NM_177457;NM_177477 TSS200;TSS1500;TSS200;TSS1500 8:143856711-143857003 LYNX1;LYNX1;LYNX1;LYNX1;LYNX1 ENST00000345173.6;ENST00000522906.1;ENST00000398906.1;ENST00000395192.2;ENST00000335822.5 TSS1500;TSS1500;TSS1500;TSS1500;TSS200 LYNX1;LYNX1;LYNX1;LYNX1;LYNX1 ENST00000345173.6;ENST00000522906.1;ENST00000398906.1;ENST00000395192.2;ENST00000335822.5 TSS1500;TSS1500;TSS1500;TSS1500;TSS200 chr8:143859645-143859870 3 TRUE 9.17e-05 0.8414528 9.17e-05 0.8414528 0.0687873 0.0948478 0.0001650 FALSE 0.5775472
cg05912299 chr13 100217962
68746552 AAAAACAATTACRTTACRCATATTATAAAACCCCAATAACCAAAAACCCC II NA NA NA NA NA NA OpenSea TGGCCCGGGACAGAAACAGTTGCGTTGCGCATGTTGTGAGACCCCAGTGACCAGGAACCC[CG]GGGAGAAGTCTCCTCCATGAGTCATGATAGCCGCAGGCAGCTGCGAAGGAGGGGGACGTG GAAACAGTTGCGTTGCGCATGTTGTGAGACCCCAGTGACCAGGAACCCCG 13:99015793-99016308 chr13:100217645-100218095 3 TRUE 9.38e-05 0.8414528 9.38e-05 0.8414528 0.0898071 0.1624123 0.0002825 FALSE 1.2531977
cg21189356 chr19 30864709
74667137 CRACAAATCTACATACAAATAACAACACATTATTAACTTAATCAAATACC II NA NA NA NA NA NA chr19:30865683-30866490 N_Shore AGAGAAAGCCCCGGCAAATCTGCATGCAAATAGCAGCACATTGTTAACTTAATCAGATGC[CG]AGGAAAGATGTGCCAGAATTCCAGGATGATCTACACACACGGAGGAGGAGCGGCCTGAGC CGGCATCTGATTAAGTTAACAATGTGCTGCTATTTGCATGCAGATTTGCC ZNF536 NM_014717 5’UTR chr19:30864626-30864626 TRUE ZNF536 ENST00000355537.1 5’UTR ZNF536 ENST00000355537.1 5’UTR chr19:30864465-30864855 3 9.60e-05 0.8414528 9.60e-05 0.8414528 0.0791596 0.1267643 0.0002205 FALSE -2.6970873
cg02914097 chr3 31118948
91641400 CTCATCCTATCRAACTACCCACTACCAAACATAATATCCTAAAATAATAC II rs294286 0.132571 NA NA NA NA OpenSea TCAGCTCTCTCCTCATCCTGTCGGACTGCCCACTGCCAAACATGGTATCCTAAAATAGTG[CG]TCCAACATTTCACTATCCTGCTCAAACATCTGTGATTGCTCTCTAGTCTTTGTTGGCCTA CGCACTATTTTAGGATACCATGTTTGGCAGTGGGCAGTCCGACAGGATGA TRUE chr3:31118625-31119015 3 TRUE 9.60e-05 0.8414528 9.60e-05 0.8414528 0.0847018 0.1451549 0.0002525 FALSE 0.5130093
cg13950578 chr1 152467744
96789356 9755123 ATTTACTACCATTTCTAAACTCCTACCTCTAATTCTATCACCCCCTACCA ATTTACTACCATTTCTAAACTCCTACCTCTAATTCTATCGCCCCCTACCG I T Red rs12123907 0.179616 NA NA NA NA OpenSea TTAGCACTTCTGTGCACTGAGGGAGAGGTGGAGCATCCAATTTGCCTTACACACATGGGA[CG]GTAGGGGGCGACAGAATCAGAGGCAGGAGTTTAGAAATGGCAGTAAATCCTGGAGATTTT CGGTAGGGGGCGACAGAATCAGAGGCAGGAGTTTAGAAATGGCAGTAAAT 1:152467571-152467870 Unclassified_Cell_type_specific chr1:152467620-152467975 3 9.80e-05 0.8414528 9.80e-05 0.8414528 0.1521842 0.4705387 0.0008184 FALSE -1.9785758
cg03998606 chr3 32856104
6706315 RCTATCAAATTTAATTCCTCTTTCTAAAATTACAACCTTCCTACTCCTCC II NA NA NA NA NA NA chr3:32858194-32860506 N_Shelf ACCCACAAACCGCTGTCAAGTTTGATTCCTCTTTCTGGAGTTACAGCCTTCCTGCTCCTC[CG]TGATGGCTCAGCTGAAGTACCACCTCTGCTACTCGTTTTCAAAATACATGTTGAGTCTTA CGGAGGAGCAGGAAGGCTGTAACTCCAGAAAGAGGAATCAAACTTGACAG RDMR chr3:32855960-32856455 3 TRUE 9.84e-05 0.8414528 9.84e-05 0.8414528 -0.0548076 0.0610700 0.0001062 FALSE -2.7499845

Visualise the effect of RUV adjustment

#Madj <- getAdjusted(mValsSw, rfit2)
# cant get this part to work
mvals <- mvals[which(rownames(mvals) %in% colnames(rfit2$Y) ),]
#Madj <- missMethyl::getAdj(Y = mValsSq, fit = rfit2)

plotMDS(mvals, labels=targets_gen$Sample_Name,
  col=as.integer(factor(targets_gen$Social_interaction_on_ADOS)),
  main="Unadjusted")

#legend(legend=c("1","0"),pch=7,cex=0.7,col=1:2)

# i had to comment the below stuff out because Madj couldn't be generated.
#plotMDS(Madj, labels=targets_gen$Sample_Name, 
#  col=as.integer(factor(targets_gen$Social_interaction_on_ADOS)),
#  main="Adjusted: RUV-inverse")
#legend(legend=c("1","0"),pch=7,cex=0.7,col=1:2)

#Trying different k values
# mark fixed a major bug here
rfit_k1 <- RUVfit(Y=mvals, X=design_tw[,"ADOS"], ctl=ctl, 
  method = "ruv4", k=1) # Stage 2 analysis 

rfit_k1 <- RUVadj(Y=mvals, fit=rfit_k1)

rfit_k2 <- RUVfit(Y=mvals, X=design_tw[,"ADOS"], ctl=ctl,
  method = "ruv4", k=2) # Stage 2 analysis 

rfit_k2 <- RUVadj(Y=mvals, fit=rfit_k2)

rfit_k8 <- RUVfit(Y=mvals, X=design_tw[,"ADOS"], ctl=ctl,
  method = "ruv4", k=8) # Stage 2 analysis 

rfit_k8 <- RUVadj(Y=mvals, fit=rfit_k8)

#get adjusted values - doesn't work as at Apr 2022.
#Madj1 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k1)
#Madj2 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k2)
#Madj3 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k8)

#Visualise plots
par(mfrow=c(1,1))
plotMDS(mvals, labels=targets_gen$Sample_Name, 
  col=as.integer(factor(targets_gen$Diagnosis)),
  dim=c(14,15),
  main="Adjusted")

#legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)

#plotMDS(Madj, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
#        main="Adjusted: RUV-inverse")
#legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)

#plotMDS(Madj1, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
#        main="Adjusted: RUV-4, k=1")
#legend("bottomleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)

#plotMDS(Madj2, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
#        main="Adjusted: RUV-4, k=2")
#legend("bottomright",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)

#plotMDS(Madj3, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
#        main="Adjusted: RUV-4, k=8")
#legend

Plotting effect sizes of top DMPs

res <- read.csv("ASD_guthrie_topruv80%_dmps_onADOS_withintw_Nov29.csv", header = TRUE)
TOPPROBES <- head(res$Name,9)

effect_plot <- function(PROBE) {
  GENE <- res[which(res$Name==PROBE),"UCSC_RefGene_Name"]
  GENE <- paste0(unique(unlist(strsplit(GENE,";"))),collapse=";")
  HEADER=paste(GENE,PROBE)
  BETAS <- ilogit2(mDat[rownames(mDat) == PROBE,])
  plot(BETAS, ADOS, main = HEADER, ylab = "ADOS", xlab = "Beta Value")
  regl5 <- lm(ADOS ~ BETAS)
  SLOPE=signif(regl5$coefficients[2],3)
  abline(regl5)
  mycor <- cor.test(ADOS, BETAS, method = "pearson")
  mycor_stat <- signif(mycor$estimate,3)
  mycor_p <- signif(mycor$p.value,3)
  SUBHEAD <- paste("R=",mycor_stat," p=",mycor_p," slope=",SLOPE, sep="")
  mtext(SUBHEAD,cex=0.6)

}

par(mfrow=c(3,3))

sapply(TOPPROBES,effect_plot)

## $cg07926098
## NULL
## 
## $cg23088672
## NULL
## 
## $cg17331950
## NULL
## 
## $cg10593232
## NULL
## 
## $cg19893067
## NULL
## 
## $cg21431091
## NULL
## 
## $cg00546248
## NULL
## 
## $cg15310518
## NULL
## 
## $cg25295510
## NULL
pdf("effect_guthrie.pdf")
par(mfrow=c(3,3))
sapply(TOPPROBES,effect_plot)
## $cg07926098
## NULL
## 
## $cg23088672
## NULL
## 
## $cg17331950
## NULL
## 
## $cg10593232
## NULL
## 
## $cg19893067
## NULL
## 
## $cg21431091
## NULL
## 
## $cg00546248
## NULL
## 
## $cg15310518
## NULL
## 
## $cg25295510
## NULL
dev.off()
## png 
##   2
par(mfrow=c(1,1))

Gene ontology of top DMPs

Gene Ontology analysis (gometh): top 1000 probes

res <- read.csv("ASD_guthrie_topruv80%_dmps_onADOS_withintw_Nov29.csv", header = TRUE)
sigCpGs_1k = res$Name[1:1000]

sigCpGs_1k = as.character(sigCpGs_1k)
all = res$Name
length(all)
## [1] 790658
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
##                                           Description   N DE       P.DE FDR
## path:hsa04060  Cytokine-cytokine receptor interaction 268  9 0.01911846   1
## path:hsa04310                   Wnt signaling pathway 163 10 0.02550854   1
## path:hsa05202 Transcriptional misregulation in cancer 182 10 0.02610983   1
## path:hsa04666        Fc gamma R-mediated phagocytosis  94  6 0.03780776   1
## path:hsa05231            Choline metabolism in cancer  94  6 0.04693433   1
## path:hsa04971                  Gastric acid secretion  75  5 0.06390185   1
## path:hsa04370                  VEGF signaling pathway  59  4 0.07533259   1
## path:hsa04664         Fc epsilon RI signaling pathway  66  4 0.08116771   1
## path:hsa04514                 Cell adhesion molecules 141  7 0.08361298   1
## path:hsa04730                    Long-term depression  58  4 0.08668244   1
## path:hsa04512                ECM-receptor interaction  86  5 0.10284213   1
## path:hsa04510                          Focal adhesion 193  9 0.11850936   1
## path:hsa04725                     Cholinergic synapse 111  6 0.12042518   1
## path:hsa04726                    Serotonergic synapse 108  5 0.12447230   1
## path:hsa04724                   Glutamatergic synapse 113  6 0.13016759   1
## path:hsa04921              Oxytocin signaling pathway 151  7 0.14568311   1
## path:hsa04972                    Pancreatic secretion  93  4 0.15621024   1
## path:hsa05220                Chronic myeloid leukemia  74  4 0.15907375   1
## path:hsa03320                  PPAR signaling pathway  72  3 0.16715691   1
## path:hsa04151              PI3K-Akt signaling pathway 331 12 0.17174184   1
write.csv(gometh_kegg, file = "GOmeth_kegg.csv", row.names = TRUE)

# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
##            ONTOLOGY
## GO:0071425       BP
## GO:0060100       BP
## GO:1905155       BP
## GO:0033299       BP
## GO:0060099       BP
## GO:1905153       BP
## GO:0043495       MF
## GO:0035025       BP
## GO:0006869       BP
## GO:0048406       MF
## GO:0009582       BP
## GO:0001738       BP
## GO:0072089       BP
## GO:0090556       MF
## GO:0002687       BP
## GO:0031638       BP
## GO:0014701       CC
## GO:0010876       BP
## GO:0040037       BP
## GO:0009581       BP
##                                                                                  TERM
## GO:0071425                                      hematopoietic stem cell proliferation
## GO:0060100                            positive regulation of phagocytosis, engulfment
## GO:1905155                               positive regulation of membrane invagination
## GO:0033299                                             secretion of lysosomal enzymes
## GO:0060099                                     regulation of phagocytosis, engulfment
## GO:1905153                                        regulation of membrane invagination
## GO:0043495                                          protein-membrane adaptor activity
## GO:0035025                     positive regulation of Rho protein signal transduction
## GO:0006869                                                            lipid transport
## GO:0048406                                                nerve growth factor binding
## GO:0009582                                              detection of abiotic stimulus
## GO:0001738                                    morphogenesis of a polarized epithelium
## GO:0072089                                                    stem cell proliferation
## GO:0090556                                       phosphatidylserine floppase activity
## GO:0002687                                 positive regulation of leukocyte migration
## GO:0031638                                                         zymogen activation
## GO:0014701                                 junctional sarcoplasmic reticulum membrane
## GO:0010876                                                         lipid localization
## GO:0040037 negative regulation of fibroblast growth factor receptor signaling pathway
## GO:0009581                                             detection of external stimulus
##              N DE         P.DE        FDR
## GO:0071425  32  7 1.709236e-05 0.03885093
## GO:0060100  13  4 2.617135e-04 0.59461297
## GO:1905155  13  4 2.617135e-04 0.59461297
## GO:0033299   7  3 2.649737e-04 0.60149030
## GO:0060099  15  4 3.475512e-04 0.78859363
## GO:1905153  15  4 3.475512e-04 0.78859363
## GO:0043495  26  5 4.561624e-04 1.00000000
## GO:0035025  29  5 5.391505e-04 1.00000000
## GO:0006869 425 20 7.331087e-04 1.00000000
## GO:0048406   6  3 8.269014e-04 1.00000000
## GO:0009582 130 10 1.152856e-03 1.00000000
## GO:0001738  90  9 1.575325e-03 1.00000000
## GO:0072089 118 10 1.613447e-03 1.00000000
## GO:0090556   8  3 1.767704e-03 1.00000000
## GO:0002687 139  9 1.854952e-03 1.00000000
## GO:0031638  65  6 2.316532e-03 1.00000000
## GO:0014701   9  3 2.384797e-03 1.00000000
## GO:0010876 474 20 2.878572e-03 1.00000000
## GO:0040037  20  4 2.953834e-03 1.00000000
## GO:0009581 127  9 3.566109e-03 1.00000000
write.csv(gometh_go, file = "GOmeth_go.csv", row.names = TRUE)

Now specifically analyse hypermethylated probes

res2 <- subset(res,b_X1>0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
##                                                       Description   N DE
## path:hsa04974                    Protein digestion and absorption  92  7
## path:hsa04512                            ECM-receptor interaction  86  7
## path:hsa04971                              Gastric acid secretion  75  6
## path:hsa04724                               Glutamatergic synapse 113  8
## path:hsa04730                                Long-term depression  58  5
## path:hsa04510                                      Focal adhesion 193 11
## path:hsa04666                    Fc gamma R-mediated phagocytosis  94  6
## path:hsa04928 Parathyroid hormone synthesis, secretion and action 105  7
## path:hsa04926                           Relaxin signaling pathway 124  7
## path:hsa04921                          Oxytocin signaling pathway 151  8
## path:hsa05146                                          Amoebiasis  98  5
## path:hsa04727                                   GABAergic synapse  84  5
## path:hsa04927                    Cortisol synthesis and secretion  63  4
## path:hsa04270                  Vascular smooth muscle contraction 132  6
## path:hsa04723                Retrograde endocannabinoid signaling 133  6
## path:hsa04725                                 Cholinergic synapse 111  6
## path:hsa04726                                Serotonergic synapse 108  5
## path:hsa04072                   Phospholipase D signaling pathway 144  7
## path:hsa05032                                  Morphine addiction  88  5
## path:hsa04972                                Pancreatic secretion  93  4
##                      P.DE       FDR
## path:hsa04974 0.008922657 0.9011883
## path:hsa04512 0.013539482 1.0000000
## path:hsa04971 0.022686667 1.0000000
## path:hsa04724 0.024517958 1.0000000
## path:hsa04730 0.027230008 1.0000000
## path:hsa04510 0.032093937 1.0000000
## path:hsa04666 0.039976997 1.0000000
## path:hsa04928 0.046270992 1.0000000
## path:hsa04926 0.053328462 1.0000000
## path:hsa04921 0.076833051 1.0000000
## path:hsa05146 0.082933928 1.0000000
## path:hsa04727 0.109164528 1.0000000
## path:hsa04927 0.118342411 1.0000000
## path:hsa04270 0.119660757 1.0000000
## path:hsa04723 0.127677303 1.0000000
## path:hsa04725 0.130946184 1.0000000
## path:hsa04726 0.132205622 1.0000000
## path:hsa04072 0.145169043 1.0000000
## path:hsa05032 0.155290477 1.0000000
## path:hsa04972 0.162843677 1.0000000
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
##            ONTOLOGY
## GO:0090556       MF
## GO:0045649       BP
## GO:0048251       BP
## GO:0051156       BP
## GO:0060100       BP
## GO:1905155       BP
## GO:0009582       BP
## GO:0006869       BP
## GO:0021697       BP
## GO:0035025       BP
## GO:0060099       BP
## GO:1905153       BP
## GO:0031644       BP
## GO:0051020       MF
## GO:0005604       CC
## GO:0030020       MF
## GO:0006098       BP
## GO:0036158       BP
## GO:0038065       BP
## GO:0002761       BP
##                                                                               TERM
## GO:0090556                                    phosphatidylserine floppase activity
## GO:0045649                                regulation of macrophage differentiation
## GO:0048251                                                  elastic fiber assembly
## GO:0051156                                   glucose 6-phosphate metabolic process
## GO:0060100                         positive regulation of phagocytosis, engulfment
## GO:1905155                            positive regulation of membrane invagination
## GO:0009582                                           detection of abiotic stimulus
## GO:0006869                                                         lipid transport
## GO:0021697                                             cerebellar cortex formation
## GO:0035025                  positive regulation of Rho protein signal transduction
## GO:0060099                                  regulation of phagocytosis, engulfment
## GO:1905153                                     regulation of membrane invagination
## GO:0031644                                    regulation of nervous system process
## GO:0051020                                                          GTPase binding
## GO:0005604                                                       basement membrane
## GO:0030020 extracellular matrix structural constituent conferring tensile strength
## GO:0006098                                                 pentose-phosphate shunt
## GO:0036158                                               outer dynein arm assembly
## GO:0038065                                    collagen-activated signaling pathway
## GO:0002761                         regulation of myeloid leukocyte differentiation
##              N DE        P.DE FDR
## GO:0090556   8  3 0.001900589   1
## GO:0045649  24  4 0.002905982   1
## GO:0048251  10  3 0.003495647   1
## GO:0051156  25  4 0.003957522   1
## GO:0060100  13  3 0.004192239   1
## GO:1905155  13  3 0.004192239   1
## GO:0009582 130  9 0.004538628   1
## GO:0006869 425 18 0.004605003   1
## GO:0021697  23  4 0.004791389   1
## GO:0035025  29  4 0.004942120   1
## GO:0060099  15  3 0.005096053   1
## GO:1905153  15  3 0.005096053   1
## GO:0031644 140 10 0.007215012   1
## GO:0051020 285 15 0.007478073   1
## GO:0005604  94  8 0.007796332   1
## GO:0030020  39  5 0.007815025   1
## GO:0006098  16  3 0.008162224   1
## GO:0036158  22  3 0.008199726   1
## GO:0038065  15  3 0.008288448   1
## GO:0002761 120  8 0.008336041   1

Now specifically analyse hypomethylated probes

res2 <- subset(res,b_X1<0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
##                                                   Description   N DE       P.DE
## path:hsa04964         Proximal tubule bicarbonate reclamation  21  3 0.01123802
## path:hsa00513          Various types of N-glycan biosynthesis  37  4 0.01483267
## path:hsa00561                         Glycerolipid metabolism  58  4 0.07017901
## path:hsa04710                                Circadian rhythm  33  3 0.07054507
## path:hsa04060          Cytokine-cytokine receptor interaction 268  8 0.10554424
## path:hsa00310                              Lysine degradation  60  4 0.10816862
## path:hsa04920                 Adipocytokine signaling pathway  66  4 0.11546899
## path:hsa04330                         Notch signaling pathway  57  4 0.11923254
## path:hsa04979                          Cholesterol metabolism  50  3 0.11992139
## path:hsa00600                         Sphingolipid metabolism  51  3 0.12370171
## path:hsa04213 Longevity regulating pathway - multiple species  61  4 0.13174057
## path:hsa04152                          AMPK signaling pathway 118  6 0.17803531
## path:hsa04072               Phospholipase D signaling pathway 144  7 0.20189169
## path:hsa05100          Bacterial invasion of epithelial cells  76  4 0.20746409
## path:hsa04520                               Adherens junction  70  4 0.22399185
## path:hsa04630                      JAK-STAT signaling pathway 145  5 0.23700761
## path:hsa04923           Regulation of lipolysis in adipocytes  56  3 0.24274384
## path:hsa04210                                       Apoptosis 132  5 0.25409543
## path:hsa05416                               Viral myocarditis  55  3 0.26621870
## path:hsa04922                      Glucagon signaling pathway  99  4 0.26753173
##               FDR
## path:hsa04964   1
## path:hsa00513   1
## path:hsa00561   1
## path:hsa04710   1
## path:hsa04060   1
## path:hsa00310   1
## path:hsa04920   1
## path:hsa04330   1
## path:hsa04979   1
## path:hsa00600   1
## path:hsa04213   1
## path:hsa04152   1
## path:hsa04072   1
## path:hsa05100   1
## path:hsa04520   1
## path:hsa04630   1
## path:hsa04923   1
## path:hsa04210   1
## path:hsa05416   1
## path:hsa04922   1
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
##            ONTOLOGY                                                       TERM
## GO:0031588       CC                nucleotide-activated protein kinase complex
## GO:2000401       BP                         regulation of lymphocyte migration
## GO:0031045       CC                                         dense core granule
## GO:0003402       BP   planar cell polarity pathway involved in axis elongation
## GO:0071425       BP                      hematopoietic stem cell proliferation
## GO:0030198       BP                          extracellular matrix organization
## GO:0043062       BP                       extracellular structure organization
## GO:0045229       BP              external encapsulating structure organization
## GO:0045019       BP   negative regulation of nitric oxide biosynthetic process
## GO:1904406       BP      negative regulation of nitric oxide metabolic process
## GO:0010171       BP                                         body morphogenesis
## GO:0071675       BP                   regulation of mononuclear cell migration
## GO:0043495       MF                          protein-membrane adaptor activity
## GO:0005667       CC                            transcription regulator complex
## GO:0003323       BP                         type B pancreatic cell development
## GO:0043124       BP negative regulation of I-kappaB kinase/NF-kappaB signaling
## GO:0005938       CC                                                cell cortex
## GO:1990907       CC                                   beta-catenin-TCF complex
## GO:0045780       BP                     positive regulation of bone resorption
## GO:0046903       BP                                                  secretion
##              N DE         P.DE FDR
## GO:0031588   9  3 0.0007080092   1
## GO:2000401  64  7 0.0007481308   1
## GO:0031045  26  5 0.0009954731   1
## GO:0003402   5  3 0.0012034415   1
## GO:0071425  32  5 0.0021399450   1
## GO:0030198 305 19 0.0027687159   1
## GO:0043062 306 19 0.0029324789   1
## GO:0045229 308 19 0.0029415922   1
## GO:0045019  15  3 0.0040392952   1
## GO:1904406  15  3 0.0040392952   1
## GO:0010171  45  6 0.0046942824   1
## GO:0071675 115  8 0.0051975776   1
## GO:0043495  26  4 0.0063730389   1
## GO:0005667 480 24 0.0072394321   1
## GO:0003323  25  4 0.0077556655   1
## GO:0043124  49  5 0.0079433702   1
## GO:0005938 302 18 0.0100909182   1
## GO:1990907  13  3 0.0105504803   1
## GO:0045780  18  3 0.0112589520   1
## GO:0046903 905 36 0.0112724213   1

Gene ontology of top DMPs LIMMA data

Gene Ontology analysis (gometh): top 1000 probes (limma data).

res <- read.csv("ASD_guthrie_top_dmps_onADOS_withintw_Nov27_limma.csv", header = TRUE)
sigCpGs_1k = res$Name[1:1000]

sigCpGs_1k = as.character(sigCpGs_1k)
all = res$Name
length(all)
## [1] 790658
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
##                                                       Description   N DE
## path:hsa04974                    Protein digestion and absorption  92  6
## path:hsa05222                              Small cell lung cancer  88  6
## path:hsa05217                                Basal cell carcinoma  63  5
## path:hsa05165                      Human papillomavirus infection 314 15
## path:hsa04928 Parathyroid hormone synthesis, secretion and action 105  7
## path:hsa04930                           Type II diabetes mellitus  45  4
## path:hsa05216                                      Thyroid cancer  37  3
## path:hsa04658                    Th1 and Th2 cell differentiation  88  5
## path:hsa04976                                      Bile secretion  86  4
## path:hsa04512                            ECM-receptor interaction  86  5
## path:hsa04927                    Cortisol synthesis and secretion  63  4
## path:hsa01240                           Biosynthesis of cofactors 147  5
## path:hsa04934                                    Cushing syndrome 154  7
## path:hsa04514                             Cell adhesion molecules 141  6
## path:hsa04120                      Ubiquitin mediated proteolysis 134  5
## path:hsa05200                                  Pathways in cancer 507 18
## path:hsa05412     Arrhythmogenic right ventricular cardiomyopathy  74  4
## path:hsa05146                                          Amoebiasis  98  4
## path:hsa04922                          Glucagon signaling pathway  99  4
## path:hsa04350                          TGF-beta signaling pathway  93  4
##                     P.DE FDR
## path:hsa04974 0.04573500   1
## path:hsa05222 0.05319280   1
## path:hsa05217 0.06104992   1
## path:hsa05165 0.06332322   1
## path:hsa04928 0.06457472   1
## path:hsa04930 0.08183517   1
## path:hsa05216 0.11609193   1
## path:hsa04658 0.12312133   1
## path:hsa04976 0.12593024   1
## path:hsa04512 0.14078121   1
## path:hsa04927 0.15135620   1
## path:hsa01240 0.16372755   1
## path:hsa04934 0.22948376   1
## path:hsa04514 0.23987134   1
## path:hsa04120 0.24359821   1
## path:hsa05200 0.24739241   1
## path:hsa05412 0.25304819   1
## path:hsa05146 0.25386815   1
## path:hsa04922 0.25984483   1
## path:hsa04350 0.26267162   1
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
##            ONTOLOGY                                                        TERM
## GO:0048625       BP                                    myoblast fate commitment
## GO:0033157       BP               regulation of intracellular protein transport
## GO:0031526       CC                                       brush border membrane
## GO:0033077       BP                            T cell differentiation in thymus
## GO:0033089       BP     positive regulation of T cell differentiation in thymus
## GO:0006517       BP                                     protein deglycosylation
## GO:0090160       BP                                 Golgi to lysosome transport
## GO:0002076       BP                                      osteoblast development
## GO:0005903       CC                                                brush border
## GO:0090316       BP      positive regulation of intracellular protein transport
## GO:0099060       CC  integral component of postsynaptic specialization membrane
## GO:0045061       BP                                     thymic T cell selection
## GO:0098948       CC intrinsic component of postsynaptic specialization membrane
## GO:0045059       BP                            positive thymic T cell selection
## GO:0071218       BP                      cellular response to misfolded protein
## GO:0019827       BP                            stem cell population maintenance
## GO:0045582       BP               positive regulation of T cell differentiation
## GO:0038065       BP                        collagen-activated signaling pathway
## GO:0098727       BP                                  maintenance of cell number
## GO:0001518       CC                        voltage-gated sodium channel complex
##              N DE        P.DE FDR
## GO:0048625   6  3 0.001077301   1
## GO:0033157 215 14 0.001243492   1
## GO:0031526  54  6 0.001860036   1
## GO:0033077  83  8 0.002670463   1
## GO:0033089  10  3 0.002673247   1
## GO:0006517  25  4 0.003285097   1
## GO:0090160  11  3 0.003434713   1
## GO:0002076  18  4 0.004604004   1
## GO:0005903  95  8 0.005433833   1
## GO:0090316 149 10 0.005737002   1
## GO:0099060  72  7 0.006448335   1
## GO:0045061  22  4 0.006920239   1
## GO:0098948  75  7 0.008026663   1
## GO:0045059  14  3 0.009117215   1
## GO:0071218  21  3 0.010241166   1
## GO:0019827 170 11 0.010306227   1
## GO:0045582 113  8 0.011427071   1
## GO:0038065  15  3 0.011452777   1
## GO:0098727 174 11 0.012066697   1
## GO:0001518  17  3 0.013120728   1

Now specifically analyse hypermethylated probes

res2 <- subset(res,logFC>0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
##                                                       Description   N DE
## path:hsa05224                                       Breast cancer 145 10
## path:hsa05225                            Hepatocellular carcinoma 166 10
## path:hsa05216                                      Thyroid cancer  37  4
## path:hsa04150                              mTOR signaling pathway 152  9
## path:hsa04720                              Long-term potentiation  64  5
## path:hsa05217                                Basal cell carcinoma  63  5
## path:hsa04217                                         Necroptosis 139  6
## path:hsa04914             Progesterone-mediated oocyte maturation  88  5
## path:hsa04934                                    Cushing syndrome 154  8
## path:hsa04928 Parathyroid hormone synthesis, secretion and action 105  6
## path:hsa04657                             IL-17 signaling pathway  88  4
## path:hsa00562                       Inositol phosphate metabolism  69  4
## path:hsa04714                                       Thermogenesis 212  8
## path:hsa04115                               p53 signaling pathway  73  4
## path:hsa05322                        Systemic lupus erythematosus 114  4
## path:hsa04915                          Estrogen signaling pathway 137  6
## path:hsa05226                                      Gastric cancer 147  7
## path:hsa05200                                  Pathways in cancer 507 18
## path:hsa01522                                Endocrine resistance  95  5
## path:hsa04916                                       Melanogenesis 101  5
##                     P.DE FDR
## path:hsa05224 0.01171602   1
## path:hsa05225 0.01750885   1
## path:hsa05216 0.02161643   1
## path:hsa04150 0.02318340   1
## path:hsa04720 0.03297370   1
## path:hsa05217 0.04180630   1
## path:hsa04217 0.07427759   1
## path:hsa04914 0.07885532   1
## path:hsa04934 0.07888452   1
## path:hsa04928 0.09519971   1
## path:hsa04657 0.10295303   1
## path:hsa00562 0.10348458   1
## path:hsa04714 0.11537434   1
## path:hsa04115 0.12331543   1
## path:hsa05322 0.13689655   1
## path:hsa04915 0.14132892   1
## path:hsa05226 0.14137343   1
## path:hsa05200 0.14962804   1
## path:hsa01522 0.15062883   1
## path:hsa04916 0.16048469   1
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
##            ONTOLOGY
## GO:0003160       BP
## GO:0030274       MF
## GO:0002221       BP
## GO:0032486       BP
## GO:0019827       BP
## GO:0072538       BP
## GO:0098727       BP
## GO:0048671       BP
## GO:0003157       BP
## GO:0043620       BP
## GO:1901533       BP
## GO:0042813       MF
## GO:0060674       BP
## GO:0017147       MF
## GO:0048679       BP
## GO:0072148       BP
## GO:0006730       BP
## GO:0035019       BP
## GO:0061101       BP
## GO:0051303       BP
##                                                                            TERM
## GO:0003160                                            endocardium morphogenesis
## GO:0030274                                                   LIM domain binding
## GO:0002221                       pattern recognition receptor signaling pathway
## GO:0032486                                      Rap protein signal transduction
## GO:0019827                                     stem cell population maintenance
## GO:0072538                                     T-helper 17 type immune response
## GO:0098727                                           maintenance of cell number
## GO:0048671                          negative regulation of collateral sprouting
## GO:0003157                                              endocardium development
## GO:0043620      regulation of DNA-templated transcription in response to stress
## GO:1901533 negative regulation of hematopoietic progenitor cell differentiation
## GO:0042813                                                Wnt receptor activity
## GO:0060674                                    placenta blood vessel development
## GO:0017147                                                  Wnt-protein binding
## GO:0048679                                      regulation of axon regeneration
## GO:0072148                                      epithelial cell fate commitment
## GO:0006730                                         one-carbon metabolic process
## GO:0035019                             somatic stem cell population maintenance
## GO:0061101                                  neuroendocrine cell differentiation
## GO:0051303                             establishment of chromosome localization
##              N DE         P.DE FDR
## GO:0003160   6  3 0.0006138797   1
## GO:0030274   7  3 0.0006332011   1
## GO:0002221 174 11 0.0018780326   1
## GO:0032486  12  3 0.0020844027   1
## GO:0019827 170 12 0.0022847540   1
## GO:0072538  42  5 0.0026830987   1
## GO:0098727 174 12 0.0027581316   1
## GO:0048671   9  3 0.0028018163   1
## GO:0003157  12  3 0.0045685657   1
## GO:0043620  43  5 0.0056730822   1
## GO:1901533  14  3 0.0057257600   1
## GO:0042813  15  3 0.0069342464   1
## GO:0060674  30  4 0.0071786732   1
## GO:0017147  28  4 0.0078984898   1
## GO:0048679  28  4 0.0084541436   1
## GO:0072148  14  3 0.0091995210   1
## GO:0006730  37  4 0.0099768964   1
## GO:0035019  63  6 0.0102186789   1
## GO:0061101  16  3 0.0110564450   1
## GO:0051303  81  6 0.0114801430   1

Now specifically analyse hypomethylated probes

res2 <- subset(res,logFC<0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)

# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
##                                                        Description   N DE
## path:hsa04974                     Protein digestion and absorption  92  8
## path:hsa04975                         Fat digestion and absorption  43  3
## path:hsa04120                       Ubiquitin mediated proteolysis 134  6
## path:hsa04350                           TGF-beta signaling pathway  93  5
## path:hsa04976                                       Bile secretion  86  4
## path:hsa04940                             Type I diabetes mellitus  41  3
## path:hsa04115                                p53 signaling pathway  73  4
## path:hsa04512                             ECM-receptor interaction  86  5
## path:hsa01240                            Biosynthesis of cofactors 147  5
## path:hsa04933 AGE-RAGE signaling pathway in diabetic complications  95  5
## path:hsa05165                       Human papillomavirus infection 314 13
## path:hsa04936                              Alcoholic liver disease 129  5
## path:hsa04921                           Oxytocin signaling pathway 151  7
## path:hsa03320                               PPAR signaling pathway  72  3
## path:hsa05416                                    Viral myocarditis  55  3
## path:hsa04068                               FoxO signaling pathway 124  5
## path:hsa04920                      Adipocytokine signaling pathway  66  3
## path:hsa04390                              Hippo signaling pathway 154  7
## path:hsa04060               Cytokine-cytokine receptor interaction 268  6
## path:hsa05222                               Small cell lung cancer  88  4
##                      P.DE       FDR
## path:hsa04974 0.004150998 0.3860428
## path:hsa04975 0.047276633 1.0000000
## path:hsa04120 0.102121660 1.0000000
## path:hsa04350 0.109887797 1.0000000
## path:hsa04976 0.117372293 1.0000000
## path:hsa04940 0.132617570 1.0000000
## path:hsa04115 0.133096130 1.0000000
## path:hsa04512 0.136536499 1.0000000
## path:hsa01240 0.138126612 1.0000000
## path:hsa04933 0.139382208 1.0000000
## path:hsa05165 0.162594089 1.0000000
## path:hsa04936 0.169529707 1.0000000
## path:hsa04921 0.195740721 1.0000000
## path:hsa03320 0.197421285 1.0000000
## path:hsa05416 0.255512306 1.0000000
## path:hsa04068 0.256091354 1.0000000
## path:hsa04920 0.262004101 1.0000000
## path:hsa04390 0.262889360 1.0000000
## path:hsa04060 0.263291974 1.0000000
## path:hsa05222 0.272352535 1.0000000
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
##            ONTOLOGY
## GO:0060028       BP
## GO:0001518       CC
## GO:0007566       BP
## GO:0060371       BP
## GO:0033077       BP
## GO:0033089       BP
## GO:0021535       BP
## GO:0031210       MF
## GO:1901163       BP
## GO:0050321       MF
## GO:0061450       BP
## GO:0005248       MF
## GO:0086010       BP
## GO:0031526       CC
## GO:0044790       BP
## GO:0086012       BP
## GO:0050997       MF
## GO:1901981       MF
## GO:0034706       CC
## GO:1903533       BP
##                                                                           TERM
## GO:0060028                    convergent extension involved in axis elongation
## GO:0001518                                voltage-gated sodium channel complex
## GO:0007566                                                 embryo implantation
## GO:0060371    regulation of atrial cardiac muscle cell membrane depolarization
## GO:0033077                                    T cell differentiation in thymus
## GO:0033089             positive regulation of T cell differentiation in thymus
## GO:0021535                                         cell migration in hindbrain
## GO:0031210                                         phosphatidylcholine binding
## GO:1901163                            regulation of trophoblast cell migration
## GO:0050321                                         tau-protein kinase activity
## GO:0061450                                          trophoblast cell migration
## GO:0005248                               voltage-gated sodium channel activity
## GO:0086010                     membrane depolarization during action potential
## GO:0031526                                               brush border membrane
## GO:0044790                                suppression of viral release by host
## GO:0086012 membrane depolarization during cardiac muscle cell action potential
## GO:0050997                                   quaternary ammonium group binding
## GO:1901981                              phosphatidylinositol phosphate binding
## GO:0034706                                              sodium channel complex
## GO:1903533                                     regulation of protein targeting
##              N DE        P.DE FDR
## GO:0060028   6  3 0.001314104   1
## GO:0001518  17  4 0.001375654   1
## GO:0007566  54  6 0.001565736   1
## GO:0060371   9  3 0.001687609   1
## GO:0033077  83  8 0.002388282   1
## GO:0033089  10  3 0.002549476   1
## GO:0021535  16  4 0.002820309   1
## GO:0031210  28  4 0.003456386   1
## GO:1901163  15  3 0.003532168   1
## GO:0050321  22  4 0.004978615   1
## GO:0061450  16  3 0.005645237   1
## GO:0005248  24  4 0.006549588   1
## GO:0086010  36  5 0.007385247   1
## GO:0031526  54  5 0.007995118   1
## GO:0044790  15  3 0.008390680   1
## GO:0086012  22  4 0.008914192   1
## GO:0050997  37  4 0.009131104   1
## GO:1901981 170 11 0.009143120   1
## GO:0034706  26  4 0.010170846   1
## GO:1903533  75  6 0.010924579   1

DMRCate - differentially methylated region analysis

#design matrix in regression 
design_tw <- model.matrix(~ADOS+Twin_Group)
design_tw_dmrc <- model.matrix(~ADOS)
#myannotation <- cpg.annotate("array", mDat, analysis.type="differential", design=design_tw, coef=2, fdr=1)
myannotation <- cpg.annotate("array", mDat, what = "M", 
  arraytype = "EPIC", analysis.type="differential", design_tw, coef=2)
## Your contrast returned no individually significant probes. Try increasing the fdr. Alternatively, set pcutoff manually in dmrcate() to return DMRs, but be warned there is an increased risk of Type I errors.
dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2, pcutoff=0.001)
## Fitting chr1...
## Fitting chr2...
## Fitting chr3...
## Fitting chr4...
## Fitting chr5...
## Fitting chr6...
## Fitting chr7...
## Fitting chr8...
## Fitting chr9...
## Fitting chr10...
## Fitting chr11...
## Fitting chr12...
## Fitting chr13...
## Fitting chr14...
## Fitting chr15...
## Fitting chr16...
## Fitting chr17...
## Fitting chr18...
## Fitting chr19...
## Fitting chr20...
## Fitting chr21...
## Fitting chr22...
## Demarcating regions...
## Done!
results.ranges <- data.frame(extractRanges(dmrcoutput, genome = "hg19") )
## snapshotDate(): 2022-10-31
## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation
## loading from cache
head(results.ranges, 20)
##    seqnames     start       end width strand no.cpgs min_smoothed_fdr Stouffer
## 1      chr1 234792390 234792752   363      *       2     8.755567e-07        1
## 2      chr2  46525649  46525678    30      *       2     9.543493e-04        1
## 3      chr6  91113506  91113687   182      *       2     8.564702e-05        1
## 4      chr9  96077410  96077420    11      *       2     9.668722e-04        1
## 5     chr11  44613980  44614255   276      *       2     4.084688e-05        1
## 6     chr22  20102216  20102580   365      *       3     1.418446e-06        1
## 7     chr22  31318147  31318444   298      *       7     9.447998e-04        1
## 8     chr12  75784541  75785295   755      *      11     8.243423e-09        1
## 9      chr2  33359198  33359688   491      *      10     9.097454e-04        1
## 10     chr7 100481990 100482960   971      *       5     3.223939e-08        1
## 11    chr16  87780848  87781501   654      *       5     5.201435e-07        1
## 12    chr11  65820391  65820516   126      *       4     4.325928e-04        1
## 13    chr12 124788627 124789036   410      *       4     2.317340e-05        1
##        HMFDR Fisher      maxdiff     meandiff     overlapping.genes
## 1  0.9999951      1  0.011843398  0.007349988           RP4-781K5.4
## 2  0.9999951      1 -0.003715122 -0.002901168                 EPAS1
## 3  0.9999951      1  0.015641676  0.011832067                  <NA>
## 4  0.9999951      1 -0.004274340 -0.001878717                  WNK2
## 5  0.9999951      1  0.017079330  0.015007099                  CD82
## 6  0.9999951      1 -0.012596545 -0.003140646                TRMT2A
## 7  0.9999951      1  0.009559386  0.003625967             MORC2-AS1
## 8  0.9999951      1  0.006045794  0.001967226       GLIPR1L2, CAPS2
## 9  0.9999951      1  0.015386270  0.008549527                 LTBP1
## 10 0.9999951      1  0.005702107  0.002285903                  SRRT
## 11 0.9999951      1  0.015904041  0.006726073 KLHDC4, RP11-278A23.2
## 12 0.9999951      1  0.006046969 -0.001162702                 SF3B2
## 13 0.9999951      1 -0.009740326 -0.002426397               FAM101A
write.csv(results.ranges, file = "dmrcoutput_guthrie.csv", row.names = TRUE)

Session information

sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] DMRcatedata_2.16.0                                 
##  [2] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [3] IlluminaHumanMethylationEPICmanifest_0.3.0         
##  [4] kableExtra_1.3.4                                   
##  [5] mitch_1.10.0                                       
##  [6] FlowSorted.Blood.EPIC_2.2.0                        
##  [7] ExperimentHub_2.6.0                                
##  [8] AnnotationHub_3.6.0                                
##  [9] BiocFileCache_2.6.0                                
## [10] dbplyr_2.3.0                                       
## [11] DMRcate_2.12.0                                     
## [12] ggplot2_3.4.0                                      
## [13] reshape2_1.4.4                                     
## [14] FlowSorted.Blood.450k_1.36.0                       
## [15] WGCNA_1.72-1                                       
## [16] fastcluster_1.2.3                                  
## [17] dynamicTreeCut_1.63-1                              
## [18] gplots_3.1.3                                       
## [19] RColorBrewer_1.1-3                                 
## [20] ruv_0.9.7.1                                        
## [21] IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0
## [22] limma_3.54.0                                       
## [23] missMethyl_1.32.0                                  
## [24] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [25] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1 
## [26] minfi_1.44.0                                       
## [27] bumphunter_1.40.0                                  
## [28] locfit_1.5-9.7                                     
## [29] iterators_1.0.14                                   
## [30] foreach_1.5.2                                      
## [31] Biostrings_2.66.0                                  
## [32] XVector_0.38.0                                     
## [33] SummarizedExperiment_1.28.0                        
## [34] Biobase_2.58.0                                     
## [35] MatrixGenerics_1.10.0                              
## [36] matrixStats_0.63.0                                 
## [37] GenomicRanges_1.50.2                               
## [38] GenomeInfoDb_1.34.6                                
## [39] IRanges_2.32.0                                     
## [40] S4Vectors_0.36.1                                   
## [41] BiocGenerics_0.44.0                                
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                rtracklayer_1.58.0           
##   [3] GGally_2.1.2                  R.methodsS3_1.8.2            
##   [5] tidyr_1.2.1                   bit64_4.0.5                  
##   [7] knitr_1.41                    DelayedArray_0.24.0          
##   [9] R.utils_2.12.2                data.table_1.14.6            
##  [11] rpart_4.1.19                  KEGGREST_1.38.0              
##  [13] RCurl_1.98-1.9                GEOquery_2.66.0              
##  [15] AnnotationFilter_1.22.0       doParallel_1.0.17            
##  [17] generics_0.1.3                GenomicFeatures_1.50.3       
##  [19] preprocessCore_1.60.1         RSQLite_2.2.20               
##  [21] bit_4.0.5                     tzdb_0.3.0                   
##  [23] webshot_0.5.4                 xml2_1.3.3                   
##  [25] httpuv_1.6.8                  assertthat_0.2.1             
##  [27] xfun_0.36                     hms_1.1.2                    
##  [29] jquerylib_0.1.4               evaluate_0.20                
##  [31] promises_1.2.0.1              fansi_1.0.3                  
##  [33] restfulr_0.0.15               scrime_1.3.5                 
##  [35] progress_1.2.2                readxl_1.4.1                 
##  [37] caTools_1.18.2                DBI_1.1.3                    
##  [39] htmlwidgets_1.6.1             reshape_0.8.9                
##  [41] purrr_1.0.1                   ellipsis_0.3.2               
##  [43] dplyr_1.0.10                  backports_1.4.1              
##  [45] permute_0.9-7                 annotate_1.76.0              
##  [47] biomaRt_2.54.0                deldir_1.0-6                 
##  [49] sparseMatrixStats_1.10.0      vctrs_0.5.1                  
##  [51] ensembldb_2.22.0              cachem_1.0.6                 
##  [53] withr_2.5.0                   Gviz_1.42.0                  
##  [55] BSgenome_1.66.2               checkmate_2.1.0              
##  [57] GenomicAlignments_1.34.0      prettyunits_1.1.1            
##  [59] mclust_6.0.0                  svglite_2.1.1                
##  [61] cluster_2.1.4                 lazyeval_0.2.2               
##  [63] crayon_1.5.2                  genefilter_1.80.2            
##  [65] labeling_0.4.2                edgeR_3.40.1                 
##  [67] pkgconfig_2.0.3               nlme_3.1-161                 
##  [69] ProtGenerics_1.30.0           nnet_7.3-18                  
##  [71] rlang_1.0.6                   lifecycle_1.0.3              
##  [73] filelock_1.0.2                dichromat_2.0-0.1            
##  [75] cellranger_1.1.0              rngtools_1.5.2               
##  [77] base64_2.0.1                  Matrix_1.5-3                 
##  [79] Rhdf5lib_1.20.0               base64enc_0.1-3              
##  [81] beeswarm_0.4.0                viridisLite_0.4.1            
##  [83] png_0.1-8                     rjson_0.2.21                 
##  [85] bitops_1.0-7                  R.oo_1.25.0                  
##  [87] KernSmooth_2.23-20            rhdf5filters_1.10.0          
##  [89] blob_1.2.3                    DelayedMatrixStats_1.20.0    
##  [91] doRNG_1.8.6                   stringr_1.5.0                
##  [93] nor1mix_1.3-0                 readr_2.1.3                  
##  [95] jpeg_0.1-10                   scales_1.2.1                 
##  [97] memoise_2.0.1                 magrittr_2.0.3               
##  [99] plyr_1.8.8                    zlibbioc_1.44.0              
## [101] compiler_4.2.2                BiocIO_1.8.0                 
## [103] illuminaio_0.40.0             Rsamtools_2.14.0             
## [105] cli_3.6.0                     DSS_2.46.0                   
## [107] htmlTable_2.4.1               Formula_1.2-4                
## [109] MASS_7.3-58.1                 tidyselect_1.2.0             
## [111] stringi_1.7.12                highr_0.10                   
## [113] yaml_2.3.6                    askpass_1.1                  
## [115] latticeExtra_0.6-30           grid_4.2.2                   
## [117] sass_0.4.4                    VariantAnnotation_1.44.0     
## [119] tools_4.2.2                   rstudioapi_0.14              
## [121] foreign_0.8-84                bsseq_1.34.0                 
## [123] gridExtra_2.3                 farver_2.1.1                 
## [125] digest_0.6.31                 BiocManager_1.30.19          
## [127] shiny_1.7.4                   quadprog_1.5-8               
## [129] Rcpp_1.0.9                    siggenes_1.72.0              
## [131] BiocVersion_3.16.0            later_1.3.0                  
## [133] org.Hs.eg.db_3.16.0           httr_1.4.4                   
## [135] AnnotationDbi_1.60.0          biovizBase_1.46.0            
## [137] colorspace_2.0-3              rvest_1.0.3                  
## [139] XML_3.99-0.13                 splines_4.2.2                
## [141] statmod_1.5.0                 multtest_2.54.0              
## [143] systemfonts_1.0.4             xtable_1.8-4                 
## [145] jsonlite_1.8.4                R6_2.5.1                     
## [147] echarts4r_0.4.4               Hmisc_4.7-2                  
## [149] pillar_1.8.1                  htmltools_0.5.4              
## [151] mime_0.12                     glue_1.6.2                   
## [153] fastmap_1.1.0                 BiocParallel_1.32.5          
## [155] interactiveDisplayBase_1.36.0 beanplot_1.3.1               
## [157] codetools_0.2-18              utf8_1.2.2                   
## [159] lattice_0.20-45               bslib_0.4.2                  
## [161] tibble_3.1.8                  BiasedUrn_2.0.8              
## [163] curl_5.0.0                    gtools_3.9.4                 
## [165] GO.db_3.16.0                  openssl_2.0.5                
## [167] interp_1.1-3                  survival_3.5-0               
## [169] rmarkdown_2.19                munsell_0.5.0                
## [171] rhdf5_2.42.0                  GenomeInfoDbData_1.2.9       
## [173] HDF5Array_1.26.0              impute_1.72.2                
## [175] gtable_0.3.1