Source: https://github.com/markziemann/asd_meth
Here we are analysing Guthrie card methylation in 9 twin pairs.
baseDir=getwd()
dataDir=paste(baseDir,"/ASD_EPIC_DATA",sep="")
library("missMethyl")
## Loading required package: IlluminaHumanMethylation450kanno.ilmn12.hg19
## Loading required package: minfi
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: bumphunter
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Loading required package: locfit
## locfit 1.5-9.7 2023-01-02
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## Loading required package: IlluminaHumanMethylationEPICanno.ilm10b4.hg19
##
## Attaching package: 'IlluminaHumanMethylationEPICanno.ilm10b4.hg19'
## The following objects are masked from 'package:IlluminaHumanMethylation450kanno.ilmn12.hg19':
##
## Islands.UCSC, Locations, Manifest, Other, SNPs.132CommonSingle,
## SNPs.135CommonSingle, SNPs.137CommonSingle, SNPs.138CommonSingle,
## SNPs.141CommonSingle, SNPs.142CommonSingle, SNPs.144CommonSingle,
## SNPs.146CommonSingle, SNPs.147CommonSingle, SNPs.Illumina
##
library("limma")
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library("minfi")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("IlluminaHumanMethylationEPICanno.ilm10b2.hg19")
##
## Attaching package: 'IlluminaHumanMethylationEPICanno.ilm10b2.hg19'
## The following objects are masked from 'package:IlluminaHumanMethylationEPICanno.ilm10b4.hg19':
##
## Islands.UCSC, Locations, Manifest, Other, SNPs.132CommonSingle,
## SNPs.135CommonSingle, SNPs.137CommonSingle, SNPs.138CommonSingle,
## SNPs.141CommonSingle, SNPs.142CommonSingle, SNPs.144CommonSingle,
## SNPs.146CommonSingle, SNPs.147CommonSingle, SNPs.Illumina
## The following objects are masked from 'package:IlluminaHumanMethylation450kanno.ilmn12.hg19':
##
## Islands.UCSC, Locations, Manifest, Other, SNPs.132CommonSingle,
## SNPs.135CommonSingle, SNPs.137CommonSingle, SNPs.138CommonSingle,
## SNPs.141CommonSingle, SNPs.142CommonSingle, SNPs.144CommonSingle,
## SNPs.146CommonSingle, SNPs.147CommonSingle, SNPs.Illumina
library("ruv")
library("RColorBrewer")
library("matrixStats")
library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
library("WGCNA")
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
##
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
##
## hclust
##
##
## Attaching package: 'WGCNA'
## The following object is masked from 'package:IRanges':
##
## cor
## The following object is masked from 'package:S4Vectors':
##
## cor
## The following object is masked from 'package:stats':
##
## cor
library("FlowSorted.Blood.450k")
library("reshape2")
library("ggplot2")
library("missMethyl")
library("DMRcate")
library("FlowSorted.Blood.EPIC")
## Loading required package: ExperimentHub
## Loading required package: AnnotationHub
## Loading required package: BiocFileCache
## Loading required package: dbplyr
##
## Attaching package: 'AnnotationHub'
## The following object is masked from 'package:Biobase':
##
## cache
library("mitch")
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library("kableExtra")
Load the annotation data and the Epic methylation data.
This analysis is to be conducted on Ubuntu with R4.
ann = getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b2.hg19)
targets_gen = read.metharray.sheet(dataDir, pattern = "ASD_guthrie_summarysheet.csv")
## [read.metharray.sheet] Found the following CSV files:
## [1] "/home/mdz/projects/asd_meth/ASD_EPIC_DATA/ASD_guthrie_summarysheet.csv"
#targets$ID = paste(targets$Sample_Group,targets_gen$Sample_Name,sep=".")
rgSet = read.metharray.exp(targets = targets_gen)
sampleNames(rgSet) = targets_gen$SampleID
snpBetas = getSnpBeta(rgSet)
## Loading required package: IlluminaHumanMethylationEPICmanifest
d = dist(t(snpBetas))
hr = hclust(d, method = "complete", members=NULL)
plot(hr)
detP = detectionP(rgSet)
qcReport(rgSet, sampNames = targets_gen$Sample_Name,
pdf="qc-report_ASD_guthrie_nov27.pdf")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## png
## 2
cols=brewer.pal(4,"Set1")
barplot(apply(detP,2,mean),
col=as.numeric(factor(targets_gen$Sample_Name)),
las=2,cex.names= 0.8, cex.axis=0.75,
main="Mean detection p-values of probe signals",
ylab="Mean detection p-value")
barplot(apply(detP,2,mean),
col=as.numeric(factor(targets_gen$Sample_Name)),
las=2,cex.names= 0.8, cex.axis=0.75,ylim=c(0,0.010),
main="Mean detection p-values of probe signals",
ylab="Mean detection p-value")
mset.raw = preprocessRaw(rgSet)
Using Multi-dimensional scaling (MDS) plots before filtering.
mdsPlot(mset.raw, sampGroups = targets_gen$Sample_Name,
sampNames=targets_gen$Social_interaction_on_ADOS,legendPos="bottom")
mdsPlot(mset.raw, sampGroups = targets_gen$Sex,
sampNames=targets_gen$SampleID,legendPos="bottom")
SQN is used as it gave a more stable result.
#SQN method
mSetSq = preprocessQuantile(rgSet)
## [preprocessQuantile] Mapping to genome.
## [preprocessQuantile] Fixing outliers.
## [preprocessQuantile] Quantile normalizing.
densityPlot(rgSet, sampGroups = targets_gen$Social_interaction_on_ADOS,
main="Raw", legend = FALSE)
densityPlot(getBeta(mSetSq),
sampGroups = targets_gen$Social_interaction_on_ADOS,main="SQN",
legend = FALSE)
Using old method.
rgSet$Slide <- as.numeric(rgSet$Slide)
rgSet$Sex <- as.character(rgSet$Sex)
#rgSet$Sample_Name <- as.character(rgSet$Sample_Name)
#sampleNames(rgSet) = targets_gen$SampleID
cellCounts_new <- estimateCellCounts(rgSet, compositeCellType = "Blood",
processMethod = "auto", probeSelect = "auto",
cellTypes = c("CD8T","CD4T", "NK","Bcell","Mono","Gran"),
referencePlatform = c("IlluminaHumanMethylation450k"),
returnAll = FALSE, meanPlot = TRUE)
## Loading required package: IlluminaHumanMethylation450kmanifest
## [estimateCellCounts] Combining user data with reference (flow sorted) data.
## Warning in DataFrame(sampleNames = c(colnames(rgSet),
## colnames(referenceRGset)), : 'stringsAsFactors' is ignored
## [estimateCellCounts] Processing user and reference data together.
## [preprocessQuantile] Mapping to genome.
## [preprocessQuantile] Fixing outliers.
## [preprocessQuantile] Quantile normalizing.
## [estimateCellCounts] Picking probes for composition estimation.
## [estimateCellCounts] Estimating composition.
#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")
cellCounts_long <- melt(cellCounts_new, id = "celltype")
cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1, targets_gen$SampleID),"Diagnosis"]
colnames(cellCounts_long) <- c("diagnosis","celltype","value")
ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) +
geom_boxplot()
wx <- lapply(1:ncol(cellCounts_new) , function(i) {
wilcox.test(a[,i],c[,i], paired=FALSE)
} )
## Warning in wilcox.test.default(a[, i], c[, i], paired = FALSE): cannot compute
## exact p-value with ties
names(wx) <- colnames(cellCounts_new)
wx
## $CD8T
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 17, p-value = 0.2674
## alternative hypothesis: true location shift is not equal to 0
##
##
## $CD4T
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 37, p-value = 0.3196
## alternative hypothesis: true location shift is not equal to 0
##
##
## $NK
##
## Wilcoxon rank sum test with continuity correction
##
## data: a[, i] and c[, i]
## W = 22, p-value = 0.491
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Bcell
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 36, p-value = 0.3773
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Mono
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 23, p-value = 0.6612
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Gran
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 26, p-value = 0.913
## alternative hypothesis: true location shift is not equal to 0
Try estimatecellcounts2.
cells <- estimateCellCounts2(rgSet, referencePlatform= "IlluminaHumanMethylationEPIC",
returnAll = TRUE)
## snapshotDate(): 2022-10-31
## snapshotDate(): 2022-10-31
## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation
## loading from cache
## [estimateCellCounts2] Combining user data with reference (flow sorted) data.
## Warning in asMethod(object): NAs introduced by coercion
## [estimateCellCounts2] Processing user and reference data together.
## [estimateCellCounts2] Using IDOL L-DMR probes for composition estimation.
## [estimateCellCounts2] Estimating proportion composition (prop), if you provide cellcounts those will be provided as counts in the composition estimation.
mset <- cells$normalizedData
cellCounts_new <- cells[[1]]
#plot cell type composition by sample group
a = cellCounts_new[targets_gen$Diagnosis == "0",]
b = cellCounts_new[targets_gen$Diagnosis == "1",]
c = cellCounts_new[targets_gen$Diagnosis == "2",]
age.pal <- brewer.pal(8,"Set1")
cellCounts_long <- melt(cellCounts_new, id = "celltype")
cellCounts_long$Var1 <- targets_gen[match(cellCounts_long$Var1, targets_gen$SampleID),"Diagnosis"]
colnames(cellCounts_long) <- c("diagnosis","celltype","value")
ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) +
geom_boxplot()
pdf("cells_guthrie.pdf")
par(mar=c(3,8,2,2))
ggplot(cellCounts_long, aes(x = factor(celltype), y = value, fill = factor(diagnosis))) +
geom_boxplot()
dev.off()
## png
## 2
wx <- lapply(1:ncol(cellCounts_new) , function(i) {
wilcox.test(a[,i],c[,i], paired=FALSE)
} )
names(wx) <- colnames(cellCounts_new)
wx
## $CD8T
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 32, p-value = 0.6612
## alternative hypothesis: true location shift is not equal to 0
##
##
## $CD4T
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 36, p-value = 0.3773
## alternative hypothesis: true location shift is not equal to 0
##
##
## $NK
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 19, p-value = 0.3773
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Bcell
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 39, p-value = 0.2212
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Mono
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 20, p-value = 0.4409
## alternative hypothesis: true location shift is not equal to 0
##
##
## $Neu
##
## Wilcoxon rank sum exact test
##
## data: a[, i] and c[, i]
## W = 29, p-value = 0.913
## alternative hypothesis: true location shift is not equal to 0
#densityByProbeType(mSetSq, main = "Raw")
#densityByProbeType(mset[,1], main = "estimateCellCounts2")
#mset_reduced <- mset[which(rownames(mset) %in% names(keep[keep==TRUE])),]
#meth <- getMeth(mset_reduced)
#unmeth <- getUnmeth(mset_reduced)
#Mval <- log2((meth + 100)/(unmeth + 100))
#beta <- getBeta(mset_reduced)
#dim(Mval)
None of the p-values are less than 0.05.
Now filter for high detection p-value and overlap with SNP.
Need to get a copy of the Xreact probes from Namitha. In the mean time I have downloaded from github - link below.
detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mset <- mset[keep,]
gmset <- mapToGenome(mset)
#remove SNPs
gmset_flt = dropLociWithSnps(gmset, snps = c("CpG", "SBE"))
#Removing cross-reactive probes
XURL="https://raw.githubusercontent.com/sirselim/illumina450k_filtering/master/EPIC/13059_2016_1066_MOESM1_ESM.csv"
Xreact <- read.csv(XURL)
#Xreact = read.csv(file="/group/canc2/puumba/Data/InfiniumData/NamithaData/Rprojects/Autism/Analysis_Sept11/EPIC_850k_crossreactiveProbes.csv", stringsAsFactors=FALSE)
#Xreact = read.csv(file="~/48639-non-specific-probes-Illumina450k.csv", stringsAsFactors=FALSE)
noXreact <- !(featureNames(gmset) %in% Xreact$X)
gmset <- gmset[noXreact,]
#Removing probes on X and Y chromosomes
autosomes <- !(featureNames(gmset) %in% ann$Name[ann$chr %in% c("chrX","chrY")])
gmset_flt <- gmset[autosomes,]
#Relative log expression (RLE plot)
mvals = getM(gmset_flt)
medSq = apply(mvals, 1, median)
YSq = mvals - medSq
boxplot(YSq,outline=FALSE,ylim=c(-1.5,1.5),
ylab="Relative Log Methylation Value",
cols=as.character(factor(targets_gen$Social_interaction_on_ADOS,)) )
pal = brewer.pal(8, "Dark2")
mds1Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(1,2))
mds2Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(1,3))
mds3Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(2,3))
mds4Sq = plotMDS(mvals, top=1000, gene.selection="common",dim.plot=c(3,4))
plotMDS(mds1Sq, xlab="Dimension 1", ylab="Dimension 2",
col=pal[as.factor(targets_gen$Diagnosis)],
dim=c(1,2), labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)
plotMDS(mds2Sq, xlab="Dimension 1", ylab="Dimension 3",
col=pal[as.factor(targets_gen$Diagnosis)],dim=c(1,3),
labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)
plotMDS(mds3Sq, xlab="Dimension 2", ylab="Dimension 3",
col=pal[as.factor(targets_gen$Diagnosis)],dim=c(2,3),
labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)
plotMDS(mds4Sq, xlab="Dimension 3", ylab="Dimension 4",
col=pal[as.factor(targets_gen$Diagnosis)],dim=c(3,4),
labels=targets_gen$Sample_Name)
legend("bottomright",bg="white",col=pal,cex=.7,pch=1,legend=0:1)
fit <- prcomp(t(mvals),center = TRUE, scale = TRUE,retx=TRUE)
loadings = fit$x
plot(fit,type="lines")
nGenes = nrow(mvals)
nSamples = ncol(mvals)
datTraits = targets_gen[,7:15]
moduleTraitCor = cor(loadings[,1:6], datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
par(cex=0.75, mar = c(6, 8.5, 3, 3))
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = t(moduleTraitCor),
xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
cex.lab.y = 0.6, zlim = c(-1,1),
main = paste("PCA-trait relationships: Top principal components"))
pdf("pca_guthrie.pdf")
par(mar=c(3,8,2,2))
labeledHeatmap(Matrix = t(moduleTraitCor),
xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
cex.lab.y = 0.6, zlim = c(-1,1),
main = paste("PCA-trait relationships: Top principal components"))
dev.off()
## png
## 2
#blah()
ADOS is the outcome variable (continuous)
mDat = mvals
Twin_Group <- factor(targets_gen$Family_ID)
ADOS <- targets_gen$Social_interaction_on_ADOS
names(ADOS) <- targets_gen$SampleID
design_tw <- model.matrix(~ Twin_Group + ADOS )
fit_tw <- lmFit(mDat, design_tw)
fit2_tw <- eBayes(fit_tw)
summary(decideTests(fit2_tw))
## (Intercept) Twin_Group2 Twin_Group3 Twin_Group4 Twin_Group5 Twin_Group6
## Down 245993 2344 2566 2439 1909 3492
## NotSig 59114 786047 786358 786292 786913 785103
## Up 485551 2267 1734 1927 1836 2063
## Twin_Group7 Twin_Group8 Twin_Group9 Twin_Group12 Twin_Group13 ADOS
## Down 48678 2099 2939 1854 2557 0
## NotSig 728154 786567 785798 786943 786409 790658
## Up 13826 1992 1921 1861 1692 0
top <- topTable(fit2_tw,coef="ADOS",num=Inf, sort.by = "P")
head(top)
## logFC AveExpr t P.Value adj.P.Val B
## cg12565057 0.08266213 3.046014 7.387285 4.009335e-06 0.9999951 4.581264
## cg00546248 0.10347799 1.801484 6.865500 8.922285e-06 0.9999951 3.850448
## cg15740041 0.13176113 1.600846 6.823044 9.537411e-06 0.9999951 3.789115
## cg05116656 0.09157137 -3.375059 6.719336 1.123567e-05 0.9999951 3.638100
## cg11632273 -0.13325664 2.268115 -6.338476 2.076849e-05 0.9999951 3.068838
## cg01656620 -0.13309386 3.051199 -6.203748 2.593336e-05 0.9999951 2.861912
nsig <- sum(top$adj.P.Val < 0.05)
sum(top$P.Value< 0.05)
## [1] 21151
ann_sub =
ann[,c("chr","pos","strand","Name","Islands_Name",
"Relation_to_Island","UCSC_RefGene_Name","UCSC_RefGene_Group")]
output <-merge(ann_sub,top,by.x="Name",by.y="row.names")
write.csv(output[order(output$P.Value),],
file="ASD_guthrie_top_dmps_onADOS_withintw_Nov27_limma.csv",row.names=FALSE)
top1000 <- topTable(fit2_tw,coef="ADOS",num=1000, sort.by = "P")
output_1000 <- merge(ann_sub,top1000,by.x="Name",by.y="row.names")
write.csv(output_1000[order(output_1000$P.Value),],
file="ASD_guthrie_top_dmps_onADOS_Nov27_withintw_limma_top1k.csv",row.names=FALSE)
saveRDS(design_tw, "gu_design.rds")
saveRDS(mvals, "gu_mvals.rds")
output <- output[order(output$P.Value),]
output <- subset(output,P.Value<1e-4)
output %>% kbl() %>% kable_paper("hover", full_width = F)
Name | chr | pos | strand | Islands_Name | Relation_to_Island | UCSC_RefGene_Name | UCSC_RefGene_Group | logFC | AveExpr | t | P.Value | adj.P.Val | B |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
cg12565057 | chr7 | 100482617 |
|
chr7:100484425-100484651 | N_Shore | SRRT;SRRT;SRRT;SRRT | Body;Body;Body;Body | 0.0826621 | 3.0460140 | 7.387285 | 4.00e-06 | 0.9999951 | 4.581264 |
cg00546248 | chr1 | 2412743 |
|
chr1:2411125-2411404 | S_Shore | PLCH2 | Body | 0.1034780 | 1.8014843 | 6.865500 | 8.90e-06 | 0.9999951 | 3.850448 |
cg15740041 | chr16 | 87781336 |
|
OpenSea | KLHDC4;KLHDC4;KLHDC4 | Body;Body;Body | 0.1317611 | 1.6008461 | 6.823044 | 9.50e-06 | 0.9999951 | 3.789115 | |
cg05116656 | chr5 | 94417892 |
|
OpenSea | MCTP1;MCTP1;MCTP1 | TSS1500;TSS1500;Body | 0.0915714 | -3.3750586 | 6.719336 | 1.12e-05 | 0.9999951 | 3.638100 | |
cg11632273 | chr22 | 20102580 |
|
chr22:20104374-20105729 | N_Shore | TRMT2A;TRMT2A;TRMT2A;RANBP1;MIR6816 | Body;Body;Body;TSS1500;TSS1500 | -0.1332566 | 2.2681145 | -6.338476 | 2.08e-05 | 0.9999951 | 3.068838 |
cg01656620 | chr12 | 124788698 |
|
chr12:124788559-124788884 | Island | FAM101A | 5’UTR | -0.1330939 | 3.0511986 | -6.203748 | 2.59e-05 | 0.9999951 | 2.861912 |
cg10881749 | chr1 | 234792390 |
|
OpenSea | 0.0804887 | 0.9589997 | 6.114550 | 3.01e-05 | 0.9999951 | 2.723318 | |||
cg13699963 | chr17 | 71548861 |
|
OpenSea | SDK2 | Body | -0.0663828 | 2.9801617 | -5.750106 | 5.58e-05 | 0.9999951 | 2.143861 | |
cg22656126 | chr17 | 1637206 |
|
chr17:1637053-1637492 | Island | WDR81;WDR81;WDR81;WDR81 | Body;Body;Body;Body | 0.1395930 | 5.7130309 | 5.698989 | 6.09e-05 | 0.9999951 | 2.060902 |
cg07162704 | chr6 | 91113687 |
|
OpenSea | 0.1009010 | 0.5704454 | 5.695702 | 6.13e-05 | 0.9999951 | 2.055553 | |||
cg13277464 | chr2 | 43393040 |
|
OpenSea | 0.1211560 | 2.3025832 | 5.606707 | 7.15e-05 | 0.9999951 | 1.910100 | |||
cg18731055 | chr13 | 49108131 |
|
chr13:49106411-49107463 | S_Shore | RCBTB2 | TSS1500 | 0.1016068 | 0.0882235 | 5.584670 | 7.43e-05 | 0.9999951 | 1.873891 |
cg03188919 | chr10 | 31372663 |
|
OpenSea | -0.1030369 | 2.0662356 | -5.526631 | 8.23e-05 | 0.9999951 | 1.778167 | |||
cg01128603 | chr11 | 65820391 |
|
chr11:65819367-65820224 | S_Shore | SF3B2 | Body | 0.0946490 | -3.2440188 | 5.486222 | 8.83e-05 | 0.9999951 | 1.711213 |
cg03177876 | chr4 | 71701931 |
|
chr4:71704664-71705736 | N_Shelf | GRSF1;GRSF1 | 5’UTR;Body | 0.0982965 | -1.2874739 | 5.481370 | 8.91e-05 | 0.9999951 | 1.703158 |
cg05791256 | chr9 | 96077420 |
|
chr9:96080113-96080330 | N_Shelf | WNK2;WNK2 | Body;Body | -0.0749658 | 3.6607111 | -5.477688 | 8.96e-05 | 0.9999951 | 1.697042 |
bDat = ilogit2(mDat)
bDat_new = getBeta(gmset_flt)
#View(bDat)
write.csv(bDat,file="ASD_guthrie_beta_onADOS_withintw_Nov27.csv",row.names=TRUE)
Extract illumina negative control
#Extract illumina negative control
INCs <- getINCs(rgSet)
head(INCs)
## S3 S4 S31 S32 S11 S12
## 10609447 -1.8776490 -1.1632518 -1.18519176 -0.7143278 -1.6703127 -0.9326960
## 10627500 -1.1555411 -0.9977650 -1.41063232 -0.6264921 -0.6773834 -0.7425038
## 10676356 -0.4866985 -0.1531999 -0.44704789 -0.9144243 -0.5461735 -0.5811102
## 10731326 -0.5239331 -0.2664330 -0.53287399 -0.3677318 -0.3942789 -0.3690454
## 10732387 -0.5808464 -0.3698891 0.02445304 -0.6246116 -0.8225041 -0.4441000
## 10740401 -0.6247424 -0.4382929 -0.12261929 -0.5911945 -0.3335561 -0.1339268
## S13 S14 S17 S18 S21 S22
## 10609447 -1.4266255 -1.65503374 -1.7325197 -2.0211992 -1.7301044 -1.70675098
## 10627500 -1.4321922 -1.11090931 -1.5051335 -1.4742964 -1.0479345 -1.97638744
## 10676356 -0.7391834 -0.71443903 -1.1014453 -0.9098022 -0.8210299 -0.09709869
## 10731326 -0.2289412 -0.71175503 -1.0727563 -0.1216786 -0.6648158 0.42950798
## 10732387 -1.0397263 -0.97893838 -0.9015374 -0.9911761 -0.7004397 -0.98943076
## 10740401 -0.2777810 0.07871198 -1.5360529 -0.1107035 -0.7868908 -0.74068388
## S27 S28 S7 S8 S23 S24
## 10609447 -1.5824091 -1.5864146 -1.3351474 -1.2598671 -1.0475135 -0.86742584
## 10627500 -0.6953229 -0.9076853 -1.1789701 -0.8329807 -1.0114048 -0.26615375
## 10676356 -0.7231598 0.1184415 -1.2825134 -0.6059221 -0.1861513 -0.37664526
## 10731326 -0.5672294 -0.1586977 -0.1512436 -0.3645157 -0.4609045 -0.27633123
## 10732387 -1.1776194 -0.6284205 0.1619675 -0.6341286 -0.7256376 0.07352904
## 10740401 -0.3617955 -0.5713134 -0.9259994 -0.7932338 -1.1415963 -0.14597931
## S39 S40 S35 S36
## 10609447 -1.2592127 -0.75375018 -1.5450238 -1.2677164
## 10627500 -1.1759877 -0.99288727 -1.1163866 -1.0860710
## 10676356 -0.1570212 0.27408959 -0.4942642 -0.9655116
## 10731326 -0.8314943 0.05578778 0.1271119 -0.5770573
## 10732387 -0.2847295 -0.21367520 -0.8371023 -0.1720994
## 10740401 -1.5156998 -0.65151369 -0.8073549 -0.8467537
#add negative control data to M-values
Mc <- rbind(mvals,INCs)
ctl <- rownames(Mc) %in% rownames(INCs)
table(ctl)
## ctl
## FALSE TRUE
## 790658 411
#old RUV syntax is updated
#rfit1 <- RUVfit(data=Mc, design=design_tw, coef=2, ctl=ctl) # Stage 1 analysis
# this is the updatedcommand - not sure if correct
rfit1 <- RUVfit(Y = Mc, X = design_tw[,"ADOS"] , ctl = ctl) # Stage 1 analysis
rfit2 <- RUVadj(Y = Mc, fit = rfit1)
top1 <- topRUV(rfit2, num=Inf)
head(top1)
## F.p F.p.BH p_X1 p.BH_X1 b_X1 sigma2
## cg00738934 1.749734e-05 0.9999976 1.749734e-05 0.9999976 0.07012855 0.08075806
## cg23184556 2.012823e-05 0.9999976 2.012823e-05 0.9999976 0.08913465 0.13356894
## cg20718640 2.062933e-05 0.9999976 2.062933e-05 0.9999976 0.13884017 0.32542004
## cg00779672 3.786057e-05 0.9999976 3.786057e-05 0.9999976 0.06977061 0.09121294
## cg21941354 4.877250e-05 0.9999976 4.877250e-05 0.9999976 -0.21282376 0.88743705
## cg15828854 4.911974e-05 0.9999976 4.911974e-05 0.9999976 0.09896672 0.19214239
## var.b_X1 fit.ctl mean
## cg00738934 0.0001458951 FALSE 2.5400187
## cg23184556 0.0002413017 FALSE 0.4946566
## cg20718640 0.0005878942 FALSE 2.8093465
## cg00779672 0.0001647826 FALSE 2.1754652
## cg21941354 0.0016032174 FALSE -3.2212907
## cg15828854 0.0003471187 FALSE -0.4868451
#ctl <- rownames(mValsSq) %in% rownames(top1[top1$p.ebayes > 0.5,])
#bottom 80% used as negative controls
ctl <- rownames(mvals) %in% rownames(top1[ceiling(0.2*nrow(top1)):nrow(top1),])
table(ctl)
## ctl
## FALSE TRUE
## 158105 632553
# Perform RUV adjustment and fit
rfit1 <- RUVfit(Y = mvals, X=design_tw[,"ADOS"], ctl=ctl) # Stage 2 analysis
rfit2 <- RUVadj(Y = mvals, fit=rfit1)
top_ruv <- topRUV(rfit2, number = Inf)
top_ruv_1000 <- topRUV(rfit2, number = 1000)
head(top_ruv_1000)
## F.p F.p.BH p_X1 p.BH_X1 b_X1 sigma2
## cg07926098 4.006899e-06 0.8414528 4.006899e-06 0.8414528 0.07112171 0.05646704
## cg23088672 1.313169e-05 0.8414528 1.313169e-05 0.8414528 0.07896674 0.08605586
## cg17331950 1.565399e-05 0.8414528 1.565399e-05 0.8414528 0.05847637 0.04873836
## cg10593232 1.652526e-05 0.8414528 1.652526e-05 0.8414528 0.05969707 0.05130503
## cg19893067 1.769350e-05 0.8414528 1.769350e-05 0.8414528 0.08232386 0.09880945
## cg21431091 2.229518e-05 0.8414528 2.229518e-05 0.8414528 -0.07643017 0.08891945
## var.b_X1 fit.ctl mean
## cg07926098 9.820970e-05 FALSE 1.3940876
## cg23088672 1.496717e-04 FALSE 1.6058434
## cg17331950 8.476767e-05 FALSE 2.3723731
## cg10593232 8.923173e-05 FALSE 0.6769301
## cg19893067 1.718533e-04 FALSE 0.2863827
## cg21431091 1.546522e-04 FALSE -1.7599098
#dat_subset_ruv = as.matrix(subset(mValsSq, rownames(mValsSq) %in% rownames(top_ruv_1000)))
#heatmap.2(dat_subset_ruv, cexRow=0.8, cexCol=0.8, col=colorRampPalette(c("Blue","Yellow")), density.info="none", trace="none", scale="row", main="diff meth probes due to epilepsy")
output_ruv = merge(ann,top_ruv,by.x="Name",by.y="row.names")
output_ruv_1000 = merge(ann,top_ruv_1000,by.x="Name",by.y="row.names")
#output_full_epicanno = merge(ann_epic,top,by.x="Name",by.y="row.names")
write.csv(output_ruv[order(output_ruv$F.p),],
file="ASD_guthrie_topruv80%_dmps_onADOS_withintw_Nov29.csv",
row.names=FALSE)
write.csv(output_ruv_1000[order(output_ruv_1000$F.p),],
file="ASD_guthrie_topruv80%_1kdmps_onADOS_withintw_Nov29.csv",
row.names=FALSE)
output_ruv <- data.frame(output_ruv[order(output_ruv$F.p),])
output_ruv <- output_ruv[order(output_ruv$p_X1),]
output_ruv <- subset(output_ruv,p_X1<1e-4)
output_ruv %>% kbl() %>% kable_paper("hover", full_width = F)
Name | chr | pos | strand | AddressA | AddressB | ProbeSeqA | ProbeSeqB | Type | NextBase | Color | Probe_rs | Probe_maf | CpG_rs | CpG_maf | SBE_rs | SBE_maf | Islands_Name | Relation_to_Island | Forward_Sequence | SourceSeq | UCSC_RefGene_Name | UCSC_RefGene_Accession | UCSC_RefGene_Group | Phantom4_Enhancers | Phantom5_Enhancers | DMR | X450k_Enhancer | HMM_Island | Regulatory_Feature_Name | Regulatory_Feature_Group | GencodeBasicV12_NAME | GencodeBasicV12_Accession | GencodeBasicV12_Group | GencodeCompV12_NAME | GencodeCompV12_Accession | GencodeCompV12_Group | DNase_Hypersensitivity_NAME | DNase_Hypersensitivity_Evidence_Count | OpenChromatin_NAME | OpenChromatin_Evidence_Count | TFBS_NAME | TFBS_Evidence_Count | Methyl27_Loci | Methyl450_Loci | Random_Loci | F.p | F.p.BH | p_X1 | p.BH_X1 | b_X1 | sigma2 | var.b_X1 | fit.ctl | mean |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
cg07926098 | chr21 | 31044189 |
|
55802984 | TTCTCCCAAATTCAATCATCTAATCTTTTATTAATACTTCTATTTTAAAC | II | NA | NA | NA | NA | NA | NA | OpenSea | ATATTTTTTCTTTCTCCCAAATTCAATCATCTGATCTTTTATTAATGCTTCTATTTTAAA[CG]TCTCACTAGCCTTTCCAGATGTGTATTACAAGATGTTTAGCAGTACCCACTAGTTGCCAG | TCTCCCAAATTCAATCATCTGATCTTTTATTAATGCTTCTATTTTAAACG | GRIK1;GRIK1 | NM_175611;NM_000830 | Body;Body | GRIK1 | ENST00000472429.1 | 3’UTR | GRIK1 | ENST00000472429.1 | 3’UTR | chr21:31044146-31044323 | 5 | chr21:31043811-31044512 | 6 | 4.00e-06 | 0.8414528 | 4.00e-06 | 0.8414528 | 0.0711217 | 0.0564670 | 0.0000982 | FALSE | 1.3940876 | |||||||||||||||||
cg23088672 | chr12 | 132403634 |
|
86647334 | ATAAAATACAAAATATATAAAATATCRAACRTAACACATCCCAACTACCC | II | NA | NA | NA | NA | NA | NA | OpenSea | GTGTACGGTGTGTGGGGTGCAGGGTGTGTGGGGTGTCGGGCGTGGCACATCCCAGCTGCC[CG]GGCTGCAGCAGGGGAGGGTACCCTGCCCTGCCTCCCCAAGGCGTCATCGGTGAGTCTGAA | CGGGCAGCTGGGATGTGCCACGCCCGACACCCCACACACCCTGCACCCCA | ULK1 | NM_003565 | Body | ULK1 | ENST00000542419.1 | TSS1500 | ULK1;ULK1;ULK1 | ENST00000542419.1;ENST00000540568.1;ENST00000544718.1 | TSS1500;TSS1500;TSS200 | chr12:132403585-132403830 | 3 | 1.31e-05 | 0.8414528 | 1.31e-05 | 0.8414528 | 0.0789667 | 0.0860559 | 0.0001497 | FALSE | 1.6058434 | |||||||||||||||||||
cg17331950 | chr1 | 181075666 |
|
56632211 | ATCTCTCTCCCTCCATCTCTACTTAATTACTCTAATTTATTTCRAACACC | II | NA | NA | NA | NA | NA | NA | chr1:181073917-181075002 | S_Shore | GAGGACTAAACATCTCTCTCCCTCCATCTCTGCTTGGTTACTCTGGTTTGTTTCGAACAC[CG]GCTGCTTCTTCCCACTTCTTGACTCCCTGCACCCCTACCCGCCCCCCCAGCAATCACATT | TCTCTCTCCCTCCATCTCTGCTTGGTTACTCTGGTTTGTTTCGAACACCG | chr1:181075648-181075648 | RDMR | chr1:181055591-181075926 | 6 | TRUE | 1.57e-05 | 0.8414528 | 1.57e-05 | 0.8414528 | 0.0584764 | 0.0487384 | 0.0000848 | FALSE | 2.3723731 | ||||||||||||||||||||||||
cg10593232 | chr17 | 4378180 |
|
65747240 | TCTTTCTAATACCCTCRCCACACATATTTTAATATTTTAAATAACTCCCC | II | NA | NA | NA | NA | NA | NA | OpenSea | TATAGTCATCATCTTTCTGGTGCCCTCGCCACACATGTTTTGGTATTTTGAATGGCTCCC[CG]CATATGTACGTAGTTGTGGCGTGTGAGTTCTTCGTGTGTGTCTCAGATTGTGGGTGTCCG | CGGGGAGCCATTCAAAATACCAAAACATGTGTGGCGAGGGCACCAGAAAG | SPNS3 | NM_182538 | Body | 17:4324799-4325035 | SPNS3 | ENST00000575194.1 | 3’UTR | chr17:4377291-4381173 | 6 | TRUE | 1.65e-05 | 0.8414528 | 1.65e-05 | 0.8414528 | 0.0596971 | 0.0513050 | 0.0000892 | FALSE | 0.6769301 | ||||||||||||||||||||
cg19893067 | chr17 | 80732956 |
|
40769910 | AAATACRATACCTATAAACCAAAAAACACCATCTAAATACTTCTCCCTCC | II | NA | NA | NA | NA | NA | NA | OpenSea | TTGGCTCTGTGGAGTGCGGTGCCTGTGGGCCAGGAGGCACCATCTGAGTACTTCTCCCTC[CG]TGGGGTCCTCCGGGAAAAGAGCGCGCTGGCTCGGGATCCCGGGCGGACTCCCGGCGAGAC | CGGAGGGAGAAGTACTCAGATGGTGCCTCCTGGCCCACAGGCACCGCACT | TBCD | NM_005993 | Body | 17:78326180-78326457 | 17:80732631-80734340 | Unclassified | TBCD;TBCD | ENST00000334614.8;ENST00000397466.2 | 5’UTR;5’UTR | TBCD;TBCD | ENST00000334614.8;ENST00000397466.2 | 5’UTR;5’UTR | chr17:80732665-80733595 | 3 | TRUE | 1.77e-05 | 0.8414528 | 1.77e-05 | 0.8414528 | 0.0823239 | 0.0988094 | 0.0001719 | FALSE | 0.2863827 | |||||||||||||||
cg21431091 | chr1 | 201124025 |
|
84650942 | ATTATTTAAAAATCCACRTCTTTACTACAAAAAACTTTCTACRCRACAAC | II | NA | NA | NA | NA | NA | NA | chr1:201123245-201123746 | S_Shore | AATCTGGAAATGTTATTTAGGGATCCACGTCTTTGCTGCAGAAGACTTTCTGCGCGGCAA[CG]TCTCTACCCCTACACACACCTCTGAGAAGGAGAAATCTAGGGTTGGCGATGGGTGGTGTG | TTATTTAGGGATCCACGTCTTTGCTGCAGAAGACTTTCTGCGCGGCAACG | TMEM9 | NM_016456 | TSS1500 | 1:201122822-201124261 | Promoter_Associated | TMEM9;TMEM9;TMEM9;TMEM9;TMEM9 | ENST00000367329.1;ENST00000367330.1;ENST00000367334.5;ENST00000367332.1;ENST00000367333.1 | TSS1500;TSS1500;TSS1500;TSS1500;5’UTR | TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9;TMEM9 | ENST00000455367.1;ENST00000495205.1;ENST00000435310.1;ENST00000367329.1;ENST00000497582.1;ENST00000367330.1;ENST00000414605.1;ENST00000367334.5;ENST00000367332.1;ENST00000367333.1 | TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;5’UTR | chr1:201123845-201124275 | 3 | TRUE | TRUE | 2.23e-05 | 0.8414528 | 2.23e-05 | 0.8414528 | -0.0764302 | 0.0889195 | 0.0001547 | FALSE | -1.7599098 | ||||||||||||||
cg00546248 | chr1 | 2412743 |
|
1768293 | ATATACRTAAATTAAAAACTAAATACTTACAAACACTCTAAAAACTACCC | II | NA | NA | NA | NA | NA | NA | chr1:2411125-2411404 | S_Shore | CAGGACAGGGTGTGTGCGTGAGTTAGGGGCTGGGTGCTTGCAGACACTCTGGGGACTGCC[CG]GGGCTTGTGAAGGCCCCAGTGGGGACACAGTCTGCCTGGGCTGTGGGGGCTGGCAGGTGG | TGTGCGTGAGTTAGGGGCTGGGTGCTTGCAGACACTCTGGGGACTGCCCG | PLCH2 | NM_014638 | Body | 1:2412565-2413038 | Unclassified_Cell_type_specific | PLCH2 | ENST00000464895.1 | 5’UTR | chr1:2391098-2419210 | 6 | chr1:2411589-2415067 | 6 | TRUE | 2.56e-05 | 0.8414528 | 2.56e-05 | 0.8414528 | 0.0813031 | 0.1032822 | 0.0001796 | FALSE | 1.8014843 | ||||||||||||||||
cg15310518 | chr14 | 106330520 |
|
96658329 | ATCACATTATAAAAAACCCCATTAAAAAATACACAAAAACCTAACTCTCC | II | NA | NA | NA | NA | NA | NA | chr14:106333492-106333920 | N_Shelf | TAGTCAAAGTAGTCACATTGTGGGAGGCCCCATTAAGGGGTGCACAAAAACCTGACTCTC[CG]ACTGTCCCGGGCCGGCCGTGGCAGCCAGCCCCGTGTCCCAAGGTCATTTTGTCCCCAGCA | CGGAGAGTCAGGTTTTTGTGCACCCCTTAATGGGGCCTCCCACAATGTGA | IGHJ6;IGHJ5;IGHJ4 | ENST00000390560.2;ENST00000488476.1;ENST00000461719.1 | TSS1500;TSS1500;TSS200 | IGHJ6;IGHJ5;IGHJ4 | ENST00000390560.2;ENST00000488476.1;ENST00000461719.1 | TSS1500;TSS1500;TSS200 | chr14:106330520-106331295 | 3 | 2.79e-05 | 0.8414528 | 2.79e-05 | 0.8414528 | -0.0436969 | 0.0303180 | 0.0000527 | FALSE | -0.3222181 | |||||||||||||||||||||
cg25295510 | chr19 | 38631842 |
|
52702188 | CCAAAAATAATACTTTCCTACTCCRTCTTAAACTCCRAAATATTCATATC | II | NA | NA | NA | NA | NA | NA | OpenSea | GTGGGCTGGGGCCTTTCTGAGTAATGCAGTTTTTCTCCCCAGGGGTTGGCCGGAGACCTA[CG]ACATGAATACCTCGGAGCCCAAGACGGAGCAGGAAAGCATCACTCCTGGGGGCCGGCCCC | CGACATGAATACCTCGGAGCCCAAGACGGAGCAGGAAAGCATCACTCCTG | SIPA1L3;SIPA1L3 | NM_015073;NM_015073 | ExonBnd;Body | SIPA1L3 | ENST00000222345.5 | ExonBnd | SIPA1L3 | ENST00000222345.5 | ExonBnd | chr19:38631744-38632303 | 3 | 3.04e-05 | 0.8414528 | 3.04e-05 | 0.8414528 | 0.0800266 | 0.1033673 | 0.0001798 | FALSE | 3.6819004 | |||||||||||||||||||
cg00779672 | chr18 | 47155847 |
|
55628264 | AAATAAAAACATATTCAACATACRACTACCAAATACAAAAACRAACACCC | II | NA | NA | NA | NA | NA | NA | OpenSea | GCATTTTAGGATTCCAAGTGCAGGGTGCCCTGGCAGGGCTGTACTCAGCCGGCGGTCCAA[CG]GGTGCTCGCCCTTGCACCTGGCAGTCGCATGTTGAATATGCTCCTATTTTCCCTACTTCT | CGGGTGCTCGCCCTTGCACCTGGCAGTCGCATGTTGAATATGCTCCTATT | chr18:47155300-47155855 | 3 | 3.34e-05 | 0.8414528 | 3.34e-05 | 0.8414528 | 0.0622395 | 0.0636649 | 0.0001107 | FALSE | 2.1754652 | ||||||||||||||||||||||||||||
cg07696757 | chr7 | 44229001 |
|
88769850 | ACAAATAACTATATCTCTCACATCCTAACCTACTTCCCTAAAACTCAAAC | II | NA | NA | NA | NA | NA | NA | OpenSea | GTAATTAGGCTGCAGGTGACTGTGTCTCTCACATCCTAGCCTGCTTCCCTGGGGCTCAGG[CG]CCGCTCGGCATTTCCTGCTCCAGCCAGGTGTGGAGTGGGGGATTCTCCTGCCAGGGCTTA | CAGGTGACTGTGTCTCTCACATCCTAGCCTGCTTCCCTGGGGCTCAGGCG | GCK;GCK | NM_000162;NM_000162 | 1stExon;5’UTR | GCK;GCK;GCK;GCK | ENST00000403799.3;ENST00000403799.3;ENST00000437084.1;ENST00000476008.1 | 1stExon;5’UTR;TSS1500;3’UTR | GCK;GCK;GCK;GCK | ENST00000403799.3;ENST00000403799.3;ENST00000437084.1;ENST00000476008.1 | 1stExon;5’UTR;TSS1500;3’UTR | chr7:44228925-44229250 | 3 | TRUE | 3.62e-05 | 0.8414528 | 3.62e-05 | 0.8414528 | 0.0568647 | 0.0539742 | 0.0000939 | FALSE | 2.9591324 | ||||||||||||||||||
cg21941354 | chr8 | 143751709 |
|
14670238 | 3669838 | AACCCCAACACCTTCAACACAAAATTCACTATCATAAAAAAAATATAACA | AACCCCAACGCCTTCAACGCAAAATTCACTATCATAAAAAAAATATAACG | I | C | Grn | NA | NA | NA | NA | NA | NA | chr8:143750759-143751448 | S_Shore | GCGCGCGGCCTGGGCCCCGGGCTCCGCCCTGCTCTCTCCATCTGTAGAATGGGCGCAGGG[CG]TCACATTCCTTTCATGACAGTGAACCCTGCGCTGAAGGCGTTGGGGCTCCTGCAGTTCTG | AGCCCCAACGCCTTCAGCGCAGGGTTCACTGTCATGAAAGGAATGTGACG | PSCA;JRK;JRK;JRK | NR_033343;NM_003724;NM_001279352;NM_001077527 | TSS200;TSS1500;TSS1500;TSS1500 | 8:143747991-143748827 | 8:143750099-143751838 | Promoter_Associated | PSCA;JRK | ENST00000505305.1;ENST00000507178.1 | TSS200;TSS1500 | PSCA;PSCA;JRK;JRK;JRK;JRK | ENST00000505305.1;ENST00000510969.1;ENST00000507178.1;ENST00000422119.2;ENST00000503272.1;ENST00000512113.1 | TSS200;TSS200;TSS1500;TSS1500;TSS1500;3’UTR | chr8:143751305-143751935 | 3 | 4.08e-05 | 0.8414528 | 4.08e-05 | 0.8414528 | -0.1751678 | 0.5240854 | 0.0009115 | FALSE | -3.2212907 | |||||||||||
cg05611414 | chr19 | 50643613 |
|
25655843 | 47675873 | ACAATAACTCCAAATATCAAAACTATCCTTTCTTCCCTTTCTCCCTCTCA | ACAATAACTCCGAATATCGAAACTATCCTTTCTTCCCTTTCTCCCTCTCG | I | T | Red | NA | NA | NA | NA | NA | NA | OpenSea | CCAGCCGGAGCCACAGTGGCTCCGGGTGTCGGGGCTGTCCTTTCTTCCCTTTCTCCCTCT[CG]TCTCCTGTCTCCCGTCTCCCTCTCCCCACGGTCTCCCTCTCCCTCTCCCTCTCCTGTCTC | CGAGAGGGAGAAAGGGAAGAAAGGACAGCCCCGACACCCGGAGCCACTGT | SNAR-D;SNAR-B1;SNAR-B2 | NR_024243;NR_024231;NR_024230 | TSS200;TSS1500;TSS1500 | 19:55335318-55336361 | TRUE | 4.34e-05 | 0.8414528 | 4.34e-05 | 0.8414528 | 0.0690128 | 0.0823146 | 0.0001432 | FALSE | 3.3701183 | |||||||||||||||||||||
cg08648042 | chr17 | 40086195 |
|
41750536 | RATACCCTAAATAACTTACTTAACTACCTTTACTACRTTACTTCCAAAAC | II | NA | NA | NA | NA | NA | NA | chr17:40086751-40087098 | N_Shore | GTGTGCATTTGTAGTTAAAGACATCTTTTTACTATTTAAACTTTAACAATGACATTAGGA[CG]TTTTGGAAGTAACGTAGTAAAGGCAGTTAAGCAAGCTACTTAGGGTATCGTTGGAGCCCA | ATACCCTAAGTAGCTTGCTTAACTGCCTTTACTACGTTACTTCCAAAACG | TTC25;TTC25 | NM_031421;NR_110662 | TSS1500;TSS1500 | 17:40086180-40087180 | Promoter_Associated | AC091172.1 | ENST00000377540.1 | TSS1500 | AC091172.1 | ENST00000377540.1 | TSS1500 | chr17:40086140-40086410 | 3 | 4.54e-05 | 0.8414528 | 4.54e-05 | 0.8414528 | 0.0551645 | 0.0530580 | 0.0000923 | FALSE | -0.7160085 | ||||||||||||||||
cg18036623 | chr1 | 5300056 |
|
69658162 | CCCATTTATATCAAAATAAATAACTCAAATATCCACCTTCAACCAAAAAC | II | NA | NA | NA | NA | NA | NA | OpenSea | GCTCCTGTGTCCCCATTTGTATCAGAGTGGGTGACTCAAGTGTCCACCTTCAGCCAAGAG[CG]TGGCCAGGGCTGGGGCACGCCACATGACCTCACTTGTGTGTCTACCAGCTAAGGCACCTC | CGCTCTTGGCTGAAGGTGGACACTTGAGTCACCCACTCTGATACAAATGG | chr1:5299945-5300270 | 3 | 4.94e-05 | 0.8414528 | 4.94e-05 | 0.8414528 | -0.0518544 | 0.0476559 | 0.0000829 | FALSE | 2.1850403 | ||||||||||||||||||||||||||||
cg10258214 | chr14 | 106330534 |
|
85771960 | TTATACTAAAAACAAAATAACCTTAAAACACRAAACTAACTACCACRACC | II | rs141509289 | 0.011000 | NA | NA | NA | NA | chr14:106333492-106333920 | N_Shelf | ACATTGTGGGAGGCCCCATTAAGGGGTGCACAAAAACCTGACTCTCCGACTGTCCCGGGC[CG]GCCGTGGCAGCCAGCCCCGTGTCCCAAGGTCATTTTGTCCCCAGCACAAGCATGACTCTG | TGTGCTGGGGACAAAATGACCTTGGGACACGGGGCTGGCTGCCACGGCCG | IGHJ6;IGHJ5;IGHJ4 | ENST00000390560.2;ENST00000488476.1;ENST00000461719.1 | TSS1500;TSS1500;TSS200 | IGHJ6;IGHJ5;IGHJ4 | ENST00000390560.2;ENST00000488476.1;ENST00000461719.1 | TSS1500;TSS1500;TSS200 | chr14:106330520-106331295 | 3 | 5.36e-05 | 0.8414528 | 5.36e-05 | 0.8414528 | -0.0851995 | 0.1307293 | 0.0002274 | FALSE | 0.2716556 | |||||||||||||||||||||
cg11879620 | chr1 | 211499932 |
|
89702346 | 17768264 | CATCTAATTTCCCTCACATACATCTAAATTATAACCATCTTACTCTTCCA | CATCTAATTTCCCTCACGTACGTCTAAATTATAACCGTCTTACTCTTCCG | I | T | Red | NA | NA | NA | NA | NA | NA | chr1:211500151-211500636 | N_Shore | CAAGGCCTTGAGCAAATGGGTCAACACAGGAGAGGACGGGCTGAGCCTTGTTGTGCCTGA[CG]GAAGAGCAAGACGGCCACAATTCAGACGCACGTGAGGGAAATCAGATGACTGGACTTGTA | CGGAAGAGCAAGACGGCCACAATTCAGACGCACGTGAGGGAAATCAGATG | TRAF5;TRAF5 | NM_001033910;NM_004619 | TSS1500;TSS200 | 1:211499503-211501341 | Promoter_Associated | TRAF5;TRAF5;TRAF5 | ENST00000427925.2;ENST00000261464.5;ENST00000462410.1 | TSS1500;TSS1500;TSS200 | TRAF5;TRAF5;TRAF5;TRAF5;TRAF5 | ENST00000427925.2;ENST00000494355.1;ENST00000261464.5;ENST00000462410.1;ENST00000488428.1 | TSS1500;TSS1500;TSS1500;TSS200;TSS200 | chr1:211499485-211501315 | 3 | 5.49e-05 | 0.8414528 | 5.49e-05 | 0.8414528 | -0.0833373 | 0.1256712 | 0.0002186 | FALSE | -4.6193643 | ||||||||||||
cg20695515 | chr20 | 61605688 |
|
4796962 | TTACAATTAAACACAATAAAATAAATTCCCRTCAAAAATATATTCCAACC | II | NA | NA | NA | NA | NA | NA | OpenSea | CTGCCCAGCCCCTCCCCTCCCCTGGGTCCTGCCAAGTCGGTCAGAAGGAAAGATGGGCAG[CG]GTTGGAATATACTTTTGACGGGAATTTACTTTATTGTGTTTAATTGTAAAGCGTGGAAGT | TACAATTAAACACAATAAAGTAAATTCCCGTCAAAAGTATATTCCAACCG | TRUE | 20:61605334-61607081 | Unclassified | chr20:61605300-61605955 | 3 | 5.72e-05 | 0.8414528 | 5.72e-05 | 0.8414528 | 0.0513965 | 0.0481834 | 0.0000838 | FALSE | -3.1116354 | |||||||||||||||||||||||||
cg11150807 | chr10 | 43354902 |
|
70613211 | CCAATCCAATAACCTAACCTACRTTTTCTACATAACTACTAATTCCAACC | II | rs2796561 | 0.460346 | NA | NA | NA | NA | OpenSea | GGAGCAGATGAGCGGGTAAGACCGGCAAATCCGTCACATACCCTCAGGACGCAGGGAGCG[CG]GCTGGAATCAGCAGTTATGCAGAAAACGCAGGCCAGGTTACTGGACTGGAACATGCGGGC | CAGTCCAGTAACCTGGCCTGCGTTTTCTGCATAACTGCTGATTCCAGCCG | 10:42674870-42674970 | TRUE | 5.74e-05 | 0.8414528 | 5.74e-05 | 0.8414528 | 0.0683731 | 0.0853268 | 0.0001484 | FALSE | 1.8086412 | ||||||||||||||||||||||||||||
cg15447535 | chr1 | 182808025 |
|
77671883 | CTTTCAAAAAACACTAACRTTCAAACTATCTAAATCAATCCRATAACTAC | II | NA | NA | NA | NA | NA | NA | chr1:182808312-182809122 | N_Shore | CCAGTTTATTTCTTTCAAGAGACACTGGCGTTCAGACTATCTGAATCAATCCGGTAACTA[CG]CCCACTGAACACCTACTACATGCCAGGTATTGAACTAAGCCGCCCTTCATATGCCATCTC | TTTCAAGAGACACTGGCGTTCAGACTATCTGAATCAATCCGGTAACTACG | DHX9;DHX9 | NM_001357;NR_033302 | TSS1500;TSS1500 | 1:182807913-182809448 | Promoter_Associated | DHX9;DHX9 | ENST00000399175.2;ENST00000367549.3 | TSS1500;TSS1500 | DHX9;DHX9;DHX9 | ENST00000399175.2;ENST00000367549.3;ENST00000483416.1 | TSS1500;TSS1500;TSS1500 | chr1:182808000-182808570 | 3 | 5.98e-05 | 0.8414528 | 5.98e-05 | 0.8414528 | 0.0400614 | 0.0295342 | 0.0000514 | FALSE | -0.8898368 | ||||||||||||||||
cg20430773 | chr1 | 16534157 |
|
13655147 | 80758121 | CTAATAACTCACAAAAAAACAATTAATCCAAACCCAAATACCACCTCCCA | CTAATAACTCACGAAAAAACGATTAATCCGAACCCAAATACCACCTCCCG | I | T | Red | NA | NA | NA | NA | NA | NA | chr1:16532917-16534580 | Island | GGAAAGAGAGAGCGTCACGGCCCTAGTTCGCCTGCACCCTGTCTCGGACCCAAGCCCAAA[CG]GGAGGTGGTACCTGGGCTCGGACCAATCGCCCTCTCGTGAGTCACCAGGGGCTGGCCGCG | CGGGAGGTGGTACCTGGGCTCGGACCAATCGCCCTCTCGTGAGTCACCAG | ARHGEF19 | NM_153213 | Body | 1:16406494-16407143 | 1:16532722-16534260 | Promoter_Associated | ARHGEF19;ARHGEF19 | ENST00000478117.1;ENST00000270747.3 | TSS1500;ExonBnd | ARHGEF19;ARHGEF19;ARHGEF19;ARHGEF19;ARHGEF19 | ENST00000478210.1;ENST00000449495.1;ENST00000471928.1;ENST00000478117.1;ENST00000270747.3 | TSS1500;TSS1500;TSS1500;TSS1500;ExonBnd | chr1:16533945-16534395 | 3 | TRUE | 6.10e-05 | 0.8414528 | 6.10e-05 | 0.8414528 | 0.0745545 | 0.1026686 | 0.0001786 | FALSE | -0.6625730 | ||||||||||
cg03270036 | chr9 | 130270004 |
|
61617279 | CAAAAAAAAACCCAAACTTACACCTAAAATCTAAATAAAACCCACTCTTC | II | NA | NA | NA | NA | NA | NA | chr9:130271237-130271447 | N_Shore | ACGCGAGAAGAAACTAAAGCACAGAGACGCAAGGCTAAGGCTGCTCCGGAGGCCACACAG[CG]AAGAGTGGGCTTCACCCAGACCCCAGGTGCAAGTCTGGGCTTTTCTTTGAGCCTTAGCTG | AAAGAAAAGCCCAGACTTGCACCTGGGGTCTGGGTGAAGCCCACTCTTCG | FAM129B;FAM129B | NM_022833;NM_001035534 | Body;Body | TRUE | 9:129309331-129310655 | FAM129B | ENST00000468379.1 | 3’UTR | FAM129B;FAM129B;FAM129B | ENST00000478917.1;ENST00000468379.1;ENST00000484348.1 | 3’UTR;3’UTR;3’UTR | chr9:130267748-130272673 | 6 | chr9:130265003-130270077 | 6 | TRUE | 6.33e-05 | 0.8414528 | 6.33e-05 | 0.8414528 | 0.0614434 | 0.0702602 | 0.0001222 | FALSE | 3.6921774 | |||||||||||||
cg01692639 | chr6 | 99273568 |
|
11803246 | CACTTATAACCRAAACCTACCCTATATTCATCCCAAACAATCAAACAAAC | II | NA | NA | NA | NA | NA | NA | chr6:99272394-99273014 | S_Shore | CTCTCACTCCCCACTTGTGGCCGAGGCCTGCCCTGTGTTCATCCCAGACAGTCAGGCAGA[CG]CTGAGCTGTCCGCGTCGGCGTGGGCGCCTGTGGCCAGCGTATTGTGAGATTTCTAAATGT | ACTTGTGGCCGAGGCCTGCCCTGTGTTCATCCCAGACAGTCAGGCAGACG | TRUE | 6:99380289-99380401 | chr6:99273085-99273735 | 3 | TRUE | 6.39e-05 | 0.8414528 | 6.39e-05 | 0.8414528 | -0.0592549 | 0.0654657 | 0.0001139 | FALSE | -3.8720322 | ||||||||||||||||||||||||
cg24666046 | chr2 | 159823997 |
|
93638314 | TTACTATAAACCRATTTATTAATAACCCCTAACCTTACTATAACAAACCC | II | NA | NA | NA | NA | NA | NA | chr2:159824854-159826401 | N_Shore | TCTGCGTGAGGTTGAAGTTTTTATGAGGAAACTTGTTTAAAACACAATCGTATTTTAAGG[CG]GGTTTGCCACAGCAAGGTTAGGGGTTACCAATAAATCGGTTTACAGCAACTTCTCTATTC | CGGGTTTGCCACAGCAAGGTTAGGGGTTACCAATAAATCGGTTTACAGCA | TANC1;TANC1 | NM_001145909;NM_033394 | TSS1500;TSS1500 | TANC1;TANC1 | ENST00000454300.1;ENST00000263635.6 | TSS1500;TSS1500 | TANC1;TANC1 | ENST00000454300.1;ENST00000263635.6 | TSS1500;TSS1500 | chr2:159823780-159824855 | 3 | 6.41e-05 | 0.8414528 | 6.41e-05 | 0.8414528 | 0.0574785 | 0.0616363 | 0.0001072 | FALSE | 0.3774693 | ||||||||||||||||||
cg04098297 | chr13 | 114312482 |
|
54800969 | 97749381 | AAATAAACATCTAACCTCAATCTAAACATAAAAAATACAAAAAACAAACA | AAATAAACATCTAACCTCAATCTAAACGTAAAAAATACAAAAAACAAACG | I | A | Red | NA | NA | NA | NA | NA | NA | OpenSea | CGCTGGCCACACGTCTTCTTCTCCTGCAGAGCCGCCATCGTCCCTGGCCTGAGATCCTCC[CG]TCTGCTCCCTGCACCCTCTACGCCCAGACTGAGGCCAGATGCCCACCTCTGGGCTTTATA | AGGTGGGCATCTGGCCTCAGTCTGGGCGTAGAGGGTGCAGGGAGCAGACG | ATP4B;ATP4B | NM_000705;NM_000705 | 1stExon;5’UTR | 13:113360106-113360765 | ATP4B;ATP4B | ENST00000335288.3;ENST00000335288.3 | 1stExon;5’UTR | ATP4B;ATP4B | ENST00000335288.3;ENST00000335288.3 | 1stExon;5’UTR | chr13:114305668-114313712 | 6 | TRUE | 6.58e-05 | 0.8414528 | 6.58e-05 | 0.8414528 | 0.0601741 | 0.0678935 | 0.0001181 | FALSE | 2.5127605 | |||||||||||||
cg27310889 | chr19 | 56032497 |
|
90604170 | ACACRAAAATTCACATCACAATATTCAAAATTTAAATCCCTAACTCTACC | II | NA | NA | NA | NA | NA | NA | chr19:56028236-56028824 | S_Shelf | TCCCCGTTTCAGAAAGGACTCTGAGGCTCAGAGAGGGCAGGCCCGTTGCCAAAGGGCACA[CG]GCAGAGCCAGGGACTCAAACCCTGAATACTGTGATGTGAATTTCCGTGCTTTGACCATGG | CGGCAGAGCCAGGGACTCAAACCCTGAATACTGTGATGTGAATTTCCGTG | chr19:56032210-56033487 | 5 | chr19:56030543-56032575 | 6 | 6.83e-05 | 0.8414528 | 6.83e-05 | 0.8414528 | 0.0546608 | 0.0564490 | 0.0000982 | FALSE | 2.7820735 | |||||||||||||||||||||||||
cg15992730 | chr12 | 7848093 |
|
90793277 | CAACRACCACTAAAATCTCCCRAAACTTATACTACRTAAAAAAACTAAAC | II | NA | NA | NA | NA | NA | NA | OpenSea | CTGGATAACACTCTATTTCCTTACCTTGGTCTGGGAGAAAGCGAAGTACATTCCCGCGGA[CG]CCCAGCTCCTTTACGTAGCATAAGTCTCGGGAGACCCCAGTGGTCGCTGCTGCCTCGCGA | CGCCCAGCTCCTTTACGTAGCATAAGTCTCGGGAGACCCCAGTGGTCGCT | GDF3 | NM_020634 | 1stExon | TRUE | GDF3 | ENST00000329913.3 | 1stExon | GDF3 | ENST00000329913.3 | 1stExon | chr12:7847640-7848110 | 3 | TRUE | TRUE | 6.92e-05 | 0.8414528 | 6.92e-05 | 0.8414528 | -0.0412180 | 0.0321790 | 0.0000560 | FALSE | -2.1239170 | ||||||||||||||||
cg17798901 | chr3 | 100236708 |
|
62809170 | ACCTATAAACAAACCCATTTAAACCACAATCTTTCCTAAAAAAATACCAC | II | rs72943302 | 0.047075 | NA | NA | NA | NA | OpenSea | TGAGCATCTTCCTCTTCTTGAAAGGGCTGTGATGCTTGTGATTGGAAAAAGCCGTTCTTA[CG]TGGCATCCTTTTAGGAAAGACTGTGGTTTAAATGGGCTTGTTTACAGGCCTCACTTTTGG | CGTGGCATCCTTTTAGGAAAGACTGTGGTTTAAATGGGCTTGTTTACAGG | TMEM45A | NM_018004 | 5’UTR | TMEM45A;TMEM45A;TMEM45A | ENST00000403410.1;ENST00000462884.1;ENST00000323523.4 | 5’UTR;TSS1500;5’UTR | TMEM45A;TMEM45A;TMEM45A;TMEM45A | ENST00000403410.1;ENST00000449609.1;ENST00000462884.1;ENST00000323523.4 | 5’UTR;5’UTR;TSS1500;5’UTR | 6.97e-05 | 0.8414528 | 6.97e-05 | 0.8414528 | -0.0752652 | 0.1074586 | 0.0001869 | FALSE | 1.8375590 | |||||||||||||||||||||
cg16585214 | chr10 | 121297227 |
|
19603902 | CTTCCTTCTCRTTTATTTATAAACTAATAAAAAAAACCRCCTACCAACCC | II | NA | NA | NA | NA | NA | NA | OpenSea | TAAAATGAATTGAGGAGAGGAGGCCAGGGGTTCCCAGGTGGCCGCCTGCCCACAGTGCCA[CG]GGCTGGTAGGCGGTTTTCTTCACTAGTCCACAAATAAACGAGAAGGAAGACTAGGAAATA | CGGGCTGGTAGGCGGTTTTCTTCACTAGTCCACAAATAAACGAGAAGGAA | RGS10;RGS10 | NM_002925;NM_001005339 | TSS1500;Body | TRUE | RGS10 | ENST00000392865.1 | TSS1500 | RGS10 | ENST00000392865.1 | TSS1500 | chr10:121297185-121297455 | 3 | 7.27e-05 | 0.8414528 | 7.27e-05 | 0.8414528 | 0.0634947 | 0.0771195 | 0.0001341 | FALSE | -3.5749317 | ||||||||||||||||||
cg09423836 | chr4 | 77070069 |
|
7695393 | AAAAATACCTAACACATAATAAACRCAACRTATTTACTACAATTCCATTC | II | NA | NA | NA | NA | NA | NA | chr4:77069066-77069672 | S_Shore | TGAAGGATGAGTAGACGGAGCAGAAGCAGTAAAGAGGATGCACGTTGGAAGAAATGAAAG[CG]AATGGAATTGTAGCAAACACGTTGCGTTTACTATGTGCCAGGCACTCTTCTAGACATTTT | AGAGTGCCTGGCACATAGTAAACGCAACGTGTTTGCTACAATTCCATTCG | NUP54 | NM_017426 | TSS1500 | 4:77068555-77070477 | Promoter_Associated | NUP54;NUP54;NUP54;NUP54 | ENST00000515460.1;ENST00000514987.1;ENST00000342467.6;ENST00000264883.3 | TSS1500;TSS1500;TSS1500;TSS1500 | NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54;NUP54 | ENST00000506098.1;ENST00000510884.1;ENST00000514307.1;ENST00000515460.1;ENST00000514987.1;ENST00000514901.1;ENST00000342467.6;ENST00000502850.1;ENST00000507257.1;ENST00000508583.1;ENST00000504173.1;ENST00000264883.3 | TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500;TSS1500 | chr4:77068812-77070486 | 6 | chr4:77068148-77071219 | 6 | TRUE | TRUE | 8.46e-05 | 0.8414528 | 8.46e-05 | 0.8414528 | -0.1153642 | 0.2624751 | 0.0004565 | FALSE | -3.8828315 | ||||||||||||
cg24046505 | chr10 | 25351544 |
|
45629175 | TCCACCCAAACCTAAATATAACTAATCAAAATACRTACTATTACTAACAC | II | rs16925307 | 0.027132 | NA | NA | NA | NA | OpenSea | TGGTGACACAGCGGGTCTGGCCTCATTCATTACTAATTGTTGACGCTGTGTTCGGAGGAA[CG]TGTTAGTAACAGCACGTACCTTGATCAGCCACACTCAGGCCTGGGTGGAGGAGATAGGTG | CGTGTTAGTAACAGCACGTACCTTGATCAGCCACACTCAGGCCTGGGTGG | ENKUR | NM_001270383 | TSS1500 | 10:25351514-25351702 | Unclassified_Cell_type_specific | chr10:25350980-25351775 | 3 | 8.65e-05 | 0.8414528 | 8.65e-05 | 0.8414528 | 0.0524204 | 0.0544310 | 0.0000947 | FALSE | -1.9843389 | |||||||||||||||||||||||
cg18259003 | chr17 | 72347925 |
|
99703156 | TTAAAATTAACCTCAAACTAAAACTCCAAACAAATACCTAAAATCCAAAC | II | NA | NA | NA | NA | NA | NA | chr17:72347924-72348322 | Island | AGATTCAGCTTTTAGGGTTGGCCTCAGGCTGGAGCTCCAGACAGATGCCTGGAGTCCAGG[CG]CTGGAGGGTCGAGGACGAGCTGGGCAGGCGGATCGGACCCCTCTCCCCGCTGCCAGATGC | CGCCTGGACTCCAGGCATCTGTCTGGAGCTCCAGCCTGAGGCCAACCCTA | KIF19 | NM_153209 | Body | 17:72347925-72348453 | Unclassified_Cell_type_specific | chr17:72347725-72348675 | 3 | TRUE | 8.69e-05 | 0.8414528 | 8.69e-05 | 0.8414528 | 0.0421317 | 0.0351944 | 0.0000612 | FALSE | -2.4552826 | |||||||||||||||||||||
cg20117369 | chr19 | 49061546 |
|
12626896 | ACCTAACTATATCACRCCCTAAATAAAAAACTTAAAAATAAACAAAACCC | II | NA | NA | NA | NA | NA | NA | chr19:49061545-49061769 | Island | GAACCAGGGAAGCCTGACTGTATCACGCCCTGGATGGGGAACTTGGGGATGAGCAAGACC[CG]GAAGGAGCGAGATCCGGGAGGGAGACTCAGCCCAGACAGGAGGCGTTAGCAGGCGCTCTG | CGGGTCTTGCTCATCCCCAAGTTCCCCATCCAGGGCGTGATACAGTCAGG | SULT2B1 | NM_177973 | Body | chr19:49061440-49061935 | 3 | TRUE | 8.78e-05 | 0.8414528 | 8.78e-05 | 0.8414528 | 0.0716314 | 0.1019551 | 0.0001773 | FALSE | -2.1040575 | |||||||||||||||||||||||
cg01208318 | chr14 | 106329652 |
|
77751469 | TCCTAAAAATAACTTAACAACCRTCTACTTACAATTAAACTTCCCAAACC | II | NA | NA | NA | NA | NA | NA | chr14:106333492-106333920 | N_Shelf | TCTGCTGGCCTCCCACGTACCCCACATTCTGGCCTGACCCCTCAGAAGCCAGACCACTGT[CG]GCCTGGGAAGTCCAACTGCAAGCAGACGGCTGCTAAGTCACCCCCAGGAGTCCAAAAACC | CGGCCTGGGAAGTCCAACTGCAAGCAGACGGCTGCTAAGTCACCCCCAGG | IGHJ6 | ENST00000390560.2 | TSS200 | IGHJ6 | ENST00000390560.2 | TSS200 | chr14:106329085-106330430 | 3 | TRUE | 8.82e-05 | 0.8414528 | 8.82e-05 | 0.8414528 | -0.1079063 | 0.2315524 | 0.0004027 | FALSE | -0.0233932 | ||||||||||||||||||||
cg01780326 | chr14 | 75609136 |
|
23725895 | TACCCAAAATAATCACAACATTTAAACTACCAAACTCTACTATAAAATTC | II | NA | NA | NA | NA | NA | NA | OpenSea | TCTTAACTCCCTACCCAGGGTGATCACAGCATTTAAACTGCCAAACTCTGCTGTAAAATT[CG]AGAACAGAATCCCTGACTTCCAAAAGCATTTATGCTCTCTGTACCCAAATCTGACCTATC | ACCCAGGGTGATCACAGCATTTAAACTGCCAAACTCTGCTGTAAAATTCG | TMED10 | NM_006827 | Body | TMED10 | ENST00000557670.1 | 3’UTR | TMED10;TMED10;TMED10 | ENST00000556969.1;ENST00000557670.1;ENST00000555036.1 | 3’UTR;3’UTR;3’UTR | chr14:75609040-75609490 | 3 | 8.82e-05 | 0.8414528 | 8.82e-05 | 0.8414528 | 0.0376005 | 0.0281181 | 0.0000489 | FALSE | 3.3619202 | |||||||||||||||||||
cg00564870 | chr3 | 149018035 |
|
11736209 | TAATAAATCATCCATTTCTCATTTTCCAAAACTAATTTCTACTTACTTAC | II | NA | NA | NA | NA | NA | NA | OpenSea | TCGAACATGCCTCAGTATGCCCCATTCCTTCTAACCTTTAACCAGAATTCTTTCCTAAGG[CG]TAAGTAAGCAGAAACCAGCTCTGGAAAATGAGAAATGGATGACTCATTACTTTGTCAACT | AATGAGTCATCCATTTCTCATTTTCCAGAGCTGGTTTCTGCTTACTTACG | TRUE | RP11-206M11.7 | ENST00000489011.1 | 5’UTR | RP11-206M11.7 | ENST00000489011.1 | 5’UTR | chr3:149017960-149018255 | 3 | 9.05e-05 | 0.8414528 | 9.05e-05 | 0.8414528 | -0.0425898 | 0.0362561 | 0.0000631 | FALSE | 0.3797182 | |||||||||||||||||||||
cg19021188 | chr8 | 143859734 |
|
42736887 | ATTACCTCTCRACAAACAAAACAACTAACTAACCTAATTTACTAAAAACC | II | NA | NA | NA | NA | NA | NA | chr8:143858279-143859411 | S_Shore | CTGAGGTACAGAATACACCATGCCTGCTGGGCAAGCGGGCTCCGGTTCCAGGAAATCCTT[CG]GTCCCCAGCAAACTAGGCCAGTTAGTTGTTCTGTTTGCCGAGAGGTAATGCGCACCCTCC | CGGTCCCCAGCAAACTAGGCCAGTTAGTTGTTCTGTTTGCCGAGAGGTAA | LYNX1;LYNX1;LYNX1;LYNX1 | NM_023946;NM_177476;NM_177457;NM_177477 | TSS200;TSS1500;TSS200;TSS1500 | 8:143856711-143857003 | LYNX1;LYNX1;LYNX1;LYNX1;LYNX1 | ENST00000345173.6;ENST00000522906.1;ENST00000398906.1;ENST00000395192.2;ENST00000335822.5 | TSS1500;TSS1500;TSS1500;TSS1500;TSS200 | LYNX1;LYNX1;LYNX1;LYNX1;LYNX1 | ENST00000345173.6;ENST00000522906.1;ENST00000398906.1;ENST00000395192.2;ENST00000335822.5 | TSS1500;TSS1500;TSS1500;TSS1500;TSS200 | chr8:143859645-143859870 | 3 | TRUE | 9.17e-05 | 0.8414528 | 9.17e-05 | 0.8414528 | 0.0687873 | 0.0948478 | 0.0001650 | FALSE | 0.5775472 | ||||||||||||||||
cg05912299 | chr13 | 100217962 |
|
68746552 | AAAAACAATTACRTTACRCATATTATAAAACCCCAATAACCAAAAACCCC | II | NA | NA | NA | NA | NA | NA | OpenSea | TGGCCCGGGACAGAAACAGTTGCGTTGCGCATGTTGTGAGACCCCAGTGACCAGGAACCC[CG]GGGAGAAGTCTCCTCCATGAGTCATGATAGCCGCAGGCAGCTGCGAAGGAGGGGGACGTG | GAAACAGTTGCGTTGCGCATGTTGTGAGACCCCAGTGACCAGGAACCCCG | 13:99015793-99016308 | chr13:100217645-100218095 | 3 | TRUE | 9.38e-05 | 0.8414528 | 9.38e-05 | 0.8414528 | 0.0898071 | 0.1624123 | 0.0002825 | FALSE | 1.2531977 | ||||||||||||||||||||||||||
cg21189356 | chr19 | 30864709 |
|
74667137 | CRACAAATCTACATACAAATAACAACACATTATTAACTTAATCAAATACC | II | NA | NA | NA | NA | NA | NA | chr19:30865683-30866490 | N_Shore | AGAGAAAGCCCCGGCAAATCTGCATGCAAATAGCAGCACATTGTTAACTTAATCAGATGC[CG]AGGAAAGATGTGCCAGAATTCCAGGATGATCTACACACACGGAGGAGGAGCGGCCTGAGC | CGGCATCTGATTAAGTTAACAATGTGCTGCTATTTGCATGCAGATTTGCC | ZNF536 | NM_014717 | 5’UTR | chr19:30864626-30864626 | TRUE | ZNF536 | ENST00000355537.1 | 5’UTR | ZNF536 | ENST00000355537.1 | 5’UTR | chr19:30864465-30864855 | 3 | 9.60e-05 | 0.8414528 | 9.60e-05 | 0.8414528 | 0.0791596 | 0.1267643 | 0.0002205 | FALSE | -2.6970873 | ||||||||||||||||
cg02914097 | chr3 | 31118948 |
|
91641400 | CTCATCCTATCRAACTACCCACTACCAAACATAATATCCTAAAATAATAC | II | rs294286 | 0.132571 | NA | NA | NA | NA | OpenSea | TCAGCTCTCTCCTCATCCTGTCGGACTGCCCACTGCCAAACATGGTATCCTAAAATAGTG[CG]TCCAACATTTCACTATCCTGCTCAAACATCTGTGATTGCTCTCTAGTCTTTGTTGGCCTA | CGCACTATTTTAGGATACCATGTTTGGCAGTGGGCAGTCCGACAGGATGA | TRUE | chr3:31118625-31119015 | 3 | TRUE | 9.60e-05 | 0.8414528 | 9.60e-05 | 0.8414528 | 0.0847018 | 0.1451549 | 0.0002525 | FALSE | 0.5130093 | ||||||||||||||||||||||||||
cg13950578 | chr1 | 152467744 |
|
96789356 | 9755123 | ATTTACTACCATTTCTAAACTCCTACCTCTAATTCTATCACCCCCTACCA | ATTTACTACCATTTCTAAACTCCTACCTCTAATTCTATCGCCCCCTACCG | I | T | Red | rs12123907 | 0.179616 | NA | NA | NA | NA | OpenSea | TTAGCACTTCTGTGCACTGAGGGAGAGGTGGAGCATCCAATTTGCCTTACACACATGGGA[CG]GTAGGGGGCGACAGAATCAGAGGCAGGAGTTTAGAAATGGCAGTAAATCCTGGAGATTTT | CGGTAGGGGGCGACAGAATCAGAGGCAGGAGTTTAGAAATGGCAGTAAAT | 1:152467571-152467870 | Unclassified_Cell_type_specific | chr1:152467620-152467975 | 3 | 9.80e-05 | 0.8414528 | 9.80e-05 | 0.8414528 | 0.1521842 | 0.4705387 | 0.0008184 | FALSE | -1.9785758 | ||||||||||||||||||||||
cg03998606 | chr3 | 32856104 |
|
6706315 | RCTATCAAATTTAATTCCTCTTTCTAAAATTACAACCTTCCTACTCCTCC | II | NA | NA | NA | NA | NA | NA | chr3:32858194-32860506 | N_Shelf | ACCCACAAACCGCTGTCAAGTTTGATTCCTCTTTCTGGAGTTACAGCCTTCCTGCTCCTC[CG]TGATGGCTCAGCTGAAGTACCACCTCTGCTACTCGTTTTCAAAATACATGTTGAGTCTTA | CGGAGGAGCAGGAAGGCTGTAACTCCAGAAAGAGGAATCAAACTTGACAG | RDMR | chr3:32855960-32856455 | 3 | TRUE | 9.84e-05 | 0.8414528 | 9.84e-05 | 0.8414528 | -0.0548076 | 0.0610700 | 0.0001062 | FALSE | -2.7499845 |
#Madj <- getAdjusted(mValsSw, rfit2)
# cant get this part to work
mvals <- mvals[which(rownames(mvals) %in% colnames(rfit2$Y) ),]
#Madj <- missMethyl::getAdj(Y = mValsSq, fit = rfit2)
plotMDS(mvals, labels=targets_gen$Sample_Name,
col=as.integer(factor(targets_gen$Social_interaction_on_ADOS)),
main="Unadjusted")
#legend(legend=c("1","0"),pch=7,cex=0.7,col=1:2)
# i had to comment the below stuff out because Madj couldn't be generated.
#plotMDS(Madj, labels=targets_gen$Sample_Name,
# col=as.integer(factor(targets_gen$Social_interaction_on_ADOS)),
# main="Adjusted: RUV-inverse")
#legend(legend=c("1","0"),pch=7,cex=0.7,col=1:2)
#Trying different k values
# mark fixed a major bug here
rfit_k1 <- RUVfit(Y=mvals, X=design_tw[,"ADOS"], ctl=ctl,
method = "ruv4", k=1) # Stage 2 analysis
rfit_k1 <- RUVadj(Y=mvals, fit=rfit_k1)
rfit_k2 <- RUVfit(Y=mvals, X=design_tw[,"ADOS"], ctl=ctl,
method = "ruv4", k=2) # Stage 2 analysis
rfit_k2 <- RUVadj(Y=mvals, fit=rfit_k2)
rfit_k8 <- RUVfit(Y=mvals, X=design_tw[,"ADOS"], ctl=ctl,
method = "ruv4", k=8) # Stage 2 analysis
rfit_k8 <- RUVadj(Y=mvals, fit=rfit_k8)
#get adjusted values - doesn't work as at Apr 2022.
#Madj1 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k1)
#Madj2 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k2)
#Madj3 <- missMethyl::getAdj(Y=mValsSq, fit=rfit_k8)
#Visualise plots
par(mfrow=c(1,1))
plotMDS(mvals, labels=targets_gen$Sample_Name,
col=as.integer(factor(targets_gen$Diagnosis)),
dim=c(14,15),
main="Adjusted")
#legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)
#plotMDS(Madj, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
# main="Adjusted: RUV-inverse")
#legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)
#plotMDS(Madj1, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
# main="Adjusted: RUV-4, k=1")
#legend("bottomleft",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)
#plotMDS(Madj2, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
# main="Adjusted: RUV-4, k=2")
#legend("bottomright",legend=c("Cancer","Normal"),pch=16,cex=1,col=1:2)
#plotMDS(Madj3, labels=targets_gen$Sample_Name, col=as.integer(factor(targets_gen$Diagnosis..L.0..M.1..H.2.)),dim=c(14,15),
# main="Adjusted: RUV-4, k=8")
#legend
res <- read.csv("ASD_guthrie_topruv80%_dmps_onADOS_withintw_Nov29.csv", header = TRUE)
TOPPROBES <- head(res$Name,9)
effect_plot <- function(PROBE) {
GENE <- res[which(res$Name==PROBE),"UCSC_RefGene_Name"]
GENE <- paste0(unique(unlist(strsplit(GENE,";"))),collapse=";")
HEADER=paste(GENE,PROBE)
BETAS <- ilogit2(mDat[rownames(mDat) == PROBE,])
plot(BETAS, ADOS, main = HEADER, ylab = "ADOS", xlab = "Beta Value")
regl5 <- lm(ADOS ~ BETAS)
SLOPE=signif(regl5$coefficients[2],3)
abline(regl5)
mycor <- cor.test(ADOS, BETAS, method = "pearson")
mycor_stat <- signif(mycor$estimate,3)
mycor_p <- signif(mycor$p.value,3)
SUBHEAD <- paste("R=",mycor_stat," p=",mycor_p," slope=",SLOPE, sep="")
mtext(SUBHEAD,cex=0.6)
}
par(mfrow=c(3,3))
sapply(TOPPROBES,effect_plot)
## $cg07926098
## NULL
##
## $cg23088672
## NULL
##
## $cg17331950
## NULL
##
## $cg10593232
## NULL
##
## $cg19893067
## NULL
##
## $cg21431091
## NULL
##
## $cg00546248
## NULL
##
## $cg15310518
## NULL
##
## $cg25295510
## NULL
pdf("effect_guthrie.pdf")
par(mfrow=c(3,3))
sapply(TOPPROBES,effect_plot)
## $cg07926098
## NULL
##
## $cg23088672
## NULL
##
## $cg17331950
## NULL
##
## $cg10593232
## NULL
##
## $cg19893067
## NULL
##
## $cg21431091
## NULL
##
## $cg00546248
## NULL
##
## $cg15310518
## NULL
##
## $cg25295510
## NULL
dev.off()
## png
## 2
par(mfrow=c(1,1))
Gene Ontology analysis (gometh): top 1000 probes
res <- read.csv("ASD_guthrie_topruv80%_dmps_onADOS_withintw_Nov29.csv", header = TRUE)
sigCpGs_1k = res$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)
all = res$Name
length(all)
## [1] 790658
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
## Description N DE P.DE FDR
## path:hsa04060 Cytokine-cytokine receptor interaction 268 9 0.01911846 1
## path:hsa04310 Wnt signaling pathway 163 10 0.02550854 1
## path:hsa05202 Transcriptional misregulation in cancer 182 10 0.02610983 1
## path:hsa04666 Fc gamma R-mediated phagocytosis 94 6 0.03780776 1
## path:hsa05231 Choline metabolism in cancer 94 6 0.04693433 1
## path:hsa04971 Gastric acid secretion 75 5 0.06390185 1
## path:hsa04370 VEGF signaling pathway 59 4 0.07533259 1
## path:hsa04664 Fc epsilon RI signaling pathway 66 4 0.08116771 1
## path:hsa04514 Cell adhesion molecules 141 7 0.08361298 1
## path:hsa04730 Long-term depression 58 4 0.08668244 1
## path:hsa04512 ECM-receptor interaction 86 5 0.10284213 1
## path:hsa04510 Focal adhesion 193 9 0.11850936 1
## path:hsa04725 Cholinergic synapse 111 6 0.12042518 1
## path:hsa04726 Serotonergic synapse 108 5 0.12447230 1
## path:hsa04724 Glutamatergic synapse 113 6 0.13016759 1
## path:hsa04921 Oxytocin signaling pathway 151 7 0.14568311 1
## path:hsa04972 Pancreatic secretion 93 4 0.15621024 1
## path:hsa05220 Chronic myeloid leukemia 74 4 0.15907375 1
## path:hsa03320 PPAR signaling pathway 72 3 0.16715691 1
## path:hsa04151 PI3K-Akt signaling pathway 331 12 0.17174184 1
write.csv(gometh_kegg, file = "GOmeth_kegg.csv", row.names = TRUE)
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
## ONTOLOGY
## GO:0071425 BP
## GO:0060100 BP
## GO:1905155 BP
## GO:0033299 BP
## GO:0060099 BP
## GO:1905153 BP
## GO:0043495 MF
## GO:0035025 BP
## GO:0006869 BP
## GO:0048406 MF
## GO:0009582 BP
## GO:0001738 BP
## GO:0072089 BP
## GO:0090556 MF
## GO:0002687 BP
## GO:0031638 BP
## GO:0014701 CC
## GO:0010876 BP
## GO:0040037 BP
## GO:0009581 BP
## TERM
## GO:0071425 hematopoietic stem cell proliferation
## GO:0060100 positive regulation of phagocytosis, engulfment
## GO:1905155 positive regulation of membrane invagination
## GO:0033299 secretion of lysosomal enzymes
## GO:0060099 regulation of phagocytosis, engulfment
## GO:1905153 regulation of membrane invagination
## GO:0043495 protein-membrane adaptor activity
## GO:0035025 positive regulation of Rho protein signal transduction
## GO:0006869 lipid transport
## GO:0048406 nerve growth factor binding
## GO:0009582 detection of abiotic stimulus
## GO:0001738 morphogenesis of a polarized epithelium
## GO:0072089 stem cell proliferation
## GO:0090556 phosphatidylserine floppase activity
## GO:0002687 positive regulation of leukocyte migration
## GO:0031638 zymogen activation
## GO:0014701 junctional sarcoplasmic reticulum membrane
## GO:0010876 lipid localization
## GO:0040037 negative regulation of fibroblast growth factor receptor signaling pathway
## GO:0009581 detection of external stimulus
## N DE P.DE FDR
## GO:0071425 32 7 1.709236e-05 0.03885093
## GO:0060100 13 4 2.617135e-04 0.59461297
## GO:1905155 13 4 2.617135e-04 0.59461297
## GO:0033299 7 3 2.649737e-04 0.60149030
## GO:0060099 15 4 3.475512e-04 0.78859363
## GO:1905153 15 4 3.475512e-04 0.78859363
## GO:0043495 26 5 4.561624e-04 1.00000000
## GO:0035025 29 5 5.391505e-04 1.00000000
## GO:0006869 425 20 7.331087e-04 1.00000000
## GO:0048406 6 3 8.269014e-04 1.00000000
## GO:0009582 130 10 1.152856e-03 1.00000000
## GO:0001738 90 9 1.575325e-03 1.00000000
## GO:0072089 118 10 1.613447e-03 1.00000000
## GO:0090556 8 3 1.767704e-03 1.00000000
## GO:0002687 139 9 1.854952e-03 1.00000000
## GO:0031638 65 6 2.316532e-03 1.00000000
## GO:0014701 9 3 2.384797e-03 1.00000000
## GO:0010876 474 20 2.878572e-03 1.00000000
## GO:0040037 20 4 2.953834e-03 1.00000000
## GO:0009581 127 9 3.566109e-03 1.00000000
write.csv(gometh_go, file = "GOmeth_go.csv", row.names = TRUE)
Now specifically analyse hypermethylated probes
res2 <- subset(res,b_X1>0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
## Description N DE
## path:hsa04974 Protein digestion and absorption 92 7
## path:hsa04512 ECM-receptor interaction 86 7
## path:hsa04971 Gastric acid secretion 75 6
## path:hsa04724 Glutamatergic synapse 113 8
## path:hsa04730 Long-term depression 58 5
## path:hsa04510 Focal adhesion 193 11
## path:hsa04666 Fc gamma R-mediated phagocytosis 94 6
## path:hsa04928 Parathyroid hormone synthesis, secretion and action 105 7
## path:hsa04926 Relaxin signaling pathway 124 7
## path:hsa04921 Oxytocin signaling pathway 151 8
## path:hsa05146 Amoebiasis 98 5
## path:hsa04727 GABAergic synapse 84 5
## path:hsa04927 Cortisol synthesis and secretion 63 4
## path:hsa04270 Vascular smooth muscle contraction 132 6
## path:hsa04723 Retrograde endocannabinoid signaling 133 6
## path:hsa04725 Cholinergic synapse 111 6
## path:hsa04726 Serotonergic synapse 108 5
## path:hsa04072 Phospholipase D signaling pathway 144 7
## path:hsa05032 Morphine addiction 88 5
## path:hsa04972 Pancreatic secretion 93 4
## P.DE FDR
## path:hsa04974 0.008922657 0.9011883
## path:hsa04512 0.013539482 1.0000000
## path:hsa04971 0.022686667 1.0000000
## path:hsa04724 0.024517958 1.0000000
## path:hsa04730 0.027230008 1.0000000
## path:hsa04510 0.032093937 1.0000000
## path:hsa04666 0.039976997 1.0000000
## path:hsa04928 0.046270992 1.0000000
## path:hsa04926 0.053328462 1.0000000
## path:hsa04921 0.076833051 1.0000000
## path:hsa05146 0.082933928 1.0000000
## path:hsa04727 0.109164528 1.0000000
## path:hsa04927 0.118342411 1.0000000
## path:hsa04270 0.119660757 1.0000000
## path:hsa04723 0.127677303 1.0000000
## path:hsa04725 0.130946184 1.0000000
## path:hsa04726 0.132205622 1.0000000
## path:hsa04072 0.145169043 1.0000000
## path:hsa05032 0.155290477 1.0000000
## path:hsa04972 0.162843677 1.0000000
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
## ONTOLOGY
## GO:0090556 MF
## GO:0045649 BP
## GO:0048251 BP
## GO:0051156 BP
## GO:0060100 BP
## GO:1905155 BP
## GO:0009582 BP
## GO:0006869 BP
## GO:0021697 BP
## GO:0035025 BP
## GO:0060099 BP
## GO:1905153 BP
## GO:0031644 BP
## GO:0051020 MF
## GO:0005604 CC
## GO:0030020 MF
## GO:0006098 BP
## GO:0036158 BP
## GO:0038065 BP
## GO:0002761 BP
## TERM
## GO:0090556 phosphatidylserine floppase activity
## GO:0045649 regulation of macrophage differentiation
## GO:0048251 elastic fiber assembly
## GO:0051156 glucose 6-phosphate metabolic process
## GO:0060100 positive regulation of phagocytosis, engulfment
## GO:1905155 positive regulation of membrane invagination
## GO:0009582 detection of abiotic stimulus
## GO:0006869 lipid transport
## GO:0021697 cerebellar cortex formation
## GO:0035025 positive regulation of Rho protein signal transduction
## GO:0060099 regulation of phagocytosis, engulfment
## GO:1905153 regulation of membrane invagination
## GO:0031644 regulation of nervous system process
## GO:0051020 GTPase binding
## GO:0005604 basement membrane
## GO:0030020 extracellular matrix structural constituent conferring tensile strength
## GO:0006098 pentose-phosphate shunt
## GO:0036158 outer dynein arm assembly
## GO:0038065 collagen-activated signaling pathway
## GO:0002761 regulation of myeloid leukocyte differentiation
## N DE P.DE FDR
## GO:0090556 8 3 0.001900589 1
## GO:0045649 24 4 0.002905982 1
## GO:0048251 10 3 0.003495647 1
## GO:0051156 25 4 0.003957522 1
## GO:0060100 13 3 0.004192239 1
## GO:1905155 13 3 0.004192239 1
## GO:0009582 130 9 0.004538628 1
## GO:0006869 425 18 0.004605003 1
## GO:0021697 23 4 0.004791389 1
## GO:0035025 29 4 0.004942120 1
## GO:0060099 15 3 0.005096053 1
## GO:1905153 15 3 0.005096053 1
## GO:0031644 140 10 0.007215012 1
## GO:0051020 285 15 0.007478073 1
## GO:0005604 94 8 0.007796332 1
## GO:0030020 39 5 0.007815025 1
## GO:0006098 16 3 0.008162224 1
## GO:0036158 22 3 0.008199726 1
## GO:0038065 15 3 0.008288448 1
## GO:0002761 120 8 0.008336041 1
Now specifically analyse hypomethylated probes
res2 <- subset(res,b_X1<0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
## Description N DE P.DE
## path:hsa04964 Proximal tubule bicarbonate reclamation 21 3 0.01123802
## path:hsa00513 Various types of N-glycan biosynthesis 37 4 0.01483267
## path:hsa00561 Glycerolipid metabolism 58 4 0.07017901
## path:hsa04710 Circadian rhythm 33 3 0.07054507
## path:hsa04060 Cytokine-cytokine receptor interaction 268 8 0.10554424
## path:hsa00310 Lysine degradation 60 4 0.10816862
## path:hsa04920 Adipocytokine signaling pathway 66 4 0.11546899
## path:hsa04330 Notch signaling pathway 57 4 0.11923254
## path:hsa04979 Cholesterol metabolism 50 3 0.11992139
## path:hsa00600 Sphingolipid metabolism 51 3 0.12370171
## path:hsa04213 Longevity regulating pathway - multiple species 61 4 0.13174057
## path:hsa04152 AMPK signaling pathway 118 6 0.17803531
## path:hsa04072 Phospholipase D signaling pathway 144 7 0.20189169
## path:hsa05100 Bacterial invasion of epithelial cells 76 4 0.20746409
## path:hsa04520 Adherens junction 70 4 0.22399185
## path:hsa04630 JAK-STAT signaling pathway 145 5 0.23700761
## path:hsa04923 Regulation of lipolysis in adipocytes 56 3 0.24274384
## path:hsa04210 Apoptosis 132 5 0.25409543
## path:hsa05416 Viral myocarditis 55 3 0.26621870
## path:hsa04922 Glucagon signaling pathway 99 4 0.26753173
## FDR
## path:hsa04964 1
## path:hsa00513 1
## path:hsa00561 1
## path:hsa04710 1
## path:hsa04060 1
## path:hsa00310 1
## path:hsa04920 1
## path:hsa04330 1
## path:hsa04979 1
## path:hsa00600 1
## path:hsa04213 1
## path:hsa04152 1
## path:hsa04072 1
## path:hsa05100 1
## path:hsa04520 1
## path:hsa04630 1
## path:hsa04923 1
## path:hsa04210 1
## path:hsa05416 1
## path:hsa04922 1
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
## ONTOLOGY TERM
## GO:0031588 CC nucleotide-activated protein kinase complex
## GO:2000401 BP regulation of lymphocyte migration
## GO:0031045 CC dense core granule
## GO:0003402 BP planar cell polarity pathway involved in axis elongation
## GO:0071425 BP hematopoietic stem cell proliferation
## GO:0030198 BP extracellular matrix organization
## GO:0043062 BP extracellular structure organization
## GO:0045229 BP external encapsulating structure organization
## GO:0045019 BP negative regulation of nitric oxide biosynthetic process
## GO:1904406 BP negative regulation of nitric oxide metabolic process
## GO:0010171 BP body morphogenesis
## GO:0071675 BP regulation of mononuclear cell migration
## GO:0043495 MF protein-membrane adaptor activity
## GO:0005667 CC transcription regulator complex
## GO:0003323 BP type B pancreatic cell development
## GO:0043124 BP negative regulation of I-kappaB kinase/NF-kappaB signaling
## GO:0005938 CC cell cortex
## GO:1990907 CC beta-catenin-TCF complex
## GO:0045780 BP positive regulation of bone resorption
## GO:0046903 BP secretion
## N DE P.DE FDR
## GO:0031588 9 3 0.0007080092 1
## GO:2000401 64 7 0.0007481308 1
## GO:0031045 26 5 0.0009954731 1
## GO:0003402 5 3 0.0012034415 1
## GO:0071425 32 5 0.0021399450 1
## GO:0030198 305 19 0.0027687159 1
## GO:0043062 306 19 0.0029324789 1
## GO:0045229 308 19 0.0029415922 1
## GO:0045019 15 3 0.0040392952 1
## GO:1904406 15 3 0.0040392952 1
## GO:0010171 45 6 0.0046942824 1
## GO:0071675 115 8 0.0051975776 1
## GO:0043495 26 4 0.0063730389 1
## GO:0005667 480 24 0.0072394321 1
## GO:0003323 25 4 0.0077556655 1
## GO:0043124 49 5 0.0079433702 1
## GO:0005938 302 18 0.0100909182 1
## GO:1990907 13 3 0.0105504803 1
## GO:0045780 18 3 0.0112589520 1
## GO:0046903 905 36 0.0112724213 1
Gene Ontology analysis (gometh): top 1000 probes (limma data).
res <- read.csv("ASD_guthrie_top_dmps_onADOS_withintw_Nov27_limma.csv", header = TRUE)
sigCpGs_1k = res$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)
all = res$Name
length(all)
## [1] 790658
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
## Description N DE
## path:hsa04974 Protein digestion and absorption 92 6
## path:hsa05222 Small cell lung cancer 88 6
## path:hsa05217 Basal cell carcinoma 63 5
## path:hsa05165 Human papillomavirus infection 314 15
## path:hsa04928 Parathyroid hormone synthesis, secretion and action 105 7
## path:hsa04930 Type II diabetes mellitus 45 4
## path:hsa05216 Thyroid cancer 37 3
## path:hsa04658 Th1 and Th2 cell differentiation 88 5
## path:hsa04976 Bile secretion 86 4
## path:hsa04512 ECM-receptor interaction 86 5
## path:hsa04927 Cortisol synthesis and secretion 63 4
## path:hsa01240 Biosynthesis of cofactors 147 5
## path:hsa04934 Cushing syndrome 154 7
## path:hsa04514 Cell adhesion molecules 141 6
## path:hsa04120 Ubiquitin mediated proteolysis 134 5
## path:hsa05200 Pathways in cancer 507 18
## path:hsa05412 Arrhythmogenic right ventricular cardiomyopathy 74 4
## path:hsa05146 Amoebiasis 98 4
## path:hsa04922 Glucagon signaling pathway 99 4
## path:hsa04350 TGF-beta signaling pathway 93 4
## P.DE FDR
## path:hsa04974 0.04573500 1
## path:hsa05222 0.05319280 1
## path:hsa05217 0.06104992 1
## path:hsa05165 0.06332322 1
## path:hsa04928 0.06457472 1
## path:hsa04930 0.08183517 1
## path:hsa05216 0.11609193 1
## path:hsa04658 0.12312133 1
## path:hsa04976 0.12593024 1
## path:hsa04512 0.14078121 1
## path:hsa04927 0.15135620 1
## path:hsa01240 0.16372755 1
## path:hsa04934 0.22948376 1
## path:hsa04514 0.23987134 1
## path:hsa04120 0.24359821 1
## path:hsa05200 0.24739241 1
## path:hsa05412 0.25304819 1
## path:hsa05146 0.25386815 1
## path:hsa04922 0.25984483 1
## path:hsa04350 0.26267162 1
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
## ONTOLOGY TERM
## GO:0048625 BP myoblast fate commitment
## GO:0033157 BP regulation of intracellular protein transport
## GO:0031526 CC brush border membrane
## GO:0033077 BP T cell differentiation in thymus
## GO:0033089 BP positive regulation of T cell differentiation in thymus
## GO:0006517 BP protein deglycosylation
## GO:0090160 BP Golgi to lysosome transport
## GO:0002076 BP osteoblast development
## GO:0005903 CC brush border
## GO:0090316 BP positive regulation of intracellular protein transport
## GO:0099060 CC integral component of postsynaptic specialization membrane
## GO:0045061 BP thymic T cell selection
## GO:0098948 CC intrinsic component of postsynaptic specialization membrane
## GO:0045059 BP positive thymic T cell selection
## GO:0071218 BP cellular response to misfolded protein
## GO:0019827 BP stem cell population maintenance
## GO:0045582 BP positive regulation of T cell differentiation
## GO:0038065 BP collagen-activated signaling pathway
## GO:0098727 BP maintenance of cell number
## GO:0001518 CC voltage-gated sodium channel complex
## N DE P.DE FDR
## GO:0048625 6 3 0.001077301 1
## GO:0033157 215 14 0.001243492 1
## GO:0031526 54 6 0.001860036 1
## GO:0033077 83 8 0.002670463 1
## GO:0033089 10 3 0.002673247 1
## GO:0006517 25 4 0.003285097 1
## GO:0090160 11 3 0.003434713 1
## GO:0002076 18 4 0.004604004 1
## GO:0005903 95 8 0.005433833 1
## GO:0090316 149 10 0.005737002 1
## GO:0099060 72 7 0.006448335 1
## GO:0045061 22 4 0.006920239 1
## GO:0098948 75 7 0.008026663 1
## GO:0045059 14 3 0.009117215 1
## GO:0071218 21 3 0.010241166 1
## GO:0019827 170 11 0.010306227 1
## GO:0045582 113 8 0.011427071 1
## GO:0038065 15 3 0.011452777 1
## GO:0098727 174 11 0.012066697 1
## GO:0001518 17 3 0.013120728 1
Now specifically analyse hypermethylated probes
res2 <- subset(res,logFC>0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
## Description N DE
## path:hsa05224 Breast cancer 145 10
## path:hsa05225 Hepatocellular carcinoma 166 10
## path:hsa05216 Thyroid cancer 37 4
## path:hsa04150 mTOR signaling pathway 152 9
## path:hsa04720 Long-term potentiation 64 5
## path:hsa05217 Basal cell carcinoma 63 5
## path:hsa04217 Necroptosis 139 6
## path:hsa04914 Progesterone-mediated oocyte maturation 88 5
## path:hsa04934 Cushing syndrome 154 8
## path:hsa04928 Parathyroid hormone synthesis, secretion and action 105 6
## path:hsa04657 IL-17 signaling pathway 88 4
## path:hsa00562 Inositol phosphate metabolism 69 4
## path:hsa04714 Thermogenesis 212 8
## path:hsa04115 p53 signaling pathway 73 4
## path:hsa05322 Systemic lupus erythematosus 114 4
## path:hsa04915 Estrogen signaling pathway 137 6
## path:hsa05226 Gastric cancer 147 7
## path:hsa05200 Pathways in cancer 507 18
## path:hsa01522 Endocrine resistance 95 5
## path:hsa04916 Melanogenesis 101 5
## P.DE FDR
## path:hsa05224 0.01171602 1
## path:hsa05225 0.01750885 1
## path:hsa05216 0.02161643 1
## path:hsa04150 0.02318340 1
## path:hsa04720 0.03297370 1
## path:hsa05217 0.04180630 1
## path:hsa04217 0.07427759 1
## path:hsa04914 0.07885532 1
## path:hsa04934 0.07888452 1
## path:hsa04928 0.09519971 1
## path:hsa04657 0.10295303 1
## path:hsa00562 0.10348458 1
## path:hsa04714 0.11537434 1
## path:hsa04115 0.12331543 1
## path:hsa05322 0.13689655 1
## path:hsa04915 0.14132892 1
## path:hsa05226 0.14137343 1
## path:hsa05200 0.14962804 1
## path:hsa01522 0.15062883 1
## path:hsa04916 0.16048469 1
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
## ONTOLOGY
## GO:0003160 BP
## GO:0030274 MF
## GO:0002221 BP
## GO:0032486 BP
## GO:0019827 BP
## GO:0072538 BP
## GO:0098727 BP
## GO:0048671 BP
## GO:0003157 BP
## GO:0043620 BP
## GO:1901533 BP
## GO:0042813 MF
## GO:0060674 BP
## GO:0017147 MF
## GO:0048679 BP
## GO:0072148 BP
## GO:0006730 BP
## GO:0035019 BP
## GO:0061101 BP
## GO:0051303 BP
## TERM
## GO:0003160 endocardium morphogenesis
## GO:0030274 LIM domain binding
## GO:0002221 pattern recognition receptor signaling pathway
## GO:0032486 Rap protein signal transduction
## GO:0019827 stem cell population maintenance
## GO:0072538 T-helper 17 type immune response
## GO:0098727 maintenance of cell number
## GO:0048671 negative regulation of collateral sprouting
## GO:0003157 endocardium development
## GO:0043620 regulation of DNA-templated transcription in response to stress
## GO:1901533 negative regulation of hematopoietic progenitor cell differentiation
## GO:0042813 Wnt receptor activity
## GO:0060674 placenta blood vessel development
## GO:0017147 Wnt-protein binding
## GO:0048679 regulation of axon regeneration
## GO:0072148 epithelial cell fate commitment
## GO:0006730 one-carbon metabolic process
## GO:0035019 somatic stem cell population maintenance
## GO:0061101 neuroendocrine cell differentiation
## GO:0051303 establishment of chromosome localization
## N DE P.DE FDR
## GO:0003160 6 3 0.0006138797 1
## GO:0030274 7 3 0.0006332011 1
## GO:0002221 174 11 0.0018780326 1
## GO:0032486 12 3 0.0020844027 1
## GO:0019827 170 12 0.0022847540 1
## GO:0072538 42 5 0.0026830987 1
## GO:0098727 174 12 0.0027581316 1
## GO:0048671 9 3 0.0028018163 1
## GO:0003157 12 3 0.0045685657 1
## GO:0043620 43 5 0.0056730822 1
## GO:1901533 14 3 0.0057257600 1
## GO:0042813 15 3 0.0069342464 1
## GO:0060674 30 4 0.0071786732 1
## GO:0017147 28 4 0.0078984898 1
## GO:0048679 28 4 0.0084541436 1
## GO:0072148 14 3 0.0091995210 1
## GO:0006730 37 4 0.0099768964 1
## GO:0035019 63 6 0.0102186789 1
## GO:0061101 16 3 0.0110564450 1
## GO:0051303 81 6 0.0114801430 1
Now specifically analyse hypomethylated probes
res2 <- subset(res,logFC<0)
sigCpGs_1k = res2$Name[1:1000]
sigCpGs_1k = as.character(sigCpGs_1k)
# kegg
gometh_kegg <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "KEGG", prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_kegg <- subset(gometh_kegg,DE>2)
gometh_kegg$FDR <- p.adjust(gometh_kegg$P.DE)
gometh_kegg <- gometh_kegg[order(gometh_kegg$P.DE),]
head( gometh_kegg , 20)
## Description N DE
## path:hsa04974 Protein digestion and absorption 92 8
## path:hsa04975 Fat digestion and absorption 43 3
## path:hsa04120 Ubiquitin mediated proteolysis 134 6
## path:hsa04350 TGF-beta signaling pathway 93 5
## path:hsa04976 Bile secretion 86 4
## path:hsa04940 Type I diabetes mellitus 41 3
## path:hsa04115 p53 signaling pathway 73 4
## path:hsa04512 ECM-receptor interaction 86 5
## path:hsa01240 Biosynthesis of cofactors 147 5
## path:hsa04933 AGE-RAGE signaling pathway in diabetic complications 95 5
## path:hsa05165 Human papillomavirus infection 314 13
## path:hsa04936 Alcoholic liver disease 129 5
## path:hsa04921 Oxytocin signaling pathway 151 7
## path:hsa03320 PPAR signaling pathway 72 3
## path:hsa05416 Viral myocarditis 55 3
## path:hsa04068 FoxO signaling pathway 124 5
## path:hsa04920 Adipocytokine signaling pathway 66 3
## path:hsa04390 Hippo signaling pathway 154 7
## path:hsa04060 Cytokine-cytokine receptor interaction 268 6
## path:hsa05222 Small cell lung cancer 88 4
## P.DE FDR
## path:hsa04974 0.004150998 0.3860428
## path:hsa04975 0.047276633 1.0000000
## path:hsa04120 0.102121660 1.0000000
## path:hsa04350 0.109887797 1.0000000
## path:hsa04976 0.117372293 1.0000000
## path:hsa04940 0.132617570 1.0000000
## path:hsa04115 0.133096130 1.0000000
## path:hsa04512 0.136536499 1.0000000
## path:hsa01240 0.138126612 1.0000000
## path:hsa04933 0.139382208 1.0000000
## path:hsa05165 0.162594089 1.0000000
## path:hsa04936 0.169529707 1.0000000
## path:hsa04921 0.195740721 1.0000000
## path:hsa03320 0.197421285 1.0000000
## path:hsa05416 0.255512306 1.0000000
## path:hsa04068 0.256091354 1.0000000
## path:hsa04920 0.262004101 1.0000000
## path:hsa04390 0.262889360 1.0000000
## path:hsa04060 0.263291974 1.0000000
## path:hsa05222 0.272352535 1.0000000
# GO terms
gometh_go <- gometh(sig.cpg = sigCpGs_1k, all.cpg = all, collection = "GO" , prior.prob=TRUE)
## All input CpGs are used for testing.
gometh_go <- subset(gometh_go,DE>2)
gometh_go$FDR <- p.adjust(gometh_go$P.DE)
gometh_go <- gometh_go[order(gometh_go$P.DE),]
head( gometh_go , 20)
## ONTOLOGY
## GO:0060028 BP
## GO:0001518 CC
## GO:0007566 BP
## GO:0060371 BP
## GO:0033077 BP
## GO:0033089 BP
## GO:0021535 BP
## GO:0031210 MF
## GO:1901163 BP
## GO:0050321 MF
## GO:0061450 BP
## GO:0005248 MF
## GO:0086010 BP
## GO:0031526 CC
## GO:0044790 BP
## GO:0086012 BP
## GO:0050997 MF
## GO:1901981 MF
## GO:0034706 CC
## GO:1903533 BP
## TERM
## GO:0060028 convergent extension involved in axis elongation
## GO:0001518 voltage-gated sodium channel complex
## GO:0007566 embryo implantation
## GO:0060371 regulation of atrial cardiac muscle cell membrane depolarization
## GO:0033077 T cell differentiation in thymus
## GO:0033089 positive regulation of T cell differentiation in thymus
## GO:0021535 cell migration in hindbrain
## GO:0031210 phosphatidylcholine binding
## GO:1901163 regulation of trophoblast cell migration
## GO:0050321 tau-protein kinase activity
## GO:0061450 trophoblast cell migration
## GO:0005248 voltage-gated sodium channel activity
## GO:0086010 membrane depolarization during action potential
## GO:0031526 brush border membrane
## GO:0044790 suppression of viral release by host
## GO:0086012 membrane depolarization during cardiac muscle cell action potential
## GO:0050997 quaternary ammonium group binding
## GO:1901981 phosphatidylinositol phosphate binding
## GO:0034706 sodium channel complex
## GO:1903533 regulation of protein targeting
## N DE P.DE FDR
## GO:0060028 6 3 0.001314104 1
## GO:0001518 17 4 0.001375654 1
## GO:0007566 54 6 0.001565736 1
## GO:0060371 9 3 0.001687609 1
## GO:0033077 83 8 0.002388282 1
## GO:0033089 10 3 0.002549476 1
## GO:0021535 16 4 0.002820309 1
## GO:0031210 28 4 0.003456386 1
## GO:1901163 15 3 0.003532168 1
## GO:0050321 22 4 0.004978615 1
## GO:0061450 16 3 0.005645237 1
## GO:0005248 24 4 0.006549588 1
## GO:0086010 36 5 0.007385247 1
## GO:0031526 54 5 0.007995118 1
## GO:0044790 15 3 0.008390680 1
## GO:0086012 22 4 0.008914192 1
## GO:0050997 37 4 0.009131104 1
## GO:1901981 170 11 0.009143120 1
## GO:0034706 26 4 0.010170846 1
## GO:1903533 75 6 0.010924579 1
#design matrix in regression
design_tw <- model.matrix(~ADOS+Twin_Group)
design_tw_dmrc <- model.matrix(~ADOS)
#myannotation <- cpg.annotate("array", mDat, analysis.type="differential", design=design_tw, coef=2, fdr=1)
myannotation <- cpg.annotate("array", mDat, what = "M",
arraytype = "EPIC", analysis.type="differential", design_tw, coef=2)
## Your contrast returned no individually significant probes. Try increasing the fdr. Alternatively, set pcutoff manually in dmrcate() to return DMRs, but be warned there is an increased risk of Type I errors.
dmrcoutput <- dmrcate(myannotation, lambda=1000, C=2, pcutoff=0.001)
## Fitting chr1...
## Fitting chr2...
## Fitting chr3...
## Fitting chr4...
## Fitting chr5...
## Fitting chr6...
## Fitting chr7...
## Fitting chr8...
## Fitting chr9...
## Fitting chr10...
## Fitting chr11...
## Fitting chr12...
## Fitting chr13...
## Fitting chr14...
## Fitting chr15...
## Fitting chr16...
## Fitting chr17...
## Fitting chr18...
## Fitting chr19...
## Fitting chr20...
## Fitting chr21...
## Fitting chr22...
## Demarcating regions...
## Done!
results.ranges <- data.frame(extractRanges(dmrcoutput, genome = "hg19") )
## snapshotDate(): 2022-10-31
## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation
## loading from cache
head(results.ranges, 20)
## seqnames start end width strand no.cpgs min_smoothed_fdr Stouffer
## 1 chr1 234792390 234792752 363 * 2 8.755567e-07 1
## 2 chr2 46525649 46525678 30 * 2 9.543493e-04 1
## 3 chr6 91113506 91113687 182 * 2 8.564702e-05 1
## 4 chr9 96077410 96077420 11 * 2 9.668722e-04 1
## 5 chr11 44613980 44614255 276 * 2 4.084688e-05 1
## 6 chr22 20102216 20102580 365 * 3 1.418446e-06 1
## 7 chr22 31318147 31318444 298 * 7 9.447998e-04 1
## 8 chr12 75784541 75785295 755 * 11 8.243423e-09 1
## 9 chr2 33359198 33359688 491 * 10 9.097454e-04 1
## 10 chr7 100481990 100482960 971 * 5 3.223939e-08 1
## 11 chr16 87780848 87781501 654 * 5 5.201435e-07 1
## 12 chr11 65820391 65820516 126 * 4 4.325928e-04 1
## 13 chr12 124788627 124789036 410 * 4 2.317340e-05 1
## HMFDR Fisher maxdiff meandiff overlapping.genes
## 1 0.9999951 1 0.011843398 0.007349988 RP4-781K5.4
## 2 0.9999951 1 -0.003715122 -0.002901168 EPAS1
## 3 0.9999951 1 0.015641676 0.011832067 <NA>
## 4 0.9999951 1 -0.004274340 -0.001878717 WNK2
## 5 0.9999951 1 0.017079330 0.015007099 CD82
## 6 0.9999951 1 -0.012596545 -0.003140646 TRMT2A
## 7 0.9999951 1 0.009559386 0.003625967 MORC2-AS1
## 8 0.9999951 1 0.006045794 0.001967226 GLIPR1L2, CAPS2
## 9 0.9999951 1 0.015386270 0.008549527 LTBP1
## 10 0.9999951 1 0.005702107 0.002285903 SRRT
## 11 0.9999951 1 0.015904041 0.006726073 KLHDC4, RP11-278A23.2
## 12 0.9999951 1 0.006046969 -0.001162702 SF3B2
## 13 0.9999951 1 -0.009740326 -0.002426397 FAM101A
write.csv(results.ranges, file = "dmrcoutput_guthrie.csv", row.names = TRUE)
sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] DMRcatedata_2.16.0
## [2] IlluminaHumanMethylation450kmanifest_0.4.0
## [3] IlluminaHumanMethylationEPICmanifest_0.3.0
## [4] kableExtra_1.3.4
## [5] mitch_1.10.0
## [6] FlowSorted.Blood.EPIC_2.2.0
## [7] ExperimentHub_2.6.0
## [8] AnnotationHub_3.6.0
## [9] BiocFileCache_2.6.0
## [10] dbplyr_2.3.0
## [11] DMRcate_2.12.0
## [12] ggplot2_3.4.0
## [13] reshape2_1.4.4
## [14] FlowSorted.Blood.450k_1.36.0
## [15] WGCNA_1.72-1
## [16] fastcluster_1.2.3
## [17] dynamicTreeCut_1.63-1
## [18] gplots_3.1.3
## [19] RColorBrewer_1.1-3
## [20] ruv_0.9.7.1
## [21] IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0
## [22] limma_3.54.0
## [23] missMethyl_1.32.0
## [24] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [25] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [26] minfi_1.44.0
## [27] bumphunter_1.40.0
## [28] locfit_1.5-9.7
## [29] iterators_1.0.14
## [30] foreach_1.5.2
## [31] Biostrings_2.66.0
## [32] XVector_0.38.0
## [33] SummarizedExperiment_1.28.0
## [34] Biobase_2.58.0
## [35] MatrixGenerics_1.10.0
## [36] matrixStats_0.63.0
## [37] GenomicRanges_1.50.2
## [38] GenomeInfoDb_1.34.6
## [39] IRanges_2.32.0
## [40] S4Vectors_0.36.1
## [41] BiocGenerics_0.44.0
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.58.0
## [3] GGally_2.1.2 R.methodsS3_1.8.2
## [5] tidyr_1.2.1 bit64_4.0.5
## [7] knitr_1.41 DelayedArray_0.24.0
## [9] R.utils_2.12.2 data.table_1.14.6
## [11] rpart_4.1.19 KEGGREST_1.38.0
## [13] RCurl_1.98-1.9 GEOquery_2.66.0
## [15] AnnotationFilter_1.22.0 doParallel_1.0.17
## [17] generics_0.1.3 GenomicFeatures_1.50.3
## [19] preprocessCore_1.60.1 RSQLite_2.2.20
## [21] bit_4.0.5 tzdb_0.3.0
## [23] webshot_0.5.4 xml2_1.3.3
## [25] httpuv_1.6.8 assertthat_0.2.1
## [27] xfun_0.36 hms_1.1.2
## [29] jquerylib_0.1.4 evaluate_0.20
## [31] promises_1.2.0.1 fansi_1.0.3
## [33] restfulr_0.0.15 scrime_1.3.5
## [35] progress_1.2.2 readxl_1.4.1
## [37] caTools_1.18.2 DBI_1.1.3
## [39] htmlwidgets_1.6.1 reshape_0.8.9
## [41] purrr_1.0.1 ellipsis_0.3.2
## [43] dplyr_1.0.10 backports_1.4.1
## [45] permute_0.9-7 annotate_1.76.0
## [47] biomaRt_2.54.0 deldir_1.0-6
## [49] sparseMatrixStats_1.10.0 vctrs_0.5.1
## [51] ensembldb_2.22.0 cachem_1.0.6
## [53] withr_2.5.0 Gviz_1.42.0
## [55] BSgenome_1.66.2 checkmate_2.1.0
## [57] GenomicAlignments_1.34.0 prettyunits_1.1.1
## [59] mclust_6.0.0 svglite_2.1.1
## [61] cluster_2.1.4 lazyeval_0.2.2
## [63] crayon_1.5.2 genefilter_1.80.2
## [65] labeling_0.4.2 edgeR_3.40.1
## [67] pkgconfig_2.0.3 nlme_3.1-161
## [69] ProtGenerics_1.30.0 nnet_7.3-18
## [71] rlang_1.0.6 lifecycle_1.0.3
## [73] filelock_1.0.2 dichromat_2.0-0.1
## [75] cellranger_1.1.0 rngtools_1.5.2
## [77] base64_2.0.1 Matrix_1.5-3
## [79] Rhdf5lib_1.20.0 base64enc_0.1-3
## [81] beeswarm_0.4.0 viridisLite_0.4.1
## [83] png_0.1-8 rjson_0.2.21
## [85] bitops_1.0-7 R.oo_1.25.0
## [87] KernSmooth_2.23-20 rhdf5filters_1.10.0
## [89] blob_1.2.3 DelayedMatrixStats_1.20.0
## [91] doRNG_1.8.6 stringr_1.5.0
## [93] nor1mix_1.3-0 readr_2.1.3
## [95] jpeg_0.1-10 scales_1.2.1
## [97] memoise_2.0.1 magrittr_2.0.3
## [99] plyr_1.8.8 zlibbioc_1.44.0
## [101] compiler_4.2.2 BiocIO_1.8.0
## [103] illuminaio_0.40.0 Rsamtools_2.14.0
## [105] cli_3.6.0 DSS_2.46.0
## [107] htmlTable_2.4.1 Formula_1.2-4
## [109] MASS_7.3-58.1 tidyselect_1.2.0
## [111] stringi_1.7.12 highr_0.10
## [113] yaml_2.3.6 askpass_1.1
## [115] latticeExtra_0.6-30 grid_4.2.2
## [117] sass_0.4.4 VariantAnnotation_1.44.0
## [119] tools_4.2.2 rstudioapi_0.14
## [121] foreign_0.8-84 bsseq_1.34.0
## [123] gridExtra_2.3 farver_2.1.1
## [125] digest_0.6.31 BiocManager_1.30.19
## [127] shiny_1.7.4 quadprog_1.5-8
## [129] Rcpp_1.0.9 siggenes_1.72.0
## [131] BiocVersion_3.16.0 later_1.3.0
## [133] org.Hs.eg.db_3.16.0 httr_1.4.4
## [135] AnnotationDbi_1.60.0 biovizBase_1.46.0
## [137] colorspace_2.0-3 rvest_1.0.3
## [139] XML_3.99-0.13 splines_4.2.2
## [141] statmod_1.5.0 multtest_2.54.0
## [143] systemfonts_1.0.4 xtable_1.8-4
## [145] jsonlite_1.8.4 R6_2.5.1
## [147] echarts4r_0.4.4 Hmisc_4.7-2
## [149] pillar_1.8.1 htmltools_0.5.4
## [151] mime_0.12 glue_1.6.2
## [153] fastmap_1.1.0 BiocParallel_1.32.5
## [155] interactiveDisplayBase_1.36.0 beanplot_1.3.1
## [157] codetools_0.2-18 utf8_1.2.2
## [159] lattice_0.20-45 bslib_0.4.2
## [161] tibble_3.1.8 BiasedUrn_2.0.8
## [163] curl_5.0.0 gtools_3.9.4
## [165] GO.db_3.16.0 openssl_2.0.5
## [167] interp_1.1-3 survival_3.5-0
## [169] rmarkdown_2.19 munsell_0.5.0
## [171] rhdf5_2.42.0 GenomeInfoDbData_1.2.9
## [173] HDF5Array_1.26.0 impute_1.72.2
## [175] gtable_0.3.1