Source: https://github.com/markziemann/asd_meth

Introduction

Here I will be running a comparison of differential methylation data from guthrie and fresh blood samples.

Here are the files that I’m using:

  • limma_guthrie_ADOS.csv

  • limma_blood_ADOS.csv

suppressPackageStartupMessages({
  library("parallel")
  library("mitch")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  source("https://raw.githubusercontent.com/markziemann/gmea/main/meth_functions.R")
  library("data.table")
  library("kableExtra")
  library("eulerr")
  library("RIdeogram")
  library("GenomicRanges")
  library("tictoc")
})

source("meth_functions.R")

Load data

bl_mvals <- readRDS(file="bl_mvals.rds")
gu_mvals <- readRDS(file="gu_mvals.rds")

Probe sets

For each gene, extract out the probes.

anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)
promoters <- myann[promoters,]
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    9.00   24.00   49.68   55.00 6778.00

Load Limma and RUV data for manhattan plot

First I will generate plots for limmma analysis.

  • limma_guthrie_ADOS.csv

  • limma_blood_ADOS.csv

# blood at assessment
top <- read.csv("limma_blood_ADOS.csv")
nrow(top)
## [1] 802647
top <- subset(top,P.Value<1e-2)
nrow(top)
## [1] 3573
-log10(min(top$P.Value))
## [1] 4.641884
top$chr <- as.integer(gsub("chr","",top$chr))
top$snp <- paste(top$Name,top$UCSC_RefGene_Name)
top$snp <- sapply(strsplit(top$snp,";"),"[[",1)
up <- subset(top,logFC>0)
dn <- subset(top,logFC<0)

par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 ,main="Blood at assessment limma hypermethylated")
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 , main="Blood at assessment limma hypomethylated")

pdf("manhat_limma_bl.pdf")
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 ,main="Blood at assessment limma hypermethylated")
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 , main="Blood at assessment limma hypomethylated")
dev.off()
## png 
##   2
# neonatal guthrie card
top <- read.csv("limma_guthrie_ADOS.csv")
nrow(top)
## [1] 790658
top <- subset(top,P.Value<1e-2)
nrow(top)
## [1] 3052
-log10(min(top$P.Value))
## [1] 5.396928
top$chr <- as.integer(gsub("chr","",top$chr))
top$snp <- paste(top$Name,top$UCSC_RefGene_Name)
top$snp <- sapply(strsplit(top$snp,";"),"[[",1)
up <- subset(top,logFC>0)
dn <- subset(top,logFC<0)

par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 ,main="Neonatal Guthrie card limma hypermethylated",
  annotateTop=FALSE)
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 , main="Neonatal Guthrie card limma hypomethylated",
  annotateTop=FALSE)

pdf("manhat_limma_gu.pdf")
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 ,main="Neonatal Guthrie card limma hypermethylated",
  annotateTop=FALSE)
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 , main="Neonatal Guthrie card limma hypomethylated",
  annotateTop=FALSE)
dev.off()
## png 
##   2

Compartment enrichment

Looking for enrichments in different genomic compartments.

par(mfrow=c(2,1))

# guthrie
dma1 <- read.csv("limma_guthrie_ADOS.csv")
if (nrow(subset(dma1,P.Value <1e-4)) > 100 ) {
comp <- compartment_enrichment(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots(comp)
}

# blood
dma2 <- read.csv("limma_blood_ADOS.csv")
if (nrow(subset(dma2,P.Value <1e-4)) > 100 ) {
comp <- compartment_enrichment(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots(comp)
}

Compartments top 1000

par(mfrow=c(2,1))
# guthrie
comp <- compartment_enrichment2(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
##               all  up        OR fisherPval   lowerCI  upperCI
## Intergenic  43381  23 0.8389085 0.48706693 0.5258383 1.276341
## 3'UTR       22433  18 1.2948872 0.27561695 0.7597998 2.072428
## 5'UTR      102937  51 0.7643920 0.07889085 0.5592212 1.025627
## Body       333689 227 1.1814543 0.08494842 0.9751244 1.431904
## TSS1500    115845  81 1.1443742 0.27418805 0.8876763 1.460454
## TSS200      75104  38 0.7904865 0.18865183 0.5511121 1.104070
## 
## $dn_comp
##               all  dn        OR   fisherPval   lowerCI   upperCI
## Intergenic  43381  15 0.5761750 0.0308946406 0.3193848 0.9621765
## 3'UTR       22433  16 1.2300273 0.3975719033 0.6960289 2.0232371
## 5'UTR      102937  50 0.8078831 0.1629589719 0.5884785 1.0884038
## Body       333689 234 1.4685899 0.0001316616 1.2021212 1.7968522
## ExonBnd      6713   4 1.0201702 0.8003323414 0.2765425 2.6334465
## TSS1500    115845  63 0.9182427 0.5942944466 0.6905059 1.2041218
## TSS200      75104  27 0.5880403 0.0050933600 0.3824177 0.8693314
make_forest_plots(comp)

# blood
comp <- compartment_enrichment2(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
##               all  up        OR fisherPval     lowerCI  upperCI
## Intergenic  43528  26 1.4859557 0.06636519 0.953038174 2.226311
## 3'UTR       22758   5 0.5224343 0.18233626 0.168373824 1.233495
## 5'UTR      104442  51 1.2171947 0.21605071 0.881706511 1.652061
## Body       339559 125 0.8070945 0.07043460 0.635256389 1.023158
## ExonBnd      6862   1 0.3496924 0.53965923 0.008825083 1.964196
## TSS1500    117509  53 1.1091360 0.48032288 0.807675308 1.498631
## TSS200      75457  33 1.0634901 0.70520650 0.716808409 1.531364
## 
## $dn_comp
##               all  dn        OR  fisherPval   lowerCI   upperCI
## Intergenic  43528  25 0.7417994 0.151754452 0.4755129 1.1077977
## 3'UTR       22758  23 1.3413967 0.176996771 0.8425370 2.0355856
## 5'UTR      104442  80 1.0063621 0.951579367 0.7834649 1.2785220
## Body       339559 289 1.2517246 0.009742042 1.0534633 1.4881382
## ExonBnd      6862  10 1.9314000 0.045232660 0.9203789 3.5825126
## TSS1500    117509  67 0.7126767 0.009075108 0.5433220 0.9220956
## TSS200      75457  47 0.8001268 0.162291260 0.5800384 1.0806029
make_forest_plots(comp)

Looking for enrichments in proximity to CGI contexts

par(mfrow=c(2,1))

if (nrow(subset(dma1,P.Value <1e-4)) > 100 ) {
# guthrie
comp <- cgi_enrichment(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots(comp)
}

if (nrow(subset(dma2,P.Value <1e-4)) > 100 ) {
# blood
comp <- cgi_enrichment(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots(comp)
}

CGI enrichments in top 1000 probes

par(mfrow=c(2,1))
# guthrie
comp <- cgi_enrichment2(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
##            all  up        OR fisherPval   lowerCI  upperCI
## Island  150182  80 0.7914758 0.05507706 0.6154100 1.006919
## OpenSea 442427 291 1.0411421 0.65617216 0.8708430 1.246143
## Shelf    55491  30 0.8262023 0.34108662 0.5512779 1.195275
## Shore   142558 110 1.2472827 0.04379063 1.0006516 1.543986
## 
## $dn_comp
##            all  dn        OR fisherPval   lowerCI  upperCI
## Island  150182  83 0.8717702 0.27330781 0.6799065 1.106451
## OpenSea 442427 255 0.8576492 0.09192572 0.7153376 1.028657
## Shelf    55491  48 1.4423707 0.02080876 1.0475126 1.946126
## Shore   142558 103 1.2132335 0.08770710 0.9665051 1.511589
make_forest_plots(comp)

# blood
comp <- cgi_enrichment2(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
##            all  up        OR  fisherPval   lowerCI  upperCI
## Island  150186  57 0.9523840 0.773732545 0.7020288 1.272907
## OpenSea 452360 167 0.8620659 0.192891313 0.6872938 1.082060
## Shelf    56174  16 0.7062707 0.187293830 0.3985352 1.166031
## Shore   143927  77 1.4686262 0.004221713 1.1209174 1.905982
## 
## $dn_comp
##            all  dn        OR   fisherPval   lowerCI   upperCI
## Island  150186  60 0.4181603 6.780836e-13 0.3153031 0.5456317
## OpenSea 452360 478 1.8064067 2.870589e-13 1.5304745 2.1381921
## Shelf    56174  41 0.8485316 3.299684e-01 0.6030135 1.1643462
## Shore   143927 104 0.8219508 6.497237e-02 0.6605231 1.0143919
make_forest_plots(comp)

Session information

For reproducibility

sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] tictoc_1.1                                         
##  [2] RIdeogram_0.2.2                                    
##  [3] data.table_1.14.8                                  
##  [4] ENmix_1.34.0                                       
##  [5] doParallel_1.0.17                                  
##  [6] qqman_0.1.8                                        
##  [7] RCircos_1.2.2                                      
##  [8] beeswarm_0.4.0                                     
##  [9] forestplot_3.1.1                                   
## [10] abind_1.4-5                                        
## [11] checkmate_2.1.0                                    
## [12] eulerr_7.0.0                                       
## [13] GEOquery_2.66.0                                    
## [14] topconfects_1.14.0                                 
## [15] R.utils_2.12.2                                     
## [16] R.oo_1.25.0                                        
## [17] R.methodsS3_1.8.2                                  
## [18] plyr_1.8.8                                         
## [19] DMRcatedata_2.16.0                                 
## [20] IlluminaHumanMethylation450kmanifest_0.4.0         
## [21] IlluminaHumanMethylationEPICmanifest_0.3.0         
## [22] kableExtra_1.3.4                                   
## [23] mitch_1.10.0                                       
## [24] FlowSorted.Blood.EPIC_2.2.0                        
## [25] ExperimentHub_2.6.0                                
## [26] AnnotationHub_3.6.0                                
## [27] BiocFileCache_2.6.0                                
## [28] dbplyr_2.3.1                                       
## [29] DMRcate_2.12.0                                     
## [30] ggplot2_3.4.1                                      
## [31] reshape2_1.4.4                                     
## [32] FlowSorted.Blood.450k_1.36.0                       
## [33] WGCNA_1.72-1                                       
## [34] fastcluster_1.2.3                                  
## [35] dynamicTreeCut_1.63-1                              
## [36] gplots_3.1.3                                       
## [37] RColorBrewer_1.1-3                                 
## [38] ruv_0.9.7.1                                        
## [39] IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0
## [40] limma_3.54.0                                       
## [41] missMethyl_1.32.0                                  
## [42] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [43] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1 
## [44] minfi_1.44.0                                       
## [45] bumphunter_1.40.0                                  
## [46] locfit_1.5-9.7                                     
## [47] iterators_1.0.14                                   
## [48] foreach_1.5.2                                      
## [49] Biostrings_2.66.0                                  
## [50] XVector_0.38.0                                     
## [51] SummarizedExperiment_1.28.0                        
## [52] Biobase_2.58.0                                     
## [53] MatrixGenerics_1.10.0                              
## [54] matrixStats_0.63.0                                 
## [55] GenomicRanges_1.50.2                               
## [56] GenomeInfoDb_1.34.6                                
## [57] IRanges_2.32.0                                     
## [58] S4Vectors_0.36.1                                   
## [59] BiocGenerics_0.44.0                                
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                rtracklayer_1.58.0           
##   [3] GGally_2.1.2                  tidyr_1.3.0                  
##   [5] bit64_4.0.5                   knitr_1.42                   
##   [7] DelayedArray_0.24.0           rpart_4.1.19                 
##   [9] KEGGREST_1.38.0               RCurl_1.98-1.10              
##  [11] AnnotationFilter_1.22.0       generics_0.1.3               
##  [13] GenomicFeatures_1.50.3        preprocessCore_1.60.2        
##  [15] RSQLite_2.3.0                 bit_4.0.5                    
##  [17] tzdb_0.3.0                    webshot_0.5.4                
##  [19] xml2_1.3.3                    httpuv_1.6.9                 
##  [21] assertthat_0.2.1              xfun_0.37                    
##  [23] hms_1.1.2                     jquerylib_0.1.4              
##  [25] evaluate_0.20                 promises_1.2.0.1             
##  [27] fansi_1.0.4                   restfulr_0.0.15              
##  [29] scrime_1.3.5                  progress_1.2.2               
##  [31] readxl_1.4.2                  caTools_1.18.2               
##  [33] geneplotter_1.76.0            DBI_1.1.3                    
##  [35] htmlwidgets_1.6.2             reshape_0.8.9                
##  [37] purrr_1.0.1                   ellipsis_0.3.2               
##  [39] dplyr_1.1.0                   backports_1.4.1              
##  [41] calibrate_1.7.7               permute_0.9-7                
##  [43] grImport2_0.2-0               annotate_1.76.0              
##  [45] biomaRt_2.54.0                deldir_1.0-6                 
##  [47] sparseMatrixStats_1.10.0      vctrs_0.6.0                  
##  [49] ensembldb_2.22.0              cachem_1.0.7                 
##  [51] withr_2.5.0                   Gviz_1.42.0                  
##  [53] BSgenome_1.66.2               GenomicAlignments_1.34.0     
##  [55] prettyunits_1.1.1             mclust_6.0.0                 
##  [57] svglite_2.1.1                 cluster_2.1.4                
##  [59] RPMM_1.25                     lazyeval_0.2.2               
##  [61] crayon_1.5.2                  genefilter_1.80.3            
##  [63] labeling_0.4.2                edgeR_3.40.2                 
##  [65] pkgconfig_2.0.3               nlme_3.1-162                 
##  [67] ProtGenerics_1.30.0           nnet_7.3-18                  
##  [69] rlang_1.1.0                   lifecycle_1.0.3              
##  [71] filelock_1.0.2                dichromat_2.0-0.1            
##  [73] rsvg_2.4.0                    cellranger_1.1.0             
##  [75] rngtools_1.5.2                base64_2.0.1                 
##  [77] Matrix_1.5-3                  Rhdf5lib_1.20.0              
##  [79] base64enc_0.1-3               viridisLite_0.4.1            
##  [81] png_0.1-8                     rjson_0.2.21                 
##  [83] bitops_1.0-7                  KernSmooth_2.23-20           
##  [85] rhdf5filters_1.10.0           blob_1.2.4                   
##  [87] DelayedMatrixStats_1.20.0     doRNG_1.8.6                  
##  [89] stringr_1.5.0                 nor1mix_1.3-0                
##  [91] readr_2.1.4                   jpeg_0.1-10                  
##  [93] scales_1.2.1                  memoise_2.0.1                
##  [95] magrittr_2.0.3                zlibbioc_1.44.0              
##  [97] compiler_4.2.2                BiocIO_1.8.0                 
##  [99] illuminaio_0.40.0             Rsamtools_2.14.0             
## [101] cli_3.6.0                     DSS_2.46.0                   
## [103] htmlTable_2.4.1               Formula_1.2-5                
## [105] MASS_7.3-58.3                 tidyselect_1.2.0             
## [107] stringi_1.7.12                highr_0.10                   
## [109] yaml_2.3.7                    askpass_1.1                  
## [111] latticeExtra_0.6-30           sass_0.4.5                   
## [113] VariantAnnotation_1.44.0      tools_4.2.2                  
## [115] rstudioapi_0.14               foreign_0.8-84               
## [117] bsseq_1.34.0                  gridExtra_2.3                
## [119] farver_2.1.1                  digest_0.6.31                
## [121] BiocManager_1.30.20           shiny_1.7.4                  
## [123] quadprog_1.5-8                Rcpp_1.0.10                  
## [125] siggenes_1.72.0               BiocVersion_3.16.0           
## [127] later_1.3.0                   org.Hs.eg.db_3.16.0          
## [129] httr_1.4.5                    AnnotationDbi_1.60.0         
## [131] biovizBase_1.46.0             colorspace_2.1-0             
## [133] rvest_1.0.3                   XML_3.99-0.13                
## [135] splines_4.2.2                 statmod_1.5.0                
## [137] multtest_2.54.0               systemfonts_1.0.4            
## [139] xtable_1.8-4                  jsonlite_1.8.4               
## [141] R6_2.5.1                      echarts4r_0.4.4              
## [143] Hmisc_5.0-1                   pillar_1.8.1                 
## [145] htmltools_0.5.4               mime_0.12                    
## [147] glue_1.6.2                    fastmap_1.1.1                
## [149] BiocParallel_1.32.5           interactiveDisplayBase_1.36.0
## [151] beanplot_1.3.1                codetools_0.2-19             
## [153] utf8_1.2.3                    lattice_0.20-45              
## [155] bslib_0.4.2                   tibble_3.2.0                 
## [157] curl_5.0.0                    gtools_3.9.4                 
## [159] GO.db_3.16.0                  openssl_2.0.6                
## [161] interp_1.1-3                  survival_3.5-5               
## [163] rmarkdown_2.20                munsell_0.5.0                
## [165] rhdf5_2.42.0                  GenomeInfoDbData_1.2.9       
## [167] HDF5Array_1.26.0              impute_1.72.3                
## [169] gtable_0.3.2