Source: https://github.com/markziemann/asd_meth

Introduction

Here I will be running a comparison of differential methylation data from guthrie and fresh blood samples.

Here are the files that I’m using:

  • limma_guthrie_ADOS.csv

  • limma_blood_ADOS.csv

suppressPackageStartupMessages({
  library("parallel")
  library("mitch")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  source("meth_functions.R")
  library("data.table")
  library("kableExtra")
  library("eulerr")
  library("GenomicRanges")
  library("qqman")
  library("forestplot")
  library("RhpcBLASctl")
})

source("meth_functions.R")

RhpcBLASctl::blas_set_num_threads(1)

Load Limma data for manhattan plot

First I will generate plots for limmma analysis.

  • limma_guthrie_ADOS.csv

  • limma_blood_ADOS.csv

# blood at assessment
top <- read.csv("limma_blood_ADOS.csv")
nrow(top)
## [1] 802647
top <- subset(top,P.Value<1e-2)
nrow(top)
## [1] 3557
-log10(min(top$P.Value))
## [1] 4.672452
top$chr <- as.integer(gsub("chr","",top$chr))
top$snp <- paste(top$Name,top$UCSC_RefGene_Name)
top$snp <- sapply(strsplit(top$snp,";"),"[[",1)
up <- subset(top,logFC>0)
dn <- subset(top,logFC<0)

par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 ,main="Blood at assessment limma hypermethylated")
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 , main="Blood at assessment limma hypomethylated")

pdf("manhat_limma_bl.pdf")
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 ,main="Blood at assessment limma hypermethylated")
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 , main="Blood at assessment limma hypomethylated")
dev.off()
## png 
##   2
# neonatal guthrie card
top <- read.csv("limma_guthrie_ADOS.csv")
nrow(top)
## [1] 790658
top <- subset(top,P.Value<1e-2)
nrow(top)
## [1] 3013
-log10(min(top$P.Value))
## [1] 5.044086
top$chr <- as.integer(gsub("chr","",top$chr))
top$snp <- paste(top$Name,top$UCSC_RefGene_Name)
top$snp <- sapply(strsplit(top$snp,";"),"[[",1)
up <- subset(top,logFC>0)
dn <- subset(top,logFC<0)

par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 ,main="Neonatal Guthrie card limma hypermethylated",
  annotateTop=FALSE)
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 , main="Neonatal Guthrie card limma hypomethylated",
  annotateTop=FALSE)

pdf("manhat_limma_gu.pdf")
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 ,main="Neonatal Guthrie card limma hypermethylated",
  annotateTop=FALSE)
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
  ylim=c(2,6), annotatePval= 1e-04 , main="Neonatal Guthrie card limma hypomethylated",
  annotateTop=FALSE)
dev.off()
## png 
##   2

Compartment enrichment

Looking for enrichments in different genomic compartments.

Guthrie cards.

par(mfrow=c(2,1))

# guthrie
dma1 <- read.csv("limma_guthrie_ADOS.csv")
if (nrow(subset(dma1,P.Value <1e-4)) > 100 ) {
comp <- compartment_enrichment(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}

Blood at assessment.

# blood
dma2 <- read.csv("limma_blood_ADOS.csv")
if (nrow(subset(dma2,P.Value <1e-4)) > 100 ) {
comp <- compartment_enrichment(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}

Compartments top 1000

Guthrie cards.

par(mfrow=c(2,1))
# guthrie
comp <- compartment_enrichment2(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
##               all  up        OR fisherPval    lowerCI  upperCI
## Intergenic  43381  51 0.9245339 0.67535385 0.68258691 1.227434
## 3'UTR       22433  35 1.2427972 0.21335929 0.85959826 1.742877
## 5'UTR      102937 119 0.8999233 0.29663116 0.73548370 1.093056
## Body       333689 426 1.0169302 0.81380803 0.88918787 1.162815
## ExonBnd      6713   3 0.3506500 0.05618644 0.07214843 1.028514
## TSS1500    115845 166 1.1629940 0.08545854 0.97629972 1.378833
## TSS200      75104  86 0.8944576 0.35565472 0.70748073 1.118575
## 
## $dn_comp
##               all  dn        OR   fisherPval   lowerCI   upperCI
## Intergenic  43381  35 0.6474352 1.028891e-02 0.4477765 0.9080900
## 3'UTR       22433  33 1.2160265 2.830640e-01 0.8311904 1.7221584
## 5'UTR      102937 106 0.8230234 6.581997e-02 0.6650275 1.0099966
## Body       333689 474 1.3738310 4.133803e-06 1.1976552 1.5768086
## ExonBnd      6713   8 0.9778717 1.000000e+00 0.4206235 1.9366982
## TSS1500    115845 132 0.9232365 4.331234e-01 0.7609279 1.1130633
## TSS200      75104  65 0.6861691 2.740879e-03 0.5243328 0.8844783
make_forest_plots_up(comp)

make_forest_plots_dn(comp)

Blood at assessment.

# blood
comp <- compartment_enrichment2(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
##               all  up        OR fisherPval   lowerCI  upperCI
## Intergenic  43528  58 1.1861089  0.2114994 0.8917286 1.550848
## 3'UTR       22758  19 0.7280137  0.1929947 0.4358963 1.144822
## 5'UTR      104442 127 1.0831976  0.3981696 0.8889404 1.311071
## Body       339559 369 0.9193118  0.2446580 0.7981453 1.058347
## ExonBnd      6862   4 0.5102260  0.2072360 0.1386423 1.312107
## TSS1500    117509 151 1.1610028  0.1068939 0.9661410 1.387944
## TSS200      75457  79 0.9126172  0.4925945 0.7140596 1.152529
## 
## $dn_comp
##               all  dn        OR   fisherPval   lowerCI   upperCI
## Intergenic  43528  33 0.6708691 2.102774e-02 0.4583358 0.9506316
## 3'UTR       22758  31 1.2404430 2.242715e-01 0.8365982 1.7763942
## 5'UTR      104442 118 1.0244279 8.011151e-01 0.8349697 1.2477260
## Body       339559 432 1.3321546 6.192567e-05 1.1547604 1.5376157
## ExonBnd      6862  12 1.5899242 1.384529e-01 0.8175566 2.7952187
## TSS1500    117509  98 0.7181107 1.768932e-03 0.5750239 0.8885583
## TSS200      75457  62 0.7200436 1.260460e-02 0.5462686 0.9341952
make_forest_plots_up(comp)

make_forest_plots_dn(comp)

Looking for enrichments in proximity to CGI contexts

Guthrie cards.

par(mfrow=c(2,1))

if (nrow(subset(dma1,P.Value <1e-4)) > 100 ) {
# guthrie
comp <- cgi_enrichment(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}

Blood at assessment.

if (nrow(subset(dma2,P.Value <1e-4)) > 100 ) {
# blood
comp <- cgi_enrichment(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}

CGI enrichments in top 1000 probes

Guthrie cards.

par(mfrow=c(2,1))
# guthrie
comp <- cgi_enrichment2(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
##            all  up        OR   fisherPval   lowerCI   upperCI
## Island  150182 146 0.7288272 0.0002798720 0.6072189 0.8696999
## OpenSea 442427 576 1.0693647 0.3078147713 0.9415907 1.2152392
## Shelf    55491  54 0.7560012 0.0471462422 0.5636143 0.9951031
## Shore   142558 224 1.3128010 0.0004602369 1.1262334 1.5253509
## 
## $dn_comp
##            all  dn        OR  fisherPval   lowerCI  upperCI
## Island  150182 170 0.8733429 0.115573520 0.7359925 1.031310
## OpenSea 442427 521 0.8559396 0.014134967 0.7545218 0.971116
## Shelf    55491  91 1.3268315 0.013032779 1.0574206 1.647678
## Shore   142558 218 1.2677690 0.002311228 1.0859120 1.475127
make_forest_plots_up(comp)

make_forest_plots_dn(comp)

Blood at assessment.

# blood
comp <- cgi_enrichment2(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
##            all  up        OR   fisherPval   lowerCI   upperCI
## Island  150186 145 0.7365097 0.0004790467 0.6132566 0.8793070
## OpenSea 452360 617 1.2477956 0.0006368253 1.0965893 1.4213292
## Shelf    56174  52 0.7286441 0.0253341559 0.5401462 0.9637818
## Shore   143927 186 1.0458621 0.5917870441 0.8869173 1.2280459
## 
## $dn_comp
##            all  dn        OR   fisherPval   lowerCI   upperCI
## Island  150186  92 0.4398291 7.197931e-17 0.3509793 0.5455411
## OpenSea 452360 703 1.8341813 1.358832e-19 1.5991311 2.1078631
## Shelf    56174  60 0.8480394 2.383446e-01 0.6419233 1.1016926
## Shore   143927 145 0.7759522 4.388490e-03 0.6461142 0.9263797
make_forest_plots_up(comp)

make_forest_plots_dn(comp)

Session information

For reproducibility

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forestplot_3.1.3                                   
##  [2] abind_1.4-5                                        
##  [3] checkmate_2.3.2                                    
##  [4] qqman_0.1.9                                        
##  [5] eulerr_7.0.2                                       
##  [6] data.table_1.15.4                                  
##  [7] DMRcatedata_2.22.0                                 
##  [8] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [9] IlluminaHumanMethylationEPICmanifest_0.3.0         
## [10] RhpcBLASctl_0.23-42                                
## [11] vioplot_0.5.0                                      
## [12] zoo_1.8-12                                         
## [13] sm_2.2-6.0                                         
## [14] kableExtra_1.4.0                                   
## [15] mitch_1.16.0                                       
## [16] FlowSorted.Blood.EPIC_2.8.0                        
## [17] ExperimentHub_2.12.0                               
## [18] AnnotationHub_3.12.0                               
## [19] BiocFileCache_2.12.0                               
## [20] dbplyr_2.5.0                                       
## [21] DMRcate_3.0.8                                      
## [22] ggplot2_3.5.1                                      
## [23] reshape2_1.4.4                                     
## [24] FlowSorted.Blood.450k_1.42.0                       
## [25] WGCNA_1.72-5                                       
## [26] fastcluster_1.2.6                                  
## [27] dynamicTreeCut_1.63-1                              
## [28] gplots_3.1.3.1                                     
## [29] RColorBrewer_1.1-3                                 
## [30] limma_3.60.4                                       
## [31] missMethyl_1.38.0                                  
## [32] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [33] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1 
## [34] minfi_1.50.0                                       
## [35] bumphunter_1.46.0                                  
## [36] locfit_1.5-9.10                                    
## [37] iterators_1.0.14                                   
## [38] foreach_1.5.2                                      
## [39] Biostrings_2.72.1                                  
## [40] XVector_0.44.0                                     
## [41] SummarizedExperiment_1.34.0                        
## [42] Biobase_2.64.0                                     
## [43] MatrixGenerics_1.16.0                              
## [44] matrixStats_1.3.0                                  
## [45] GenomicRanges_1.56.1                               
## [46] GenomeInfoDb_1.40.1                                
## [47] IRanges_2.38.1                                     
## [48] S4Vectors_0.42.1                                   
## [49] BiocGenerics_0.50.0                                
## 
## loaded via a namespace (and not attached):
##   [1] ProtGenerics_1.36.0       bitops_1.0-8             
##   [3] httr_1.4.7                doParallel_1.0.17        
##   [5] tools_4.4.1               doRNG_1.8.6              
##   [7] backports_1.5.0           utf8_1.2.4               
##   [9] R6_2.5.1                  HDF5Array_1.32.1         
##  [11] lazyeval_0.2.2            Gviz_1.48.0              
##  [13] rhdf5filters_1.16.0       permute_0.9-7            
##  [15] withr_3.0.0               GGally_2.2.1             
##  [17] prettyunits_1.2.0         gridExtra_2.3            
##  [19] base64_2.0.1              preprocessCore_1.61.0    
##  [21] cli_3.6.3                 labeling_0.4.3           
##  [23] sass_0.4.9                readr_2.1.5              
##  [25] genefilter_1.86.0         askpass_1.2.0            
##  [27] systemfonts_1.1.0         Rsamtools_2.20.0         
##  [29] foreign_0.8-86            svglite_2.1.3            
##  [31] siggenes_1.78.0           illuminaio_0.46.0        
##  [33] R.utils_2.12.3            dichromat_2.0-0.1        
##  [35] scrime_1.3.5              BSgenome_1.72.0          
##  [37] readxl_1.4.3              rstudioapi_0.16.0        
##  [39] impute_1.78.0             RSQLite_2.3.7            
##  [41] generics_0.1.3            BiocIO_1.14.0            
##  [43] gtools_3.9.5              dplyr_1.1.4              
##  [45] GO.db_3.19.1              Matrix_1.7-0             
##  [47] interp_1.1-6              fansi_1.0.6              
##  [49] R.methodsS3_1.8.2         lifecycle_1.0.4          
##  [51] edgeR_4.2.1               yaml_2.3.8               
##  [53] rhdf5_2.48.0              SparseArray_1.4.8        
##  [55] blob_1.2.4                promises_1.3.0           
##  [57] crayon_1.5.3              lattice_0.22-6           
##  [59] echarts4r_0.4.5           GenomicFeatures_1.56.0   
##  [61] annotate_1.82.0           KEGGREST_1.44.1          
##  [63] pillar_1.9.0              knitr_1.47               
##  [65] beanplot_1.3.1            rjson_0.2.22             
##  [67] codetools_0.2-20          glue_1.7.0               
##  [69] vctrs_0.6.5               png_0.1-8                
##  [71] cellranger_1.1.0          gtable_0.3.5             
##  [73] cachem_1.1.0              xfun_0.45                
##  [75] mime_0.12                 S4Arrays_1.4.1           
##  [77] survival_3.7-0            statmod_1.5.0            
##  [79] nlme_3.1-165              bit64_4.0.5              
##  [81] bsseq_1.40.0              progress_1.2.3           
##  [83] filelock_1.0.3            bslib_0.7.0              
##  [85] nor1mix_1.3-3             KernSmooth_2.23-24       
##  [87] rpart_4.1.23              colorspace_2.1-1         
##  [89] DBI_1.2.3                 Hmisc_5.1-3              
##  [91] nnet_7.3-19               tidyselect_1.2.1         
##  [93] bit_4.0.5                 compiler_4.4.1           
##  [95] curl_5.2.1                httr2_1.0.1              
##  [97] htmlTable_2.4.3           BiasedUrn_2.0.12         
##  [99] xml2_1.3.6                DelayedArray_0.30.1      
## [101] rtracklayer_1.64.0        scales_1.3.0             
## [103] caTools_1.18.2            quadprog_1.5-8           
## [105] rappdirs_0.3.3            stringr_1.5.1            
## [107] digest_0.6.36             rmarkdown_2.27           
## [109] GEOquery_2.72.0           htmltools_0.5.8.1        
## [111] pkgconfig_2.0.3           jpeg_0.1-10              
## [113] base64enc_0.1-3           sparseMatrixStats_1.16.0 
## [115] highr_0.11                fastmap_1.2.0            
## [117] ensembldb_2.28.1          rlang_1.1.4              
## [119] htmlwidgets_1.6.4         UCSC.utils_1.0.0         
## [121] shiny_1.8.1.1             DelayedMatrixStats_1.26.0
## [123] farver_2.1.2              jquerylib_0.1.4          
## [125] jsonlite_1.8.8            BiocParallel_1.38.0      
## [127] mclust_6.1.1              R.oo_1.26.0              
## [129] VariantAnnotation_1.50.0  RCurl_1.98-1.16          
## [131] magrittr_2.0.3            Formula_1.2-5            
## [133] GenomeInfoDbData_1.2.12   Rhdf5lib_1.26.0          
## [135] munsell_0.5.1             Rcpp_1.0.12              
## [137] stringi_1.8.4             zlibbioc_1.50.0          
## [139] MASS_7.3-61               plyr_1.8.9               
## [141] org.Hs.eg.db_3.19.1       ggstats_0.6.0            
## [143] deldir_2.0-4              splines_4.4.1            
## [145] multtest_2.60.0           hms_1.1.3                
## [147] rngtools_1.5.2            biomaRt_2.60.1           
## [149] BiocVersion_3.19.1        XML_3.99-0.17            
## [151] evaluate_0.24.0           calibrate_1.7.7          
## [153] latticeExtra_0.6-30       biovizBase_1.52.0        
## [155] BiocManager_1.30.23       httpuv_1.6.15            
## [157] tzdb_0.4.0                tidyr_1.3.1              
## [159] openssl_2.2.0             purrr_1.0.2              
## [161] reshape_0.8.9             xtable_1.8-4             
## [163] restfulr_0.0.15           AnnotationFilter_1.28.0  
## [165] later_1.3.2               viridisLite_0.4.2        
## [167] tibble_3.2.1              beeswarm_0.4.0           
## [169] memoise_2.0.1             AnnotationDbi_1.66.0     
## [171] GenomicAlignments_1.40.0  cluster_2.1.6