Source: https://github.com/markziemann/asd_meth
Here I will be running a comparison of differential methylation data from guthrie and fresh blood samples.
Here are the files that I’m using:
limma_guthrie_ADOS.csv
limma_blood_ADOS.csv
suppressPackageStartupMessages({
library("parallel")
library("mitch")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
source("meth_functions.R")
library("data.table")
library("kableExtra")
library("eulerr")
library("GenomicRanges")
library("qqman")
library("forestplot")
library("RhpcBLASctl")
})
source("meth_functions.R")
RhpcBLASctl::blas_set_num_threads(1)
First I will generate plots for limmma analysis.
limma_guthrie_ADOS.csv
limma_blood_ADOS.csv
# blood at assessment
top <- read.csv("limma_blood_ADOS.csv")
nrow(top)
## [1] 802647
top <- subset(top,P.Value<1e-2)
nrow(top)
## [1] 3557
-log10(min(top$P.Value))
## [1] 4.672452
top$chr <- as.integer(gsub("chr","",top$chr))
top$snp <- paste(top$Name,top$UCSC_RefGene_Name)
top$snp <- sapply(strsplit(top$snp,";"),"[[",1)
up <- subset(top,logFC>0)
dn <- subset(top,logFC<0)
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 ,main="Blood at assessment limma hypermethylated")
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 , main="Blood at assessment limma hypomethylated")
pdf("manhat_limma_bl.pdf")
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 ,main="Blood at assessment limma hypermethylated")
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 , main="Blood at assessment limma hypomethylated")
dev.off()
## png
## 2
# neonatal guthrie card
top <- read.csv("limma_guthrie_ADOS.csv")
nrow(top)
## [1] 790658
top <- subset(top,P.Value<1e-2)
nrow(top)
## [1] 3013
-log10(min(top$P.Value))
## [1] 5.044086
top$chr <- as.integer(gsub("chr","",top$chr))
top$snp <- paste(top$Name,top$UCSC_RefGene_Name)
top$snp <- sapply(strsplit(top$snp,";"),"[[",1)
up <- subset(top,logFC>0)
dn <- subset(top,logFC<0)
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 ,main="Neonatal Guthrie card limma hypermethylated",
annotateTop=FALSE)
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 , main="Neonatal Guthrie card limma hypomethylated",
annotateTop=FALSE)
pdf("manhat_limma_gu.pdf")
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 ,main="Neonatal Guthrie card limma hypermethylated",
annotateTop=FALSE)
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 , main="Neonatal Guthrie card limma hypomethylated",
annotateTop=FALSE)
dev.off()
## png
## 2
Looking for enrichments in different genomic compartments.
Guthrie cards.
par(mfrow=c(2,1))
# guthrie
dma1 <- read.csv("limma_guthrie_ADOS.csv")
if (nrow(subset(dma1,P.Value <1e-4)) > 100 ) {
comp <- compartment_enrichment(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}
Blood at assessment.
# blood
dma2 <- read.csv("limma_blood_ADOS.csv")
if (nrow(subset(dma2,P.Value <1e-4)) > 100 ) {
comp <- compartment_enrichment(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}
Guthrie cards.
par(mfrow=c(2,1))
# guthrie
comp <- compartment_enrichment2(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
## all up OR fisherPval lowerCI upperCI
## Intergenic 43381 51 0.9245339 0.67535385 0.68258691 1.227434
## 3'UTR 22433 35 1.2427972 0.21335929 0.85959826 1.742877
## 5'UTR 102937 119 0.8999233 0.29663116 0.73548370 1.093056
## Body 333689 426 1.0169302 0.81380803 0.88918787 1.162815
## ExonBnd 6713 3 0.3506500 0.05618644 0.07214843 1.028514
## TSS1500 115845 166 1.1629940 0.08545854 0.97629972 1.378833
## TSS200 75104 86 0.8944576 0.35565472 0.70748073 1.118575
##
## $dn_comp
## all dn OR fisherPval lowerCI upperCI
## Intergenic 43381 35 0.6474352 1.028891e-02 0.4477765 0.9080900
## 3'UTR 22433 33 1.2160265 2.830640e-01 0.8311904 1.7221584
## 5'UTR 102937 106 0.8230234 6.581997e-02 0.6650275 1.0099966
## Body 333689 474 1.3738310 4.133803e-06 1.1976552 1.5768086
## ExonBnd 6713 8 0.9778717 1.000000e+00 0.4206235 1.9366982
## TSS1500 115845 132 0.9232365 4.331234e-01 0.7609279 1.1130633
## TSS200 75104 65 0.6861691 2.740879e-03 0.5243328 0.8844783
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
Blood at assessment.
# blood
comp <- compartment_enrichment2(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
## all up OR fisherPval lowerCI upperCI
## Intergenic 43528 58 1.1861089 0.2114994 0.8917286 1.550848
## 3'UTR 22758 19 0.7280137 0.1929947 0.4358963 1.144822
## 5'UTR 104442 127 1.0831976 0.3981696 0.8889404 1.311071
## Body 339559 369 0.9193118 0.2446580 0.7981453 1.058347
## ExonBnd 6862 4 0.5102260 0.2072360 0.1386423 1.312107
## TSS1500 117509 151 1.1610028 0.1068939 0.9661410 1.387944
## TSS200 75457 79 0.9126172 0.4925945 0.7140596 1.152529
##
## $dn_comp
## all dn OR fisherPval lowerCI upperCI
## Intergenic 43528 33 0.6708691 2.102774e-02 0.4583358 0.9506316
## 3'UTR 22758 31 1.2404430 2.242715e-01 0.8365982 1.7763942
## 5'UTR 104442 118 1.0244279 8.011151e-01 0.8349697 1.2477260
## Body 339559 432 1.3321546 6.192567e-05 1.1547604 1.5376157
## ExonBnd 6862 12 1.5899242 1.384529e-01 0.8175566 2.7952187
## TSS1500 117509 98 0.7181107 1.768932e-03 0.5750239 0.8885583
## TSS200 75457 62 0.7200436 1.260460e-02 0.5462686 0.9341952
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
Guthrie cards.
par(mfrow=c(2,1))
if (nrow(subset(dma1,P.Value <1e-4)) > 100 ) {
# guthrie
comp <- cgi_enrichment(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}
Blood at assessment.
if (nrow(subset(dma2,P.Value <1e-4)) > 100 ) {
# blood
comp <- cgi_enrichment(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}
Guthrie cards.
par(mfrow=c(2,1))
# guthrie
comp <- cgi_enrichment2(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
## all up OR fisherPval lowerCI upperCI
## Island 150182 146 0.7288272 0.0002798720 0.6072189 0.8696999
## OpenSea 442427 576 1.0693647 0.3078147713 0.9415907 1.2152392
## Shelf 55491 54 0.7560012 0.0471462422 0.5636143 0.9951031
## Shore 142558 224 1.3128010 0.0004602369 1.1262334 1.5253509
##
## $dn_comp
## all dn OR fisherPval lowerCI upperCI
## Island 150182 170 0.8733429 0.115573520 0.7359925 1.031310
## OpenSea 442427 521 0.8559396 0.014134967 0.7545218 0.971116
## Shelf 55491 91 1.3268315 0.013032779 1.0574206 1.647678
## Shore 142558 218 1.2677690 0.002311228 1.0859120 1.475127
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
Blood at assessment.
# blood
comp <- cgi_enrichment2(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
## all up OR fisherPval lowerCI upperCI
## Island 150186 145 0.7365097 0.0004790467 0.6132566 0.8793070
## OpenSea 452360 617 1.2477956 0.0006368253 1.0965893 1.4213292
## Shelf 56174 52 0.7286441 0.0253341559 0.5401462 0.9637818
## Shore 143927 186 1.0458621 0.5917870441 0.8869173 1.2280459
##
## $dn_comp
## all dn OR fisherPval lowerCI upperCI
## Island 150186 92 0.4398291 7.197931e-17 0.3509793 0.5455411
## OpenSea 452360 703 1.8341813 1.358832e-19 1.5991311 2.1078631
## Shelf 56174 60 0.8480394 2.383446e-01 0.6419233 1.1016926
## Shore 143927 145 0.7759522 4.388490e-03 0.6461142 0.9263797
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
For reproducibility
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] forestplot_3.1.3
## [2] abind_1.4-5
## [3] checkmate_2.3.2
## [4] qqman_0.1.9
## [5] eulerr_7.0.2
## [6] data.table_1.15.4
## [7] DMRcatedata_2.22.0
## [8] IlluminaHumanMethylation450kmanifest_0.4.0
## [9] IlluminaHumanMethylationEPICmanifest_0.3.0
## [10] RhpcBLASctl_0.23-42
## [11] vioplot_0.5.0
## [12] zoo_1.8-12
## [13] sm_2.2-6.0
## [14] kableExtra_1.4.0
## [15] mitch_1.16.0
## [16] FlowSorted.Blood.EPIC_2.8.0
## [17] ExperimentHub_2.12.0
## [18] AnnotationHub_3.12.0
## [19] BiocFileCache_2.12.0
## [20] dbplyr_2.5.0
## [21] DMRcate_3.0.8
## [22] ggplot2_3.5.1
## [23] reshape2_1.4.4
## [24] FlowSorted.Blood.450k_1.42.0
## [25] WGCNA_1.72-5
## [26] fastcluster_1.2.6
## [27] dynamicTreeCut_1.63-1
## [28] gplots_3.1.3.1
## [29] RColorBrewer_1.1-3
## [30] limma_3.60.4
## [31] missMethyl_1.38.0
## [32] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [33] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [34] minfi_1.50.0
## [35] bumphunter_1.46.0
## [36] locfit_1.5-9.10
## [37] iterators_1.0.14
## [38] foreach_1.5.2
## [39] Biostrings_2.72.1
## [40] XVector_0.44.0
## [41] SummarizedExperiment_1.34.0
## [42] Biobase_2.64.0
## [43] MatrixGenerics_1.16.0
## [44] matrixStats_1.3.0
## [45] GenomicRanges_1.56.1
## [46] GenomeInfoDb_1.40.1
## [47] IRanges_2.38.1
## [48] S4Vectors_0.42.1
## [49] BiocGenerics_0.50.0
##
## loaded via a namespace (and not attached):
## [1] ProtGenerics_1.36.0 bitops_1.0-8
## [3] httr_1.4.7 doParallel_1.0.17
## [5] tools_4.4.1 doRNG_1.8.6
## [7] backports_1.5.0 utf8_1.2.4
## [9] R6_2.5.1 HDF5Array_1.32.1
## [11] lazyeval_0.2.2 Gviz_1.48.0
## [13] rhdf5filters_1.16.0 permute_0.9-7
## [15] withr_3.0.0 GGally_2.2.1
## [17] prettyunits_1.2.0 gridExtra_2.3
## [19] base64_2.0.1 preprocessCore_1.61.0
## [21] cli_3.6.3 labeling_0.4.3
## [23] sass_0.4.9 readr_2.1.5
## [25] genefilter_1.86.0 askpass_1.2.0
## [27] systemfonts_1.1.0 Rsamtools_2.20.0
## [29] foreign_0.8-86 svglite_2.1.3
## [31] siggenes_1.78.0 illuminaio_0.46.0
## [33] R.utils_2.12.3 dichromat_2.0-0.1
## [35] scrime_1.3.5 BSgenome_1.72.0
## [37] readxl_1.4.3 rstudioapi_0.16.0
## [39] impute_1.78.0 RSQLite_2.3.7
## [41] generics_0.1.3 BiocIO_1.14.0
## [43] gtools_3.9.5 dplyr_1.1.4
## [45] GO.db_3.19.1 Matrix_1.7-0
## [47] interp_1.1-6 fansi_1.0.6
## [49] R.methodsS3_1.8.2 lifecycle_1.0.4
## [51] edgeR_4.2.1 yaml_2.3.8
## [53] rhdf5_2.48.0 SparseArray_1.4.8
## [55] blob_1.2.4 promises_1.3.0
## [57] crayon_1.5.3 lattice_0.22-6
## [59] echarts4r_0.4.5 GenomicFeatures_1.56.0
## [61] annotate_1.82.0 KEGGREST_1.44.1
## [63] pillar_1.9.0 knitr_1.47
## [65] beanplot_1.3.1 rjson_0.2.22
## [67] codetools_0.2-20 glue_1.7.0
## [69] vctrs_0.6.5 png_0.1-8
## [71] cellranger_1.1.0 gtable_0.3.5
## [73] cachem_1.1.0 xfun_0.45
## [75] mime_0.12 S4Arrays_1.4.1
## [77] survival_3.7-0 statmod_1.5.0
## [79] nlme_3.1-165 bit64_4.0.5
## [81] bsseq_1.40.0 progress_1.2.3
## [83] filelock_1.0.3 bslib_0.7.0
## [85] nor1mix_1.3-3 KernSmooth_2.23-24
## [87] rpart_4.1.23 colorspace_2.1-1
## [89] DBI_1.2.3 Hmisc_5.1-3
## [91] nnet_7.3-19 tidyselect_1.2.1
## [93] bit_4.0.5 compiler_4.4.1
## [95] curl_5.2.1 httr2_1.0.1
## [97] htmlTable_2.4.3 BiasedUrn_2.0.12
## [99] xml2_1.3.6 DelayedArray_0.30.1
## [101] rtracklayer_1.64.0 scales_1.3.0
## [103] caTools_1.18.2 quadprog_1.5-8
## [105] rappdirs_0.3.3 stringr_1.5.1
## [107] digest_0.6.36 rmarkdown_2.27
## [109] GEOquery_2.72.0 htmltools_0.5.8.1
## [111] pkgconfig_2.0.3 jpeg_0.1-10
## [113] base64enc_0.1-3 sparseMatrixStats_1.16.0
## [115] highr_0.11 fastmap_1.2.0
## [117] ensembldb_2.28.1 rlang_1.1.4
## [119] htmlwidgets_1.6.4 UCSC.utils_1.0.0
## [121] shiny_1.8.1.1 DelayedMatrixStats_1.26.0
## [123] farver_2.1.2 jquerylib_0.1.4
## [125] jsonlite_1.8.8 BiocParallel_1.38.0
## [127] mclust_6.1.1 R.oo_1.26.0
## [129] VariantAnnotation_1.50.0 RCurl_1.98-1.16
## [131] magrittr_2.0.3 Formula_1.2-5
## [133] GenomeInfoDbData_1.2.12 Rhdf5lib_1.26.0
## [135] munsell_0.5.1 Rcpp_1.0.12
## [137] stringi_1.8.4 zlibbioc_1.50.0
## [139] MASS_7.3-61 plyr_1.8.9
## [141] org.Hs.eg.db_3.19.1 ggstats_0.6.0
## [143] deldir_2.0-4 splines_4.4.1
## [145] multtest_2.60.0 hms_1.1.3
## [147] rngtools_1.5.2 biomaRt_2.60.1
## [149] BiocVersion_3.19.1 XML_3.99-0.17
## [151] evaluate_0.24.0 calibrate_1.7.7
## [153] latticeExtra_0.6-30 biovizBase_1.52.0
## [155] BiocManager_1.30.23 httpuv_1.6.15
## [157] tzdb_0.4.0 tidyr_1.3.1
## [159] openssl_2.2.0 purrr_1.0.2
## [161] reshape_0.8.9 xtable_1.8-4
## [163] restfulr_0.0.15 AnnotationFilter_1.28.0
## [165] later_1.3.2 viridisLite_0.4.2
## [167] tibble_3.2.1 beeswarm_0.4.0
## [169] memoise_2.0.1 AnnotationDbi_1.66.0
## [171] GenomicAlignments_1.40.0 cluster_2.1.6