Source: https://github.com/markziemann/asd_meth
Here I will be running a comparison of differential methylation data from guthrie and fresh blood samples.
Here are the files that I’m using:
limma_guthrie_iIQ.csv
limma_blood_iIQ.csv
suppressPackageStartupMessages({
library("parallel")
library("mitch")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
source("https://raw.githubusercontent.com/markziemann/gmea/main/meth_functions.R")
library("data.table")
library("kableExtra")
library("eulerr")
library("RIdeogram")
library("GenomicRanges")
library("tictoc")
})
source("meth_functions.R")
First I will generate plots for limmma analysis.
# blood at assessment
top <- read.csv("limma_blood_iIQ.csv")
nrow(top)
## [1] 802647
top <- subset(top,P.Value<1e-2)
nrow(top)
## [1] 135236
-log10(min(top$P.Value))
## [1] 6.458586
top$chr <- as.integer(gsub("chr","",top$chr))
top$snp <- paste(top$Name,top$UCSC_RefGene_Name)
top$snp <- sapply(strsplit(top$snp,";"),"[[",1)
up <- subset(top,logFC>0)
dn <- subset(top,logFC<0)
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 ,main="Blood at assessment limma hypermethylated")
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 , main="Blood at assessment limma hypomethylated")
pdf("manhat_limma_bl.pdf")
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 ,main="Blood at assessment limma hypermethylated")
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 , main="Blood at assessment limma hypomethylated")
dev.off()
## png
## 2
# neonatal guthrie card
top <- read.csv("limma_guthrie_iIQ.csv")
nrow(top)
## [1] 790658
top <- subset(top,P.Value<1e-2)
nrow(top)
## [1] 14750
-log10(min(top$P.Value))
## [1] 4.87714
top$chr <- as.integer(gsub("chr","",top$chr))
top$snp <- paste(top$Name,top$UCSC_RefGene_Name)
top$snp <- sapply(strsplit(top$snp,";"),"[[",1)
up <- subset(top,logFC>0)
dn <- subset(top,logFC<0)
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 ,main="Neonatal Guthrie card limma hypermethylated",
annotateTop=FALSE)
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 , main="Neonatal Guthrie card limma hypomethylated",
annotateTop=FALSE)
pdf("manhat_limma_gu.pdf")
par(mfrow=c(2,1))
par(mar=c(4,4,3,5))
manhattan(x=up,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 ,main="Neonatal Guthrie card limma hypermethylated",
annotateTop=FALSE)
manhattan(x=dn,chr="chr",bp="pos",p="P.Value",snp="snp",suggestiveline = -log10(1e-04),
ylim=c(2,6), annotatePval= 1e-04 , main="Neonatal Guthrie card limma hypomethylated",
annotateTop=FALSE)
dev.off()
## png
## 2
Looking for enrichments in different genomic compartments.
Guthrie cards.
par(mfrow=c(2,1))
# guthrie
dma1 <- read.csv("limma_guthrie_iIQ.csv")
if (nrow(subset(dma1,P.Value <1e-4)) > 100 ) {
comp <- compartment_enrichment(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}
Blood at assessment.
# blood
dma2 <- read.csv("limma_blood_iIQ.csv")
if (nrow(subset(dma2,P.Value <1e-4)) > 100 ) {
comp <- compartment_enrichment(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}
Guthrie cards.
par(mfrow=c(2,1))
# guthrie
comp <- compartment_enrichment2(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
## all up OR fisherPval lowerCI upperCI
## Intergenic 43381 82 1.3128013 2.315209e-02 1.0343350 1.6467386
## 3'UTR 22433 9 0.2665139 1.260482e-06 0.1214940 0.5080348
## 5'UTR 102937 182 1.2484552 8.119349e-03 1.0576305 1.4671679
## Body 333689 320 0.4958336 5.572010e-27 0.4330962 0.5666457
## ExonBnd 6713 4 0.4031351 7.444627e-02 0.1095886 1.0357822
## TSS1500 115845 257 1.6826541 5.653619e-12 1.4551851 1.9405700
## TSS200 75104 174 1.6971429 2.096980e-09 1.4332076 2.0003441
##
## $dn_comp
## all dn OR fisherPval lowerCI upperCI
## Intergenic 43381 23 0.4873647 0.0002246935 0.3069297 0.7372909
## 3'UTR 22433 26 1.1047943 0.5999534839 0.7165317 1.6329547
## 5'UTR 102937 98 0.8896326 0.2981027406 0.7116388 1.1020120
## Body 333689 403 1.3252996 0.0001420147 1.1434250 1.5368914
## ExonBnd 6713 7 0.9904467 1.0000000000 0.3966398 2.0521073
## TSS1500 115845 123 1.0103417 0.9209822482 0.8255280 1.2282240
## TSS200 75104 57 0.6973307 0.0072978308 0.5224867 0.9146612
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
Blood at assessment.
# blood
comp <- compartment_enrichment2(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
## all up OR fisherPval lowerCI upperCI
## Intergenic 43528 13 0.3203799 1.794093e-06 0.1696305 0.5519925
## 3'UTR 22758 15 0.7317172 2.596702e-01 0.4069807 1.2166229
## 5'UTR 104442 101 1.0990235 3.699278e-01 0.8793542 1.3619540
## Body 339559 385 1.6880936 7.310680e-11 1.4355337 1.9880934
## ExonBnd 6862 4 0.6504958 5.399943e-01 0.1766529 1.6745640
## TSS1500 117509 95 0.8887766 3.097687e-01 0.7068615 1.1072783
## TSS200 75457 21 0.2879391 1.096355e-11 0.1769272 0.4441419
##
## $dn_comp
## all dn OR fisherPval lowerCI upperCI
## Intergenic 43528 12 0.2206601 2.073474e-11 0.1135493 0.3875733
## 3'UTR 22758 38 1.4246415 3.925012e-02 1.0004821 1.9730623
## 5'UTR 104442 117 0.9331978 5.272447e-01 0.7609384 1.1359581
## Body 339559 504 1.6185740 4.376035e-12 1.4076722 1.8628902
## ExonBnd 6862 6 0.7335510 5.956398e-01 0.2683465 1.6042279
## TSS1500 117509 139 0.9943003 1.000000e+00 0.8228036 1.1943376
## TSS200 75457 28 0.2883440 3.586048e-15 0.1903863 0.4199389
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
Guthrie cards.
par(mfrow=c(2,1))
if (nrow(subset(dma1,P.Value <1e-4)) > 100 ) {
# guthrie
comp <- cgi_enrichment(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}
Blood at assessment.
if (nrow(subset(dma2,P.Value <1e-4)) > 100 ) {
# blood
comp <- cgi_enrichment(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
}
Guthrie cards.
par(mfrow=c(2,1))
# guthrie
comp <- cgi_enrichment2(dma1)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
## all up OR fisherPval lowerCI upperCI
## Island 150182 235 1.3105454 3.809875e-04 1.1273616 1.5189923
## OpenSea 442427 413 0.5533733 1.740577e-20 0.4866870 0.6287060
## Shelf 55491 45 0.6239442 1.237693e-03 0.4519913 0.8416203
## Shore 142558 307 2.0162438 2.876050e-22 1.7566861 2.3097508
##
## $dn_comp
## all dn OR fisherPval lowerCI upperCI
## Island 150182 107 0.5106539 1.229374e-12 0.4138368 0.6246727
## OpenSea 442427 654 1.4884602 1.265497e-09 1.3045056 1.7006442
## Shelf 55491 73 1.0433648 7.101004e-01 0.8104514 1.3252248
## Shore 142558 166 0.9047870 2.492677e-01 0.7611987 1.0700679
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
Blood at assessment.
# blood
comp <- cgi_enrichment2(dma2)
comp <- lapply(comp,function(x) {subset(x,OR!=0)} )
comp
## $up_comp
## all up OR fisherPval lowerCI upperCI
## Island 150186 21 0.09306164 9.711431e-61 0.05732012 0.1430515
## OpenSea 452360 832 3.84006002 4.129213e-73 3.24922913 4.5602108
## Shelf 56174 50 0.69911498 1.290409e-02 0.51512263 0.9295346
## Shore 143927 97 0.49129703 3.771782e-13 0.39423772 0.6063576
##
## $dn_comp
## all dn OR fisherPval lowerCI upperCI
## Island 150186 44 0.1997145 2.951776e-41 0.1441291 0.2702244
## OpenSea 452360 754 2.3756797 6.546337e-36 2.0544428 2.7546897
## Shelf 56174 69 0.9848496 9.505837e-01 0.7595925 1.2587767
## Shore 143927 133 0.7018143 8.697716e-05 0.5802284 0.8433119
make_forest_plots_up(comp)
make_forest_plots_dn(comp)
For reproducibility
sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] tictoc_1.1
## [2] RIdeogram_0.2.2
## [3] kableExtra_1.3.4
## [4] data.table_1.14.8
## [5] ENmix_1.34.0
## [6] doParallel_1.0.17
## [7] qqman_0.1.8
## [8] RCircos_1.2.2
## [9] beeswarm_0.4.0
## [10] forestplot_3.1.1
## [11] abind_1.4-5
## [12] checkmate_2.1.0
## [13] reshape2_1.4.4
## [14] gplots_3.1.3
## [15] eulerr_7.0.0
## [16] GEOquery_2.66.0
## [17] RColorBrewer_1.1-3
## [18] IlluminaHumanMethylation450kmanifest_0.4.0
## [19] topconfects_1.14.0
## [20] DMRcatedata_2.16.0
## [21] ExperimentHub_2.6.0
## [22] AnnotationHub_3.6.0
## [23] BiocFileCache_2.6.0
## [24] dbplyr_2.3.1
## [25] DMRcate_2.12.0
## [26] limma_3.54.0
## [27] missMethyl_1.32.0
## [28] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [29] R.utils_2.12.2
## [30] R.oo_1.25.0
## [31] R.methodsS3_1.8.2
## [32] plyr_1.8.8
## [33] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [34] minfi_1.44.0
## [35] bumphunter_1.40.0
## [36] locfit_1.5-9.7
## [37] iterators_1.0.14
## [38] foreach_1.5.2
## [39] Biostrings_2.66.0
## [40] XVector_0.38.0
## [41] SummarizedExperiment_1.28.0
## [42] Biobase_2.58.0
## [43] MatrixGenerics_1.10.0
## [44] matrixStats_0.63.0
## [45] GenomicRanges_1.50.2
## [46] GenomeInfoDb_1.34.6
## [47] IRanges_2.32.0
## [48] S4Vectors_0.36.1
## [49] BiocGenerics_0.44.0
## [50] mitch_1.10.0
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.58.0
## [3] GGally_2.1.2 tidyr_1.3.0
## [5] ggplot2_3.4.1 bit64_4.0.5
## [7] knitr_1.42 DelayedArray_0.24.0
## [9] rpart_4.1.19 KEGGREST_1.38.0
## [11] RCurl_1.98-1.10 AnnotationFilter_1.22.0
## [13] generics_0.1.3 GenomicFeatures_1.50.3
## [15] preprocessCore_1.60.2 RSQLite_2.3.0
## [17] bit_4.0.5 tzdb_0.3.0
## [19] webshot_0.5.4 xml2_1.3.3
## [21] httpuv_1.6.9 assertthat_0.2.1
## [23] xfun_0.37 hms_1.1.2
## [25] jquerylib_0.1.4 evaluate_0.20
## [27] promises_1.2.0.1 fansi_1.0.4
## [29] restfulr_0.0.15 scrime_1.3.5
## [31] progress_1.2.2 caTools_1.18.2
## [33] readxl_1.4.2 DBI_1.1.3
## [35] geneplotter_1.76.0 htmlwidgets_1.6.2
## [37] reshape_0.8.9 purrr_1.0.1
## [39] ellipsis_0.3.2 dplyr_1.1.0
## [41] backports_1.4.1 permute_0.9-7
## [43] calibrate_1.7.7 grImport2_0.2-0
## [45] annotate_1.76.0 biomaRt_2.54.0
## [47] deldir_1.0-6 sparseMatrixStats_1.10.0
## [49] vctrs_0.6.0 ensembldb_2.22.0
## [51] withr_2.5.0 cachem_1.0.7
## [53] Gviz_1.42.0 BSgenome_1.66.2
## [55] GenomicAlignments_1.34.0 prettyunits_1.1.1
## [57] mclust_6.0.0 svglite_2.1.1
## [59] cluster_2.1.4 RPMM_1.25
## [61] lazyeval_0.2.2 crayon_1.5.2
## [63] genefilter_1.80.3 edgeR_3.40.2
## [65] pkgconfig_2.0.3 nlme_3.1-162
## [67] ProtGenerics_1.30.0 nnet_7.3-18
## [69] rlang_1.1.0 lifecycle_1.0.3
## [71] filelock_1.0.2 dichromat_2.0-0.1
## [73] rsvg_2.4.0 cellranger_1.1.0
## [75] rngtools_1.5.2 base64_2.0.1
## [77] Matrix_1.5-3 Rhdf5lib_1.20.0
## [79] base64enc_0.1-3 viridisLite_0.4.1
## [81] png_0.1-8 rjson_0.2.21
## [83] bitops_1.0-7 KernSmooth_2.23-20
## [85] rhdf5filters_1.10.0 blob_1.2.4
## [87] DelayedMatrixStats_1.20.0 doRNG_1.8.6
## [89] stringr_1.5.0 nor1mix_1.3-0
## [91] readr_2.1.4 jpeg_0.1-10
## [93] scales_1.2.1 memoise_2.0.1
## [95] magrittr_2.0.3 zlibbioc_1.44.0
## [97] compiler_4.2.2 BiocIO_1.8.0
## [99] illuminaio_0.40.0 Rsamtools_2.14.0
## [101] cli_3.6.0 DSS_2.46.0
## [103] htmlTable_2.4.1 Formula_1.2-5
## [105] MASS_7.3-58.3 tidyselect_1.2.0
## [107] stringi_1.7.12 highr_0.10
## [109] yaml_2.3.7 askpass_1.1
## [111] latticeExtra_0.6-30 sass_0.4.5
## [113] VariantAnnotation_1.44.0 tools_4.2.2
## [115] rstudioapi_0.14 foreign_0.8-84
## [117] bsseq_1.34.0 gridExtra_2.3
## [119] digest_0.6.31 BiocManager_1.30.20
## [121] shiny_1.7.4 quadprog_1.5-8
## [123] Rcpp_1.0.10 siggenes_1.72.0
## [125] BiocVersion_3.16.0 later_1.3.0
## [127] org.Hs.eg.db_3.16.0 httr_1.4.5
## [129] AnnotationDbi_1.60.0 biovizBase_1.46.0
## [131] colorspace_2.1-0 rvest_1.0.3
## [133] XML_3.99-0.13 splines_4.2.2
## [135] statmod_1.5.0 multtest_2.54.0
## [137] systemfonts_1.0.4 xtable_1.8-4
## [139] jsonlite_1.8.4 dynamicTreeCut_1.63-1
## [141] R6_2.5.1 echarts4r_0.4.4
## [143] Hmisc_5.0-1 pillar_1.8.1
## [145] htmltools_0.5.4 mime_0.12
## [147] glue_1.6.2 fastmap_1.1.1
## [149] BiocParallel_1.32.5 interactiveDisplayBase_1.36.0
## [151] beanplot_1.3.1 codetools_0.2-19
## [153] utf8_1.2.3 lattice_0.20-45
## [155] bslib_0.4.2 tibble_3.2.0
## [157] curl_5.0.0 gtools_3.9.4
## [159] openssl_2.0.6 interp_1.1-3
## [161] survival_3.5-5 rmarkdown_2.20
## [163] munsell_0.5.0 rhdf5_2.42.0
## [165] GenomeInfoDbData_1.2.9 HDF5Array_1.26.0
## [167] impute_1.72.3 gtable_0.3.2