Import meta-regression results to run mitch.
IMPORTANT Remove the mitch package and reinstall with devtools directly from GitHub.
devtools::install_github("markziemann/mitch")
Load packages.
suppressPackageStartupMessages({
library("limma")
library("eulerr")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library("HGNChelper")
library("tictoc")
library("mitch")
library("kableExtra")
library("beeswarm")
library("missMethyl")
library("gridExtra")
library("png")
})
CORES=8
Reactome pathways were downloaded on the 11th January.
if(!file.exists("ReactomePathways.gmt")) {
download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip",
destfile="ReactomePathways.gmt.zip")
unzip("ReactomePathways.gmt.zip")
}
gs <- gmt_import("ReactomePathways.gmt")
head(gs)
## $`2-LTR circle formation`
## [1] "BANF1" "HMGA1" "LIG4" "PSIP1" "XRCC4" "XRCC5" "XRCC6"
## [8] "gag" "gag-pol" "rev" "vif" "vpr" "vpu"
##
## $`5-Phosphoribose 1-diphosphate biosynthesis`
## [1] "PRPS1" "PRPS1L1" "PRPS2"
##
## $`A tetrasaccharide linker sequence is required for GAG synthesis`
## [1] "AGRN" "B3GALT6" "B3GAT1" "B3GAT2" "B3GAT3" "B4GALT7" "BCAN"
## [8] "BGN" "CSPG4" "CSPG5" "DCN" "GPC1" "GPC2" "GPC3"
## [15] "GPC4" "GPC5" "GPC6" "HSPG2" "NCAN" "SDC1" "SDC2"
## [22] "SDC3" "SDC4" "VCAN" "XYLT1" "XYLT2"
##
## $`ABC transporter disorders`
## [1] "ABCA1" "ABCA12" "ABCA3" "ABCB11" "ABCB4" "ABCB6" "ABCC2" "ABCC6"
## [9] "ABCC8" "ABCC9" "ABCD1" "ABCD4" "ABCG5" "ABCG8" "APOA1" "CFTR"
## [17] "DERL1" "DERL2" "DERL3" "ERLEC1" "ERLIN1" "ERLIN2" "KCNJ11" "LMBRD1"
## [25] "OS9" "PSMA1" "PSMA2" "PSMA3" "PSMA4" "PSMA5" "PSMA6" "PSMA7"
## [33] "PSMA8" "PSMB1" "PSMB10" "PSMB11" "PSMB2" "PSMB3" "PSMB4" "PSMB5"
## [41] "PSMB6" "PSMB7" "PSMB8" "PSMB9" "PSMC1" "PSMC2" "PSMC3" "PSMC4"
## [49] "PSMC5" "PSMC6" "PSMD1" "PSMD10" "PSMD11" "PSMD12" "PSMD13" "PSMD14"
## [57] "PSMD2" "PSMD3" "PSMD4" "PSMD5" "PSMD6" "PSMD7" "PSMD8" "PSMD9"
## [65] "PSME1" "PSME2" "PSME3" "PSME4" "PSMF1" "RNF185" "RNF5" "RPS27A"
## [73] "SEL1L" "SEM1" "UBA52" "UBB" "UBC" "VCP"
##
## $`ABC transporters in lipid homeostasis`
## [1] "ABCA10" "ABCA12" "ABCA2" "ABCA3" "ABCA5" "ABCA6" "ABCA7" "ABCA9"
## [9] "ABCD1" "ABCD2" "ABCD3" "ABCG1" "ABCG4" "ABCG5" "ABCG8" "APOA1"
## [17] "PEX19" "PEX3"
##
## $`ABC-family proteins mediated transport`
## [1] "ABCA10" "ABCA12" "ABCA2" "ABCA3" "ABCA4" "ABCA5" "ABCA6" "ABCA7"
## [9] "ABCA8" "ABCA9" "ABCB1" "ABCB10" "ABCB4" "ABCB5" "ABCB6" "ABCB7"
## [17] "ABCB8" "ABCB9" "ABCC1" "ABCC10" "ABCC11" "ABCC2" "ABCC3" "ABCC4"
## [25] "ABCC5" "ABCC6" "ABCC9" "ABCD1" "ABCD2" "ABCD3" "ABCF1" "ABCG1"
## [33] "ABCG4" "ABCG5" "ABCG8" "APOA1" "CFTR" "DERL1" "DERL2" "DERL3"
## [41] "EIF2S1" "EIF2S2" "EIF2S3" "ERLEC1" "ERLIN1" "ERLIN2" "KCNJ11" "OS9"
## [49] "PEX19" "PEX3" "PSMA1" "PSMA2" "PSMA3" "PSMA4" "PSMA5" "PSMA6"
## [57] "PSMA7" "PSMA8" "PSMB1" "PSMB10" "PSMB11" "PSMB2" "PSMB3" "PSMB4"
## [65] "PSMB5" "PSMB6" "PSMB7" "PSMB8" "PSMB9" "PSMC1" "PSMC2" "PSMC3"
## [73] "PSMC4" "PSMC5" "PSMC6" "PSMD1" "PSMD10" "PSMD11" "PSMD12" "PSMD13"
## [81] "PSMD14" "PSMD2" "PSMD3" "PSMD4" "PSMD5" "PSMD6" "PSMD7" "PSMD8"
## [89] "PSMD9" "PSME1" "PSME2" "PSME3" "PSME4" "PSMF1" "RNF185" "RNF5"
## [97] "RPS27A" "SEL1L" "SEM1" "UBA52" "UBB" "UBC" "VCP"
Use all probes on the chip.
Updating gene names using the HGNC package because a lot of them have been updated over time.
tic()
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","UCSC_RefGene_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
gp2 <- lapply(gp2,unique)
gt <- stack(gp2)
colnames(gt) <- c("gene","probe")
gt$probe <- as.character(gt$probe)
dim(gt)
## [1] 684970 2
str(gt)
## 'data.frame': 684970 obs. of 2 variables:
## $ gene : chr "YTHDF1" "EIF2S3" "PKN3" "CCDC57" ...
## $ probe: chr "cg18478105" "cg09835024" "cg14361672" "cg01763666" ...
toc() #9.0s
## 4.532 sec elapsed
tic()
#new.hgnc.table <- getCurrentHumanMap() Dec 2023
if(!file.exists("new.hgnc.table.rds")) {
download.file("https://github.com/markziemann/gmea/raw/main/figs/new.hgnc.table.rds",
destfile="new.hgnc.table.rds")
}
new.hgnc.table <- readRDS("new.hgnc.table.rds")
fix <- checkGeneSymbols(gt$gene,map=new.hgnc.table)
## Warning in checkGeneSymbols(gt$gene, map = new.hgnc.table): Human gene symbols
## should be all upper-case except for the 'orf' in open reading frames. The case
## of some letters was corrected.
## Warning in checkGeneSymbols(gt$gene, map = new.hgnc.table): x contains
## non-approved gene symbols
fix2 <- fix[which(fix$x != fix$Suggested.Symbol),]
length(unique(fix2$x))
## [1] 3253
gt$gene <- fix$Suggested.Symbol
toc()
## 55.464 sec elapsed
x <- readRDS("models_sex_all_blood_need_cleaning>=10_5000.rds")
df <- as.data.frame(as.matrix(unlist(lapply(x,function(y) { y$model$zval[2] }))))
colnames(df) <- "zval"
hist(df$zval)
which(is.na(df$zval))
## integer(0)
rm(x)
Ensure that mitch version has aggregate by mean, not sum. Delete the bioconductor version and install with devtools::install_github
# mitch aggregate function needs to be changes to `mean`
tic()
m <- mitch_import(x=df,DEtype="prescored",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 659363
## Note: no. genes in output = 21625
## Warning in mitch_import(x = df, DEtype = "prescored", geneTable = gt): Warning: less than half of the input genes are also in the
## output
toc() # 7.25 sec
## 3.841 sec elapsed
hist(m$x)
dim(m)
## [1] 21625 1
head(m) %>%
kbl(caption = "Example of differential gene methylation scores used for pathway analysis") %>%
kable_paper("hover", full_width = F)
x | |
---|---|
A1BG | -0.1281221 |
A1BG-AS1 | -0.1265037 |
A1CF | -0.2257970 |
A2M | -0.0016135 |
A2M-AS1 | 1.3166603 |
A2ML1 | -0.3595390 |
genenames <- unique(unname(unlist(gs)))
length(which(rownames(m) %in% genenames))
## [1] 10357
length(which(!rownames(m) %in% genenames))
## [1] 11268
tic()
mres <- mitch_calc(x=m,genesets=gs,minsetsize=5,cores=8, priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
toc() #82.639 sec
## 7.653 sec elapsed
mtable <- mres$enrichment_result
up <- subset(mtable,s.dist>0 & p.adjustANOVA<0.05)
dn <- subset(mtable,s.dist<0 & p.adjustANOVA<0.05)
nrow(up)
## [1] 57
nrow(dn)
## [1] 238
head(up,10) %>%
kbl(caption = "Top significant pathways with higher methylation") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
1454 | Regulation of gene expression in endocrine-committed (NEUROG3+) progenitor cells | 5 | 0.0017587 | 0.8077521 | 0.0170660 |
1453 | Regulation of gene expression in early pancreatic precursor cells | 8 | 0.0001424 | 0.7765300 | 0.0029260 |
594 | Formation of lateral plate mesoderm | 5 | 0.0032659 | 0.7595560 | 0.0269748 |
609 | Free fatty acid receptors | 5 | 0.0041344 | 0.7404995 | 0.0317875 |
593 | Formation of intermediate mesoderm | 8 | 0.0017347 | 0.6394504 | 0.0169157 |
605 | Formation of the ureteric bud | 21 | 0.0000005 | 0.6314395 | 0.0000196 |
1824 | Thyroxine biosynthesis | 10 | 0.0005983 | 0.6267592 | 0.0082485 |
1860 | Transcriptional regulation of testis differentiation | 12 | 0.0002046 | 0.6189947 | 0.0039701 |
599 | Formation of the anterior neural plate | 10 | 0.0017939 | 0.5701689 | 0.0170924 |
1703 | Specification of the neural plate border | 16 | 0.0000964 | 0.5629657 | 0.0020957 |
head(dn,10) %>%
kbl(caption = "Top significant pathways with lower methylation") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
1543 | SRP-dependent cotranslational protein targeting to membrane | 105 | 0.0000000 | -0.4181280 | 0.0000000 |
1511 | SARS-CoV-1 modulates host translation machinery | 33 | 0.0000822 | -0.3960165 | 0.0018065 |
524 | Eukaryotic Translation Termination | 86 | 0.0000000 | -0.3952549 | 0.0000000 |
184 | Butyrate Response Factor 1 (BRF1) binds and destabilizes mRNA | 17 | 0.0051326 | -0.3920226 | 0.0370394 |
1155 | PINK1-PRKN Mediated Mitophagy | 21 | 0.0020045 | -0.3894164 | 0.0186376 |
1933 | Viral mRNA Translation | 83 | 0.0000000 | -0.3885120 | 0.0000001 |
1901 | Tristetraprolin (TTP, ZFP36) binds and destabilizes mRNA | 17 | 0.0055547 | -0.3884346 | 0.0393751 |
1185 | Peptide chain elongation | 83 | 0.0000000 | -0.3733374 | 0.0000003 |
1094 | Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) | 88 | 0.0000000 | -0.3695585 | 0.0000002 |
1571 | Selenocysteine synthesis | 86 | 0.0000000 | -0.3670843 | 0.0000003 |
top <- rbind(up[1:10,],dn[1:10,])
top <- top[order(top$s.dist),]
barnames <- gsub("_"," ",gsub("REACTOME_","",top$set))
cols <- as.character(((sign(top$s.dist)+1)/2)+1)
cols <- gsub("1","blue",gsub("2","red",cols))
par(mar = c(5.1, 29.1, 4.1, 2.1))
barplot(abs(top$s.dist),horiz=TRUE,las=1,names.arg=barnames, cex.names=0.8,
cex.axis=0.8,col=cols, xlab="Enrichment score",main="Reactomes")
mtext("whole gene")
grid()
par( mar = c(5.1, 4.1, 4.1, 2.1) )
Make a html report and some charts.
mitch_report(res=mres,outfile="mandy_mitchreport.html",overwrite=TRUE)
## Note: overwriting existing report
## Dataset saved as " /tmp/RtmpnMvthp/mandy_mitchreport.rds ".
##
##
## processing file: mitch.Rmd
## 1/34
## 2/34 [checklibraries]
## 3/34
## 4/34 [peek]
## 5/34
## 6/34 [metrics]
## 7/34
## 8/34 [scatterplot]
## 9/34
## 10/34 [contourplot]
## 11/34
## 12/34 [input_geneset_metrics1]
## 13/34
## 14/34 [input_geneset_metrics2]
## 15/34
## 16/34 [input_geneset_metrics3]
## 17/34
## 18/34 [echart1d]
## 19/34 [echart2d]
## 20/34
## 21/34 [heatmap]
## 22/34
## 23/34 [effectsize]
## 24/34
## 25/34 [results_table]
## 26/34
## 27/34 [results_table_complete]
## 28/34
## 29/34 [detailed_geneset_reports1d]
## 30/34
## 31/34 [detailed_geneset_reports2d]
## 32/34
## 33/34 [session_info]
## 34/34
## output file: /home/mark.ziemann@domain.internal.burnet.edu.au/projects/mandy/mitch.knit.md
## /usr/bin/pandoc +RTS -K512m -RTS /home/mark.ziemann@domain.internal.burnet.edu.au/projects/mandy/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpnMvthp/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpnMvthp/rmarkdown-str133b9435d978bb.html
##
## Output created: /tmp/RtmpnMvthp/mitch_report.html
## [1] TRUE
mitch_plots(res=mres,outfile="mandy_mitchcharts.pdf")
## png
## 2
save.image("mandy_gmea.Rdata")
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] pkgload_1.3.3
## [2] GGally_2.2.0
## [3] ggplot2_3.4.4
## [4] reshape2_1.4.4
## [5] gplots_3.1.3
## [6] gtools_3.9.5
## [7] tibble_3.2.1
## [8] dplyr_1.1.4
## [9] echarts4r_0.4.5
## [10] png_0.1-8
## [11] gridExtra_2.3
## [12] missMethyl_1.36.0
## [13] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [14] beeswarm_0.4.0
## [15] kableExtra_1.3.4
## [16] mitch_1.15.0
## [17] tictoc_1.2
## [18] HGNChelper_0.8.1
## [19] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [20] IlluminaHumanMethylation450kmanifest_0.4.0
## [21] minfi_1.48.0
## [22] bumphunter_1.44.0
## [23] locfit_1.5-9.8
## [24] iterators_1.0.14
## [25] foreach_1.5.2
## [26] Biostrings_2.70.1
## [27] XVector_0.42.0
## [28] SummarizedExperiment_1.32.0
## [29] Biobase_2.62.0
## [30] MatrixGenerics_1.14.0
## [31] matrixStats_1.2.0
## [32] GenomicRanges_1.54.1
## [33] GenomeInfoDb_1.38.1
## [34] IRanges_2.36.0
## [35] S4Vectors_0.40.1
## [36] BiocGenerics_0.48.1
## [37] eulerr_7.0.0
## [38] limma_3.58.1
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.2 later_1.3.2
## [3] BiocIO_1.12.0 bitops_1.0-7
## [5] filelock_1.0.3 preprocessCore_1.64.0
## [7] XML_3.99-0.16 lifecycle_1.0.4
## [9] lattice_0.22-5 MASS_7.3-60
## [11] base64_2.0.1 scrime_1.3.5
## [13] magrittr_2.0.3 sass_0.4.8
## [15] rmarkdown_2.25 jquerylib_0.1.4
## [17] yaml_2.3.8 httpuv_1.6.13
## [19] doRNG_1.8.6 askpass_1.2.0
## [21] DBI_1.2.0 RColorBrewer_1.1-3
## [23] abind_1.4-5 zlibbioc_1.48.0
## [25] rvest_1.0.3 quadprog_1.5-8
## [27] purrr_1.0.2 RCurl_1.98-1.13
## [29] rappdirs_0.3.3 GenomeInfoDbData_1.2.11
## [31] genefilter_1.84.0 annotate_1.80.0
## [33] svglite_2.1.3 DelayedMatrixStats_1.24.0
## [35] codetools_0.2-19 DelayedArray_0.28.0
## [37] xml2_1.3.6 tidyselect_1.2.0
## [39] beanplot_1.3.1 BiocFileCache_2.10.1
## [41] webshot_0.5.5 illuminaio_0.44.0
## [43] GenomicAlignments_1.38.0 jsonlite_1.8.8
## [45] multtest_2.58.0 ellipsis_0.3.2
## [47] survival_3.5-7 systemfonts_1.0.5
## [49] tools_4.3.2 progress_1.2.3
## [51] Rcpp_1.0.11 glue_1.6.2
## [53] SparseArray_1.2.2 xfun_0.41
## [55] HDF5Array_1.30.0 withr_2.5.2
## [57] fastmap_1.1.1 rhdf5filters_1.14.1
## [59] fansi_1.0.6 openssl_2.1.1
## [61] caTools_1.18.2 digest_0.6.33
## [63] R6_2.5.1 mime_0.12
## [65] colorspace_2.1-0 biomaRt_2.58.0
## [67] RSQLite_2.3.4 utf8_1.2.4
## [69] tidyr_1.3.0 generics_0.1.3
## [71] data.table_1.14.10 rtracklayer_1.62.0
## [73] prettyunits_1.2.0 httr_1.4.7
## [75] htmlwidgets_1.6.4 S4Arrays_1.2.0
## [77] ggstats_0.5.1 pkgconfig_2.0.3
## [79] gtable_0.3.4 blob_1.2.4
## [81] siggenes_1.76.0 htmltools_0.5.7
## [83] scales_1.3.0 rstudioapi_0.15.0
## [85] knitr_1.45 tzdb_0.4.0
## [87] rjson_0.2.21 nlme_3.1-164
## [89] curl_5.2.0 org.Hs.eg.db_3.18.0
## [91] cachem_1.0.8 rhdf5_2.46.0
## [93] stringr_1.5.1 KernSmooth_2.23-22
## [95] AnnotationDbi_1.64.1 restfulr_0.0.15
## [97] GEOquery_2.70.0 pillar_1.9.0
## [99] grid_4.3.2 reshape_0.8.9
## [101] vctrs_0.6.5 promises_1.2.1
## [103] dbplyr_2.4.0 xtable_1.8-4
## [105] evaluate_0.23 readr_2.1.4
## [107] GenomicFeatures_1.54.1 cli_3.6.2
## [109] compiler_4.3.2 Rsamtools_2.18.0
## [111] rlang_1.1.2 crayon_1.5.2
## [113] rngtools_1.5.2 nor1mix_1.3-2
## [115] mclust_6.0.1 plyr_1.8.9
## [117] stringi_1.8.3 viridisLite_0.4.2
## [119] BiocParallel_1.36.0 munsell_0.5.0
## [121] Matrix_1.6-4 hms_1.1.3
## [123] sparseMatrixStats_1.14.0 bit64_4.0.5
## [125] Rhdf5lib_1.24.0 KEGGREST_1.42.0
## [127] statmod_1.5.0 shiny_1.8.0
## [129] highr_0.10 memoise_2.0.1.9000
## [131] bslib_0.6.1 bit_4.0.5