Introduction

Import meta-regression results to run mitch.

Requirements

IMPORTANT Remove the mitch package and reinstall with devtools directly from GitHub.

devtools::install_github("markziemann/mitch")

Load packages.

suppressPackageStartupMessages({
  library("limma")
  library("eulerr")
  library("IlluminaHumanMethylation450kmanifest")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  library("HGNChelper")
  library("tictoc")
  library("mitch")
  library("kableExtra")
  library("beeswarm")
  library("missMethyl")
  library("gridExtra")
  library("png")
})

CORES=8

Load data

Reactome pathways were downloaded on the 11th January.

if(!file.exists("ReactomePathways.gmt")) {
  download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip",
    destfile="ReactomePathways.gmt.zip")
  unzip("ReactomePathways.gmt.zip")
}

gs <- gmt_import("ReactomePathways.gmt")
head(gs)
## $`2-LTR circle formation`
##  [1] "BANF1"   "HMGA1"   "LIG4"    "PSIP1"   "XRCC4"   "XRCC5"   "XRCC6"  
##  [8] "gag"     "gag-pol" "rev"     "vif"     "vpr"     "vpu"    
## 
## $`5-Phosphoribose 1-diphosphate biosynthesis`
## [1] "PRPS1"   "PRPS1L1" "PRPS2"  
## 
## $`A tetrasaccharide linker sequence is required for GAG synthesis`
##  [1] "AGRN"    "B3GALT6" "B3GAT1"  "B3GAT2"  "B3GAT3"  "B4GALT7" "BCAN"   
##  [8] "BGN"     "CSPG4"   "CSPG5"   "DCN"     "GPC1"    "GPC2"    "GPC3"   
## [15] "GPC4"    "GPC5"    "GPC6"    "HSPG2"   "NCAN"    "SDC1"    "SDC2"   
## [22] "SDC3"    "SDC4"    "VCAN"    "XYLT1"   "XYLT2"  
## 
## $`ABC transporter disorders`
##  [1] "ABCA1"  "ABCA12" "ABCA3"  "ABCB11" "ABCB4"  "ABCB6"  "ABCC2"  "ABCC6" 
##  [9] "ABCC8"  "ABCC9"  "ABCD1"  "ABCD4"  "ABCG5"  "ABCG8"  "APOA1"  "CFTR"  
## [17] "DERL1"  "DERL2"  "DERL3"  "ERLEC1" "ERLIN1" "ERLIN2" "KCNJ11" "LMBRD1"
## [25] "OS9"    "PSMA1"  "PSMA2"  "PSMA3"  "PSMA4"  "PSMA5"  "PSMA6"  "PSMA7" 
## [33] "PSMA8"  "PSMB1"  "PSMB10" "PSMB11" "PSMB2"  "PSMB3"  "PSMB4"  "PSMB5" 
## [41] "PSMB6"  "PSMB7"  "PSMB8"  "PSMB9"  "PSMC1"  "PSMC2"  "PSMC3"  "PSMC4" 
## [49] "PSMC5"  "PSMC6"  "PSMD1"  "PSMD10" "PSMD11" "PSMD12" "PSMD13" "PSMD14"
## [57] "PSMD2"  "PSMD3"  "PSMD4"  "PSMD5"  "PSMD6"  "PSMD7"  "PSMD8"  "PSMD9" 
## [65] "PSME1"  "PSME2"  "PSME3"  "PSME4"  "PSMF1"  "RNF185" "RNF5"   "RPS27A"
## [73] "SEL1L"  "SEM1"   "UBA52"  "UBB"    "UBC"    "VCP"   
## 
## $`ABC transporters in lipid homeostasis`
##  [1] "ABCA10" "ABCA12" "ABCA2"  "ABCA3"  "ABCA5"  "ABCA6"  "ABCA7"  "ABCA9" 
##  [9] "ABCD1"  "ABCD2"  "ABCD3"  "ABCG1"  "ABCG4"  "ABCG5"  "ABCG8"  "APOA1" 
## [17] "PEX19"  "PEX3"  
## 
## $`ABC-family proteins mediated transport`
##   [1] "ABCA10" "ABCA12" "ABCA2"  "ABCA3"  "ABCA4"  "ABCA5"  "ABCA6"  "ABCA7" 
##   [9] "ABCA8"  "ABCA9"  "ABCB1"  "ABCB10" "ABCB4"  "ABCB5"  "ABCB6"  "ABCB7" 
##  [17] "ABCB8"  "ABCB9"  "ABCC1"  "ABCC10" "ABCC11" "ABCC2"  "ABCC3"  "ABCC4" 
##  [25] "ABCC5"  "ABCC6"  "ABCC9"  "ABCD1"  "ABCD2"  "ABCD3"  "ABCF1"  "ABCG1" 
##  [33] "ABCG4"  "ABCG5"  "ABCG8"  "APOA1"  "CFTR"   "DERL1"  "DERL2"  "DERL3" 
##  [41] "EIF2S1" "EIF2S2" "EIF2S3" "ERLEC1" "ERLIN1" "ERLIN2" "KCNJ11" "OS9"   
##  [49] "PEX19"  "PEX3"   "PSMA1"  "PSMA2"  "PSMA3"  "PSMA4"  "PSMA5"  "PSMA6" 
##  [57] "PSMA7"  "PSMA8"  "PSMB1"  "PSMB10" "PSMB11" "PSMB2"  "PSMB3"  "PSMB4" 
##  [65] "PSMB5"  "PSMB6"  "PSMB7"  "PSMB8"  "PSMB9"  "PSMC1"  "PSMC2"  "PSMC3" 
##  [73] "PSMC4"  "PSMC5"  "PSMC6"  "PSMD1"  "PSMD10" "PSMD11" "PSMD12" "PSMD13"
##  [81] "PSMD14" "PSMD2"  "PSMD3"  "PSMD4"  "PSMD5"  "PSMD6"  "PSMD7"  "PSMD8" 
##  [89] "PSMD9"  "PSME1"  "PSME2"  "PSME3"  "PSME4"  "PSMF1"  "RNF185" "RNF5"  
##  [97] "RPS27A" "SEL1L"  "SEM1"   "UBA52"  "UBB"    "UBC"    "VCP"

Curate the annotation

Use all probes on the chip.

Updating gene names using the HGNC package because a lot of them have been updated over time.

tic()
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","UCSC_RefGene_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
gp2 <- lapply(gp2,unique)
gt <- stack(gp2)
colnames(gt) <- c("gene","probe")
gt$probe <- as.character(gt$probe)
dim(gt)
## [1] 684970      2
str(gt)
## 'data.frame':    684970 obs. of  2 variables:
##  $ gene : chr  "YTHDF1" "EIF2S3" "PKN3" "CCDC57" ...
##  $ probe: chr  "cg18478105" "cg09835024" "cg14361672" "cg01763666" ...
toc() #9.0s
## 4.532 sec elapsed
tic()
#new.hgnc.table <- getCurrentHumanMap() Dec 2023
if(!file.exists("new.hgnc.table.rds")) {
  download.file("https://github.com/markziemann/gmea/raw/main/figs/new.hgnc.table.rds",
    destfile="new.hgnc.table.rds")
}
new.hgnc.table <- readRDS("new.hgnc.table.rds")
fix <- checkGeneSymbols(gt$gene,map=new.hgnc.table)
## Warning in checkGeneSymbols(gt$gene, map = new.hgnc.table): Human gene symbols
## should be all upper-case except for the 'orf' in open reading frames. The case
## of some letters was corrected.
## Warning in checkGeneSymbols(gt$gene, map = new.hgnc.table): x contains
## non-approved gene symbols
fix2 <- fix[which(fix$x != fix$Suggested.Symbol),]
length(unique(fix2$x))
## [1] 3253
gt$gene <- fix$Suggested.Symbol
toc()
## 55.464 sec elapsed

Load meta-regression data

x <- readRDS("models_sex_all_blood_need_cleaning>=10_5000.rds")
df <- as.data.frame(as.matrix(unlist(lapply(x,function(y) { y$model$zval[2] }))))
colnames(df) <- "zval"
hist(df$zval)

which(is.na(df$zval))
## integer(0)
rm(x)

Mitch import

Ensure that mitch version has aggregate by mean, not sum. Delete the bioconductor version and install with devtools::install_github

# mitch aggregate function needs to be changes to `mean`
tic()
m <- mitch_import(x=df,DEtype="prescored",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 659363
## Note: no. genes in output = 21625
## Warning in mitch_import(x = df, DEtype = "prescored", geneTable = gt): Warning: less than half of the input genes are also in the
##         output
toc() # 7.25 sec
## 3.841 sec elapsed
hist(m$x)

dim(m)
## [1] 21625     1
head(m) %>%
  kbl(caption = "Example of differential gene methylation scores used for pathway analysis") %>%
  kable_paper("hover", full_width = F)
Example of differential gene methylation scores used for pathway analysis
x
A1BG -0.1281221
A1BG-AS1 -0.1265037
A1CF -0.2257970
A2M -0.0016135
A2M-AS1 1.3166603
A2ML1 -0.3595390

Run mitch

genenames <- unique(unname(unlist(gs)))
length(which(rownames(m) %in% genenames))
## [1] 10357
length(which(!rownames(m) %in% genenames))
## [1] 11268
tic()
mres <- mitch_calc(x=m,genesets=gs,minsetsize=5,cores=8, priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
toc() #82.639 sec
## 7.653 sec elapsed
mtable <- mres$enrichment_result
up <- subset(mtable,s.dist>0 & p.adjustANOVA<0.05)
dn <- subset(mtable,s.dist<0 & p.adjustANOVA<0.05)
nrow(up)
## [1] 57
nrow(dn)
## [1] 238
head(up,10) %>%
  kbl(caption = "Top significant pathways with higher methylation") %>%
  kable_paper("hover", full_width = F)
Top significant pathways with higher methylation
set setSize pANOVA s.dist p.adjustANOVA
1454 Regulation of gene expression in endocrine-committed (NEUROG3+) progenitor cells 5 0.0017587 0.8077521 0.0170660
1453 Regulation of gene expression in early pancreatic precursor cells 8 0.0001424 0.7765300 0.0029260
594 Formation of lateral plate mesoderm 5 0.0032659 0.7595560 0.0269748
609 Free fatty acid receptors 5 0.0041344 0.7404995 0.0317875
593 Formation of intermediate mesoderm 8 0.0017347 0.6394504 0.0169157
605 Formation of the ureteric bud 21 0.0000005 0.6314395 0.0000196
1824 Thyroxine biosynthesis 10 0.0005983 0.6267592 0.0082485
1860 Transcriptional regulation of testis differentiation 12 0.0002046 0.6189947 0.0039701
599 Formation of the anterior neural plate 10 0.0017939 0.5701689 0.0170924
1703 Specification of the neural plate border 16 0.0000964 0.5629657 0.0020957
head(dn,10) %>%
  kbl(caption = "Top significant pathways with lower methylation") %>%
  kable_paper("hover", full_width = F)
Top significant pathways with lower methylation
set setSize pANOVA s.dist p.adjustANOVA
1543 SRP-dependent cotranslational protein targeting to membrane 105 0.0000000 -0.4181280 0.0000000
1511 SARS-CoV-1 modulates host translation machinery 33 0.0000822 -0.3960165 0.0018065
524 Eukaryotic Translation Termination 86 0.0000000 -0.3952549 0.0000000
184 Butyrate Response Factor 1 (BRF1) binds and destabilizes mRNA 17 0.0051326 -0.3920226 0.0370394
1155 PINK1-PRKN Mediated Mitophagy 21 0.0020045 -0.3894164 0.0186376
1933 Viral mRNA Translation 83 0.0000000 -0.3885120 0.0000001
1901 Tristetraprolin (TTP, ZFP36) binds and destabilizes mRNA 17 0.0055547 -0.3884346 0.0393751
1185 Peptide chain elongation 83 0.0000000 -0.3733374 0.0000003
1094 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) 88 0.0000000 -0.3695585 0.0000002
1571 Selenocysteine synthesis 86 0.0000000 -0.3670843 0.0000003
top <- rbind(up[1:10,],dn[1:10,])
top <- top[order(top$s.dist),]

barnames <- gsub("_"," ",gsub("REACTOME_","",top$set))
cols <- as.character(((sign(top$s.dist)+1)/2)+1)
cols <- gsub("1","blue",gsub("2","red",cols))

par(mar = c(5.1, 29.1, 4.1, 2.1))
barplot(abs(top$s.dist),horiz=TRUE,las=1,names.arg=barnames, cex.names=0.8,
  cex.axis=0.8,col=cols, xlab="Enrichment score",main="Reactomes")
mtext("whole gene")
grid()

par( mar = c(5.1, 4.1, 4.1, 2.1) )

Make a html report and some charts.

mitch_report(res=mres,outfile="mandy_mitchreport.html",overwrite=TRUE)
## Note: overwriting existing report
## Dataset saved as " /tmp/RtmpnMvthp/mandy_mitchreport.rds ".
## 
## 
## processing file: mitch.Rmd
## 1/34                             
## 2/34 [checklibraries]            
## 3/34                             
## 4/34 [peek]                      
## 5/34                             
## 6/34 [metrics]                   
## 7/34                             
## 8/34 [scatterplot]
## 9/34                             
## 10/34 [contourplot]               
## 11/34                             
## 12/34 [input_geneset_metrics1]    
## 13/34                             
## 14/34 [input_geneset_metrics2]
## 15/34                             
## 16/34 [input_geneset_metrics3]
## 17/34                             
## 18/34 [echart1d]                  
## 19/34 [echart2d]                  
## 20/34                             
## 21/34 [heatmap]                   
## 22/34                             
## 23/34 [effectsize]                
## 24/34                             
## 25/34 [results_table]             
## 26/34                             
## 27/34 [results_table_complete]    
## 28/34                             
## 29/34 [detailed_geneset_reports1d]
## 30/34                             
## 31/34 [detailed_geneset_reports2d]
## 32/34                             
## 33/34 [session_info]              
## 34/34
## output file: /home/mark.ziemann@domain.internal.burnet.edu.au/projects/mandy/mitch.knit.md
## /usr/bin/pandoc +RTS -K512m -RTS /home/mark.ziemann@domain.internal.burnet.edu.au/projects/mandy/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpnMvthp/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpnMvthp/rmarkdown-str133b9435d978bb.html
## 
## Output created: /tmp/RtmpnMvthp/mitch_report.html
## [1] TRUE
mitch_plots(res=mres,outfile="mandy_mitchcharts.pdf")
## png 
##   2

Session information

save.image("mandy_gmea.Rdata")

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pkgload_1.3.3                                      
##  [2] GGally_2.2.0                                       
##  [3] ggplot2_3.4.4                                      
##  [4] reshape2_1.4.4                                     
##  [5] gplots_3.1.3                                       
##  [6] gtools_3.9.5                                       
##  [7] tibble_3.2.1                                       
##  [8] dplyr_1.1.4                                        
##  [9] echarts4r_0.4.5                                    
## [10] png_0.1-8                                          
## [11] gridExtra_2.3                                      
## [12] missMethyl_1.36.0                                  
## [13] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1 
## [14] beeswarm_0.4.0                                     
## [15] kableExtra_1.3.4                                   
## [16] mitch_1.15.0                                       
## [17] tictoc_1.2                                         
## [18] HGNChelper_0.8.1                                   
## [19] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [20] IlluminaHumanMethylation450kmanifest_0.4.0         
## [21] minfi_1.48.0                                       
## [22] bumphunter_1.44.0                                  
## [23] locfit_1.5-9.8                                     
## [24] iterators_1.0.14                                   
## [25] foreach_1.5.2                                      
## [26] Biostrings_2.70.1                                  
## [27] XVector_0.42.0                                     
## [28] SummarizedExperiment_1.32.0                        
## [29] Biobase_2.62.0                                     
## [30] MatrixGenerics_1.14.0                              
## [31] matrixStats_1.2.0                                  
## [32] GenomicRanges_1.54.1                               
## [33] GenomeInfoDb_1.38.1                                
## [34] IRanges_2.36.0                                     
## [35] S4Vectors_0.40.1                                   
## [36] BiocGenerics_0.48.1                                
## [37] eulerr_7.0.0                                       
## [38] limma_3.58.1                                       
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.3.2             later_1.3.2              
##   [3] BiocIO_1.12.0             bitops_1.0-7             
##   [5] filelock_1.0.3            preprocessCore_1.64.0    
##   [7] XML_3.99-0.16             lifecycle_1.0.4          
##   [9] lattice_0.22-5            MASS_7.3-60              
##  [11] base64_2.0.1              scrime_1.3.5             
##  [13] magrittr_2.0.3            sass_0.4.8               
##  [15] rmarkdown_2.25            jquerylib_0.1.4          
##  [17] yaml_2.3.8                httpuv_1.6.13            
##  [19] doRNG_1.8.6               askpass_1.2.0            
##  [21] DBI_1.2.0                 RColorBrewer_1.1-3       
##  [23] abind_1.4-5               zlibbioc_1.48.0          
##  [25] rvest_1.0.3               quadprog_1.5-8           
##  [27] purrr_1.0.2               RCurl_1.98-1.13          
##  [29] rappdirs_0.3.3            GenomeInfoDbData_1.2.11  
##  [31] genefilter_1.84.0         annotate_1.80.0          
##  [33] svglite_2.1.3             DelayedMatrixStats_1.24.0
##  [35] codetools_0.2-19          DelayedArray_0.28.0      
##  [37] xml2_1.3.6                tidyselect_1.2.0         
##  [39] beanplot_1.3.1            BiocFileCache_2.10.1     
##  [41] webshot_0.5.5             illuminaio_0.44.0        
##  [43] GenomicAlignments_1.38.0  jsonlite_1.8.8           
##  [45] multtest_2.58.0           ellipsis_0.3.2           
##  [47] survival_3.5-7            systemfonts_1.0.5        
##  [49] tools_4.3.2               progress_1.2.3           
##  [51] Rcpp_1.0.11               glue_1.6.2               
##  [53] SparseArray_1.2.2         xfun_0.41                
##  [55] HDF5Array_1.30.0          withr_2.5.2              
##  [57] fastmap_1.1.1             rhdf5filters_1.14.1      
##  [59] fansi_1.0.6               openssl_2.1.1            
##  [61] caTools_1.18.2            digest_0.6.33            
##  [63] R6_2.5.1                  mime_0.12                
##  [65] colorspace_2.1-0          biomaRt_2.58.0           
##  [67] RSQLite_2.3.4             utf8_1.2.4               
##  [69] tidyr_1.3.0               generics_0.1.3           
##  [71] data.table_1.14.10        rtracklayer_1.62.0       
##  [73] prettyunits_1.2.0         httr_1.4.7               
##  [75] htmlwidgets_1.6.4         S4Arrays_1.2.0           
##  [77] ggstats_0.5.1             pkgconfig_2.0.3          
##  [79] gtable_0.3.4              blob_1.2.4               
##  [81] siggenes_1.76.0           htmltools_0.5.7          
##  [83] scales_1.3.0              rstudioapi_0.15.0        
##  [85] knitr_1.45                tzdb_0.4.0               
##  [87] rjson_0.2.21              nlme_3.1-164             
##  [89] curl_5.2.0                org.Hs.eg.db_3.18.0      
##  [91] cachem_1.0.8              rhdf5_2.46.0             
##  [93] stringr_1.5.1             KernSmooth_2.23-22       
##  [95] AnnotationDbi_1.64.1      restfulr_0.0.15          
##  [97] GEOquery_2.70.0           pillar_1.9.0             
##  [99] grid_4.3.2                reshape_0.8.9            
## [101] vctrs_0.6.5               promises_1.2.1           
## [103] dbplyr_2.4.0              xtable_1.8-4             
## [105] evaluate_0.23             readr_2.1.4              
## [107] GenomicFeatures_1.54.1    cli_3.6.2                
## [109] compiler_4.3.2            Rsamtools_2.18.0         
## [111] rlang_1.1.2               crayon_1.5.2             
## [113] rngtools_1.5.2            nor1mix_1.3-2            
## [115] mclust_6.0.1              plyr_1.8.9               
## [117] stringi_1.8.3             viridisLite_0.4.2        
## [119] BiocParallel_1.36.0       munsell_0.5.0            
## [121] Matrix_1.6-4              hms_1.1.3                
## [123] sparseMatrixStats_1.14.0  bit64_4.0.5              
## [125] Rhdf5lib_1.24.0           KEGGREST_1.42.0          
## [127] statmod_1.5.0             shiny_1.8.0              
## [129] highr_0.10                memoise_2.0.1.9000       
## [131] bslib_0.6.1               bit_4.0.5