The purpose of this analysis is to explore different enrichment approaches to the cancer methylation data: Infinium EPIC array (GSE158422) data for matched control and cancer tissues.
We will specifically be testing the GMEA approach versus existing approaches:
1-way Wilcox text
1-way t-test
FGSEA test
roast test
After each of these tests, downstream GSEA can be conducted.
suppressPackageStartupMessages({
library("plyr")
library("R.utils")
library("missMethyl")
library("limma")
library("DMRcate")
library("DMRcatedata")
library("topconfects")
library("minfi")
library("IlluminaHumanMethylation450kmanifest")
library("RColorBrewer")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("GEOquery")
library("eulerr")
library("plyr")
library("gplots")
library("reshape2")
library("forestplot")
library("beeswarm")
library("RCircos")
library("qqman")
library("ENmix")
library("tictoc")
library("mitch")
library("kableExtra")
library("fgsea")
})
source("../meth_functions.R")
CORES=8
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name",
"Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)
Gene probe sets
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
hist(unlist(lapply(sets,length)),xlim=c(0,200),breaks=500,xlab="set size",main="probes per gene")
GSE158422 is the accession number for the array data.
#GSE158422 <- readRDS("GSE158422.rds")
dm <- read.table("GSE158422_limma.tsv",header=TRUE,sep="\t")
head(dm,50) %>%
kbl(caption = "Top significant probes with limma") %>%
kable_paper("hover", full_width = F)
Row.names | UCSC_RefGene_Name | Regulatory_Feature_Group | Islands_Name | Relation_to_Island | logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|---|---|---|---|---|
5777 | cg00159780 | REXO1L2P;REXO1L1 | chr8:86573421-86575248 | Island | -1.6810156 | 1.1199758 | -13.41989 | 0 | 0 | 38.61314 | |
61184 | cg01772014 | C20orf160 | Unclassified_Cell_type_specific | chr20:30606492-30607174 | N_Shore | -1.2581347 | -2.1384838 | -13.36031 | 0 | 0 | 38.38467 |
468211 | cg14205001 | NOTCH1 | chr9:139405089-139405292 | S_Shore | 1.0609528 | 1.0240980 | 13.34777 | 0 | 0 | 38.33651 | |
221392 | cg06569947 | DCUN1D2 | OpenSea | 1.6933468 | 1.5977978 | 13.21072 | 0 | 0 | 37.80891 | ||
577564 | cg17655624 | ZNF385A;ZNF385A | Unclassified_Cell_type_specific | chr12:54784900-54785238 | S_Shore | -1.6062843 | -2.4260117 | -13.17366 | 0 | 0 | 37.66580 |
444856 | cg13531667 | MCC | chr5:112823256-112824304 | S_Shore | -1.9451786 | -2.4725269 | -13.13667 | 0 | 0 | 37.52278 | |
669100 | cg20979986 | SERINC5;SERINC5;SERINC5;SERINC5;SERINC5 | Unclassified_Cell_type_specific | OpenSea | -1.9389375 | -1.5165570 | -13.05957 | 0 | 0 | 37.22412 | |
657981 | cg20568402 | DCUN1D2 | OpenSea | 1.6831260 | 1.3499641 | 13.03319 | 0 | 0 | 37.12174 | ||
60887 | cg01762663 | DOCK9;DOCK9 | OpenSea | 1.4190052 | 1.6213375 | 12.94365 | 0 | 0 | 36.77360 | ||
760860 | cg24334634 | DCUN1D2 | OpenSea | 1.8952109 | 1.3472386 | 12.89559 | 0 | 0 | 36.58628 | ||
791527 | cg25422938 | RPA3;UMAD1;UMAD1;UMAD1 | OpenSea | 2.0931417 | 0.9054807 | 12.88130 | 0 | 0 | 36.53053 | ||
102318 | cg02973960 | CAMKK2;CAMKK2;CAMKK2;CAMKK2;CAMKK2;CAMKK2;CAMKK2;CAMKK2 | OpenSea | 1.8653298 | 0.9885988 | 12.82480 | 0 | 0 | 36.30981 | ||
159551 | cg04690379 | SPTLC2 | OpenSea | 1.4808359 | 1.6632219 | 12.81647 | 0 | 0 | 36.27725 | ||
401316 | cg12131862 | ATP2B4;ATP2B4 | OpenSea | 1.3214447 | 1.1562250 | 12.73097 | 0 | 0 | 35.94239 | ||
549143 | cg16732616 | DMRTA2 | chr1:50884228-50891471 | Island | 3.0282534 | -2.2765252 | 12.67359 | 0 | 0 | 35.71713 | |
560139 | cg17094249 | Unclassified_Cell_type_specific | OpenSea | -1.6984142 | -1.5599415 | -12.62766 | 0 | 0 | 35.53653 | ||
91439 | cg02655980 | GALK2;GALK2;GALK2;MIR4716;GALK2 | OpenSea | 1.1562534 | 1.9223113 | 12.59459 | 0 | 0 | 35.40631 | ||
15682 | cg00446235 | TSTD1;TSTD1;TSTD1 | Promoter_Associated | chr1:161008377-161008830 | Island | -1.4800380 | -2.5291407 | -12.57768 | 0 | 0 | 35.33968 |
440784 | cg13417058 | TKT;TKT;TKT;TKT | chr3:53289533-53290213 | N_Shore | -1.2138572 | -3.1919784 | -12.56018 | 0 | 0 | 35.27064 | |
576795 | cg17627973 | SRPK2;SRPK2 | chr7:104885049-104885400 | Island | 1.2313437 | 1.4827501 | 12.55575 | 0 | 0 | 35.25318 | |
397225 | cg12003230 | C21orf84 | OpenSea | 1.2476557 | 1.2625415 | 12.51287 | 0 | 0 | 35.08392 | ||
736989 | cg23480296 | DAB2IP | OpenSea | 1.5228320 | 2.0541757 | 12.49776 | 0 | 0 | 35.02422 | ||
667781 | cg20934096 | C7orf50;C7orf50;C7orf50 | chr7:1119980-1120248 | N_Shelf | 1.8073593 | 1.0858697 | 12.48786 | 0 | 0 | 34.98510 | |
74972 | cg02174232 | APEH | Promoter_Associated | chr3:49710968-49712279 | Island | -1.3008066 | -2.2997937 | -12.47090 | 0 | 0 | 34.91804 |
224178 | cg06651376 | SMYD4 | Unclassified_Cell_type_specific | OpenSea | 1.7518820 | 2.1327280 | 12.39961 | 0 | 0 | 34.63569 | |
48818 | cg01399319 | CCNL2;CCNL2;CCNL2;CCNL2 | chr1:1322644-1322924 | S_Shelf | 1.5152093 | 1.6887531 | 12.39857 | 0 | 0 | 34.63157 | |
836007 | cg26993251 | MACROD1 | Promoter_Associated | chr11:63932990-63934070 | Island | -1.8619005 | -2.5408778 | -12.38902 | 0 | 0 | 34.59373 |
317907 | cg09496762 | Promoter_Associated | chr20:42285961-42286535 | Island | -1.3804253 | -3.4226436 | -12.38487 | 0 | 0 | 34.57725 | |
141548 | cg04147064 | TBC1D22A;TBC1D22A;TBC1D22A;TBC1D22A;TBC1D22A | chr22:47558272-47558513 | S_Shelf | 1.6643797 | 1.1404515 | 12.38173 | 0 | 0 | 34.56481 | |
689315 | cg21722612 | TPCN1;TPCN1;TPCN1 | OpenSea | 1.7694572 | 1.3631921 | 12.37873 | 0 | 0 | 34.55291 | ||
303960 | cg09067993 | SUCLG2;SUCLG2 | OpenSea | 1.6770912 | 1.0487424 | 12.34700 | 0 | 0 | 34.42698 | ||
327281 | cg09792881 | DMRTA2 | chr1:50884228-50891471 | Island | 2.6518400 | -2.3846188 | 12.30475 | 0 | 0 | 34.25906 | |
811041 | cg26134703 | FAM49B;FAM49B;FAM49B;FAM49B;FAM49B;FAM49B;FAM49B;FAM49B | OpenSea | 1.1456692 | 2.1751297 | 12.30127 | 0 | 0 | 34.24524 | ||
536592 | cg16350950 | ARFGEF2;ARFGEF2 | OpenSea | 1.5891250 | 1.7433091 | 12.28112 | 0 | 0 | 34.16508 | ||
577091 | cg17639795 | chr19:2950359-2950962 | N_Shore | 0.9854179 | 1.4310298 | 12.25750 | 0 | 0 | 34.07103 | ||
500102 | cg15169038 | PLEKHG1 | OpenSea | 1.2350751 | 1.2815770 | 12.25413 | 0 | 0 | 34.05762 | ||
833500 | cg26908825 | GMDS | OpenSea | 1.5139160 | 0.9404246 | 12.24443 | 0 | 0 | 34.01897 | ||
515979 | cg15698065 | DCUN1D2 | OpenSea | 1.1600645 | 1.1265920 | 12.24075 | 0 | 0 | 34.00432 | ||
281220 | cg08365845 | GNAI2;GNAI2 | Promoter_Associated | chr3:50264402-50265101 | S_Shore | -1.1317946 | -3.7762019 | -12.22984 | 0 | 0 | 33.96084 |
27631 | cg00781839 | DCUN1D2 | OpenSea | 1.5928682 | 2.1251831 | 12.19970 | 0 | 0 | 33.84066 | ||
663755 | cg20778786 | OpenSea | -2.2891897 | -1.4266297 | -12.19677 | 0 | 0 | 33.82896 | |||
601050 | cg18483766 | PPP4R1;PPP4R1;PPP4R1 | OpenSea | 1.3919045 | 1.7771650 | 12.17365 | 0 | 0 | 33.73668 | ||
690514 | cg21768042 | ARAP3 | Unclassified | OpenSea | 0.9038892 | 1.1291209 | 12.17317 | 0 | 0 | 33.73475 | |
66144 | cg01915688 | PPP1R12A;PPP1R12A;PPP1R12A;PPP1R12A;PPP1R12A | OpenSea | 1.5092716 | 2.2025612 | 12.13652 | 0 | 0 | 33.58836 | ||
134752 | cg03946762 | ZNF385A;ZNF385A;ZNF385A;ZNF385A | Unclassified_Cell_type_specific | chr12:54784900-54785238 | Island | -1.7206885 | -2.0731893 | -12.13557 | 0 | 0 | 33.58454 |
228200 | cg06767326 | DCUN1D2 | OpenSea | 1.8092946 | 1.9834216 | 12.13503 | 0 | 0 | 33.58240 | ||
693283 | cg21870038 | RFFL;RFFL;RFFL | Promoter_Associated | OpenSea | -1.8490646 | -3.1666138 | -12.11240 | 0 | 0 | 33.49190 | |
351767 | cg10546600 | BRD7;BRD7 | OpenSea | 1.4049959 | 1.7498277 | 12.11131 | 0 | 0 | 33.48755 | ||
524497 | cg15970145 | LARGE;LARGE | OpenSea | -1.8418429 | -0.8440018 | -12.09043 | 0 | 0 | 33.40397 | ||
54185 | cg01558390 | PIP4K2A | OpenSea | 1.6277425 | 0.6732985 | 12.08821 | 0 | 0 | 33.39509 |
rownames(dm) <- dm[,1]
dm <- dm[,c("UCSC_RefGene_Name","t")]
hist(dm$t)
tic() ; res <- calc_sc(dm) ; toc() #32 cores 95.5
## 116.156 sec elapsed
res2 <- res[[2]]
head(res2,20) %>%
kbl(caption = "Top significant genes with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | mean | median | p-value(sc) | sig | fdr(sc) | |
---|---|---|---|---|---|---|
PTPRN2 | 1474 | -3.813234 | -4.397006 | 0 | 196.48861 | 0 |
MAD1L1 | 817 | -2.042400 | -2.277810 | 0 | 70.11979 | 0 |
TNXB | 529 | -3.156354 | -3.605676 | 0 | 69.75063 | 0 |
DIP2C | 607 | -2.479070 | -3.095174 | 0 | 63.78267 | 0 |
CDH4 | 382 | -3.919010 | -4.543389 | 0 | 54.63858 | 0 |
PCDHGA1 | 439 | 3.122674 | 3.776895 | 0 | 52.91420 | 0 |
SHANK2 | 491 | -2.794116 | -3.589533 | 0 | 51.36588 | 0 |
PCDHGA2 | 422 | 3.159710 | 3.783492 | 0 | 51.21125 | 0 |
PCDHGA3 | 399 | 3.179048 | 3.797327 | 0 | 48.12942 | 0 |
ADARB2 | 475 | -2.507251 | -3.021943 | 0 | 48.00564 | 0 |
PCDHGB1 | 380 | 3.182828 | 3.785979 | 0 | 45.95262 | 0 |
PRDM16 | 663 | -2.082507 | -2.808744 | 0 | 44.79882 | 0 |
PCDHGA4 | 362 | 3.243741 | 3.881572 | 0 | 44.50243 | 0 |
RASA3 | 361 | -2.612493 | -2.761520 | 0 | 42.79716 | 0 |
PCDHGB2 | 343 | 3.255906 | 3.949639 | 0 | 41.81006 | 0 |
PCDHGA5 | 326 | 3.258105 | 4.026341 | 0 | 39.45533 | 0 |
TRAPPC9 | 420 | -2.548773 | -3.071687 | 0 | 38.99707 | 0 |
LMF1 | 262 | -3.279107 | -3.705261 | 0 | 38.85506 | 0 |
RPS6KA2 | 355 | -2.828566 | -3.489451 | 0 | 38.78985 | 0 |
CACNA1C | 290 | -3.605957 | -4.538620 | 0 | 37.76518 | 0 |
sig <- subset(res2,`fdr(sc)`<0.05)
es <- sig[order(-abs(sig$median)),]
head(es,20) %>%
kbl(caption = "Top effect size probes with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | mean | median | p-value(sc) | sig | fdr(sc) | |
---|---|---|---|---|---|---|
HOXA5 | 44 | 7.623040 | 7.780885 | 0.0e+00 | 12.944290 | 0.0000000 |
HOXD10 | 21 | 7.142220 | 7.426734 | 1.0e-06 | 6.020600 | 0.0249023 |
HOXD9 | 25 | 7.665227 | 7.412387 | 1.0e-07 | 7.224720 | 0.0015815 |
HOXD4 | 28 | 7.471039 | 7.349427 | 0.0e+00 | 8.127810 | 0.0001990 |
SFTA3 | 40 | 5.835427 | 7.108292 | 0.0e+00 | 9.089862 | 0.0000218 |
CCDC140 | 52 | 6.792560 | 6.995631 | 0.0e+00 | 9.442748 | 0.0000097 |
DLX6AS | 66 | 5.720625 | 6.862673 | 0.0e+00 | 11.222806 | 0.0000002 |
OTX2 | 38 | 6.288114 | 6.766082 | 0.0e+00 | 10.837080 | 0.0000004 |
HOXA3 | 77 | 6.546720 | 6.671353 | 0.0e+00 | 13.600021 | 0.0000000 |
IRX1 | 24 | 6.319978 | 6.616094 | 1.0e-07 | 6.923690 | 0.0031527 |
SOX1 | 25 | 6.058103 | 6.516175 | 1.0e-07 | 7.224720 | 0.0015815 |
SIM1 | 73 | 4.353270 | 6.467176 | 0.0e+00 | 9.145769 | 0.0000192 |
DRD4 | 20 | 5.670473 | 6.440472 | 1.9e-06 | 5.719570 | 0.0495014 |
MUC19 | 21 | -6.200077 | -6.435404 | 1.0e-06 | 6.020600 | 0.0249023 |
OR9Q1 | 32 | -5.049954 | -6.398015 | 1.7e-06 | 5.761153 | 0.0450092 |
SIX6 | 23 | 6.101058 | 6.375368 | 2.0e-07 | 6.622660 | 0.0062802 |
PAX3 | 58 | 5.432323 | 6.178637 | 0.0e+00 | 9.927403 | 0.0000032 |
UNC13C | 22 | -5.520481 | -6.155292 | 1.0e-06 | 6.020600 | 0.0249023 |
CNTNAP4 | 35 | -5.354329 | -6.137429 | 0.0e+00 | 9.389922 | 0.0000110 |
GHSR | 21 | 5.456063 | 6.134691 | 1.0e-06 | 6.020600 | 0.0249023 |
gmea_volc(res2)
gmea_boxplot(res)
res2_wg <- res2
Investigate how probe number influences significance.
plot(res2$nprobes,res2$sig,log="x",ylim=c(0,50),pch=19,cex=0.6)
points(sig$nprobes,sig$sig,col="red",pch=19,cex=0.62)
MIN = min(sig$nprobes)
LEFT = nrow(subset(res2,nprobes<MIN))
RIGHT = nrow(subset(res2,nprobes>MIN))
SIG = nrow(sig)
TOT = nrow(res2)
HEADER <- paste(TOT, "genes in total.", SIG, "with FDR<0.05.",
RIGHT, "well covered and", LEFT, "poorly covered")
mtext(HEADER)
abline(v=MIN,lty=2)
dm <- read.table("GSE158422_limma.tsv",header=TRUE,sep="\t")
rownames(dm) <- dm[,1]
dm <- dm[,c("UCSC_RefGene_Name","t")]
hist(dm$t)
tic()
res <- calc_sc2(dm)
toc()
## 254.451 sec elapsed
res2 <- res[[2]]
head(res2,20) %>%
kbl(caption = "Top significant genes with GMEA-t") %>%
kable_paper("hover", full_width = F)
nprobes | mean | median | p-value(sc) | sig | fdr(sc) | |
---|---|---|---|---|---|---|
PTPRN2 | 1474 | -3.813234 | -4.397006 | 0 | Inf | 0 |
TNXB | 529 | -3.156354 | -3.605676 | 0 | 113.00665 | 0 |
CDH4 | 382 | -3.919010 | -4.543389 | 0 | 99.12338 | 0 |
DIP2C | 607 | -2.479070 | -3.095174 | 0 | 86.68677 | 0 |
MAD1L1 | 817 | -2.042400 | -2.277810 | 0 | 81.22935 | 0 |
PCDHGA1 | 439 | 3.122674 | 3.776895 | 0 | 75.90437 | 0 |
PCDHGA2 | 422 | 3.159710 | 3.783492 | 0 | 73.92040 | 0 |
SHANK2 | 491 | -2.794116 | -3.589533 | 0 | 73.17547 | 0 |
SNORD115-15 | 81 | -6.138454 | -6.052379 | 0 | 70.09110 | 0 |
SNORD115-21 | 81 | -6.138454 | -6.052379 | 0 | 70.09110 | 0 |
PCDHGA3 | 399 | 3.179048 | 3.797327 | 0 | 69.19488 | 0 |
LMF1 | 262 | -3.279107 | -3.705261 | 0 | 67.83114 | 0 |
PCDHGB1 | 380 | 3.182828 | 3.785979 | 0 | 66.11569 | 0 |
PCDHGA4 | 362 | 3.243741 | 3.881572 | 0 | 64.49388 | 0 |
MYT1L | 245 | -3.956712 | -4.423715 | 0 | 64.37781 | 0 |
ADARB2 | 475 | -2.507251 | -3.021943 | 0 | 62.24055 | 0 |
PCDHGB2 | 343 | 3.255906 | 3.949639 | 0 | 60.22098 | 0 |
RASA3 | 361 | -2.612493 | -2.761520 | 0 | 59.68705 | 0 |
PCDHGA5 | 326 | 3.258105 | 4.026341 | 0 | 56.53669 | 0 |
CACNA1C | 290 | -3.605957 | -4.538620 | 0 | 56.17918 | 0 |
sig <- subset(res2,`fdr(sc)`<0.05)
es <- sig[order(-abs(sig$median)),]
head(es,20) %>%
kbl(caption = "Top effect size probes with GMEA") %>%
kable_paper("hover", full_width = F)
nprobes | mean | median | p-value(sc) | sig | fdr(sc) | |
---|---|---|---|---|---|---|
MIR10B | 13 | 7.587783 | 8.134873 | 0.0e+00 | 8.007009 | 0.0002488 |
HOXD12 | 14 | 7.862712 | 7.922158 | 0.0e+00 | 12.765511 | 0.0000000 |
HOXA5 | 44 | 7.623040 | 7.780885 | 0.0e+00 | 40.923039 | 0.0000000 |
LINC01246 | 7 | -7.478710 | -7.729335 | 1.0e-07 | 6.973414 | 0.0026483 |
PFN3 | 10 | 7.723714 | 7.630209 | 0.0e+00 | 7.650953 | 0.0005621 |
MIR196A1 | 7 | 7.437659 | 7.478137 | 7.0e-07 | 6.160796 | 0.0169168 |
C17orf112 | 6 | -7.509326 | -7.471408 | 1.5e-06 | 5.816728 | 0.0370409 |
MIR205 | 7 | -6.909420 | -7.466330 | 2.0e-06 | 5.704269 | 0.0478546 |
HOXD10 | 21 | 7.142220 | 7.426734 | 0.0e+00 | 12.079764 | 0.0000000 |
HOXD9 | 25 | 7.665227 | 7.412387 | 0.0e+00 | 22.120455 | 0.0000000 |
HOXD4 | 28 | 7.471039 | 7.349427 | 0.0e+00 | 25.596279 | 0.0000000 |
MIR503 | 12 | 6.473450 | 7.288326 | 0.0e+00 | 7.337316 | 0.0011527 |
HOXB1 | 16 | 6.521135 | 7.287845 | 9.0e-07 | 6.062606 | 0.0211564 |
TAS2R1 | 6 | -6.937028 | -7.169484 | 1.8e-06 | 5.750589 | 0.0430630 |
MIR411 | 11 | -6.635723 | -7.159973 | 1.0e-07 | 7.229529 | 0.0014751 |
SFTA3 | 40 | 5.835427 | 7.108292 | 0.0e+00 | 13.314641 | 0.0000000 |
LOC101929681 | 9 | -6.576090 | -7.075011 | 9.0e-07 | 6.052007 | 0.0216720 |
MNDA | 10 | -6.561520 | -7.040268 | 0.0e+00 | 7.404846 | 0.0009876 |
CCDC140 | 52 | 6.792560 | 6.995631 | 0.0e+00 | 39.740933 | 0.0000000 |
KRTAP7-1 | 6 | -6.916205 | -6.989707 | 0.0e+00 | 7.365598 | 0.0010804 |
gmea_volc(res2)
gmea_boxplot(res)
res2_t <- res2
plot(res2$nprobes,res2$sig,log="x",ylim=c(0,50),pch=19,cex=0.6)
points(sig$nprobes,sig$sig,col="red",pch=19,cex=0.62)
MIN = min(sig$nprobes)
LEFT = nrow(subset(res2,nprobes<MIN))
RIGHT = nrow(subset(res2,nprobes>MIN))
SIG = nrow(sig)
TOT = nrow(res2)
HEADER <- paste(TOT, "genes in total.", SIG, "with FDR<0.05.",
RIGHT, "well covered and", LEFT, "poorly covered")
mtext(HEADER)
abline(v=MIN,lty=2)
head(dm)
## UCSC_RefGene_Name t
## cg00159780 REXO1L2P;REXO1L1 -13.41989
## cg01772014 C20orf160 -13.36031
## cg14205001 NOTCH1 13.34777
## cg06569947 DCUN1D2 13.21072
## cg17655624 ZNF385A;ZNF385A -13.17366
## cg13531667 MCC -13.13667
tstats <- dm$t
names(tstats) <- rownames(dm)
tic()
fgseaRes <- fgsea(pathways = sets,
stats = tstats,
minSize = 5,
nproc = 16)
## Warning in fgseaMultilevel(...): There were 24 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some of the pathways the P-values were
## likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-50. You can set the `eps` argument to zero for better estimation.
toc()
## 49.495 sec elapsed
fgseaRes <- as.data.frame(fgseaRes)[,1:7]
fgseaRes <- fgseaRes[order(fgseaRes$pval),]
head(fgseaRes,20) %>%
kbl(caption = "Top significant genes with GMEA-FGSEA") %>%
kable_paper("hover", full_width = F)
pathway | pval | padj | log2err | ES | NES | size | |
---|---|---|---|---|---|---|---|
8626 | HOXA5 | 0 | 0 | NA | 0.9751158 | 4.264259 | 44 |
15480 | PAX6 | 0 | 0 | NA | 0.8355458 | 4.540388 | 131 |
15576 | PCDHGA8 | 0 | 0 | NA | 0.7066343 | 4.257646 | 234 |
15582 | PCDHGB5 | 0 | 0 | NA | 0.6960881 | 4.129469 | 215 |
16969 | PTPRN2 | 0 | 0 | NA | -0.5493220 | -2.371055 | 1474 |
15577 | PCDHGA9 | 0 | 0 | 1.825919 | 0.6740625 | 3.896916 | 198 |
3333 | CCDC140 | 0 | 0 | 1.762439 | 0.9413319 | 4.313300 | 52 |
22866 | ZIC4 | 0 | 0 | 1.750650 | 0.8765661 | 4.361757 | 77 |
15583 | PCDHGB6 | 0 | 0 | 1.714797 | 0.6586990 | 3.867038 | 187 |
8624 | HOXA3 | 0 | 0 | 1.690472 | 0.8627231 | 4.292875 | 77 |
16008 | PITX2 | 0 | 0 | 1.671997 | 0.8576116 | 4.235153 | 74 |
5408 | DLX6AS | 0 | 0 | 1.640742 | 0.8782157 | 4.255537 | 66 |
20520 | TBX5 | 0 | 0 | 1.621700 | 0.8292883 | 4.115278 | 81 |
15540 | PCDHA5 | 0 | 0 | 1.602431 | 0.5833933 | 3.407617 | 221 |
3724 | CDH4 | 0 | 0 | 1.589456 | -0.5596017 | -2.353428 | 382 |
15477 | PAX3 | 0 | 0 | 1.582928 | 0.8881333 | 4.117349 | 58 |
19451 | SNORD115-15 | 0 | 0 | 1.582928 | -0.8243442 | -3.073420 | 81 |
19457 | SNORD115-21 | 0 | 0 | 1.582928 | -0.8243442 | -3.073420 | 81 |
8647 | HOXC4 | 0 | 0 | 1.569792 | 0.6886474 | 3.830130 | 137 |
15567 | PCDHGA10 | 0 | 0 | 1.563182 | 0.6308482 | 3.688264 | 172 |
sig <- subset(fgseaRes,padj<0.05)
SIG = nrow(sig)
plot(fgseaRes$ES,-log10(fgseaRes$pval))
points(sig$ES,-log10(sig$pval),col="red")
plot(fgseaRes$size,-log10(fgseaRes$pval),log="x")
points(sig$size,-log10(sig$pval),col="red")
gmea_volc(res2)
gmea_boxplot(res)
res2_wg <- res2
TODO
Methylation only.
genesets <- gmt_import("../ReactomePathways.gmt")
meth <- res2$sig * res2$median
meth <- as.data.frame(meth)
rownames(meth) <- rownames(res2)
mres <- mitch_calc(x=meth, genesets=genesets,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres$enrichment_result,20) %>%
kbl(caption = "Top differential pathways with GMEA+mitch") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
323 | Digestion of dietary carbohydrate | 10 | 0.0000238 | -0.7715770 | 0.0006371 |
405 | Expression and translocation of olfactory receptors | 351 | 0.0000000 | -0.6913602 | 0.0000000 |
861 | Olfactory Signaling Pathway | 359 | 0.0000000 | -0.6848450 | 0.0000000 |
291 | Defective GALNT3 causes HFTC | 18 | 0.0000007 | -0.6768037 | 0.0000311 |
390 | Endosomal/Vacuolar pathway | 12 | 0.0000529 | 0.6738191 | 0.0012828 |
119 | Beta defensins | 31 | 0.0000000 | -0.6641412 | 0.0000000 |
290 | Defective GALNT12 causes CRCS1 | 18 | 0.0000054 | -0.6193648 | 0.0001978 |
304 | Defensins | 39 | 0.0000000 | -0.6163304 | 0.0000000 |
281 | Dectin-2 family | 28 | 0.0000000 | -0.5959173 | 0.0000032 |
286 | Defective C1GALT1C1 causes TNPS | 19 | 0.0000137 | -0.5760934 | 0.0004434 |
321 | Digestion | 22 | 0.0000038 | -0.5690575 | 0.0001514 |
1404 | Termination of O-glycan biosynthesis | 25 | 0.0000021 | -0.5481980 | 0.0000947 |
87 | Antimicrobial peptides | 83 | 0.0000000 | -0.5470219 | 0.0000000 |
380 | ERKs are inactivated | 13 | 0.0007751 | 0.5383799 | 0.0096231 |
742 | Metallothioneins bind metals | 11 | 0.0020694 | 0.5362633 | 0.0187082 |
1157 | Response to metal ions | 14 | 0.0006043 | 0.5293178 | 0.0081553 |
56 | Activation of the TFAP2 (AP-2) family of transcription factors | 12 | 0.0015324 | 0.5281737 | 0.0151485 |
963 | Presynaptic depolarization and calcium channel opening | 12 | 0.0016093 | -0.5257980 | 0.0154173 |
1449 | Transcriptional regulation of testis differentiation | 12 | 0.0017594 | 0.5214446 | 0.0166501 |
1220 | Sensory Perception | 575 | 0.0000000 | -0.5209378 | 0.0000000 |
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] fgsea_1.22.0
## [2] kableExtra_1.3.4
## [3] mitch_1.8.0
## [4] tictoc_1.1
## [5] ENmix_1.32.0
## [6] doParallel_1.0.17
## [7] qqman_0.1.8
## [8] RCircos_1.2.2
## [9] beeswarm_0.4.0
## [10] forestplot_2.0.1
## [11] checkmate_2.1.0
## [12] magrittr_2.0.3
## [13] reshape2_1.4.4
## [14] gplots_3.1.3
## [15] eulerr_6.1.1
## [16] GEOquery_2.64.2
## [17] RColorBrewer_1.1-3
## [18] IlluminaHumanMethylation450kmanifest_0.4.0
## [19] topconfects_1.12.0
## [20] DMRcatedata_2.14.0
## [21] ExperimentHub_2.4.0
## [22] AnnotationHub_3.4.0
## [23] BiocFileCache_2.4.0
## [24] dbplyr_2.2.1
## [25] DMRcate_2.10.0
## [26] limma_3.52.1
## [27] missMethyl_1.30.0
## [28] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [29] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [30] minfi_1.42.0
## [31] bumphunter_1.38.0
## [32] locfit_1.5-9.5
## [33] iterators_1.0.14
## [34] foreach_1.5.2
## [35] Biostrings_2.64.0
## [36] XVector_0.36.0
## [37] SummarizedExperiment_1.26.1
## [38] Biobase_2.56.0
## [39] MatrixGenerics_1.8.0
## [40] matrixStats_0.62.0
## [41] GenomicRanges_1.48.0
## [42] GenomeInfoDb_1.32.2
## [43] IRanges_2.30.0
## [44] S4Vectors_0.34.0
## [45] BiocGenerics_0.42.0
## [46] R.utils_2.12.0
## [47] R.oo_1.25.0
## [48] R.methodsS3_1.8.2
## [49] plyr_1.8.7
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.56.0
## [3] GGally_2.1.2 tidyr_1.2.0
## [5] ggplot2_3.3.6 bit64_4.0.5
## [7] knitr_1.39 DelayedArray_0.22.0
## [9] data.table_1.14.2 rpart_4.1.16
## [11] KEGGREST_1.36.2 RCurl_1.98-1.7
## [13] AnnotationFilter_1.20.0 generics_0.1.2
## [15] GenomicFeatures_1.48.3 preprocessCore_1.58.0
## [17] RSQLite_2.2.14 bit_4.0.4
## [19] tzdb_0.3.0 webshot_0.5.3
## [21] xml2_1.3.3 httpuv_1.6.5
## [23] assertthat_0.2.1 xfun_0.31
## [25] hms_1.1.1 jquerylib_0.1.4
## [27] evaluate_0.15 promises_1.2.0.1
## [29] fansi_1.0.3 restfulr_0.0.15
## [31] scrime_1.3.5 progress_1.2.2
## [33] caTools_1.18.2 readxl_1.4.0
## [35] DBI_1.1.3 geneplotter_1.74.0
## [37] htmlwidgets_1.5.4 reshape_0.8.9
## [39] purrr_0.3.4 ellipsis_0.3.2
## [41] dplyr_1.0.9 backports_1.4.1
## [43] permute_0.9-7 calibrate_1.7.7
## [45] annotate_1.74.0 biomaRt_2.52.0
## [47] sparseMatrixStats_1.8.0 vctrs_0.4.1
## [49] ensembldb_2.20.2 cachem_1.0.6
## [51] Gviz_1.40.1 BSgenome_1.64.0
## [53] GenomicAlignments_1.32.0 prettyunits_1.1.1
## [55] mclust_5.4.10 svglite_2.1.0
## [57] cluster_2.1.3 RPMM_1.25
## [59] lazyeval_0.2.2 crayon_1.5.1
## [61] genefilter_1.78.0 edgeR_3.38.1
## [63] pkgconfig_2.0.3 nlme_3.1-159
## [65] ProtGenerics_1.28.0 nnet_7.3-17
## [67] rlang_1.0.3 lifecycle_1.0.1
## [69] filelock_1.0.2 dichromat_2.0-0.1
## [71] cellranger_1.1.0 rngtools_1.5.2
## [73] base64_2.0 Matrix_1.4-1
## [75] Rhdf5lib_1.18.2 base64enc_0.1-3
## [77] viridisLite_0.4.0 png_0.1-7
## [79] rjson_0.2.21 bitops_1.0-7
## [81] KernSmooth_2.23-20 rhdf5filters_1.8.0
## [83] blob_1.2.3 DelayedMatrixStats_1.18.0
## [85] doRNG_1.8.2 stringr_1.4.0
## [87] nor1mix_1.3-0 readr_2.1.2
## [89] jpeg_0.1-9 scales_1.2.0
## [91] memoise_2.0.1 zlibbioc_1.42.0
## [93] compiler_4.2.1 BiocIO_1.6.0
## [95] illuminaio_0.38.0 Rsamtools_2.12.0
## [97] cli_3.3.0 DSS_2.44.0
## [99] htmlTable_2.4.0 Formula_1.2-4
## [101] MASS_7.3-58 tidyselect_1.1.2
## [103] stringi_1.7.6 highr_0.9
## [105] yaml_2.3.5 askpass_1.1
## [107] latticeExtra_0.6-29 sass_0.4.1
## [109] VariantAnnotation_1.42.1 fastmatch_1.1-3
## [111] tools_4.2.1 rstudioapi_0.13
## [113] foreign_0.8-82 bsseq_1.32.0
## [115] gridExtra_2.3 digest_0.6.29
## [117] BiocManager_1.30.18 shiny_1.7.1
## [119] quadprog_1.5-8 Rcpp_1.0.8.3
## [121] siggenes_1.70.0 BiocVersion_3.15.2
## [123] later_1.3.0 org.Hs.eg.db_3.15.0
## [125] httr_1.4.3 AnnotationDbi_1.58.0
## [127] biovizBase_1.44.0 colorspace_2.0-3
## [129] rvest_1.0.2 XML_3.99-0.10
## [131] splines_4.2.1 statmod_1.4.36
## [133] multtest_2.52.0 systemfonts_1.0.4
## [135] xtable_1.8-4 jsonlite_1.8.0
## [137] dynamicTreeCut_1.63-1 R6_2.5.1
## [139] echarts4r_0.4.4 Hmisc_4.7-0
## [141] pillar_1.7.0 htmltools_0.5.2
## [143] mime_0.12 glue_1.6.2
## [145] fastmap_1.1.0 BiocParallel_1.30.3
## [147] interactiveDisplayBase_1.34.0 beanplot_1.3.1
## [149] codetools_0.2-18 utf8_1.2.2
## [151] lattice_0.20-45 bslib_0.3.1
## [153] tibble_3.1.7 curl_4.3.2
## [155] gtools_3.9.2.2 openssl_2.0.2
## [157] survival_3.4-0 rmarkdown_2.14
## [159] munsell_0.5.0 rhdf5_2.40.0
## [161] GenomeInfoDbData_1.2.8 HDF5Array_1.24.1
## [163] impute_1.70.0 gtable_0.3.0