Functional class sorting is widely used for pathway analysis using tools like GSEA, yet there is no consensus in how this can be conducted for Illumina Infinium methylation array data.
Here we propose a simple approach which involves the following:
Limma test on probes.
For each gene, calculate the median t-statistic from step 1.
Use this median t-stat in Camera pre-ranked test for gene sets.
In this example, I’m using matched infinium EPIC 850k data from (n=37) normal and lung cancer samples (GSE158422). The data was previously preprocessed and normalised using the minfi package (see the folder called “misc”).
Here the gene sets are obtained from Reactome.
This analysis was run on a 8C/16T computer with 64GB RAM running at 3.8 GHz. This workflow used 34 GB RAM and took RAM usage can be moderated by reducing the parallel core count.
suppressPackageStartupMessages({
library("limma")
library("eulerr")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library("tictoc")
library("mitch")
library("kableExtra")
library("beeswarm")
})
CORES=12
annotations
probe sets
gene sets
design matrix
mval matrix
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")
Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.
gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")
gs_symbols <- gmt_import("c2.cp.reactome.v2023.1.Hs.symbols.gmt")
There are two obvious ways to perform GMEA:
limma-agg-1stt (la1). In this approach, the limma test is conducted on probes, followed by aggregation of the differential methylation t-scores. Each gene gets a median t-stat value which is evaluated downstream by a 1 sample t-test.
agg-limma-1stt (al1). Here, the probe beta values for each gene are aggregated to a median and then limma is run on these values. To determine gene set enrichment, a 1-sample t-test is conducted.
sex <- as.data.frame(design)$sex
tumor <- as.data.frame(design)$tumor
patient <- as.character(unlist(lapply(1:ncol(mval),function(i) {c(i,i)})))
patient <- head(patient,ncol(mval))
design <- model.matrix(~ patient + tumor )
fit.reduced <- lmFit(mval,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) patient10 patient11 patient12 patient13 patient14 patient15
## Down 282082 1128 1138 1016 1041 1317 1856
## NotSig 183057 836640 836611 837158 837010 836739 836327
## Up 374334 1705 1724 1299 1422 1417 1290
## patient16 patient17 patient18 patient19 patient2 patient20 patient21
## Down 1030 1057 977 2349 1118 1194 949
## NotSig 837016 837288 836963 836069 837383 836706 837099
## Up 1427 1128 1533 1055 972 1573 1425
## patient22 patient23 patient24 patient25 patient26 patient27 patient28
## Down 1219 1188 1190 1234 1714 1440 1153
## NotSig 836311 836592 837059 836983 836483 836911 836975
## Up 1943 1693 1224 1256 1276 1122 1345
## patient29 patient3 patient30 patient31 patient32 patient33 patient34
## Down 1022 1034 1876 1177 1013 988 1231
## NotSig 837324 837014 824837 837041 837246 837342 836640
## Up 1127 1425 12760 1255 1214 1143 1602
## patient35 patient36 patient37 patient4 patient5 patient6 patient7
## Down 1109 973 1058 1065 980 938 1061
## NotSig 837232 837426 837258 837226 837345 837426 837391
## Up 1132 1074 1157 1182 1148 1109 1021
## patient8 patient9 tumor
## Down 1466 1218 353787
## NotSig 836987 836463 365901
## Up 1020 1792 119785
dm <- topTable(fit.reduced,coef=4, number = Inf)
dm <- merge(myann,dm,by=0)
dm <- dm[order(dm$P.Value),]
rownames(dm) <- dm$Row.names
dm$Row.names=NULL
head(dm) %>%
kbl(caption="Top DM probes") %>%
kable_styling("hover",full_width=FALSE)
UCSC_RefGene_Name | Regulatory_Feature_Group | Islands_Name | Relation_to_Island | logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|---|---|---|---|
cg10463322 | OpenSea | 9.106330 | -4.0979376 | 32.08586 | 0 | 0 | 42.42719 | |||
cg07996772 | CRTC3-AS1;BLM;BLM;BLM;BLM | Promoter_Associated | chr15:91260384-91260919 | S_Shore | -8.541045 | -3.7529802 | -31.83397 | 0 | 0 | 42.31059 |
cg23418075 | TM9SF1;TM9SF1 | Promoter_Associated | chr14:24664440-24665507 | Island | -7.241558 | -2.5119857 | -30.51168 | 0 | 0 | 41.66510 |
cg01891583 | USP7 | OpenSea | 7.523968 | -2.1055565 | 30.16887 | 0 | 0 | 41.48810 | ||
cg21463262 | ATP11A;ATP11A | chr13:113540350-113540558 | N_Shore | 8.025921 | -0.6446701 | 29.09256 | 0 | 0 | 40.90450 | |
cg03966315 | LINC00371;LINC00371 | OpenSea | -7.345208 | 0.9638935 | -28.43208 | 0 | 0 | 40.52419 |
agg <- function(dm,cores=1) {
gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
dm$UCSC_RefGene_Name <- gnl
l <- mclapply(1:nrow(dm), function(i) {
a <- dm[i,]
len <- length(a[[1]][[1]])
tvals <- as.numeric(rep(a["t"],len))
genes <- a[[1]][[1]]
data.frame(genes,tvals)
},mc.cores=cores)
df <- do.call(rbind,l)
keep <- names(which(table(df$genes)>1))
df <- df[df$genes %in% keep,]
gn <- unique(df$genes)
gme_res <- lapply( 1:length(gn), function(i) {
g <- gn[i]
tstats <- df[which(df$genes==g),"tvals"]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
if ( length(tstats) > 2 ) {
ttest <- t.test(tstats)
pval <- ttest$p.value
} else {
pval = 1
}
res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
"median"=mymedian, pval=pval)
} )
gme_res_df <- do.call(rbind, gme_res)
rownames(gme_res_df) <- gme_res_df[,1]
gme_res_df <- gme_res_df[,-1]
tmp <- apply(gme_res_df,2,as.numeric)
rownames(tmp) <- rownames(gme_res_df)
gme_res_df <- as.data.frame(tmp)
gme_res_df$sig <- -log10(gme_res_df[,4])
gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
gme_res_df$fdr <- p.adjust(gme_res_df$pval)
out <- list("df"=df,"gme_res_df"=gme_res_df)
return(out)
}
tic()
dmagg <- agg(dm,cores=CORES)
toc()
## 186.047 sec elapsed
head(dmagg$gme_res_df,20) %>%
kbl(caption="top gmea genes (t-test)") %>%
kable_paper("hover", full_width = F)
nprobes | mean | median | pval | sig | fdr | |
---|---|---|---|---|---|---|
PRDM16 | 663 | 0.6576825 | 0.6867538 | 0 | 89.30626 | 0 |
PTPRN2 | 1474 | 0.3271851 | 0.3019060 | 0 | 78.37428 | 0 |
CDH4 | 382 | 0.6589410 | 0.6410030 | 0 | 58.40283 | 0 |
TNXB | 529 | 0.3818763 | 0.3856856 | 0 | 31.96489 | 0 |
C22orf34 | 103 | 0.8954618 | 0.9098073 | 0 | 31.63700 | 0 |
MYT1L | 245 | 0.5382258 | 0.5414581 | 0 | 28.35089 | 0 |
NRXN3 | 189 | 0.7300821 | 0.7460453 | 0 | 28.22942 | 0 |
DLG2 | 222 | 0.6546920 | 0.6542848 | 0 | 27.66551 | 0 |
DPP6 | 229 | 0.5756898 | 0.5445770 | 0 | 27.40978 | 0 |
PCDHGA1 | 439 | 0.5274875 | 0.4176149 | 0 | 27.23162 | 0 |
PCDHGA2 | 422 | 0.5249113 | 0.4172050 | 0 | 26.31114 | 0 |
GALNT9 | 279 | 0.4926182 | 0.4588122 | 0 | 25.82180 | 0 |
TMEM132D | 140 | 0.6090544 | 0.5931978 | 0 | 25.37428 | 0 |
AFF3 | 137 | 0.5644502 | 0.5434736 | 0 | 25.26323 | 0 |
FBXL7 | 91 | 0.7055520 | 0.6927748 | 0 | 24.24975 | 0 |
APBA2 | 111 | 0.6003242 | 0.6297787 | 0 | 23.47857 | 0 |
BRSK2 | 129 | 0.6059711 | 0.6201595 | 0 | 23.10604 | 0 |
TMEM132C | 104 | 0.6317377 | 0.6058494 | 0 | 22.88108 | 0 |
CACNA1H | 171 | 0.5393801 | 0.4784796 | 0 | 22.83555 | 0 |
CTNNA2 | 174 | 0.5111421 | 0.3962578 | 0 | 22.67566 | 0 |
Check whether the distribution is even - it’s not.
hist(dmagg$gme_res_df$median,breaks=30) ; grid()
1-sample t-test for gene set enrichment.
ttenrich <- function(m,genesets,cores=1) {
res <- mclapply( 1:length(genesets), function(i) {
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
wt <- t.test(tstats)
res <- c(name,myn,mymean,mymedian,wt$p.value)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
# need to get the median column before analysis
m <- dmagg$gme_res_df[,"median",drop=FALSE]
tic()
lares <- ttenrich(m=m,genesets=gs_symbols,cores=CORES)
toc()
## 2.351 sec elapsed
head(lares,30) %>%
kbl(caption = "Top significant genesets using LA approach") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_POST_TRANSLATIONAL_PROTEIN_MODIFICATION | 1290 | 0.2998819 | 0.2991537 | 0 | 233.15569 | 0 |
REACTOME_RNA_POLYMERASE_II_TRANSCRIPTION | 1264 | 0.2960719 | 0.2950221 | 0 | 219.89598 | 0 |
REACTOME_INFECTIOUS_DISEASE | 843 | 0.2864686 | 0.2808871 | 0 | 141.62252 | 0 |
REACTOME_DEVELOPMENTAL_BIOLOGY | 1059 | 0.2627351 | 0.2740292 | 0 | 139.51392 | 0 |
REACTOME_INNATE_IMMUNE_SYSTEM | 1003 | 0.2668969 | 0.2718274 | 0 | 132.55638 | 0 |
REACTOME_TRANSPORT_OF_SMALL_MOLECULES | 690 | 0.3047491 | 0.2940394 | 0 | 129.72666 | 0 |
REACTOME_ADAPTIVE_IMMUNE_SYSTEM | 728 | 0.3043997 | 0.2894737 | 0 | 123.83064 | 0 |
REACTOME_VESICLE_MEDIATED_TRANSPORT | 642 | 0.3034908 | 0.3068687 | 0 | 116.67725 | 0 |
REACTOME_SIGNALING_BY_GPCR | 668 | 0.3238436 | 0.3049452 | 0 | 113.86719 | 0 |
REACTOME_MEMBRANE_TRAFFICKING | 603 | 0.3034801 | 0.3070516 | 0 | 111.51246 | 0 |
REACTOME_CELLULAR_RESPONSES_TO_STIMULI | 708 | 0.2853291 | 0.2822160 | 0 | 109.99926 | 0 |
REACTOME_SIGNALING_BY_RHO_GTPASES_MIRO_GTPASES_AND_RHOBTB3 | 614 | 0.2775567 | 0.2778777 | 0 | 107.02355 | 0 |
REACTOME_METABOLISM_OF_LIPIDS | 690 | 0.2822451 | 0.2811934 | 0 | 105.62667 | 0 |
REACTOME_SIGNALING_BY_RECEPTOR_TYROSINE_KINASES | 508 | 0.2955823 | 0.3046843 | 0 | 102.93157 | 0 |
REACTOME_CELL_CYCLE | 594 | 0.2901179 | 0.2751444 | 0 | 102.53801 | 0 |
REACTOME_HEMOSTASIS | 572 | 0.2910908 | 0.2955291 | 0 | 96.05974 | 0 |
REACTOME_NERVOUS_SYSTEM_DEVELOPMENT | 547 | 0.2892142 | 0.2918507 | 0 | 95.62451 | 0 |
REACTOME_METABOLISM_OF_RNA | 650 | 0.2739300 | 0.2710402 | 0 | 93.86478 | 0 |
REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | 673 | 0.2679751 | 0.2665137 | 0 | 87.66679 | 0 |
REACTOME_NEURONAL_SYSTEM | 387 | 0.3539050 | 0.3565131 | 0 | 86.21708 | 0 |
REACTOME_DISEASES_OF_SIGNAL_TRANSDUCTION_BY_GROWTH_FACTOR_RECEPTORS_AND_SECOND_MESSENGERS | 411 | 0.2992599 | 0.3004987 | 0 | 79.51307 | 0 |
REACTOME_CELL_CYCLE_MITOTIC | 473 | 0.2863082 | 0.2688549 | 0 | 78.19745 | 0 |
REACTOME_GPCR_LIGAND_BINDING | 440 | 0.3454031 | 0.3391080 | 0 | 75.13377 | 0 |
REACTOME_RHO_GTPASE_CYCLE | 416 | 0.2615049 | 0.2668531 | 0 | 71.86053 | 0 |
REACTOME_CLASS_I_MHC_MEDIATED_ANTIGEN_PROCESSING_PRESENTATION | 359 | 0.3229966 | 0.3097359 | 0 | 70.78455 | 0 |
REACTOME_SENSORY_PERCEPTION | 558 | 0.3447258 | 0.3557804 | 0 | 70.36348 | 0 |
REACTOME_SARS_COV_INFECTIONS | 383 | 0.2905156 | 0.2733905 | 0 | 68.54454 | 0 |
REACTOME_ANTIGEN_PROCESSING_UBIQUITINATION_PROTEASOME_DEGRADATION | 291 | 0.3186704 | 0.3097359 | 0 | 61.90848 | 0 |
REACTOME_TRANSCRIPTIONAL_REGULATION_BY_TP53 | 342 | 0.2866424 | 0.2970055 | 0 | 61.15333 | 0 |
REACTOME_SIGNALING_BY_INTERLEUKINS | 434 | 0.2635798 | 0.2741694 | 0 | 59.09986 | 0 |
head(lares[order(-abs(lares$t_median)),],30) %>%
kbl(caption = "Top effect size genesets using LA approach") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_SYNTHESIS_OF_EPOXY_EET_AND_DIHYDROXYEICOSATRIENOIC_ACIDS_DHET | 8 | 0.6647780 | 0.6511859 | 0.0000156 | 4.807886 | 0.0000314 |
REACTOME_BIOSYNTHESIS_OF_MARESINS | 8 | 0.6883028 | 0.6454084 | 0.0036305 | 2.440039 | 0.0047101 |
REACTOME_BIOSYNTHESIS_OF_MARESIN_LIKE_SPMS | 6 | 0.7401036 | 0.6454084 | 0.0175174 | 1.756531 | 0.0201820 |
REACTOME_SYNTHESIS_OF_16_20_HYDROXYEICOSATETRAENOIC_ACIDS_HETE | 9 | 0.5550265 | 0.6419164 | 0.0001601 | 3.795701 | 0.0002727 |
REACTOME_HORMONE_LIGAND_BINDING_RECEPTORS | 12 | 0.5959606 | 0.6171239 | 0.0000069 | 5.160469 | 0.0000148 |
REACTOME_POLB_DEPENDENT_LONG_PATCH_BASE_EXCISION_REPAIR | 7 | 0.5627699 | 0.6093286 | 0.0002669 | 3.573722 | 0.0004331 |
REACTOME_THYROXINE_BIOSYNTHESIS | 10 | 0.4943075 | 0.5892574 | 0.0016059 | 2.794292 | 0.0022359 |
REACTOME_CYP2E1_REACTIONS | 11 | 0.5047935 | 0.5885427 | 0.0166391 | 1.778870 | 0.0192515 |
REACTOME_PTK6_REGULATES_PROTEINS_INVOLVED_IN_RNA_PROCESSING | 5 | 0.6246832 | 0.5754640 | 0.0093297 | 2.030133 | 0.0112479 |
REACTOME_ACTIVATION_OF_RAS_IN_B_CELLS | 5 | 0.4691901 | 0.5544113 | 0.0047675 | 2.321712 | 0.0060792 |
REACTOME_XENOBIOTICS | 23 | 0.5237150 | 0.5537705 | 0.0000178 | 4.748828 | 0.0000356 |
REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA | 13 | 0.4513543 | 0.5517358 | 0.0010631 | 2.973417 | 0.0015337 |
REACTOME_RELEASE_OF_HH_NP_FROM_THE_SECRETING_CELL | 8 | 0.4168124 | 0.5515797 | 0.0027224 | 2.565053 | 0.0036387 |
REACTOME_N_GLYCAN_TRIMMING_AND_ELONGATION_IN_THE_CIS_GOLGI | 5 | 0.5429720 | 0.5422029 | 0.0094753 | 2.023409 | 0.0113731 |
REACTOME_POU5F1_OCT4_SOX2_NANOG_REPRESS_GENES_RELATED_TO_DIFFERENTIATION | 10 | 0.5487481 | 0.5372936 | 0.0006495 | 3.187405 | 0.0009713 |
REACTOME_FGFR1B_LIGAND_BINDING_AND_ACTIVATION | 6 | 0.5240330 | 0.5333029 | 0.0027539 | 2.560051 | 0.0036779 |
REACTOME_POU5F1_OCT4_SOX2_NANOG_ACTIVATE_GENES_RELATED_TO_PROLIFERATION | 12 | 0.4455582 | 0.5301791 | 0.0010124 | 2.994629 | 0.0014671 |
REACTOME_RECYCLING_OF_EIF2_GDP | 7 | 0.4415141 | 0.5299023 | 0.0026349 | 2.579242 | 0.0035304 |
REACTOME_SHC_MEDIATED_CASCADE_FGFR1 | 21 | 0.4492479 | 0.5250671 | 0.0000003 | 6.577303 | 0.0000007 |
REACTOME_FGFR1_LIGAND_BINDING_AND_ACTIVATION | 15 | 0.4623513 | 0.5250671 | 0.0000098 | 5.008179 | 0.0000206 |
REACTOME_FGFR1C_LIGAND_BINDING_AND_ACTIVATION | 11 | 0.4605713 | 0.5250671 | 0.0001418 | 3.848329 | 0.0002439 |
REACTOME_IRAK2_MEDIATED_ACTIVATION_OF_TAK1_COMPLEX | 9 | 0.4119899 | 0.5242935 | 0.0023498 | 2.628966 | 0.0031771 |
REACTOME_DOPAMINE_RECEPTORS | 5 | 0.4762660 | 0.5224986 | 0.0172276 | 1.763775 | 0.0198762 |
REACTOME_IRS_ACTIVATION | 5 | 0.4288802 | 0.5191630 | 0.0028008 | 2.552726 | 0.0037313 |
REACTOME_SOS_MEDIATED_SIGNALLING | 7 | 0.3884767 | 0.5191630 | 0.0040506 | 2.392477 | 0.0052139 |
REACTOME_MINERALOCORTICOID_BIOSYNTHESIS | 6 | 0.4322265 | 0.5134655 | 0.0076227 | 2.117893 | 0.0093204 |
REACTOME_PEPTIDE_HORMONE_BIOSYNTHESIS | 13 | 0.4870750 | 0.5073463 | 0.0000465 | 4.332189 | 0.0000872 |
REACTOME_ANDROGEN_BIOSYNTHESIS | 11 | 0.3698550 | 0.5011304 | 0.0064208 | 2.192410 | 0.0079760 |
REACTOME_DIGESTION_OF_DIETARY_CARBOHYDRATE | 10 | 0.4250851 | 0.4987482 | 0.0045883 | 2.338349 | 0.0058598 |
REACTOME_THE_ACTIVATION_OF_ARYLSULFATASES | 8 | 0.3915444 | 0.4957261 | 0.0080778 | 2.092707 | 0.0098474 |
lasig <- subset(lares,fdr<0.05)
nrow(lasig)
## [1] 1497
head(lasig[order(-abs(lasig$t_median)),],30) %>%
kbl(caption = "Top effect size genesets after FDR filtering using LA approach") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_SYNTHESIS_OF_EPOXY_EET_AND_DIHYDROXYEICOSATRIENOIC_ACIDS_DHET | 8 | 0.6647780 | 0.6511859 | 0.0000156 | 4.807886 | 0.0000314 |
REACTOME_BIOSYNTHESIS_OF_MARESINS | 8 | 0.6883028 | 0.6454084 | 0.0036305 | 2.440039 | 0.0047101 |
REACTOME_BIOSYNTHESIS_OF_MARESIN_LIKE_SPMS | 6 | 0.7401036 | 0.6454084 | 0.0175174 | 1.756531 | 0.0201820 |
REACTOME_SYNTHESIS_OF_16_20_HYDROXYEICOSATETRAENOIC_ACIDS_HETE | 9 | 0.5550265 | 0.6419164 | 0.0001601 | 3.795701 | 0.0002727 |
REACTOME_HORMONE_LIGAND_BINDING_RECEPTORS | 12 | 0.5959606 | 0.6171239 | 0.0000069 | 5.160469 | 0.0000148 |
REACTOME_POLB_DEPENDENT_LONG_PATCH_BASE_EXCISION_REPAIR | 7 | 0.5627699 | 0.6093286 | 0.0002669 | 3.573722 | 0.0004331 |
REACTOME_THYROXINE_BIOSYNTHESIS | 10 | 0.4943075 | 0.5892574 | 0.0016059 | 2.794292 | 0.0022359 |
REACTOME_CYP2E1_REACTIONS | 11 | 0.5047935 | 0.5885427 | 0.0166391 | 1.778870 | 0.0192515 |
REACTOME_PTK6_REGULATES_PROTEINS_INVOLVED_IN_RNA_PROCESSING | 5 | 0.6246832 | 0.5754640 | 0.0093297 | 2.030133 | 0.0112479 |
REACTOME_ACTIVATION_OF_RAS_IN_B_CELLS | 5 | 0.4691901 | 0.5544113 | 0.0047675 | 2.321712 | 0.0060792 |
REACTOME_XENOBIOTICS | 23 | 0.5237150 | 0.5537705 | 0.0000178 | 4.748828 | 0.0000356 |
REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA | 13 | 0.4513543 | 0.5517358 | 0.0010631 | 2.973417 | 0.0015337 |
REACTOME_RELEASE_OF_HH_NP_FROM_THE_SECRETING_CELL | 8 | 0.4168124 | 0.5515797 | 0.0027224 | 2.565053 | 0.0036387 |
REACTOME_N_GLYCAN_TRIMMING_AND_ELONGATION_IN_THE_CIS_GOLGI | 5 | 0.5429720 | 0.5422029 | 0.0094753 | 2.023409 | 0.0113731 |
REACTOME_POU5F1_OCT4_SOX2_NANOG_REPRESS_GENES_RELATED_TO_DIFFERENTIATION | 10 | 0.5487481 | 0.5372936 | 0.0006495 | 3.187405 | 0.0009713 |
REACTOME_FGFR1B_LIGAND_BINDING_AND_ACTIVATION | 6 | 0.5240330 | 0.5333029 | 0.0027539 | 2.560051 | 0.0036779 |
REACTOME_POU5F1_OCT4_SOX2_NANOG_ACTIVATE_GENES_RELATED_TO_PROLIFERATION | 12 | 0.4455582 | 0.5301791 | 0.0010124 | 2.994629 | 0.0014671 |
REACTOME_RECYCLING_OF_EIF2_GDP | 7 | 0.4415141 | 0.5299023 | 0.0026349 | 2.579242 | 0.0035304 |
REACTOME_SHC_MEDIATED_CASCADE_FGFR1 | 21 | 0.4492479 | 0.5250671 | 0.0000003 | 6.577303 | 0.0000007 |
REACTOME_FGFR1_LIGAND_BINDING_AND_ACTIVATION | 15 | 0.4623513 | 0.5250671 | 0.0000098 | 5.008179 | 0.0000206 |
REACTOME_FGFR1C_LIGAND_BINDING_AND_ACTIVATION | 11 | 0.4605713 | 0.5250671 | 0.0001418 | 3.848329 | 0.0002439 |
REACTOME_IRAK2_MEDIATED_ACTIVATION_OF_TAK1_COMPLEX | 9 | 0.4119899 | 0.5242935 | 0.0023498 | 2.628966 | 0.0031771 |
REACTOME_DOPAMINE_RECEPTORS | 5 | 0.4762660 | 0.5224986 | 0.0172276 | 1.763775 | 0.0198762 |
REACTOME_IRS_ACTIVATION | 5 | 0.4288802 | 0.5191630 | 0.0028008 | 2.552726 | 0.0037313 |
REACTOME_SOS_MEDIATED_SIGNALLING | 7 | 0.3884767 | 0.5191630 | 0.0040506 | 2.392477 | 0.0052139 |
REACTOME_MINERALOCORTICOID_BIOSYNTHESIS | 6 | 0.4322265 | 0.5134655 | 0.0076227 | 2.117893 | 0.0093204 |
REACTOME_PEPTIDE_HORMONE_BIOSYNTHESIS | 13 | 0.4870750 | 0.5073463 | 0.0000465 | 4.332189 | 0.0000872 |
REACTOME_ANDROGEN_BIOSYNTHESIS | 11 | 0.3698550 | 0.5011304 | 0.0064208 | 2.192410 | 0.0079760 |
REACTOME_DIGESTION_OF_DIETARY_CARBOHYDRATE | 10 | 0.4250851 | 0.4987482 | 0.0045883 | 2.338349 | 0.0058598 |
REACTOME_THE_ACTIVATION_OF_ARYLSULFATASES | 8 | 0.3915444 | 0.4957261 | 0.0080778 | 2.092707 | 0.0098474 |
plot(lares$t_median,-log10(lares$pval),pch=19,col="gray")
grid()
points(lasig$t_median,-log10(lasig$pval),pch=19,col="red")
Determine the median of probe values
mx <- mval
gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=CORES)
myann$gnl <- gnl
mymed <- function(g) {
probes <- rownames(myann[grep(g,myann$gnl),])
rows <- which(rownames(mx) %in% probes)
if ( length(rows) > 1 ) {
b <- mx[rows,]
med <- apply(b,2,median)
med <- matrix(med,nrow=1)
colnames(med) <- colnames(b)
rownames(med) <- g
return(med)
}
}
med <- mclapply(gn,mymed,mc.cores=CORES)
med <- med[lapply(med,length)>0]
medf <- do.call(rbind,med)
Then do a limma test on median gene methylation. See if it is more robust than other approaches.
fit.reduced <- lmFit(medf,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) patient10 patient11 patient12 patient13 patient14 patient15
## Down 9705 10 4 14 10 11 19
## NotSig 6090 25491 25505 25491 25501 25497 25496
## Up 9726 20 12 16 10 13 6
## patient16 patient17 patient18 patient19 patient2 patient20 patient21
## Down 5 8 8 21 13 10 9
## NotSig 25509 25505 25499 25492 25497 25494 25502
## Up 7 8 14 8 11 17 10
## patient22 patient23 patient24 patient25 patient26 patient27 patient28
## Down 10 6 11 9 11 16 4
## NotSig 25501 25502 25501 25505 25501 25502 25513
## Up 10 13 9 7 9 3 4
## patient29 patient3 patient30 patient31 patient32 patient33 patient34
## Down 9 7 19 12 6 11 11
## NotSig 25498 25507 24824 25504 25501 25502 25498
## Up 14 7 678 5 14 8 12
## patient35 patient36 patient37 patient4 patient5 patient6 patient7
## Down 9 10 13 15 10 6 7
## NotSig 25502 25498 25501 25496 25502 25504 25508
## Up 10 13 7 10 9 11 6
## patient8 patient9 tumor
## Down 21 13 11958
## NotSig 25493 25486 10638
## Up 7 22 2925
magg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
head(magg, 30) %>%
kbl(caption="limma gene methylation results") %>%
kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
ATIC | -1.4254425 | -2.0443778 | -12.181158 | 0 | 0 | 23.78827 |
FAM53B-AS1 | 1.1521510 | 0.7705936 | 12.026717 | 0 | 0 | 23.40774 |
REXO1L2P | -1.7026400 | 1.9479949 | -11.445804 | 0 | 0 | 21.94675 |
MIR18A | -1.5591587 | -2.0685843 | -11.326911 | 0 | 0 | 21.64192 |
TSTD1 | -1.5161721 | -1.2357071 | -11.231168 | 0 | 0 | 21.39500 |
SMAD5OS | -0.8330970 | -2.4700007 | -11.024233 | 0 | 0 | 20.85690 |
MIR1204 | -2.1708455 | -1.0523946 | -10.992054 | 0 | 0 | 20.77268 |
PFN3 | 0.9712278 | 0.1898591 | 10.914662 | 0 | 0 | 20.56953 |
MIR19A | -1.6269020 | -2.0096904 | -10.870465 | 0 | 0 | 20.45313 |
PTBP3 | -1.0022582 | -1.9922371 | -10.824212 | 0 | 0 | 20.33103 |
MIR17HG | -0.8068688 | -2.7695862 | -10.745257 | 0 | 0 | 20.12190 |
MIR17 | -0.8068688 | -2.7695862 | -10.745257 | 0 | 0 | 20.12190 |
LOC100506472 | 1.0406665 | 0.1575938 | 10.577875 | 0 | 0 | 19.67563 |
COX7A1 | 1.3986564 | 0.4180681 | 10.499226 | 0 | 0 | 19.46458 |
MAB21L2 | 0.9563229 | 2.0672402 | 10.369290 | 0 | 0 | 19.11399 |
TASP1 | -0.8331848 | -2.0591260 | -10.218288 | 0 | 0 | 18.70357 |
LINC00961 | 0.9633201 | 0.4383619 | 10.207708 | 0 | 0 | 18.67469 |
RBKS | -0.8711210 | -1.2057390 | -10.115722 | 0 | 0 | 18.42298 |
ZNRF2 | -0.8431140 | -0.4362518 | -10.110227 | 0 | 0 | 18.40790 |
MAP1LC3B | -1.3422158 | -1.3174271 | -10.104103 | 0 | 0 | 18.39110 |
DMRTA2 | 1.3868374 | -2.0173113 | 10.036068 | 0 | 0 | 18.20405 |
ANGPT2 | 0.5974818 | 0.7035021 | 10.026692 | 0 | 0 | 18.17822 |
PPTC7 | -1.2224394 | -1.2993429 | -10.001082 | 0 | 0 | 18.10761 |
GSR | -0.8360882 | -3.2793538 | -9.991651 | 0 | 0 | 18.08159 |
LOC158376 | 1.1202875 | 0.1315688 | 9.879906 | 0 | 0 | 17.77228 |
ARPP19 | -1.1259520 | 0.1471944 | -9.848139 | 0 | 0 | 17.68403 |
GTF2H2D | -0.8556522 | -3.1558091 | -9.823241 | 0 | 0 | 17.61477 |
RNF216L | -0.4826783 | -3.0664417 | -9.770609 | 0 | 0 | 17.46807 |
MGC4473 | -1.0629077 | 2.2814382 | -9.762656 | 0 | 0 | 17.44587 |
DHX57 | -1.4900868 | 0.5152841 | -9.750397 | 0 | 0 | 17.41163 |
1-sample t-test for enrichment
ttenrich <- function(m,genesets,cores=1) {
res <- mclapply( 1:length(genesets), function(i) {
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
wt <- t.test(tstats)
pval = wt$p.value
res <- c(name,myn,mymean,mymedian,pval)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
m <- as.data.frame(magg$t)
rownames(m) <- rownames(magg)
colnames(m) <- "t"
alres <- ttenrich(m=m,genesets=gs_symbols,cores=CORES)
alsig <- subset(alres,fdr<0.05)
nrow(alsig)
## [1] 846
nrow(subset(alsig,t_median>0))
## [1] 4
head(alsig[order(-alsig$t_median),],20) %>%
kbl(caption="hypermethylated pathways") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_REGULATION_OF_GENE_EXPRESSION_IN_EARLY_PANCREATIC_PRECURSOR_CELLS | 8 | 5.2567635 | 5.8699372 | 0.0001656 | 3.781019 | 0.0007533 |
REACTOME_FORMATION_OF_LATERAL_PLATE_MESODERM | 5 | 3.9275041 | 3.9126425 | 0.0010900 | 2.962569 | 0.0036149 |
REACTOME_ENDOSOMAL_VACUOLAR_PATHWAY | 11 | 2.2324168 | 2.6402485 | 0.0196618 | 1.706377 | 0.0395652 |
REACTOME_ACTIVATION_OF_ANTERIOR_HOX_GENES_IN_HINDBRAIN_DEVELOPMENT_DURING_EARLY_EMBRYOGENESIS | 59 | 1.5885811 | 1.0732915 | 0.0021290 | 2.671814 | 0.0063871 |
REACTOME_G_BETA_GAMMA_SIGNALLING_THROUGH_PI3KGAMMA | 25 | -1.2914237 | -0.1583230 | 0.0221544 | 1.654540 | 0.0440935 |
REACTOME_SIGNAL_AMPLIFICATION | 33 | -0.9376559 | -0.1583230 | 0.0249389 | 1.603123 | 0.0485730 |
REACTOME_EXTRA_NUCLEAR_ESTROGEN_SIGNALING | 74 | -0.6703062 | -0.2355707 | 0.0169817 | 1.770018 | 0.0349448 |
REACTOME_AUF1_HNRNP_D0_BINDS_AND_DESTABILIZES_MRNA | 53 | -1.0352630 | -0.2445826 | 0.0033043 | 2.480922 | 0.0090207 |
REACTOME_SIGNALING_BY_NOTCH4 | 79 | -0.6844068 | -0.2445826 | 0.0117999 | 1.928122 | 0.0260379 |
REACTOME_UCH_PROTEINASES | 83 | -0.6599484 | -0.2445826 | 0.0191832 | 1.717078 | 0.0387928 |
REACTOME_REGULATION_OF_MRNA_STABILITY_BY_PROTEINS_THAT_BIND_AU_RICH_ELEMENTS | 85 | -0.9045005 | -0.2476116 | 0.0007261 | 3.139021 | 0.0025798 |
REACTOME_DEFECTIVE_CFTR_CAUSES_CYSTIC_FIBROSIS | 59 | -1.1489203 | -0.2476116 | 0.0008997 | 3.045893 | 0.0030703 |
REACTOME_DEGRADATION_OF_AXIN | 53 | -0.9284598 | -0.2476116 | 0.0090207 | 2.044760 | 0.0207527 |
REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA | 71 | -0.9385234 | -0.2525490 | 0.0063416 | 2.197804 | 0.0155968 |
REACTOME_REGULATION_OF_PTEN_STABILITY_AND_ACTIVITY | 66 | -1.0530603 | -0.2802151 | 0.0014545 | 2.837290 | 0.0046172 |
REACTOME_NEGATIVE_REGULATION_OF_NOTCH4_SIGNALING | 52 | -0.9497642 | -0.2802151 | 0.0053230 | 2.273845 | 0.0135179 |
REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS | 62 | -0.8951187 | -0.2957337 | 0.0078342 | 2.106006 | 0.0184109 |
REACTOME_G_PROTEIN_BETA_GAMMA_SIGNALLING | 31 | -1.4011251 | -0.3128185 | 0.0060581 | 2.217661 | 0.0150580 |
REACTOME_ADORA2B_MEDIATED_ANTI_INFLAMMATORY_CYTOKINES_PRODUCTION | 43 | -0.9698413 | -0.3138115 | 0.0233186 | 1.632298 | 0.0458533 |
REACTOME_DEGRADATION_OF_GLI1_BY_THE_PROTEASOME | 58 | -1.0574075 | -0.3446614 | 0.0042067 | 2.376061 | 0.0110393 |
nrow(subset(alsig,t_median<0))
## [1] 842
head(alsig[order(alsig$t_median),],20) %>%
kbl(caption="hypomethylated pathways") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_DIGESTION_OF_DIETARY_CARBOHYDRATE | 10 | -5.995775 | -5.919723 | 0.0000004 | 6.400727 | 0.0000034 |
REACTOME_OLFACTORY_SIGNALING_PATHWAY | 357 | -5.457057 | -5.870819 | 0.0000000 | 173.944218 | 0.0000000 |
REACTOME_BETA_DEFENSINS | 34 | -5.312710 | -5.753725 | 0.0000000 | 18.362433 | 0.0000000 |
REACTOME_DEFENSINS | 43 | -5.130054 | -5.622719 | 0.0000000 | 22.965874 | 0.0000000 |
REACTOME_MET_RECEPTOR_ACTIVATION | 6 | -5.113725 | -5.311435 | 0.0000166 | 4.779260 | 0.0001043 |
REACTOME_METAL_SEQUESTRATION_BY_ANTIMICROBIAL_PROTEINS | 6 | -4.834315 | -5.303438 | 0.0006691 | 3.174477 | 0.0024303 |
REACTOME_ATORVASTATIN_ADME | 9 | -4.163363 | -5.205971 | 0.0017938 | 2.746235 | 0.0055125 |
REACTOME_ANTIMICROBIAL_PEPTIDES | 86 | -4.505592 | -5.157756 | 0.0000000 | 28.323998 | 0.0000000 |
REACTOME_TERMINATION_OF_O_GLYCAN_BIOSYNTHESIS | 23 | -4.337216 | -5.093173 | 0.0000000 | 8.740008 | 0.0000000 |
REACTOME_MATURATION_OF_PROTEIN_3A | 9 | -3.477797 | -5.093173 | 0.0072892 | 2.137322 | 0.0173542 |
REACTOME_SYNTHESIS_OF_KETONE_BODIES | 8 | -4.509386 | -5.048903 | 0.0009806 | 3.008510 | 0.0033186 |
REACTOME_SENSORY_PERCEPTION | 565 | -4.377463 | -5.046318 | 0.0000000 | 156.723470 | 0.0000000 |
REACTOME_AMINO_ACID_CONJUGATION | 9 | -4.895310 | -5.041090 | 0.0001353 | 3.868676 | 0.0006406 |
REACTOME_DEFECTIVE_GALNT3_CAUSES_HFTC | 16 | -4.731596 | -5.034515 | 0.0000000 | 9.794677 | 0.0000000 |
REACTOME_DECTIN_2_FAMILY | 26 | -4.162059 | -5.027594 | 0.0000000 | 8.069990 | 0.0000001 |
REACTOME_DEFECTIVE_C1GALT1C1_CAUSES_TNPS | 17 | -4.469243 | -5.010493 | 0.0000000 | 8.671523 | 0.0000000 |
REACTOME_SODIUM_COUPLED_SULPHATE_DI_AND_TRI_CARBOXYLATE_TRANSPORTERS | 5 | -4.027892 | -4.943080 | 0.0253917 | 1.595308 | 0.0492814 |
REACTOME_RUNX1_AND_FOXP3_CONTROL_THE_DEVELOPMENT_OF_REGULATORY_T_LYMPHOCYTES_TREGS | 9 | -5.191716 | -4.903460 | 0.0000301 | 4.521504 | 0.0001748 |
REACTOME_CLASS_C_3_METABOTROPIC_GLUTAMATE_PHEROMONE_RECEPTORS | 39 | -4.508825 | -4.831654 | 0.0000000 | 14.686070 | 0.0000000 |
REACTOME_BIOSYNTHESIS_OF_MARESIN_LIKE_SPMS | 6 | -4.596805 | -4.738171 | 0.0002350 | 3.628990 | 0.0010075 |
Use Euler diagrams to find the similarity. As medians are normally distributed about the zero, one would expect equal numbers of up and down- methylated gene sets. Therefore the t-test based approach could be considered better.
lasig_up <- rownames(subset(lasig,t_median>0))
lasig_dn <- rownames(subset(lasig,t_median<0))
alsig_up <- rownames(subset(alsig,t_median>0))
alsig_dn <- rownames(subset(alsig,t_median<0))
v1 <- list("LA up"=lasig_up, "LA dn"=lasig_dn,
"AL up"=alsig_up,"AL dn"=alsig_dn)
plot(euler(v1),quantities = TRUE)
## Warning in colSums(id & !empty) == 0 | merged_sets: longer object length is not
## a multiple of shorter object length
dmp <- dm[grep("Promo",dm$Regulatory_Feature_Group),]
tic()
dmpagg <- agg(dmp,cores=CORES)
toc()
## 24.577 sec elapsed
head(dmpagg$gme_res_df,20) %>%
kbl(caption="top gmea genes (t-test)") %>%
kable_paper("hover", full_width = F)
nprobes | mean | median | pval | sig | fdr | |
---|---|---|---|---|---|---|
HLA-L | 41 | 1.2196993 | 1.1499539 | 0.0e+00 | 10.773790 | 0.0000002 |
PPT2 | 70 | 0.6609744 | 0.7905891 | 0.0e+00 | 8.681603 | 0.0000237 |
ACTR3C | 19 | -3.0735939 | -3.1395413 | 0.0e+00 | 8.325419 | 0.0000537 |
LRRC61 | 19 | -3.0735939 | -3.1395413 | 0.0e+00 | 8.325419 | 0.0000537 |
PHACTR2 | 16 | 1.1858185 | 1.2549898 | 0.0e+00 | 7.616723 | 0.0002745 |
PTPRCAP | 15 | -0.7129031 | -0.6930239 | 0.0e+00 | 7.494429 | 0.0003638 |
RBBP8 | 16 | 1.0995120 | 1.1812878 | 1.0e-07 | 7.081456 | 0.0009415 |
TRIM2 | 18 | 1.1318700 | 1.1916002 | 1.0e-07 | 6.993039 | 0.0011539 |
ARL4A | 18 | 1.4067409 | 1.4849414 | 2.0e-07 | 6.723592 | 0.0021458 |
FADS2 | 17 | 1.3216546 | 1.3468335 | 2.0e-07 | 6.620172 | 0.0027226 |
HCG4 | 38 | 0.5374925 | 0.4962379 | 3.0e-07 | 6.488716 | 0.0036846 |
EIF4EBP3 | 14 | 1.3046497 | 1.1590657 | 3.0e-07 | 6.460522 | 0.0039314 |
HOXA4 | 14 | -0.5960817 | -0.6274269 | 4.0e-07 | 6.425539 | 0.0042608 |
LRRC49 | 13 | 1.5469306 | 1.6317643 | 5.0e-07 | 6.300802 | 0.0056780 |
THAP10 | 13 | 1.5469306 | 1.6317643 | 5.0e-07 | 6.300802 | 0.0056780 |
INPP5D | 20 | -0.4475761 | -0.3304586 | 6.0e-07 | 6.250234 | 0.0063780 |
TMBIM1 | 17 | 0.7991611 | 0.7955157 | 1.6e-06 | 5.797188 | 0.0181006 |
SLCO4C1 | 11 | 1.1497558 | 1.2119814 | 1.7e-06 | 5.770824 | 0.0192317 |
DDAH2 | 37 | 0.7661586 | 0.8038979 | 1.9e-06 | 5.726782 | 0.0212825 |
L3MBTL | 22 | 0.6137769 | 0.6218255 | 2.1e-06 | 5.667730 | 0.0243801 |
Check whether the distribution is even - it’s not.
hist(dmpagg$gme_res_df$median,breaks=30) ; grid()
1-sample t-test for gene set enrichment.
# need to get the median column before analysis
m <- dmpagg$gme_res_df[,"median",drop=FALSE]
tic()
lapres <- ttenrich(m=m,genesets=gs_symbols,cores=CORES)
toc()
## 2.131 sec elapsed
head(lapres,30) %>%
kbl(caption = "Top significant genesets using LA approach for promoters") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_POST_TRANSLATIONAL_PROTEIN_MODIFICATION | 889 | 0.2192261 | 0.2124958 | 0 | 45.55117 | 0 |
REACTOME_RNA_POLYMERASE_II_TRANSCRIPTION | 895 | 0.2504899 | 0.2242910 | 0 | 39.59338 | 0 |
REACTOME_ADAPTIVE_IMMUNE_SYSTEM | 472 | 0.2424433 | 0.2246615 | 0 | 31.62884 | 0 |
REACTOME_INFECTIOUS_DISEASE | 626 | 0.2082916 | 0.2031278 | 0 | 29.17803 | 0 |
REACTOME_METABOLISM_OF_RNA | 559 | 0.2073418 | 0.2243347 | 0 | 27.96706 | 0 |
REACTOME_CELL_CYCLE | 512 | 0.2175579 | 0.2239982 | 0 | 26.58314 | 0 |
REACTOME_INNATE_IMMUNE_SYSTEM | 607 | 0.1880170 | 0.1896513 | 0 | 25.83740 | 0 |
REACTOME_METABOLISM_OF_LIPIDS | 421 | 0.2358066 | 0.2383694 | 0 | 25.03356 | 0 |
REACTOME_VESICLE_MEDIATED_TRANSPORT | 447 | 0.2273127 | 0.2200743 | 0 | 24.74828 | 0 |
REACTOME_MEMBRANE_TRAFFICKING | 439 | 0.2263624 | 0.2133683 | 0 | 23.91864 | 0 |
REACTOME_CLASS_I_MHC_MEDIATED_ANTIGEN_PROCESSING_PRESENTATION | 278 | 0.2807442 | 0.2822949 | 0 | 23.09209 | 0 |
REACTOME_DEVELOPMENTAL_BIOLOGY | 458 | 0.2028929 | 0.2128727 | 0 | 21.98409 | 0 |
REACTOME_CELL_CYCLE_MITOTIC | 412 | 0.2174393 | 0.2215544 | 0 | 21.13451 | 0 |
REACTOME_SIGNALING_BY_RHO_GTPASES_MIRO_GTPASES_AND_RHOBTB3 | 440 | 0.2160317 | 0.1963506 | 0 | 20.67288 | 0 |
REACTOME_ASPARAGINE_N_LINKED_GLYCOSYLATION | 228 | 0.2940392 | 0.2393459 | 0 | 20.25781 | 0 |
REACTOME_DNA_REPAIR | 241 | 0.2368912 | 0.2447895 | 0 | 18.89879 | 0 |
REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | 425 | 0.1965470 | 0.2040146 | 0 | 18.79170 | 0 |
REACTOME_CELLULAR_RESPONSES_TO_STIMULI | 544 | 0.1757627 | 0.2016791 | 0 | 18.76258 | 0 |
REACTOME_ANTIGEN_PROCESSING_UBIQUITINATION_PROTEASOME_DEGRADATION | 232 | 0.2673979 | 0.2685479 | 0 | 18.12039 | 0 |
REACTOME_SARS_COV_INFECTIONS | 293 | 0.2216330 | 0.2032030 | 0 | 17.42404 | 0 |
REACTOME_DISEASES_OF_SIGNAL_TRANSDUCTION_BY_GROWTH_FACTOR_RECEPTORS_AND_SECOND_MESSENGERS | 291 | 0.2154122 | 0.2304174 | 0 | 17.20946 | 0 |
REACTOME_TRANSPORT_OF_SMALL_MOLECULES | 337 | 0.2249619 | 0.2214938 | 0 | 16.23451 | 0 |
REACTOME_INTRA_GOLGI_AND_RETROGRADE_GOLGI_TO_ER_TRAFFIC | 162 | 0.3045838 | 0.2444465 | 0 | 16.00075 | 0 |
REACTOME_TRANSCRIPTIONAL_REGULATION_BY_TP53 | 294 | 0.2097320 | 0.2405244 | 0 | 14.94443 | 0 |
REACTOME_NERVOUS_SYSTEM_DEVELOPMENT | 307 | 0.2015002 | 0.2030527 | 0 | 14.76518 | 0 |
REACTOME_M_PHASE | 289 | 0.2161367 | 0.2094081 | 0 | 14.45597 | 0 |
REACTOME_SIGNALING_BY_RECEPTOR_TYROSINE_KINASES | 284 | 0.1987876 | 0.1908174 | 0 | 13.91930 | 0 |
REACTOME_SARS_COV_2_INFECTION | 211 | 0.2375341 | 0.2276205 | 0 | 13.23978 | 0 |
REACTOME_SIGNALING_BY_INTERLEUKINS | 263 | 0.2002083 | 0.2092101 | 0 | 13.02391 | 0 |
REACTOME_TRANSLATION | 237 | 0.2232197 | 0.2760778 | 0 | 12.99444 | 0 |
head(lapres[order(-abs(lapres$t_median)),],30) %>%
kbl(caption = "Top effect size genesets using LA approach") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_CASPASE_ACTIVATION_VIA_DEPENDENCE_RECEPTORS_IN_THE_ABSENCE_OF_LIGAND | 5 | 0.7318338 | 0.8271229 | 0.0085391 | 2.0685857 | 0.0211663 |
REACTOME_N_GLYCAN_TRIMMING_AND_ELONGATION_IN_THE_CIS_GOLGI | 5 | 0.9146009 | 0.8269001 | 0.0273749 | 1.5626474 | 0.0534594 |
REACTOME_ACYL_CHAIN_REMODELLING_OF_PG | 5 | 0.7504015 | 0.7785339 | 0.0145652 | 1.8366847 | 0.0323376 |
REACTOME_ACYL_CHAIN_REMODELLING_OF_PC | 8 | 0.6835749 | 0.7304901 | 0.0021125 | 2.6752076 | 0.0070469 |
REACTOME_SURFACTANT_METABOLISM | 7 | 0.8413523 | 0.7231251 | 0.0522554 | 1.2818688 | 0.0877028 |
REACTOME_MIRO_GTPASE_CYCLE | 7 | 0.5071042 | 0.7061679 | 0.0465231 | 1.3323318 | 0.0798814 |
REACTOME_ACYL_CHAIN_REMODELLING_OF_PE | 8 | 0.6235195 | 0.6890713 | 0.0052091 | 2.2832406 | 0.0141979 |
REACTOME_LYSINE_CATABOLISM | 6 | 0.5913141 | 0.6879470 | 0.0211007 | 1.6757022 | 0.0435946 |
REACTOME_SIGNALING_BY_RNF43_MUTANTS | 6 | 0.4263419 | 0.6844331 | 0.0932973 | 1.0301311 | 0.1415909 |
REACTOME_REPRESSION_OF_WNT_TARGET_GENES | 8 | 0.7244184 | 0.6768602 | 0.0013612 | 2.8660691 | 0.0050989 |
REACTOME_ENDOSOMAL_VACUOLAR_PATHWAY | 9 | 0.5374188 | 0.6674260 | 0.0183857 | 1.7355189 | 0.0391971 |
REACTOME_ACYL_CHAIN_REMODELLING_OF_PS | 6 | 0.6208751 | 0.6607948 | 0.0387429 | 1.4118079 | 0.0692241 |
REACTOME_APOPTOSIS_INDUCED_DNA_FRAGMENTATION | 7 | 0.4283549 | 0.6542443 | 0.0518673 | 1.2851068 | 0.0872697 |
REACTOME_TRANSPORT_OF_CONNEXONS_TO_THE_PLASMA_MEMBRANE | 7 | 0.6518170 | 0.6406297 | 0.0430049 | 1.3664818 | 0.0748956 |
REACTOME_GAP_JUNCTION_ASSEMBLY | 9 | 0.4920937 | 0.6406297 | 0.0571510 | 1.2429763 | 0.0941517 |
REACTOME_BINDING_OF_TCF_LEF_CTNNB1_TO_TARGET_GENE_PROMOTERS | 5 | 0.4720585 | 0.6268754 | 0.1373005 | 0.8623279 | 0.1917916 |
REACTOME_DISPLACEMENT_OF_DNA_GLYCOSYLASE_BY_APEX1 | 7 | 0.5337495 | 0.6173662 | 0.0020024 | 2.6984593 | 0.0067807 |
REACTOME_THE_ACTIVATION_OF_ARYLSULFATASES | 6 | 0.4371261 | 0.6171619 | 0.1305487 | 0.8842273 | 0.1850025 |
REACTOME_POLB_DEPENDENT_LONG_PATCH_BASE_EXCISION_REPAIR | 5 | 0.4053687 | 0.6162257 | 0.0717882 | 1.1439472 | 0.1146047 |
REACTOME_GABA_SYNTHESIS_RELEASE_REUPTAKE_AND_DEGRADATION | 7 | 0.5097912 | 0.6159512 | 0.0193727 | 1.7128095 | 0.0407191 |
REACTOME_FORMATION_OF_TUBULIN_FOLDING_INTERMEDIATES_BY_CCT_TRIC | 14 | 0.4878429 | 0.6024602 | 0.0047240 | 2.3256928 | 0.0131976 |
REACTOME_MET_ACTIVATES_PTK2_SIGNALING | 5 | 0.3841710 | 0.5813410 | 0.0729923 | 1.1367228 | 0.1162502 |
REACTOME_SIGNALING_BY_MRAS_COMPLEX_MUTANTS | 6 | 0.2486581 | 0.5776049 | 0.3784228 | 0.4220227 | 0.4408905 |
REACTOME_VITAMIN_D_CALCIFEROL_METABOLISM | 5 | 0.5239579 | 0.5724994 | 0.0026219 | 2.5813856 | 0.0083913 |
REACTOME_FBXW7_MUTANTS_AND_NOTCH1_IN_CANCER | 5 | 0.5705174 | 0.5721604 | 0.0138233 | 1.8593875 | 0.0311025 |
REACTOME_SEROTONIN_NEUROTRANSMITTER_RELEASE_CYCLE | 8 | 0.5282004 | 0.5654912 | 0.0088668 | 2.0522322 | 0.0216189 |
REACTOME_NOREPINEPHRINE_NEUROTRANSMITTER_RELEASE_CYCLE | 8 | 0.5282004 | 0.5654912 | 0.0088668 | 2.0522322 | 0.0216189 |
REACTOME_ACETYLCHOLINE_NEUROTRANSMITTER_RELEASE_CYCLE | 8 | 0.5282004 | 0.5654912 | 0.0088668 | 2.0522322 | 0.0216189 |
REACTOME_RHO_GTPASES_ACTIVATE_IQGAPS | 17 | 0.4616774 | 0.5642107 | 0.0026889 | 2.5704178 | 0.0085650 |
REACTOME_INACTIVATION_OF_CDC42_AND_RAC1 | 5 | 0.3647595 | 0.5642107 | 0.1265019 | 0.8979029 | 0.1806593 |
lapsig <- subset(lapres,fdr<0.05)
nrow(lapsig)
## [1] 670
head(lapsig[order(-abs(lapsig$t_median)),],30) %>%
kbl(caption = "Top effect size genesets after FDR filtering using LA approach for promoters") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_CASPASE_ACTIVATION_VIA_DEPENDENCE_RECEPTORS_IN_THE_ABSENCE_OF_LIGAND | 5 | 0.7318338 | 0.8271229 | 0.0085391 | 2.068586 | 0.0211663 |
REACTOME_ACYL_CHAIN_REMODELLING_OF_PG | 5 | 0.7504015 | 0.7785339 | 0.0145652 | 1.836685 | 0.0323376 |
REACTOME_ACYL_CHAIN_REMODELLING_OF_PC | 8 | 0.6835749 | 0.7304901 | 0.0021125 | 2.675208 | 0.0070469 |
REACTOME_ACYL_CHAIN_REMODELLING_OF_PE | 8 | 0.6235195 | 0.6890713 | 0.0052091 | 2.283241 | 0.0141979 |
REACTOME_LYSINE_CATABOLISM | 6 | 0.5913141 | 0.6879470 | 0.0211007 | 1.675702 | 0.0435946 |
REACTOME_REPRESSION_OF_WNT_TARGET_GENES | 8 | 0.7244184 | 0.6768602 | 0.0013612 | 2.866069 | 0.0050989 |
REACTOME_ENDOSOMAL_VACUOLAR_PATHWAY | 9 | 0.5374188 | 0.6674260 | 0.0183857 | 1.735519 | 0.0391971 |
REACTOME_DISPLACEMENT_OF_DNA_GLYCOSYLASE_BY_APEX1 | 7 | 0.5337495 | 0.6173662 | 0.0020024 | 2.698459 | 0.0067807 |
REACTOME_GABA_SYNTHESIS_RELEASE_REUPTAKE_AND_DEGRADATION | 7 | 0.5097912 | 0.6159512 | 0.0193727 | 1.712810 | 0.0407191 |
REACTOME_FORMATION_OF_TUBULIN_FOLDING_INTERMEDIATES_BY_CCT_TRIC | 14 | 0.4878429 | 0.6024602 | 0.0047240 | 2.325693 | 0.0131976 |
REACTOME_VITAMIN_D_CALCIFEROL_METABOLISM | 5 | 0.5239579 | 0.5724994 | 0.0026219 | 2.581386 | 0.0083913 |
REACTOME_FBXW7_MUTANTS_AND_NOTCH1_IN_CANCER | 5 | 0.5705174 | 0.5721604 | 0.0138233 | 1.859387 | 0.0311025 |
REACTOME_SEROTONIN_NEUROTRANSMITTER_RELEASE_CYCLE | 8 | 0.5282004 | 0.5654912 | 0.0088668 | 2.052232 | 0.0216189 |
REACTOME_NOREPINEPHRINE_NEUROTRANSMITTER_RELEASE_CYCLE | 8 | 0.5282004 | 0.5654912 | 0.0088668 | 2.052232 | 0.0216189 |
REACTOME_ACETYLCHOLINE_NEUROTRANSMITTER_RELEASE_CYCLE | 8 | 0.5282004 | 0.5654912 | 0.0088668 | 2.052232 | 0.0216189 |
REACTOME_RHO_GTPASES_ACTIVATE_IQGAPS | 17 | 0.4616774 | 0.5642107 | 0.0026889 | 2.570418 | 0.0085650 |
REACTOME_ATF6_ATF6_ALPHA_ACTIVATES_CHAPERONE_GENES | 8 | 0.5132293 | 0.5578594 | 0.0043986 | 2.356688 | 0.0125768 |
REACTOME_SYNDECAN_INTERACTIONS | 11 | 0.3332609 | 0.5460082 | 0.0091493 | 2.038610 | 0.0222269 |
REACTOME_MODULATION_BY_MTB_OF_HOST_IMMUNE_SYSTEM | 6 | 0.5446679 | 0.5430733 | 0.0028789 | 2.540780 | 0.0090200 |
REACTOME_RECYCLING_OF_EIF2_GDP | 7 | 0.4121148 | 0.5299023 | 0.0159237 | 1.797957 | 0.0346088 |
REACTOME_ROBO_RECEPTORS_BIND_AKAP5 | 5 | 0.4806726 | 0.5292089 | 0.0005936 | 3.226519 | 0.0025270 |
REACTOME_LGI_ADAM_INTERACTIONS | 6 | 0.5414660 | 0.5263101 | 0.0003472 | 3.459465 | 0.0016221 |
REACTOME_G_BETA_GAMMA_SIGNALLING_THROUGH_CDC42 | 11 | 0.4641181 | 0.5224699 | 0.0230709 | 1.636936 | 0.0467342 |
REACTOME_SYNTHESIS_OF_GDP_MANNOSE | 5 | 0.4179452 | 0.5072262 | 0.0084197 | 2.074701 | 0.0209478 |
REACTOME_UPTAKE_AND_ACTIONS_OF_BACTERIAL_TOXINS | 17 | 0.4296322 | 0.5064394 | 0.0002512 | 3.600032 | 0.0012521 |
REACTOME_REGULATION_OF_TP53_ACTIVITY_THROUGH_METHYLATION | 17 | 0.5109669 | 0.5033617 | 0.0000305 | 4.515951 | 0.0002086 |
REACTOME_GLUCAGON_TYPE_LIGAND_RECEPTORS | 12 | 0.4663371 | 0.4952442 | 0.0177017 | 1.751986 | 0.0378595 |
REACTOME_CLASS_B_2_SECRETIN_FAMILY_RECEPTORS | 27 | 0.4470810 | 0.4888232 | 0.0001597 | 3.796724 | 0.0008464 |
REACTOME_PREGNENOLONE_BIOSYNTHESIS | 6 | 0.4448512 | 0.4875549 | 0.0005238 | 3.280837 | 0.0022880 |
REACTOME_SIGNALING_BY_HIPPO | 12 | 0.5362793 | 0.4807863 | 0.0072681 | 2.138582 | 0.0186714 |
plot(lapres$t_median,-log10(lapres$pval),pch=19,col="gray")
grid()
points(lapsig$t_median,-log10(lapsig$pval),pch=19,col="red")
Determine the median of probe values.
gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=CORES)
myann$gnl <- gnl
myannp <- myann[grep("Promot",myann$Regulatory_Feature_Group),]
mxp <- mval[rownames(mval) %in% rownames(myannp),]
mymed <- function(g) {
probes <- rownames(myannp[grep(g,myannp$gnl),])
rows <- which(rownames(mxp) %in% probes)
if ( length(rows) > 1 ) {
b <- mxp[rows,]
med <- apply(b,2,median)
med <- matrix(med,nrow=1)
colnames(med) <- colnames(b)
rownames(med) <- g
return(med)
}
}
medp <- mclapply(gn,mymed,mc.cores=CORES)
medp <- medp[lapply(medp,length)>0]
medfp <- do.call(rbind,medp)
Then do a limma test on median gene methylation. See if it is more robust than other approaches.
fit.reduced <- lmFit(medfp,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) patient10 patient11 patient12 patient13 patient14 patient15
## Down 11705 15 17 7 16 30 10
## NotSig 274 12020 12036 12042 12034 12022 12047
## Up 82 26 8 12 11 9 4
## patient16 patient17 patient18 patient19 patient2 patient20 patient21
## Down 7 9 3 25 14 10 7
## NotSig 12020 12044 12046 12027 12041 12022 12050
## Up 34 8 12 9 6 29 4
## patient22 patient23 patient24 patient25 patient26 patient27 patient28
## Down 16 12 15 17 26 16 12
## NotSig 12021 12020 12038 12028 11988 12041 12018
## Up 24 29 8 16 47 4 31
## patient29 patient3 patient30 patient31 patient32 patient33 patient34
## Down 3 6 70 7 5 8 10
## NotSig 12045 12045 10881 12050 12051 12047 12042
## Up 13 10 1110 4 5 6 9
## patient35 patient36 patient37 patient4 patient5 patient6 patient7
## Down 9 12 6 13 8 6 11
## NotSig 12045 12039 12049 12046 12048 12050 12044
## Up 7 10 6 2 5 5 6
## patient8 patient9 tumor
## Down 11 17 1055
## NotSig 12045 11969 10444
## Up 5 75 562
maggp <- topTable(fit.reduced,coef=ncol(design), number = Inf)
head(maggp, 30) %>%
kbl(caption="limma gene methylation results") %>%
kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
LOXL1-AS1 | -0.5939962 | -3.7471045 | -12.366365 | 0 | 0 | 23.89199 |
ABLIM1 | -1.4504158 | -2.4768535 | -11.200776 | 0 | 0 | 21.02416 |
PBX4 | -0.7107596 | -3.5234467 | -11.135895 | 0 | 0 | 20.85894 |
TSTD1 | -1.4751443 | -1.5323147 | -11.025084 | 0 | 0 | 20.57540 |
GNA15 | -1.2539062 | -1.4438563 | -10.929064 | 0 | 0 | 20.32829 |
SYTL1 | -1.3615645 | -1.4552688 | -10.891609 | 0 | 0 | 20.23155 |
TENC1 | -1.3665461 | -2.0769729 | -10.857454 | 0 | 0 | 20.14316 |
MIR19A | -1.6429500 | -2.2032455 | -10.612799 | 0 | 0 | 19.50517 |
SORL1 | -1.0151585 | -1.8089608 | -10.282892 | 0 | 0 | 18.63146 |
TRABD | -0.7179044 | -2.4430419 | -10.020261 | 0 | 0 | 17.92494 |
LOXL1 | -0.4213093 | -3.9370436 | -9.983033 | 0 | 0 | 17.82400 |
LCP1 | -1.3607259 | -1.2499719 | -9.919727 | 0 | 0 | 17.65192 |
ACSM3 | -1.1291094 | -2.2920575 | -9.878576 | 0 | 0 | 17.53976 |
GCET2 | -1.4780623 | -1.4693851 | -9.736988 | 0 | 0 | 17.15205 |
SNORD93 | -1.6788788 | -1.1641308 | -9.640003 | 0 | 0 | 16.88487 |
FAM220A | -0.5932807 | -3.7728531 | -9.620629 | 0 | 0 | 16.83134 |
C10orf91 | -0.9873395 | -1.1371952 | -9.591525 | 0 | 0 | 16.75083 |
ZNF217 | -1.3870595 | -1.9606183 | -9.522256 | 0 | 0 | 16.55874 |
NIPSNAP1 | -0.2888664 | -4.4906983 | -9.310865 | 0 | 0 | 15.96850 |
PTPN6 | -0.7200932 | -0.6610194 | -9.289363 | 0 | 0 | 15.90812 |
C9orf139 | -1.0891547 | -1.2111025 | -9.286968 | 0 | 0 | 15.90139 |
C11orf85 | 1.5959062 | -1.5467507 | 9.283910 | 0 | 0 | 15.89280 |
LOC147804 | 1.0002546 | -2.6718547 | 9.251600 | 0 | 0 | 15.80193 |
AARS2 | -0.5597709 | -3.6818079 | -9.196769 | 0 | 0 | 15.64740 |
MAMSTR | -1.0106156 | -1.2732252 | -9.175558 | 0 | 0 | 15.58752 |
KDM3B | -0.5515647 | -2.4009106 | -9.146899 | 0 | 0 | 15.50651 |
MIR18A | -0.8901480 | -2.9376592 | -9.105571 | 0 | 0 | 15.38950 |
TTC7A | -0.8195367 | -3.5623031 | -9.093974 | 0 | 0 | 15.35662 |
TMA16 | -0.5019002 | -4.0438383 | -9.026364 | 0 | 0 | 15.16460 |
GJD3 | -0.9279393 | -2.3364473 | -8.988876 | 0 | 0 | 15.05788 |
1-sample t-test for enrichment promoter.
m <- as.data.frame(maggp$t)
rownames(m) <- rownames(maggp)
colnames(m) <- "t"
alpres <- ttenrich(m=m,genesets=gs_symbols,cores=CORES)
alpsig <- subset(alpres,fdr<0.05)
nrow(alpsig)
## [1] 24
nrow(subset(alpsig,t_median>0))
## [1] 1
head(alpsig[order(-alpsig$t_median),],20) %>%
kbl(caption="hypermethylated pathways") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_MECP2_REGULATES_TRANSCRIPTION_OF_NEURONAL_LIGANDS | 5 | 0.7045782 | 0.7462844 | 0.0001991 | 3.700992 | 0.0278301 |
REACTOME_ADAPTIVE_IMMUNE_SYSTEM | 491 | -0.3010736 | -0.0773177 | 0.0007545 | 3.122322 | 0.0439515 |
REACTOME_INNATE_IMMUNE_SYSTEM | 657 | -0.4293379 | -0.0939349 | 0.0000002 | 6.644933 | 0.0003166 |
REACTOME_MEMBRANE_TRAFFICKING | 465 | -0.3932135 | -0.1261098 | 0.0000417 | 4.379528 | 0.0083345 |
REACTOME_VESICLE_MEDIATED_TRANSPORT | 478 | -0.3864050 | -0.1294140 | 0.0000375 | 4.426248 | 0.0083345 |
REACTOME_INFECTIOUS_DISEASE | 673 | -0.2520992 | -0.1422107 | 0.0003469 | 3.459740 | 0.0344548 |
REACTOME_METABOLISM_OF_LIPIDS | 449 | -0.3278862 | -0.1478980 | 0.0006754 | 3.170468 | 0.0429157 |
REACTOME_CELL_CYCLE | 522 | -0.2475204 | -0.1585426 | 0.0004095 | 3.387779 | 0.0344548 |
REACTOME_CELLULAR_RESPONSES_TO_STIMULI | 574 | -0.2803735 | -0.1884255 | 0.0001293 | 3.888402 | 0.0216421 |
REACTOME_METABOLISM_OF_RNA | 585 | -0.2131226 | -0.2069913 | 0.0003853 | 3.414188 | 0.0344548 |
REACTOME_SIGNALING_BY_RHO_GTPASES_MIRO_GTPASES_AND_RHOBTB3 | 472 | -0.3405012 | -0.2110760 | 0.0005150 | 3.288233 | 0.0385376 |
REACTOME_CELL_CYCLE_CHECKPOINTS | 222 | -0.4340729 | -0.2222850 | 0.0002193 | 3.658991 | 0.0278691 |
REACTOME_CELL_CYCLE_MITOTIC | 420 | -0.2779576 | -0.2222850 | 0.0004190 | 3.377809 | 0.0344548 |
REACTOME_METABOLISM_OF_AMINO_ACIDS_AND_DERIVATIVES | 250 | -0.5165409 | -0.3554563 | 0.0000144 | 4.841322 | 0.0050365 |
REACTOME_MITOTIC_METAPHASE_AND_ANAPHASE | 196 | -0.4348812 | -0.3827065 | 0.0003288 | 3.483129 | 0.0344548 |
REACTOME_M_PHASE | 294 | -0.4093829 | -0.3909721 | 0.0000133 | 4.874976 | 0.0050365 |
REACTOME_RHO_GTPASE_EFFECTORS | 206 | -0.4731060 | -0.4079576 | 0.0005268 | 3.278341 | 0.0385376 |
REACTOME_TRANSLATION | 251 | -0.3972671 | -0.4259736 | 0.0000364 | 4.439401 | 0.0083345 |
REACTOME_SEPARATION_OF_SISTER_CHROMATIDS | 153 | -0.4872543 | -0.4463882 | 0.0005513 | 3.258593 | 0.0385376 |
REACTOME_MITOTIC_PROMETAPHASE | 162 | -0.5997910 | -0.5355054 | 0.0000075 | 5.126529 | 0.0050365 |
nrow(subset(alpsig,t_median<0))
## [1] 23
head(alpsig[order(alpsig$t_median),],20) %>%
kbl(caption="hypomethylated pathways") %>%
kable_paper("hover", full_width = F)
n_genes | t_mean | t_median | pval | logp | fdr | |
---|---|---|---|---|---|---|
REACTOME_ABORTIVE_ELONGATION_OF_HIV_1_TRANSCRIPT_IN_THE_ABSENCE_OF_TAT | 22 | -0.7704556 | -0.9428629 | 0.0003980 | 3.400158 | 0.0344548 |
REACTOME_MRNA_SPLICING_MINOR_PATHWAY | 46 | -0.7379727 | -0.7350914 | 0.0005955 | 3.225146 | 0.0396408 |
REACTOME_HCMV_EARLY_EVENTS | 57 | -0.6508005 | -0.6502009 | 0.0001393 | 3.855965 | 0.0216421 |
REACTOME_RESOLUTION_OF_SISTER_CHROMATID_COHESION | 101 | -0.6162418 | -0.5373447 | 0.0007138 | 3.146408 | 0.0433882 |
REACTOME_MITOTIC_PROMETAPHASE | 162 | -0.5997910 | -0.5355054 | 0.0000075 | 5.126529 | 0.0050365 |
REACTOME_SEPARATION_OF_SISTER_CHROMATIDS | 153 | -0.4872543 | -0.4463882 | 0.0005513 | 3.258593 | 0.0385376 |
REACTOME_TRANSLATION | 251 | -0.3972671 | -0.4259736 | 0.0000364 | 4.439401 | 0.0083345 |
REACTOME_RHO_GTPASE_EFFECTORS | 206 | -0.4731060 | -0.4079576 | 0.0005268 | 3.278341 | 0.0385376 |
REACTOME_M_PHASE | 294 | -0.4093829 | -0.3909721 | 0.0000133 | 4.874976 | 0.0050365 |
REACTOME_MITOTIC_METAPHASE_AND_ANAPHASE | 196 | -0.4348812 | -0.3827065 | 0.0003288 | 3.483129 | 0.0344548 |
REACTOME_METABOLISM_OF_AMINO_ACIDS_AND_DERIVATIVES | 250 | -0.5165409 | -0.3554563 | 0.0000144 | 4.841322 | 0.0050365 |
REACTOME_CELL_CYCLE_CHECKPOINTS | 222 | -0.4340729 | -0.2222850 | 0.0002193 | 3.658991 | 0.0278691 |
REACTOME_CELL_CYCLE_MITOTIC | 420 | -0.2779576 | -0.2222850 | 0.0004190 | 3.377809 | 0.0344548 |
REACTOME_SIGNALING_BY_RHO_GTPASES_MIRO_GTPASES_AND_RHOBTB3 | 472 | -0.3405012 | -0.2110760 | 0.0005150 | 3.288233 | 0.0385376 |
REACTOME_METABOLISM_OF_RNA | 585 | -0.2131226 | -0.2069913 | 0.0003853 | 3.414188 | 0.0344548 |
REACTOME_CELLULAR_RESPONSES_TO_STIMULI | 574 | -0.2803735 | -0.1884255 | 0.0001293 | 3.888402 | 0.0216421 |
REACTOME_CELL_CYCLE | 522 | -0.2475204 | -0.1585426 | 0.0004095 | 3.387779 | 0.0344548 |
REACTOME_METABOLISM_OF_LIPIDS | 449 | -0.3278862 | -0.1478980 | 0.0006754 | 3.170468 | 0.0429157 |
REACTOME_INFECTIOUS_DISEASE | 673 | -0.2520992 | -0.1422107 | 0.0003469 | 3.459740 | 0.0344548 |
REACTOME_VESICLE_MEDIATED_TRANSPORT | 478 | -0.3864050 | -0.1294140 | 0.0000375 | 4.426248 | 0.0083345 |
Use Euler diagrams to find the similarity. As medians are normally distributed about the zero, one would expect equal numbers of up and down- methylated gene sets. Therefore the t-test based approach could be considered better.
lapsig_up <- rownames(subset(lapsig,t_median>0))
lapsig_dn <- rownames(subset(lapsig,t_median<0))
alpsig_up <- rownames(subset(alpsig,t_median>0))
alpsig_dn <- rownames(subset(alpsig,t_median<0))
v1 <- list("LA up"=lapsig_up, "LA dn"=lapsig_dn,
"AL up"=alpsig_up,"AL dn"=alpsig_dn)
plot(euler(v1),quantities = TRUE)
## Warning in colSums(id & !empty) == 0 | merged_sets: longer object length is not
## a multiple of shorter object length
save.image("GSE158422_gmea.Rdata")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] beeswarm_0.4.0
## [2] kableExtra_1.3.4
## [3] mitch_1.12.0
## [4] tictoc_1.2
## [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [6] IlluminaHumanMethylation450kmanifest_0.4.0
## [7] minfi_1.46.0
## [8] bumphunter_1.42.0
## [9] locfit_1.5-9.8
## [10] iterators_1.0.14
## [11] foreach_1.5.2
## [12] Biostrings_2.68.1
## [13] XVector_0.40.0
## [14] SummarizedExperiment_1.30.2
## [15] Biobase_2.60.0
## [16] MatrixGenerics_1.12.3
## [17] matrixStats_1.0.0
## [18] GenomicRanges_1.52.0
## [19] GenomeInfoDb_1.36.3
## [20] IRanges_2.34.1
## [21] S4Vectors_0.38.1
## [22] BiocGenerics_0.46.0
## [23] eulerr_7.0.0
## [24] limma_3.56.2
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.1 later_1.3.1
## [3] BiocIO_1.10.0 bitops_1.0-7
## [5] filelock_1.0.2 tibble_3.2.1
## [7] polyclip_1.10-4 preprocessCore_1.62.1
## [9] XML_3.99-0.14 lifecycle_1.0.3
## [11] lattice_0.21-8 MASS_7.3-60
## [13] base64_2.0.1 scrime_1.3.5
## [15] magrittr_2.0.3 sass_0.4.7
## [17] rmarkdown_2.24 jquerylib_0.1.4
## [19] yaml_2.3.7 httpuv_1.6.11
## [21] doRNG_1.8.6 askpass_1.2.0
## [23] DBI_1.1.3 RColorBrewer_1.1-3
## [25] abind_1.4-5 zlibbioc_1.46.0
## [27] rvest_1.0.3 quadprog_1.5-8
## [29] purrr_1.0.2 RCurl_1.98-1.12
## [31] rappdirs_0.3.3 GenomeInfoDbData_1.2.10
## [33] genefilter_1.82.1 annotate_1.78.0
## [35] svglite_2.1.1 DelayedMatrixStats_1.22.6
## [37] codetools_0.2-19 DelayedArray_0.26.7
## [39] xml2_1.3.5 tidyselect_1.2.0
## [41] beanplot_1.3.1 BiocFileCache_2.8.0
## [43] webshot_0.5.5 illuminaio_0.42.0
## [45] GenomicAlignments_1.36.0 jsonlite_1.8.7
## [47] multtest_2.56.0 ellipsis_0.3.2
## [49] survival_3.5-7 systemfonts_1.0.4
## [51] polylabelr_0.2.0 tools_4.3.1
## [53] progress_1.2.2 Rcpp_1.0.11
## [55] glue_1.6.2 gridExtra_2.3
## [57] xfun_0.40 dplyr_1.1.3
## [59] HDF5Array_1.28.1 fastmap_1.1.1
## [61] GGally_2.1.2 rhdf5filters_1.12.1
## [63] fansi_1.0.4 openssl_2.1.0
## [65] caTools_1.18.2 digest_0.6.33
## [67] R6_2.5.1 mime_0.12
## [69] colorspace_2.1-0 gtools_3.9.4
## [71] biomaRt_2.56.1 RSQLite_2.3.1
## [73] utf8_1.2.3 tidyr_1.3.0
## [75] generics_0.1.3 data.table_1.14.8
## [77] rtracklayer_1.60.1 prettyunits_1.1.1
## [79] httr_1.4.7 htmlwidgets_1.6.2
## [81] S4Arrays_1.0.6 pkgconfig_2.0.3
## [83] gtable_0.3.4 blob_1.2.4
## [85] siggenes_1.74.0 htmltools_0.5.6
## [87] echarts4r_0.4.5 scales_1.2.1
## [89] png_0.1-8 knitr_1.44
## [91] rstudioapi_0.15.0 reshape2_1.4.4
## [93] tzdb_0.4.0 rjson_0.2.21
## [95] nlme_3.1-163 curl_5.0.2
## [97] cachem_1.0.8 rhdf5_2.44.0
## [99] stringr_1.5.0 KernSmooth_2.23-22
## [101] AnnotationDbi_1.62.2 restfulr_0.0.15
## [103] GEOquery_2.68.0 pillar_1.9.0
## [105] grid_4.3.1 reshape_0.8.9
## [107] vctrs_0.6.3 gplots_3.1.3
## [109] promises_1.2.1 dbplyr_2.3.3
## [111] xtable_1.8-4 evaluate_0.21
## [113] readr_2.1.4 GenomicFeatures_1.52.2
## [115] cli_3.6.1 compiler_4.3.1
## [117] Rsamtools_2.16.0 rlang_1.1.1
## [119] crayon_1.5.2 rngtools_1.5.2
## [121] nor1mix_1.3-0 mclust_6.0.0
## [123] plyr_1.8.8 stringi_1.7.12
## [125] viridisLite_0.4.2 BiocParallel_1.34.2
## [127] munsell_0.5.0 Matrix_1.6-1
## [129] hms_1.1.3 sparseMatrixStats_1.12.2
## [131] bit64_4.0.5 ggplot2_3.4.3
## [133] Rhdf5lib_1.22.1 KEGGREST_1.40.0
## [135] shiny_1.7.5 highr_0.10
## [137] memoise_2.0.1 bslib_0.5.1
## [139] bit_4.0.5