Introduction

Previously, Mandhri and I analysed the B-PROOF 450K data, trying to understand whether vitamin supplementation caused changes in gene methylation. We used Limma and some basic analyses, which showed no specific probes with FDR<0.05, nor any DMRs.

In this analysis we will use the principle of Gene Set Enrichment Analysis, applying it to many probes belonging to genes. If the probes are trending in concert, then we can make some judgement about the enrichment of those probes. The statistical test used is the Wilcox test, which can be applied to competitive or self contained test types.

library("parallel")
library("dplyr")
library("kableExtra")
library("eulerr")
library("mitch")
library("tictoc")

CORES= detectCores()

Reactome gene sets

#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")

Functions

# Gene methylation enrichment analysis
calc_sc <- function(dm) {
  gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=CORES)
  dm$UCSC_RefGene_Name <- gnl
  l <- mclapply(1:nrow(dm), function(i) {
    a <- dm[i,]
    len <- length(a[[1]][[1]])
    tvals <- as.numeric(rep(a[2],len))
    genes <- a[[1]][[1]]
    data.frame(genes,tvals)
  },mc.cores=CORES)
  df <- do.call(rbind,l)
  gme_res <- mclapply( 1:length(gn), function(i) {
    g <- gn[i]
    tstats <- df[which(df$genes==g),"tvals"]
    myn <- length(tstats)
    mymean <- mean(tstats)
    mymedian <- median(tstats)
    wtselfcont <- wilcox.test(tstats)
    res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,"median"=mymedian,
      "p-value(sc)"=wtselfcont$p.value)
  } , mc.cores=CORES )
  gme_res_df <- do.call(rbind, gme_res)
  rownames(gme_res_df) <- gme_res_df[,1]
  gme_res_df <- gme_res_df[,-1]
  tmp <- apply(gme_res_df,2,as.numeric)
  rownames(tmp) <- rownames(gme_res_df)
  gme_res_df <- as.data.frame(tmp)
  gme_res_df$sig <- -log10(gme_res_df[,4])
  gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
  gme_res_df$`fdr(sc)` <- p.adjust(gme_res_df$`p-value(sc)`)
  out <- list("df"=df,"gme_res_df"=gme_res_df)
  return(out)
}


gmea_volc <- function(res) {
  sig <- subset(res,`fdr(sc)` < 0.05)
  plot(res$median , -log10(res$`p-value(sc)`) ,
    xlab="effect size (mean t-stat)", ylab="-log10(p-value)",
    pch=19, cex=0.5, col="gray",main="self contained test")
  grid()
  points(sig$median , -log10(sig$`p-value(sc)`) ,
    pch=19, cex=0.5, col="red")
}


gmea_barplot <- function(res) {
  par(mfrow=c(1,2))
  n=50
  gs <- head(rownames(res),50)
  tstats <- lapply(gs, function(g) {
    df[which(df$genes==g),"tvals"]
  })
  names(tstats) <- gs
  tstats <- tstats[order(unlist(lapply(tstats,median)))]
  boxplot(tstats,horizontal=TRUE,las=1,
    main="smallest p-val(selfcont)",cex.axis=0.6,
    xlab="t-statistic")
  grid()
  sig <- subset(res,`fdr(sc)` < 0.05)
  gs <- head(rownames(sig[order(-abs(sig$median)),]),n)
  if ( length(gs) >2 ) {
    tstats <- lapply(gs, function(g) {
      df[which(df$genes==g),"tvals"]
    })
    names(tstats) <- gs
    tstats <- tstats[order(unlist(lapply(tstats,median)))]
    boxplot(tstats,horizontal=TRUE,las=1,
      main="biggest effect size(median)",cex.axis=0.6,
      xlab="t-statistic")
    grid()
  } else {
    plot(1)
    mtext("too few significant genes found")
  }
  par(mfrow=c(1,1))
}

DM1 - post trial supplement versus placebo

Whole gene analysis

dm <- read.table("dma1a.tsv")
head(dm,50) %>% kbl(caption = "Top significant genes with limma") %>% kable_paper("hover", full_width = F)
Top significant genes with limma
Row.names UCSC_RefGene_Name Regulatory_Feature_Group logFC AveExpr t P.Value adj.P.Val B
cg11922164 cg11922164 SYT15;SYT15 -0.3241503 3.0811458 -5.467839 0.0000004 0.1762222 2.8609369
cg17055959 cg17055959 RB1 Promoter_Associated -0.2589536 -3.5513962 -4.489903 0.0000214 0.9999898 0.6028356
cg09095400 cg09095400 MCF2L;MCF2L -0.2864056 3.0671992 -4.464955 0.0000235 0.9999898 0.5477069
cg00079169 cg00079169 THOP1 -0.2955543 3.1975846 -4.437042 0.0000262 0.9999898 0.4862035
cg22907174 cg22907174 Unclassified_Cell_type_specific -0.2797225 -3.7484900 -4.185434 0.0000670 0.9999898 -0.0593122
cg00485312 cg00485312 RADIL 0.4204328 3.4730548 4.174616 0.0000697 0.9999898 -0.0823910
cg14356440 cg14356440 MGAT5 -0.3103486 3.2447286 -4.052258 0.0001088 0.9999898 -0.3411438
cg02007493 cg02007493 NUDT14 Unclassified -0.2179231 -3.7401126 -3.965567 0.0001483 0.9999898 -0.5218415
cg26973953 cg26973953 GNPTG -0.3541682 5.9713884 -3.958323 0.0001522 0.9999898 -0.5368395
cg15693572 cg15693572 -0.4938634 0.7629543 -3.919761 0.0001745 0.9999898 -0.6164053
cg16895973 cg16895973 TOLLIP 0.3297518 2.6982157 3.903079 0.0001850 0.9999898 -0.6506847
cg20451722 cg20451722 MX1 Promoter_Associated -0.1698344 -3.3514466 -3.900823 0.0001865 0.9999898 -0.6553134
cg26966630 cg26966630 CBLN2 0.4062536 -2.9670074 3.891735 0.0001926 0.9999898 -0.6739459
cg03852670 cg03852670 SLC10A4 Unclassified_Cell_type_specific -0.4430002 -0.7967887 -3.881409 0.0001997 0.9999898 -0.6950842
cg08998375 cg08998375 TBL3 -0.1877786 2.5459320 -3.855284 0.0002188 0.9999898 -0.7484145
cg15672304 cg15672304 GPX5;GPX5 -0.3336515 3.1181097 -3.837698 0.0002327 0.9999898 -0.7841908
cg20089935 cg20089935 ANKLE2 -0.2915339 4.0051127 -3.811572 0.0002548 0.9999898 -0.8371613
cg15828427 cg15828427 FBXW9 Unclassified -0.2583253 0.5795931 -3.806959 0.0002589 0.9999898 -0.8464900
cg08267701 cg08267701 TOMM40;TOMM40;TOMM40 Promoter_Associated -0.1743045 -3.1175045 -3.799373 0.0002658 0.9999898 -0.8618177
cg05392577 cg05392577 0.3437963 2.9223743 3.775844 0.0002884 0.9999898 -0.9092403
cg01142579 cg01142579 -0.3248540 3.3432889 -3.765804 0.0002985 0.9999898 -0.9294210
cg20954533 cg20954533 MAP1B -0.4628496 -0.6760910 -3.756705 0.0003080 0.9999898 -0.9476804
cg05021312 cg05021312 CBFA2T2 Promoter_Associated -0.2952828 2.5083399 -3.754228 0.0003107 0.9999898 -0.9526472
cg17396852 cg17396852 PIK3CG -0.3209891 3.6737748 -3.749569 0.0003157 0.9999898 -0.9619841
cg23712359 cg23712359 ZEB1;ZEB1;ZEB1;ZEB1;ZEB1 Promoter_Associated -0.2953219 -3.3975280 -3.742234 0.0003237 0.9999898 -0.9766652
cg09887667 cg09887667 NEDD4 -0.3996089 -3.2162534 -3.711208 0.0003600 0.9999898 -1.0385751
cg21697812 cg21697812 C11orf91 Promoter_Associated -0.3423683 0.5214687 -3.709752 0.0003618 0.9999898 -1.0414727
cg21478490 cg21478490 PROP1;PROP1 -0.3961129 1.1593620 -3.704598 0.0003682 0.9999898 -1.0517233
cg07162820 cg07162820 NFATC4;NFATC4 Unclassified -0.2672138 -1.0400681 -3.696270 0.0003788 0.9999898 -1.0682666
cg16728323 cg16728323 RAB20 Unclassified_Cell_type_specific -0.2539616 3.0628003 -3.678793 0.0004021 0.9999898 -1.1029115
cg17479060 cg17479060 -0.2769604 4.3430224 -3.676850 0.0004047 0.9999898 -1.1067565
cg23666491 cg23666491 -0.2117727 2.6281734 -3.668545 0.0004163 0.9999898 -1.1231771
cg05616472 cg05616472 EHMT1;C9orf37;EHMT1 Promoter_Associated -0.2821533 -2.0788718 -3.658848 0.0004302 0.9999898 -1.1423197
cg02852873 cg02852873 CNDP2 Promoter_Associated -0.1398984 -2.0133284 -3.657861 0.0004316 0.9999898 -1.1442663
cg02779707 cg02779707 TAP2;TAP2 Promoter_Associated -0.2886455 -3.3284846 -3.656625 0.0004334 0.9999898 -1.1467037
cg08836615 cg08836615 0.2845805 3.5093175 3.646502 0.0004485 0.9999898 -1.1666452
cg08837481 cg08837481 -0.2400255 3.3494209 -3.636667 0.0004637 0.9999898 -1.1859854
cg22304399 cg22304399 CACNA1A;CACNA1A Unclassified -0.2754963 -3.1766244 -3.632407 0.0004704 0.9999898 -1.1943525
cg08391356 cg08391356 Promoter_Associated -0.1729776 -3.0377128 -3.626525 0.0004798 0.9999898 -1.2058949
cg10645648 cg10645648 HLA-DQA2 0.7902770 2.1075292 3.610488 0.0005063 0.9999898 -1.2373048
cg08844849 cg08844849 SIGLEC10;SIGLEC10;SIGLEC10;SIGLEC10;SIGLEC10;SIGLEC10;SIGLEC10 -0.2764699 1.6759984 -3.609514 0.0005080 0.9999898 -1.2392086
cg18034329 cg18034329 RABEP1;RABEP1 Promoter_Associated -0.2081747 -3.1771846 -3.609420 0.0005082 0.9999898 -1.2393918
cg24181389 cg24181389 RPTOR;RPTOR Gene_Associated_Cell_type_specific -0.2960408 3.1936879 -3.605892 0.0005142 0.9999898 -1.2462881
cg15840079 cg15840079 TMEM132A;TMEM132A Unclassified -0.2210544 -2.1865179 -3.602825 0.0005195 0.9999898 -1.2522797
cg14686845 cg14686845 AGPAT5 Promoter_Associated -0.1491522 -3.1939740 -3.599622 0.0005251 0.9999898 -1.2585348
cg16258224 cg16258224 -0.3368918 2.2019338 -3.598709 0.0005267 0.9999898 -1.2603163
cg24571822 cg24571822 CUBN -0.2529204 -0.0429503 -3.598077 0.0005279 0.9999898 -1.2615487
cg01448562 cg01448562 -0.7166198 3.3952247 -3.592024 0.0005387 0.9999898 -1.2733538
cg21696208 cg21696208 MTFR1;MTFR1 Promoter_Associated -0.3830576 -2.9811457 -3.590917 0.0005407 0.9999898 -1.2755117
cg16066221 cg16066221 CCDC55 -0.1847661 3.3401955 -3.589510 0.0005432 0.9999898 -1.2782531
dm <- dm[,c("UCSC_RefGene_Name","t")]
hist(dm$t,breaks=seq(from=-10,to=10,by=1))

tic() ; gmea <- calc_sc(dm) ; time2 <- toc()
## 41.776 sec elapsed
df <- gmea[[1]]
res <- gmea[[2]]
write.table(res,file="gmeawg_dm1.tsv")
head(res,50) %>% kbl(caption = "Top significant genes with GMEA") %>% kable_paper("hover", full_width = F)
Top significant genes with GMEA
nprobes mean median p-value(sc) sig fdr(sc)
PCDHGA3 295 -0.7017497 -0.7444529 0e+00 25.235081 0.0000000
PCDHGA1 317 -0.6696821 -0.7108065 0e+00 25.184842 0.0000000
PCDHGA2 309 -0.6784529 -0.7113643 0e+00 25.051893 0.0000000
PCDHGB1 277 -0.7306268 -0.7648181 0e+00 24.888921 0.0000000
PCDHGA4 263 -0.7399222 -0.7805508 0e+00 23.847952 0.0000000
PCDHGB2 249 -0.7580196 -0.7805508 0e+00 23.310643 0.0000000
PCDHGA5 238 -0.7697027 -0.8120064 0e+00 22.465199 0.0000000
PCDHGB3 223 -0.7728903 -0.8105310 0e+00 21.153268 0.0000000
PCDHGA6 214 -0.7711420 -0.7850676 0e+00 20.069420 0.0000000
PCDHGA7 199 -0.7801077 -0.8105310 0e+00 18.267132 0.0000000
PCDHGB4 187 -0.7776558 -0.8105310 0e+00 16.634832 0.0000000
BRD2 166 -0.6702827 -0.6824132 0e+00 15.458711 0.0000000
PCDHGA8 171 -0.7915790 -0.8407872 0e+00 15.303684 0.0000000
PCDHGA9 144 -0.8577123 -0.8981900 0e+00 14.514405 0.0000000
TAPBP 148 -0.6818321 -0.6569560 0e+00 14.240174 0.0000000
PCDHGB6 139 -0.8533928 -0.8896805 0e+00 13.682237 0.0000000
PCDHGB5 156 -0.7970750 -0.8525708 0e+00 13.571377 0.0000000
PCDHGA10 129 -0.8126353 -0.8643544 0e+00 12.130833 0.0000000
NPAT 46 -0.9628576 -1.0425066 0e+00 10.976976 0.0000002
PCDHGB7 115 -0.7895743 -0.7805508 0e+00 10.300729 0.0000010
RNF39 86 -0.7788504 -0.7559190 0e+00 9.866275 0.0000027
KIAA1949 101 -0.6762457 -0.6361989 0e+00 9.719954 0.0000038
SGCE 88 -0.7888273 -0.7654908 0e+00 9.612636 0.0000049
RGL2 117 -0.5363788 -0.4756625 0e+00 8.624712 0.0000476
PCDHGA11 103 -0.7490053 -0.7596284 0e+00 8.544984 0.0000572
TAP1 100 -0.6159567 -0.5669570 0e+00 8.417318 0.0000768
PEG10 105 -0.6341071 -0.6657271 0e+00 8.384136 0.0000829
BAT4 73 -0.7966377 -0.8913803 0e+00 8.349961 0.0000896
PPT2 101 -0.5546419 -0.6200746 0e+00 8.327285 0.0000944
NBR2 49 -0.9728316 -1.0024802 0e+00 8.211310 0.0001233
TUBB 85 -0.6571760 -0.6208794 0e+00 8.204873 0.0001252
BRCA1 49 -0.9331738 -1.0000759 0e+00 7.976026 0.0002120
PSMB8 83 -0.6346065 -0.6118727 0e+00 7.726497 0.0003766
PFDN6 57 -0.8240819 -0.9209890 0e+00 7.357167 0.0008814
PSMB9 79 -0.6794080 -0.7169306 0e+00 7.301557 0.0010018
PRR3 74 -0.6640534 -0.6289730 1e-07 7.289274 0.0010305
GNMT 35 -0.7815110 -0.6911977 1e-07 7.063879 0.0017314
KILLIN 61 -0.7716617 -0.8054342 1e-07 7.058546 0.0017527
ZBTB22 77 -0.5499240 -0.4921742 1e-07 6.938364 0.0023114
HSPA1A 35 -0.9175739 -1.0721469 1e-07 6.859356 0.0027725
ATM 52 -0.7676261 -0.8333011 2e-07 6.647544 0.0045150
PBLD 23 -0.8300796 -0.5721446 2e-07 6.622660 0.0047810
LOC220930 25 -0.7068837 -0.6044628 3e-07 6.525750 0.0059760
TNXB 531 -0.1966839 -0.2114342 5e-07 6.344778 0.0090648
ANAPC13 29 -0.8779355 -0.9637643 5e-07 6.292119 0.0102328
VARS 102 -0.4198120 -0.3325725 5e-07 6.280618 0.0105069
BAT2 161 -0.3583810 -0.4181412 6e-07 6.259314 0.0110346
PTEN 59 -0.7268898 -0.8005707 6e-07 6.208860 0.0123934
PPP1R2P1 27 0.8317927 0.8893044 6e-07 6.193311 0.0128445
TUBG1 23 -0.7046680 -0.6052688 7e-07 6.145539 0.0143373
gmea_volc(res)

gmea_barplot(res)

dmscore <- data.frame( res$median * res$sig)
rownames(dmscore) <- rownames(res)
colnames(dmscore) <- "metric"
mres <- mitch_calc(x=dmscore, genesets=genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres$enrichment_result,20) %>% kbl(caption = "Top enriched gene sets with GMEA-Mitch") %>% kable_paper("hover", full_width = F)
Top enriched gene sets with GMEA-Mitch
set setSize pANOVA s.dist p.adjustANOVA
1162 SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs 11 0.0006817 -0.5913323 0.0023875
1163 SLBP independent Processing of Histone Pre-mRNAs 10 0.0012759 -0.5881498 0.0040958
1123 Resolution of D-Loop Structures 26 0.0000007 -0.5605327 0.0000073
898 Pexophagy 11 0.0013388 -0.5583872 0.0042321
1124 Resolution of D-loop Structures through Holliday Junction Intermediates 25 0.0000013 -0.5581285 0.0000117
56 Activation of the phototransduction cascade 11 0.0014072 0.5558885 0.0044192
395 Expression and translocation of olfactory receptors 331 0.0000000 0.5434341 0.0000000
238 Creation of C4 and C2 activators 14 0.0004401 0.5423983 0.0016158
284 Defective HDR through Homologous Recombination (HRR) due to BRCA1 loss-of-function 20 0.0000272 -0.5417954 0.0001439
285 Defective HDR through Homologous Recombination (HRR) due to PALB2 loss of function 20 0.0000272 -0.5417954 0.0001439
286 Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA1 binding function 20 0.0000272 -0.5417954 0.0001439
287 Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA2/RAD51/RAD51C binding function 20 0.0000272 -0.5417954 0.0001439
580 Impaired BRCA2 binding to PALB2 20 0.0000272 -0.5417954 0.0001439
224 Condensation of Prometaphase Chromosomes 11 0.0019424 -0.5394793 0.0057892
841 Olfactory Signaling Pathway 338 0.0000000 0.5383073 0.0000000
1148 SARS-CoV-2 modulates autophagy 11 0.0023314 -0.5299960 0.0068017
1125 Resolution of D-loop Structures through Synthesis-Dependent Strand Annealing (SDSA) 22 0.0000179 -0.5280943 0.0001020
432 Fatty acids 15 0.0004555 0.5226522 0.0016605
1026 RNA Polymerase II Transcription Termination 50 0.0000000 -0.5142666 0.0000000
581 Impaired BRCA2 binding to RAD51 31 0.0000008 -0.5119085 0.0000078
#mitch_report(mres,outfile="gmeawg_mitch_dm1.html",overwrite=TRUE)

Promoter

dm <- read.table("dma1a.tsv")
dm <- dm[grep("Promoter_Associated",dm$Regulatory_Feature_Group),]
dm <- dm[,c("UCSC_RefGene_Name","t")]
head(dm,50) %>% kbl(caption = "Top significant promoters with limma") %>% kable_paper("hover", full_width = F)
Top significant promoters with limma
UCSC_RefGene_Name t
cg17055959 RB1 -4.489903
cg20451722 MX1 -3.900823
cg08267701 TOMM40;TOMM40;TOMM40 -3.799373
cg05021312 CBFA2T2 -3.754228
cg23712359 ZEB1;ZEB1;ZEB1;ZEB1;ZEB1 -3.742234
cg21697812 C11orf91 -3.709752
cg05616472 EHMT1;C9orf37;EHMT1 -3.658848
cg02852873 CNDP2 -3.657861
cg02779707 TAP2;TAP2 -3.656625
cg08391356 -3.626525
cg18034329 RABEP1;RABEP1 -3.609420
cg14686845 AGPAT5 -3.599622
cg21696208 MTFR1;MTFR1 -3.590917
cg21413009 DAPK2 -3.534316
cg11692772 KLRAQ1;KLRAQ1;KLRAQ1;KLRAQ1 -3.516666
cg24353236 NR1H3;ACP2;ACP2 -3.493813
cg06870750 SETDB2;CAB39L -3.490587
cg09479632 KIAA0226;KIAA0226 -3.435143
cg00945719 TIMM50 -3.434537
cg26118737 NVL;NVL -3.433108
cg25329685 ENTPD4;ENTPD4 -3.431244
cg18564855 MEIG1 -3.406530
cg20451369 LOC219347;LOC219347;LOC219347;LOC219347;LOC219347;C10orf57 -3.399582
cg22994198 ADAMTSL5 3.399172
cg00719400 LOC100130093;LOC100130093 -3.392125
cg05407396 BAT3;BAT3;BAT3;BAT3 -3.386990
cg22838378 STARD5 -3.377938
cg02097636 PAFAH1B3;PRR19;PRR19;PAFAH1B3;PAFAH1B3 -3.374483
cg19598293 -3.369366
cg06175825 -3.366198
cg25719132 SNRPG -3.346044
cg07212963 HMGCL;HMGCL -3.346040
cg08141245 TMEM11;TMEM11 -3.338267
cg06916012 GMPPA;GMPPA;GMPPA -3.329166
cg20100633 ZNF443 -3.320740
cg10154826 FAM8A1 -3.320108
cg12873692 HSPA2 -3.286050
cg13342195 HEATR3 -3.284627
cg26056831 C10orf140;C10orf140 -3.276076
cg12686344 TTC15;TTC15 -3.273293
cg17512779 CNOT7;VPS37A;VPS37A;CNOT7 -3.264150
cg14501914 ODF2;ODF2 -3.250655
cg08539350 MIR548G;C3orf26 -3.242583
cg15670290 HNRNPUL2;TTC9C -3.240179
cg12325285 IER3 -3.237497
cg20510213 ATP6V1G2;NFKBIL1;NFKBIL1;NFKBIL1;ATP6V1G2;NFKBIL1;NFKBIL1;NFKBIL1 -3.234176
cg25743286 GNL1 -3.232446
cg18130437 CCDC107;RMRP -3.227743
cg19540034 APH1A;APH1A;APH1A;APH1A -3.226581
cg13868654 ITGAE;GSG2 -3.223767
hist(dm$t,breaks=seq(from=-10,to=10,by=1))

tic() ; gmea <- calc_sc(dm) ; time2 <- toc()
## 12.614 sec elapsed
df <- gmea[[1]]
res <- gmea[[2]]
write.table(res ,file="gmeapr_dm1.tsv")
head(res,50) %>% kbl(caption = "Top significant genes with GMEA") %>% kable_paper("hover", full_width = F)
Top significant genes with GMEA
nprobes mean median p-value(sc) sig fdr(sc)
PSMB8 46 -0.8920168 -0.8004817 0.00e+00 9.154392 0.0000072
PSMB9 49 -0.9442436 -1.0225348 0.00e+00 8.752556 0.0000182
PPT2 73 -0.7091599 -0.6795781 0.00e+00 8.743195 0.0000186
PEG10 61 -0.8521794 -0.7920898 0.00e+00 8.113649 0.0000794
SGCE 58 -0.8968128 -0.8030180 0.00e+00 8.056013 0.0000906
KIAA1949 67 -0.7462169 -0.7345707 0.00e+00 7.953541 0.0001147
BAT4 40 -0.9527453 -1.0254627 0.00e+00 7.920494 0.0001238
CSNK2B 40 -0.9527453 -1.0254627 0.00e+00 7.920494 0.0001238
DAXX 38 -0.9996226 -0.9173185 0.00e+00 7.897810 0.0001304
ATM 30 -0.9626480 -0.9689806 0.00e+00 7.331930 0.0004799
NPAT 30 -0.9626480 -0.9689806 0.00e+00 7.331930 0.0004799
TAP1 52 -0.9113773 -0.9586524 1.00e-07 7.299025 0.0005176
NOTCH4 45 -0.7454883 -0.8175342 1.00e-07 7.056377 0.0009049
TAPBP 72 -0.6849255 -0.7672558 1.00e-07 6.990366 0.0010533
MICA 40 -0.8072197 -0.8320503 1.00e-07 6.984348 0.0010679
BAT2 35 -0.9096305 -0.8428743 2.00e-07 6.793325 0.0016577
STK19 49 -0.6725285 -0.7144080 2.00e-07 6.780722 0.0017064
DOM3Z 49 -0.6725285 -0.7144080 2.00e-07 6.780722 0.0017064
KILLIN 54 -0.7896278 -0.8901758 3.00e-07 6.482415 0.0033907
NEU1 44 -0.6989037 -0.6230866 5.00e-07 6.300708 0.0051518
PTEN 53 -0.7795262 -0.8214457 5.00e-07 6.285658 0.0053330
EHMT2 35 -0.8908227 -0.8849120 6.00e-07 6.238114 0.0059494
BAT3 53 -0.6837927 -0.6418732 7.00e-07 6.126606 0.0076902
BRD2 57 -0.6926935 -0.6771136 9.00e-07 6.023318 0.0097540
FAM134C 22 -0.7299019 -0.6096295 1.00e-06 6.020600 0.0098143
TUBG1 22 -0.7299019 -0.6096295 1.00e-06 6.020600 0.0098143
HSPA1A 30 -0.9573633 -1.1134720 1.20e-06 5.923690 0.0122654
HSPA1L 30 -0.9573633 -1.1134720 1.20e-06 5.923690 0.0122654
GPSM3 51 -0.7092293 -0.8175342 1.40e-06 5.848739 0.0145730
MTIF3 22 -0.8300263 -0.7679785 1.40e-06 5.844509 0.0147142
DHX16 38 -0.7526592 -0.9248024 1.70e-06 5.770506 0.0174461
MORG1 28 -0.7920815 -0.8506950 1.90e-06 5.724689 0.0193853
GALK2 25 -0.8568160 -0.9211098 2.60e-06 5.591251 0.0263553
MSH5 38 -0.6132056 -0.6479561 3.40e-06 5.467260 0.0350604
PSMA1 19 -0.9993563 -0.9956671 3.80e-06 5.418540 0.0392189
SELI 20 -0.8987079 -0.7928658 5.70e-06 5.242449 0.0588226
GPR113 20 -0.8987079 -0.7928658 5.70e-06 5.242449 0.0588226
IER3 42 -0.6933107 -0.7453727 6.00e-06 5.222925 0.0615154
TUBB 26 -0.8143572 -0.7599730 6.20e-06 5.209779 0.0633996
AARSD1 24 -0.8310468 -0.8427885 6.60e-06 5.183327 0.0673747
GDAP2 18 -0.9019455 -0.9849057 7.60e-06 5.117510 0.0783920
WDR3 18 -0.9019455 -0.9849057 7.60e-06 5.117510 0.0783920
RGL2 56 -0.6374767 -0.6708599 7.70e-06 5.115137 0.0788062
ZBTB12 33 -0.6941017 -0.6952731 8.10e-06 5.092856 0.0829467
NRM 21 -0.7630976 -0.6442921 9.50e-06 5.020600 0.0979519
RAB1B 58 -0.4589958 -0.5377857 1.31e-05 4.883748 0.1342215
TAF6L 20 -0.8954676 -0.8936077 1.34e-05 4.874472 0.1371059
LOC100130987 35 -0.5679060 -0.5663418 1.45e-05 4.837297 0.1493444
ZFP91 18 -1.0099018 -1.0208478 1.53e-05 4.816480 0.1566620
LPXN 18 -0.9985143 -1.0208478 1.53e-05 4.816480 0.1566620
gmea_volc(res)

gmea_barplot(res)

dmscore <- data.frame( res$median * res$sig)
rownames(dmscore) <- rownames(res)
colnames(dmscore) <- "metric"
mres <- mitch_calc(x=dmscore, genesets=genesets,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres$enrichment_result,20) %>%  kbl(caption = "Top enriched gene sets with GMEA-Mitch (promoter only)") %>% kable_paper("hover", full_width = F)
Top enriched gene sets with GMEA-Mitch (promoter only)
set setSize pANOVA s.dist p.adjustANOVA
1160 Voltage gated Potassium channels 10 0.0003426 0.6538574 0.0240198
65 Assembly of collagen fibrils and other multimeric structures 10 0.0007788 0.6136050 0.0442061
735 Purine catabolism 10 0.0082403 -0.4825133 0.1314042
614 Negative regulation of NMDA receptor-mediated neuronal transmission 12 0.0053568 0.4642984 0.1089239
162 Condensation of Prometaphase Chromosomes 10 0.0136197 -0.4505774 0.1633855
528 Meiotic recombination 19 0.0006768 -0.4504611 0.0403380
1183 p75NTR signals via NF-kB 11 0.0140100 -0.4278656 0.1637247
976 Signaling by Leptin 10 0.0208042 0.4221640 0.1907582
944 Signaling by BMP 14 0.0084768 0.4063961 0.1314042
426 IKK complex recruitment mediated by RIP1 17 0.0042314 -0.4007746 0.0987379
251 Downregulation of ERBB2 signaling 17 0.0062324 -0.3832555 0.1154544
713 Potassium Channels 26 0.0008254 0.3790157 0.0447208
119 Caspase activation via Death Receptors in the presence of ligand 10 0.0403496 -0.3744396 0.2780154
817 Ras activation upon Ca2+ influx through NMDA receptor 10 0.0408761 0.3734595 0.2797189
724 Processing of Intronless Pre-mRNAs 15 0.0134417 -0.3686861 0.1633855
399 HDMs demethylate histones 15 0.0173311 -0.3549320 0.1796408
139 Chaperone Mediated Autophagy 12 0.0365918 -0.3485393 0.2611825
391 Golgi Cisternae Pericentriolar Stack Reorganization 14 0.0248432 -0.3464157 0.2115218
434 Impaired BRCA2 binding to RAD51 30 0.0010361 -0.3462032 0.0536992
1188 tRNA Aminoacylation 23 0.0040971 -0.3458871 0.0979021
#mitch_report(mres,outfile="gmeapr_mitch_dm1.html",overwrite=TRUE)

DM3 effect of homocysteine

Whole gene analysis

dm <- read.table("dma3a.tsv")
head(dm,50) %>% kbl(caption = "Top significant genes with limma") %>% kable_paper("hover", full_width = F)
Top significant genes with limma
Row.names UCSC_RefGene_Name Regulatory_Feature_Group logFC AveExpr t P.Value adj.P.Val B
cg04905210 cg04905210 POLR3D -0.2917142 -2.8247113 -5.390393 2.00e-07 0.0957990 5.941825
cg09338148 cg09338148 Unclassified -0.2931592 -1.1714089 -5.121719 8.00e-07 0.1687298 4.907067
cg04247967 cg04247967 -0.2054821 3.1801350 -4.965418 1.60e-06 0.2293803 4.322061
cg06458106 cg06458106 -0.1905124 1.6705818 -4.806972 3.30e-06 0.2433469 3.742284
cg26425904 cg26425904 OCA2 Unclassified -0.2544221 -4.0153857 -4.740360 4.40e-06 0.2433469 3.502627
cg09762242 cg09762242 SIPA1;SIPA1 Promoter_Associated -0.2101898 -2.5136127 -4.733232 4.60e-06 0.2433469 3.477128
cg19590707 cg19590707 SLC16A3;SLC16A3;SLC16A3 Promoter_Associated -0.2482548 -1.2509036 -4.722777 4.80e-06 0.2433469 3.439780
cg10746622 cg10746622 PRSSL1 -0.1991931 1.5625330 -4.693210 5.40e-06 0.2433469 3.334483
cg11257888 cg11257888 RCAN3 0.2795356 0.4871632 4.691739 5.50e-06 0.2433469 3.329258
cg03622371 cg03622371 P2RY6;P2RY6;P2RY6;P2RY6 Unclassified -0.2424592 -1.6765190 -4.679696 5.80e-06 0.2433469 3.286518
cg16971827 cg16971827 CBL Gene_Associated_Cell_type_specific 0.3035900 1.7707651 4.648039 6.60e-06 0.2467538 3.174563
cg20997993 cg20997993 Unclassified -0.2429900 1.9524246 -4.621663 7.40e-06 0.2467538 3.081715
cg08469255 cg08469255 DDR1;DDR1 Unclassified 0.2398657 1.4933506 4.615574 7.60e-06 0.2467538 3.060337
cg27557428 cg27557428 Unclassified -0.2803875 -2.1681390 -4.585121 8.70e-06 0.2610062 2.953738
cg00493755 cg00493755 LYRM5;CASC1;CASC1;LYRM5;CASC1 Promoter_Associated -0.2475869 -4.0864529 -4.539873 1.05e-05 0.2649257 2.796327
cg16879574 cg16879574 NHLRC1 Promoter_Associated -0.2473504 -1.5752205 -4.527103 1.11e-05 0.2649257 2.752116
cg15742245 cg15742245 CD177 -0.2543110 1.9039638 -4.523418 1.12e-05 0.2649257 2.739376
cg23248150 cg23248150 TLL1 -0.2464022 -1.5240685 -4.522435 1.13e-05 0.2649257 2.735979
cg22948808 cg22948808 DGKZ -0.2909225 -0.3489572 -4.503895 1.22e-05 0.2656216 2.672009
cg02845204 cg02845204 KRTAP5-9 -0.3070580 -0.4149132 -4.496832 1.26e-05 0.2656216 2.647692
cg03946955 cg03946955 LMNA;LMNA;LMNA Unclassified_Cell_type_specific -0.1824392 -2.3433644 -4.480178 1.35e-05 0.2690173 2.590467
cg15516314 cg15516314 RTL1;MIR431;MIR433 0.2392133 2.9600363 4.465319 1.44e-05 0.2690173 2.539550
cg22537604 cg22537604 CD177 -0.3193055 1.0749273 -4.460505 1.46e-05 0.2690173 2.523080
cg01345727 cg01345727 NEK3;NEK3;NEK3;NEK3 Promoter_Associated_Cell_type_specific -0.2329252 1.6256861 -4.446244 1.55e-05 0.2736441 2.474372
cg19284277 cg19284277 SLC16A3;SLC16A3;SLC16A3 -0.2463026 -1.8395228 -4.434600 1.63e-05 0.2749055 2.434691
cg03032053 cg03032053 MICALL2 Unclassified_Cell_type_specific -0.1577248 0.3915575 -4.417976 1.75e-05 0.2749055 2.378177
cg23659250 cg23659250 BRD1;LOC90834 -0.2598981 0.6604919 -4.413930 1.78e-05 0.2749055 2.364445
cg14204064 cg14204064 ZNF8 Promoter_Associated -0.2727535 -3.0330794 -4.400132 1.88e-05 0.2749055 2.317694
cg00772000 cg00772000 NHLRC1 Promoter_Associated -0.2612511 -1.8626859 -4.395918 1.92e-05 0.2749055 2.303438
cg18817487 cg18817487 HAL -0.3791862 -1.7673383 -4.381199 2.04e-05 0.2749055 2.253729
cg16223079 cg16223079 PPCDC Unclassified_Cell_type_specific -0.2580476 -1.8276656 -4.380895 2.04e-05 0.2749055 2.252702
cg05305327 cg05305327 CBFA2T2;CBFA2T2;CBFA2T2 0.2044785 2.4023989 4.375843 2.08e-05 0.2749055 2.235670
cg26954174 cg26954174 NOD2 Unclassified_Cell_type_specific -0.3098398 -1.8856005 -4.351680 2.30e-05 0.2944766 2.154421
cg16659510 cg16659510 BANP;BANP Promoter_Associated_Cell_type_specific -0.2237425 2.5640167 -4.336119 2.45e-05 0.2994994 2.102280
cg08521073 cg08521073 HRK -0.2342466 -2.1812465 -4.332839 2.49e-05 0.2994994 2.091309
cg05788125 cg05788125 -0.2164090 0.5929679 -4.318979 2.63e-05 0.2994994 2.045016
cg15657641 cg15657641 -0.3045296 -1.6478040 -4.308557 2.75e-05 0.2994994 2.010284
cg00138407 cg00138407 KLHL18 -0.2310495 1.3743589 -4.299105 2.85e-05 0.2994994 1.978839
cg08533162 cg08533162 ERI3 -0.2838418 -0.7553821 -4.282412 3.05e-05 0.2994994 1.923439
cg00822241 cg00822241 FUT2;FUT2 -0.2413571 -1.0170752 -4.260359 3.34e-05 0.2994994 1.850510
cg24568579 cg24568579 Unclassified -0.2454344 -1.2262927 -4.255112 3.41e-05 0.2994994 1.833200
cg27392804 cg27392804 -0.2126822 -2.0892490 -4.248151 3.51e-05 0.2994994 1.810262
cg01063280 cg01063280 -0.2659171 1.0708141 -4.246661 3.53e-05 0.2994994 1.805356
cg00504782 cg00504782 -0.1861498 0.1335070 -4.233364 3.72e-05 0.2994994 1.761634
cg19203575 cg19203575 ZNF323;ZKSCAN3;ZNF323 -0.1781090 -0.8128462 -4.220555 3.92e-05 0.2994994 1.719618
cg06625077 cg06625077 CTRL Gene_Associated_Cell_type_specific 0.1262006 2.8187132 4.209985 4.09e-05 0.2994994 1.685023
cg11866943 cg11866943 STAB1 Unclassified_Cell_type_specific -0.2082625 -1.2756607 -4.207151 4.14e-05 0.2994994 1.675757
cg26923863 cg26923863 CTBP1;CTBP1 -0.3035588 -0.9720327 -4.195757 4.33e-05 0.2994994 1.638563
cg18963859 cg18963859 -0.2795682 0.7305054 -4.189956 4.43e-05 0.2994994 1.619656
cg19409163 cg19409163 -0.1835966 -0.0363520 -4.179232 4.63e-05 0.2994994 1.584758
dm <- dm[,c("UCSC_RefGene_Name","t")]
hist(dm$t,breaks=seq(from=-10,to=10,by=1))

tic() ; gmea <- calc_sc(dm) ; time2 <- toc()
## 42.999 sec elapsed
df <- gmea[[1]]
res <- gmea[[2]]
write.table(res,file="gmeawg_dm3.tsv")
head(res,50) %>% kbl(caption = "Top significant genes with GMEA") %>% kable_paper("hover", full_width = F)
Top significant genes with GMEA
nprobes mean median p-value(sc) sig fdr(sc)
TNXB 531 0.3429080 0.4078607 0e+00 14.041475 0.0000000
PCDHA1 162 -0.7077910 -0.6245086 0e+00 13.496797 0.0000000
NNAT 49 -0.9419415 -0.9392397 0e+00 13.303312 0.0000000
PCDHA2 149 -0.7188634 -0.6267169 0e+00 12.485679 0.0000000
PCDHA3 141 -0.7189282 -0.6267169 0e+00 11.583810 0.0000001
KCNQ1DN 39 -1.6144397 -1.7080108 0e+00 11.439140 0.0000001
TAP1 100 0.8373978 0.8170222 0e+00 10.577873 0.0000005
NCOR2 212 -0.6647509 -0.5447866 0e+00 10.345470 0.0000009
PCDHGA1 317 -0.4141393 -0.4153141 0e+00 10.238108 0.0000012
PCDHA4 131 -0.6779619 -0.6026401 0e+00 10.042257 0.0000018
PCDHGA2 309 -0.4165786 -0.4153141 0e+00 9.987553 0.0000021
PCDHGA3 295 -0.4180524 -0.3901466 0e+00 9.576696 0.0000053
MESTIT1 59 -0.7596278 -0.7939385 0e+00 9.544056 0.0000057
PCDHA5 122 -0.6998042 -0.6245086 0e+00 9.542358 0.0000058
NKX6-2 36 -1.3952706 -1.4319593 0e+00 9.536050 0.0000058
SOX2OT 85 -0.7468762 -0.8104998 0e+00 9.479982 0.0000066
PCDHA7 103 -0.7321981 -0.6267169 0e+00 9.375369 0.0000085
C11orf21 36 -1.7668062 -1.9463963 0e+00 9.257296 0.0000111
PITX2 64 -1.0007742 -0.9208371 0e+00 9.163509 0.0000138
PCDHA6 114 -0.6916765 -0.6245086 0e+00 8.749552 0.0000357
TSPAN32 42 -1.6254336 -1.9412022 0e+00 8.645175 0.0000454
MEST 85 -0.6582939 -0.7858506 0e+00 8.607705 0.0000495
ASCL2 49 -0.8873371 -0.9068647 0e+00 8.538922 0.0000580
WT1 60 -1.0380025 -1.0581139 0e+00 8.537528 0.0000582
PCDHGB1 277 -0.4040374 -0.3512600 0e+00 8.169027 0.0001360
PCDHA8 92 -0.7250898 -0.6311017 0e+00 8.158124 0.0001394
KIAA1949 101 0.7427567 0.6964985 0e+00 8.107412 0.0001567
PCDHA10 81 -0.7546498 -0.6842488 0e+00 7.961957 0.0002190
PCDHA9 84 -0.7351768 -0.6598677 0e+00 7.948914 0.0002257
PSMB8 83 0.8313260 0.6488706 0e+00 7.945020 0.0002277
PRDM13 41 -0.9251076 -0.9595314 0e+00 7.870850 0.0002701
GNA12 84 -0.9539754 -0.7963257 0e+00 7.622375 0.0004787
PCDHGA4 263 -0.4030756 -0.3512600 0e+00 7.620039 0.0004812
MAD1L1 684 -0.2827612 -0.2513758 0e+00 7.532317 0.0005889
SFRP2 44 -1.1033457 -1.2056608 0e+00 7.518236 0.0006083
SOX1 27 -1.2956875 -1.5392701 0e+00 7.349659 0.0008967
ZIC1 40 -1.0223609 -1.1114647 1e-07 7.295516 0.0010157
SPON2 30 -1.0371460 -1.0318217 1e-07 7.096401 0.0016064
TLX1 36 -0.7858743 -0.8711780 1e-07 7.094355 0.0016139
PPP1R2P1 27 -1.2635175 -1.2835267 1e-07 6.826780 0.0029884
TBX15 57 -0.8784010 -0.9985338 2e-07 6.710128 0.0039091
HLA-E 64 0.9883267 0.9471868 2e-07 6.621543 0.0047933
SOX2 31 -0.9356689 -0.9861379 4e-07 6.380592 0.0083477
HLA-J 60 -0.9474161 -0.9835159 4e-07 6.363271 0.0086869
TFAP2A 77 -0.6823000 -0.6389762 4e-07 6.359493 0.0087624
PCDHGB2 249 -0.3779342 -0.2994303 5e-07 6.334383 0.0092835
TBX2 42 -0.9192182 -0.9768360 5e-07 6.283615 0.0104341
NR2E1 55 -0.7936621 -0.8098274 6e-07 6.236670 0.0116246
DMRTA2 32 -0.8190626 -0.8833632 6e-07 6.231215 0.0117710
LOC100128811 24 -0.9214760 -1.0762458 6e-07 6.224720 0.0119478
gmea_volc(res)

gmea_barplot(res)

dmscore <- data.frame( res$median * res$sig)
rownames(dmscore) <- rownames(res)
colnames(dmscore) <- "metric"
mres <- mitch_calc(x=dmscore, genesets=genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres$enrichment_result,20) %>% kbl(caption = "Top enriched gene sets with GMEA-Mitch") %>% kable_paper("hover", full_width = F)
Top enriched gene sets with GMEA-Mitch
set setSize pANOVA s.dist p.adjustANOVA
797 Negative regulation of activity of TFAP2 (AP-2) family transcription factors 10 0.0001209 -0.7018024 0.0009285
1099 Regulation of commissural axon pathfinding by SLIT and ROBO 10 0.0003908 -0.6474208 0.0025714
87 Apoptotic cleavage of cell adhesion proteins 11 0.0002096 -0.6453083 0.0014958
1418 Transcriptional regulation of testis differentiation 12 0.0002348 -0.6130531 0.0016520
1386 Thyroxine biosynthesis 10 0.0034244 -0.5343657 0.0159178
879 POU5F1 (OCT4), SOX2, NANOG repress genes related to differentiation 10 0.0055806 -0.5060048 0.0228640
367 ERBB2 Activates PTK6 Signaling 13 0.0018315 -0.4990366 0.0092526
796 Negative regulation of TCF-dependent signaling by WNT ligand antagonists 15 0.0009549 -0.4925046 0.0053957
64 Adenylate cyclase activating pathway 10 0.0081256 -0.4832503 0.0312683
29 Acetylcholine Neurotransmitter Release Cycle 16 0.0009281 -0.4780294 0.0052637
479 GABA synthesis, release, reuptake and degradation 19 0.0003437 -0.4742558 0.0022913
923 Platelet sensitization by LDL 17 0.0008191 0.4686399 0.0047701
275 Defective B3GALTL causes PpS 34 0.0000029 -0.4632690 0.0000521
1360 TP53 regulates transcription of additional cell cycle genes whose exact role in the p53 pathway remain uncertain 19 0.0004924 0.4616661 0.0031316
1167 SRP-dependent cotranslational protein targeting to membrane 105 0.0000000 0.4519886 0.0000000
1162 SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs 11 0.0094503 0.4518113 0.0354189
1340 TFAP2 (AP-2) family regulates transcription of growth factors and their receptors 12 0.0074999 -0.4456644 0.0295334
837 O-glycosylation of TSR domain-containing proteins 35 0.0000051 -0.4453420 0.0000803
168 Carnitine metabolism 11 0.0106185 -0.4447950 0.0388601
385 Erythrocytes take up carbon dioxide and release oxygen 12 0.0079199 -0.4426103 0.0306439
#mitch_report(mres,outfile="gmeawg_mitch_dm3.html",overwrite=TRUE)

Promoter

dm <- read.table("dma1a.tsv")
dm <- dm[grep("Promoter_Associated",dm$Regulatory_Feature_Group),]
dm <- dm[,c("UCSC_RefGene_Name","t")]
head(dm,50) %>% kbl(caption = "Top significant promoters with limma") %>% kable_paper("hover", full_width = F)
Top significant promoters with limma
UCSC_RefGene_Name t
cg17055959 RB1 -4.489903
cg20451722 MX1 -3.900823
cg08267701 TOMM40;TOMM40;TOMM40 -3.799373
cg05021312 CBFA2T2 -3.754228
cg23712359 ZEB1;ZEB1;ZEB1;ZEB1;ZEB1 -3.742234
cg21697812 C11orf91 -3.709752
cg05616472 EHMT1;C9orf37;EHMT1 -3.658848
cg02852873 CNDP2 -3.657861
cg02779707 TAP2;TAP2 -3.656625
cg08391356 -3.626525
cg18034329 RABEP1;RABEP1 -3.609420
cg14686845 AGPAT5 -3.599622
cg21696208 MTFR1;MTFR1 -3.590917
cg21413009 DAPK2 -3.534316
cg11692772 KLRAQ1;KLRAQ1;KLRAQ1;KLRAQ1 -3.516666
cg24353236 NR1H3;ACP2;ACP2 -3.493813
cg06870750 SETDB2;CAB39L -3.490587
cg09479632 KIAA0226;KIAA0226 -3.435143
cg00945719 TIMM50 -3.434537
cg26118737 NVL;NVL -3.433108
cg25329685 ENTPD4;ENTPD4 -3.431244
cg18564855 MEIG1 -3.406530
cg20451369 LOC219347;LOC219347;LOC219347;LOC219347;LOC219347;C10orf57 -3.399582
cg22994198 ADAMTSL5 3.399172
cg00719400 LOC100130093;LOC100130093 -3.392125
cg05407396 BAT3;BAT3;BAT3;BAT3 -3.386990
cg22838378 STARD5 -3.377938
cg02097636 PAFAH1B3;PRR19;PRR19;PAFAH1B3;PAFAH1B3 -3.374483
cg19598293 -3.369366
cg06175825 -3.366198
cg25719132 SNRPG -3.346044
cg07212963 HMGCL;HMGCL -3.346040
cg08141245 TMEM11;TMEM11 -3.338267
cg06916012 GMPPA;GMPPA;GMPPA -3.329166
cg20100633 ZNF443 -3.320740
cg10154826 FAM8A1 -3.320108
cg12873692 HSPA2 -3.286050
cg13342195 HEATR3 -3.284627
cg26056831 C10orf140;C10orf140 -3.276076
cg12686344 TTC15;TTC15 -3.273293
cg17512779 CNOT7;VPS37A;VPS37A;CNOT7 -3.264150
cg14501914 ODF2;ODF2 -3.250655
cg08539350 MIR548G;C3orf26 -3.242583
cg15670290 HNRNPUL2;TTC9C -3.240179
cg12325285 IER3 -3.237497
cg20510213 ATP6V1G2;NFKBIL1;NFKBIL1;NFKBIL1;ATP6V1G2;NFKBIL1;NFKBIL1;NFKBIL1 -3.234176
cg25743286 GNL1 -3.232446
cg18130437 CCDC107;RMRP -3.227743
cg19540034 APH1A;APH1A;APH1A;APH1A -3.226581
cg13868654 ITGAE;GSG2 -3.223767
hist(dm$t,breaks=seq(from=-10,to=10,by=1))

tic() ; gmea <- calc_sc(dm) ; time2 <- toc()
## 15.463 sec elapsed
df <- gmea[[1]]
res <- gmea[[2]]
write.table(res ,file="gmeapr_dm3.tsv")
head(res,50) %>% kbl(caption = "Top significant genes with GMEA") %>% kable_paper("hover", full_width = F)
Top significant genes with GMEA
nprobes mean median p-value(sc) sig fdr(sc)
PSMB8 46 -0.8920168 -0.8004817 0.00e+00 9.154392 0.0000072
PSMB9 49 -0.9442436 -1.0225348 0.00e+00 8.752556 0.0000182
PPT2 73 -0.7091599 -0.6795781 0.00e+00 8.743195 0.0000186
PEG10 61 -0.8521794 -0.7920898 0.00e+00 8.113649 0.0000794
SGCE 58 -0.8968128 -0.8030180 0.00e+00 8.056013 0.0000906
KIAA1949 67 -0.7462169 -0.7345707 0.00e+00 7.953541 0.0001147
BAT4 40 -0.9527453 -1.0254627 0.00e+00 7.920494 0.0001238
CSNK2B 40 -0.9527453 -1.0254627 0.00e+00 7.920494 0.0001238
DAXX 38 -0.9996226 -0.9173185 0.00e+00 7.897810 0.0001304
ATM 30 -0.9626480 -0.9689806 0.00e+00 7.331930 0.0004799
NPAT 30 -0.9626480 -0.9689806 0.00e+00 7.331930 0.0004799
TAP1 52 -0.9113773 -0.9586524 1.00e-07 7.299025 0.0005176
NOTCH4 45 -0.7454883 -0.8175342 1.00e-07 7.056377 0.0009049
TAPBP 72 -0.6849255 -0.7672558 1.00e-07 6.990366 0.0010533
MICA 40 -0.8072197 -0.8320503 1.00e-07 6.984348 0.0010679
BAT2 35 -0.9096305 -0.8428743 2.00e-07 6.793325 0.0016577
STK19 49 -0.6725285 -0.7144080 2.00e-07 6.780722 0.0017064
DOM3Z 49 -0.6725285 -0.7144080 2.00e-07 6.780722 0.0017064
KILLIN 54 -0.7896278 -0.8901758 3.00e-07 6.482415 0.0033907
NEU1 44 -0.6989037 -0.6230866 5.00e-07 6.300708 0.0051518
PTEN 53 -0.7795262 -0.8214457 5.00e-07 6.285658 0.0053330
EHMT2 35 -0.8908227 -0.8849120 6.00e-07 6.238114 0.0059494
BAT3 53 -0.6837927 -0.6418732 7.00e-07 6.126606 0.0076902
BRD2 57 -0.6926935 -0.6771136 9.00e-07 6.023318 0.0097540
FAM134C 22 -0.7299019 -0.6096295 1.00e-06 6.020600 0.0098143
TUBG1 22 -0.7299019 -0.6096295 1.00e-06 6.020600 0.0098143
HSPA1A 30 -0.9573633 -1.1134720 1.20e-06 5.923690 0.0122654
HSPA1L 30 -0.9573633 -1.1134720 1.20e-06 5.923690 0.0122654
GPSM3 51 -0.7092293 -0.8175342 1.40e-06 5.848739 0.0145730
MTIF3 22 -0.8300263 -0.7679785 1.40e-06 5.844509 0.0147142
DHX16 38 -0.7526592 -0.9248024 1.70e-06 5.770506 0.0174461
MORG1 28 -0.7920815 -0.8506950 1.90e-06 5.724689 0.0193853
GALK2 25 -0.8568160 -0.9211098 2.60e-06 5.591251 0.0263553
MSH5 38 -0.6132056 -0.6479561 3.40e-06 5.467260 0.0350604
PSMA1 19 -0.9993563 -0.9956671 3.80e-06 5.418540 0.0392189
SELI 20 -0.8987079 -0.7928658 5.70e-06 5.242449 0.0588226
GPR113 20 -0.8987079 -0.7928658 5.70e-06 5.242449 0.0588226
IER3 42 -0.6933107 -0.7453727 6.00e-06 5.222925 0.0615154
TUBB 26 -0.8143572 -0.7599730 6.20e-06 5.209779 0.0633996
AARSD1 24 -0.8310468 -0.8427885 6.60e-06 5.183327 0.0673747
GDAP2 18 -0.9019455 -0.9849057 7.60e-06 5.117510 0.0783920
WDR3 18 -0.9019455 -0.9849057 7.60e-06 5.117510 0.0783920
RGL2 56 -0.6374767 -0.6708599 7.70e-06 5.115137 0.0788062
ZBTB12 33 -0.6941017 -0.6952731 8.10e-06 5.092856 0.0829467
NRM 21 -0.7630976 -0.6442921 9.50e-06 5.020600 0.0979519
RAB1B 58 -0.4589958 -0.5377857 1.31e-05 4.883748 0.1342215
TAF6L 20 -0.8954676 -0.8936077 1.34e-05 4.874472 0.1371059
LOC100130987 35 -0.5679060 -0.5663418 1.45e-05 4.837297 0.1493444
ZFP91 18 -1.0099018 -1.0208478 1.53e-05 4.816480 0.1566620
LPXN 18 -0.9985143 -1.0208478 1.53e-05 4.816480 0.1566620
gmea_volc(res)

gmea_barplot(res)

dmscore <- data.frame( res$median * res$sig)
rownames(dmscore) <- rownames(res)
colnames(dmscore) <- "metric"
mres <- mitch_calc(x=dmscore, genesets=genesets,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres$enrichment_result,20) %>%  kbl(caption = "Top enriched gene sets with GMEA-Mitch (promoter only)") %>% kable_paper("hover", full_width = F)
Top enriched gene sets with GMEA-Mitch (promoter only)
set setSize pANOVA s.dist p.adjustANOVA
1160 Voltage gated Potassium channels 10 0.0003426 0.6538574 0.0240198
65 Assembly of collagen fibrils and other multimeric structures 10 0.0007788 0.6136050 0.0442061
735 Purine catabolism 10 0.0082403 -0.4825133 0.1314042
614 Negative regulation of NMDA receptor-mediated neuronal transmission 12 0.0053568 0.4642984 0.1089239
162 Condensation of Prometaphase Chromosomes 10 0.0136197 -0.4505774 0.1633855
528 Meiotic recombination 19 0.0006768 -0.4504611 0.0403380
1183 p75NTR signals via NF-kB 11 0.0140100 -0.4278656 0.1637247
976 Signaling by Leptin 10 0.0208042 0.4221640 0.1907582
944 Signaling by BMP 14 0.0084768 0.4063961 0.1314042
426 IKK complex recruitment mediated by RIP1 17 0.0042314 -0.4007746 0.0987379
251 Downregulation of ERBB2 signaling 17 0.0062324 -0.3832555 0.1154544
713 Potassium Channels 26 0.0008254 0.3790157 0.0447208
119 Caspase activation via Death Receptors in the presence of ligand 10 0.0403496 -0.3744396 0.2780154
817 Ras activation upon Ca2+ influx through NMDA receptor 10 0.0408761 0.3734595 0.2797189
724 Processing of Intronless Pre-mRNAs 15 0.0134417 -0.3686861 0.1633855
399 HDMs demethylate histones 15 0.0173311 -0.3549320 0.1796408
139 Chaperone Mediated Autophagy 12 0.0365918 -0.3485393 0.2611825
391 Golgi Cisternae Pericentriolar Stack Reorganization 14 0.0248432 -0.3464157 0.2115218
434 Impaired BRCA2 binding to RAD51 30 0.0010361 -0.3462032 0.0536992
1188 tRNA Aminoacylation 23 0.0040971 -0.3458871 0.0979021
#mitch_report(mres,outfile="gmeapr_mitch_dm3.html",overwrite=TRUE)

Session Information

For reproducibility.

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] pkgload_1.2.4    GGally_2.1.2     ggplot2_3.3.6    reshape2_1.4.4  
##  [5] beeswarm_0.4.0   gplots_3.1.3     gtools_3.9.2     tibble_3.1.7    
##  [9] echarts4r_0.4.3  tictoc_1.0.1     mitch_1.4.1      eulerr_6.1.1    
## [13] kableExtra_1.3.4 dplyr_1.0.9     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.8.3       svglite_2.1.0      rprojroot_2.0.3    assertthat_0.2.1  
##  [5] digest_0.6.29      utf8_1.2.2         mime_0.12          R6_2.5.1          
##  [9] plyr_1.8.7         evaluate_0.15      highr_0.9          httr_1.4.3        
## [13] pillar_1.7.0       rlang_1.0.2        rstudioapi_0.13    jquerylib_0.1.4   
## [17] rmarkdown_2.14     desc_1.4.1         webshot_0.5.3      stringr_1.4.0     
## [21] htmlwidgets_1.5.4  munsell_0.5.0      shiny_1.7.1        compiler_4.2.0    
## [25] httpuv_1.6.5       xfun_0.30          pkgconfig_2.0.3    systemfonts_1.0.4 
## [29] htmltools_0.5.2    tidyselect_1.1.2   gridExtra_2.3      reshape_0.8.9     
## [33] fansi_1.0.3        viridisLite_0.4.0  withr_2.5.0        crayon_1.5.1      
## [37] later_1.3.0        brio_1.1.3         bitops_1.0-7       MASS_7.3-57       
## [41] grid_4.2.0         jsonlite_1.8.0     xtable_1.8-4       gtable_0.3.0      
## [45] lifecycle_1.0.1    DBI_1.1.2          magrittr_2.0.3     scales_1.2.0      
## [49] KernSmooth_2.23-20 cli_3.3.0          stringi_1.7.6      promises_1.2.0.1  
## [53] testthat_3.1.4     xml2_1.3.3         bslib_0.3.1        ellipsis_0.3.2    
## [57] generics_0.1.2     vctrs_0.4.1        RColorBrewer_1.1-3 tools_4.2.0       
## [61] glue_1.6.2         purrr_0.3.4        fastmap_1.1.0      yaml_2.3.5        
## [65] colorspace_2.0-3   caTools_1.18.2     rvest_1.0.2        knitr_1.39        
## [69] sass_0.4.1