Introduction

Previously, Mandhri and I analysed the B-PROOF 450K data, trying to understand whether vitamin supplementation caused changes in gene methylation. We used Limma and some basic analyses, which showed no specific probes with FDR<0.05, nor any DMRs.

In this analysis we will use the principle of Gene Set Enrichment Analysis, applying it to many probes belonging to genes. If the probes are trending in concert, then we can make some judgement about the enrichment of those probes. The statistical test used is the CAMERA test, which is a competitive test that attempts to account for correlation between genes in a set, or in this case probes belonging to a gene.

library("parallel")
library("dplyr")
library("kableExtra")
library("eulerr")
library("mitch")
library("limma")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("tictoc")

Get array annotation data

CORES=detectCores()

ann450k <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)
myann <- data.frame(ann450k[,c("UCSC_RefGene_Name","Regulatory_Feature_Group")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)

# make a list of probes that belong to a gene
gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=CORES)
myann$UCSC_RefGene_Name <- gnl

gnl <- gnl[which(lapply(gnl,length)>0)]

if ( ! file.exists("sets.Rds") ) {

  l <- mclapply(1:nrow(myann), function(i) {
    a <- myann[i,]
    len <- length(a[[1]][[1]])
    probe <- rep(rownames(a),len)
    genes <- a[[1]][[1]]
    data.frame(genes,probe)
  },mc.cores=CORES)

  df <- do.call(rbind,l)

  sets <- mclapply(X=gn, FUN=function(g) {
    df[which(df$genes == g),2]
  } , mc.cores=CORES/2)

  names(sets) <- gn

  saveRDS(object=sets,file="sets.Rds")

} else {

  sets <- readRDS("sets.Rds")

}

DM3 - association of Hcy with gene methylation

It is thought that high plasma homocysteine can inhibit DNA methylation. Let’s see whether that is the case, and which genes are most affected. This analysis is conducted at the whole gene level as well as on the level of promoters.

Read in and inspect the data

dm <- read.table("dma3a.tsv.gz")
dm <- dm[,4:9]
dm <- merge(myann,dm,by=0)
rownames(dm) <- dm[,1]
dm[,1] = NULL

# testing
#dm <- head(dm,40000)

head(dm,50) %>%
  kbl(caption = "Top significant genes with limma") %>%
  kable_paper("hover", full_width = F)
Top significant genes with limma
UCSC_RefGene_Name Regulatory_Feature_Group logFC AveExpr t P.Value adj.P.Val B
cg00000029 RBL2 Promoter_Associated -0.0446959 0.4522209 -0.8638867 0.3888420 0.9098372 -5.265779
cg00000108 C3orf35 -0.0122602 3.6607954 -0.1654468 0.8687848 0.9886500 -5.576980
cg00000109 FNDC3B -0.0378447 2.0296336 -0.5100334 0.6106758 0.9574373 -5.476065
cg00000165 -0.0277057 -1.0289408 -0.5477061 0.5845972 0.9528277 -5.458804
cg00000236 VDAC3 -0.0308775 1.8879879 -0.6273277 0.5312685 0.9421655 -5.418295
cg00000289 ACTN1 -0.0347776 0.2694825 -0.5508856 0.5824203 0.9524415 -5.457291
cg00000292 ATP2A1 0.0078848 2.8508680 0.1712753 0.8642068 0.9883139 -5.576129
cg00000321 SFRP1 -0.0006141 -0.8769170 -0.0091726 0.9926919 0.9994500 -5.588821
cg00000363 -0.0207565 -0.2718208 -0.3794289 0.7048330 0.9708901 -5.526411
cg00000622 NIPA2 Promoter_Associated 0.0333418 -4.8495004 0.7574773 0.4497908 0.9250804 -5.340328
cg00000658 MAN1B1 0.0226951 2.0750918 0.7385729 0.4611636 0.9276129 -5.352556
cg00000714 TSEN34 -0.0767442 -1.5409438 -2.5167547 0.0127504 0.4684043 -2.895740
cg00000721 LRRC16A Unclassified_Cell_type_specific -0.0031114 2.7846882 -0.0385133 0.9693227 0.9977628 -5.588214
cg00000734 CNBP Unclassified -0.0224123 -3.0864719 -0.8730215 0.3838572 0.9082510 -5.258927
cg00000769 DDX55 Promoter_Associated 0.0044715 -3.1463101 0.1951579 0.8454975 0.9862198 -5.572331
cg00000884 TLR2 -0.0355966 2.4344668 -0.6542490 0.5138171 0.9388729 -5.403363
cg00000905 FAM81A Unclassified 0.0269914 -2.1171455 0.5264592 0.5992411 0.9555164 -5.468689
cg00000924 KCNQ1 , KCNQ1OT1 0.0076655 0.4463612 0.2133270 0.8313222 0.9846206 -5.569112
cg00000948 -0.0512202 2.8596325 -1.1837137 0.2381445 0.8535327 -4.983571
cg00000957 NPHP4 Unclassified_Cell_type_specific 0.0113656 2.7283928 0.3477238 0.7284688 0.9734455 -5.536407
cg00001099 PSKH2 -0.0540892 2.2699920 -0.7493275 0.4546739 0.9262433 -5.345637
cg00001245 MRPS25 Promoter_Associated 0.0127411 -4.2195473 0.3178478 0.7509823 0.9762343 -5.545030
cg00001249 0.0878332 2.2493861 1.3410880 0.1816436 0.8210138 -4.812929
cg00001261 0.0441601 -0.2075480 0.9709593 0.3329193 0.8918583 -5.180991
cg00001269 0.0390867 2.9907317 1.0911248 0.2767298 0.8718875 -5.074204
cg00001349 MAEL 0.0094591 1.9174464 0.1451613 0.8847519 0.9899344 -5.579714
cg00001364 PROX1 -0.0859751 2.3099396 -1.4045238 0.1619493 0.8064409 -4.738269
cg00001446 ELOVL1 0.0361831 2.9141672 0.8905659 0.3743945 0.9051405 -5.245568
cg00001510 LILRA6 0.0045678 -0.0996494 0.1465904 0.8836254 0.9898532 -5.579533
cg00001534 FAF1 -0.1186347 3.3033444 -1.5710033 0.1180025 0.7613262 -4.526384
cg00001582 LOC283050, ZMIZ1 0.0248102 -2.9750721 0.9292188 0.3540658 0.8980126 -5.215208
cg00001583 NR5A2 -0.0441798 -2.8591736 -0.9330761 0.3520765 0.8973466 -5.212108
cg00001594 ROCK2 0.0042517 -4.7593997 0.0853653 0.9320693 0.9944972 -5.585695
cg00001687 CDK10 0.0015161 5.0163253 0.0361013 0.9712431 0.9978621 -5.588292
cg00001747 Unclassified_Cell_type_specific -0.1126870 -2.7076297 -1.5885362 0.1139845 0.7562820 -4.502731
cg00001791 TMEM182 0.0182578 2.9978670 0.2936189 0.7693998 0.9781258 -5.551455
cg00001793 ETV6 -0.1098128 1.0746323 -2.0295079 0.0439333 0.6192038 -3.825126
cg00001809 0.0525778 1.7725728 1.1597328 0.2477500 0.8587519 -5.007741
cg00001854 DNAJA2 0.0114339 2.8722017 0.2049001 0.8378902 0.9853220 -5.570641
cg00001930 0.0632018 2.1698165 0.8056091 0.4215702 0.9186297 -5.307809
cg00002028 PINK1 Promoter_Associated 0.0295245 -3.5036247 0.8499858 0.3965035 0.9114511 -5.276068
cg00002033 LRFN1 0.0542461 1.7907903 0.4372262 0.6624906 0.9647231 -5.505950
cg00002080 RWDD2B Unclassified_Cell_type_specific 0.0795838 2.1256473 1.4263346 0.1555674 0.8004050 -4.711822
cg00002116 MRPL12 Promoter_Associated -0.0045132 -5.0826553 -0.0905479 0.9279562 0.9940693 -5.585300
cg00002145 COL6A3 0.0863898 2.7662213 1.9901870 0.0481398 0.6319901 -3.891945
cg00002190 -0.0809549 2.2105512 -1.6860704 0.0935784 0.7287342 -4.366527
cg00002224 C8orf31 Unclassified_Cell_type_specific 0.0994866 0.1264213 1.3473757 0.1796150 0.8195515 -4.805679
cg00002236 RTTN Promoter_Associated -0.0189864 -3.5918761 -0.6113878 0.5417426 0.9447402 -5.426842
cg00002406 CD2BP2 Promoter_Associated 0.0073673 -4.1952587 0.2710988 0.7866371 0.9798899 -5.556971
cg00002426 SLMAP 0.0357775 3.4533187 0.6786596 0.4982571 0.9355991 -5.389284
# trim down the input dataset
dm <- dm[,c("UCSC_RefGene_Name","t")]

# histogram of t values
hist(dm$t,breaks=seq(from=-6,to=6,by=1))

Whole gene analysis

# set cores to used for parallel execution
CORES= detectCores()

calc_sc <- function(dm) {
  gn <- unique(unlist(strsplit(unlist( dm$UCSC_RefGene_Name) ,", ")))
  gnl <- strsplit( unlist(dm$UCSC_RefGene_Name) ,";")
  gnl <- mclapply(gnl,unique,mc.cores=CORES)

  l <- mclapply(1:nrow(dm), function(i) {
    a <- dm[i,]
    len <- length(a[[1]][[1]])
    tvals <- as.numeric(rep(a[2],len))
    genes <- a[[1]][[1]]
    data.frame(genes,tvals)
  },mc.cores=CORES)

  df <- do.call(rbind,l)
  gme_res <- mclapply( 1:length(gn), function(i) {
    g <- gn[i]
    tstats <- df[which(df$genes==g),"tvals"] 
    myn <- length(tstats)
    mymean <- mean(tstats)
    mymedian <- median(tstats)
    wtselfcont <- wilcox.test(tstats)
    res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,"median"=mymedian,
      "p-value(sc)"=wtselfcont$p.value)
  } , mc.cores=CORES )
  gme_res_df <- do.call(rbind, gme_res)
  rownames(gme_res_df) <- gme_res_df[,1]
  gme_res_df <- gme_res_df[,-1]
  tmp <- apply(gme_res_df,2,as.numeric)
  rownames(tmp) <- rownames(gme_res_df)
  gme_res_df <- as.data.frame(tmp)
  gme_res_df$sig <- -log10(gme_res_df[,4])
  gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
  gme_res_df$`fdr(sc)` <- p.adjust(gme_res_df$`p-value(sc)`)
  out <- list("df"=df,"gme_res_df"=gme_res_df)
  return(out)
}

tic()
gme_res_wholegene <- calc_sc(dm)
time2 <- toc() #38 44 41 40 44 41 40
## 51.175 sec elapsed
time2
## $tic
##  elapsed 
## 3554.186 
## 
## $toc
##  elapsed 
## 3605.361 
## 
## $msg
## logical(0)
df <- gme_res_wholegene[[1]]
res <- gme_res_wholegene[[2]]
write.table(res,file="gmea_wholegene.tsv")

Table of top results.

head(res,50) %>%
  kbl(caption = "Top significant genes with GMEA") %>%
  kable_paper("hover", full_width = F)
Top significant genes with GMEA
nprobes mean median p-value(sc) sig fdr(sc)
TNXB 531 0.3429080 0.4078607 0e+00 14.041475 0.0000000
PCDHA1 162 -0.7077910 -0.6245086 0e+00 13.496797 0.0000000
NNAT 49 -0.9419415 -0.9392397 0e+00 13.303312 0.0000000
PCDHA2 149 -0.7188634 -0.6267169 0e+00 12.485679 0.0000000
PCDHA3 141 -0.7189282 -0.6267169 0e+00 11.583810 0.0000001
KCNQ1DN 39 -1.6144397 -1.7080108 0e+00 11.439140 0.0000001
TAP1 100 0.8373978 0.8170222 0e+00 10.577873 0.0000005
NCOR2 212 -0.6647509 -0.5447866 0e+00 10.345470 0.0000009
PCDHGA1 317 -0.4141393 -0.4153141 0e+00 10.238108 0.0000012
PCDHA4 131 -0.6779619 -0.6026401 0e+00 10.042257 0.0000018
PCDHGA2 309 -0.4165786 -0.4153141 0e+00 9.987553 0.0000021
PCDHGA3 295 -0.4180524 -0.3901466 0e+00 9.576696 0.0000053
MESTIT1 59 -0.7596278 -0.7939385 0e+00 9.544056 0.0000057
PCDHA5 122 -0.6998042 -0.6245086 0e+00 9.542358 0.0000058
NKX6-2 36 -1.3952706 -1.4319593 0e+00 9.536050 0.0000058
SOX2OT 85 -0.7468762 -0.8104998 0e+00 9.479982 0.0000066
PCDHA7 103 -0.7321981 -0.6267169 0e+00 9.375369 0.0000085
C11orf21 36 -1.7668062 -1.9463963 0e+00 9.257296 0.0000111
PITX2 64 -1.0007742 -0.9208371 0e+00 9.163509 0.0000138
PCDHA6 114 -0.6916765 -0.6245086 0e+00 8.749552 0.0000357
TSPAN32 42 -1.6254336 -1.9412022 0e+00 8.645175 0.0000454
MEST 85 -0.6582939 -0.7858506 0e+00 8.607705 0.0000495
ASCL2 49 -0.8873371 -0.9068647 0e+00 8.538922 0.0000580
WT1 60 -1.0380025 -1.0581139 0e+00 8.537528 0.0000582
PCDHGB1 277 -0.4040374 -0.3512600 0e+00 8.169027 0.0001360
PCDHA8 92 -0.7250898 -0.6311017 0e+00 8.158124 0.0001394
KIAA1949 101 0.7427567 0.6964985 0e+00 8.107412 0.0001567
PCDHA10 81 -0.7546498 -0.6842488 0e+00 7.961957 0.0002190
PCDHA9 84 -0.7351768 -0.6598677 0e+00 7.948914 0.0002257
PSMB8 83 0.8313260 0.6488706 0e+00 7.945020 0.0002277
PRDM13 41 -0.9251076 -0.9595314 0e+00 7.870850 0.0002701
GNA12 84 -0.9539754 -0.7963257 0e+00 7.622375 0.0004787
PCDHGA4 263 -0.4030756 -0.3512600 0e+00 7.620039 0.0004812
MAD1L1 684 -0.2827612 -0.2513758 0e+00 7.532317 0.0005889
SFRP2 44 -1.1033457 -1.2056608 0e+00 7.518236 0.0006083
SOX1 27 -1.2956875 -1.5392701 0e+00 7.349659 0.0008967
ZIC1 40 -1.0223609 -1.1114647 1e-07 7.295516 0.0010157
SPON2 30 -1.0371460 -1.0318217 1e-07 7.096401 0.0016064
TLX1 36 -0.7858743 -0.8711780 1e-07 7.094355 0.0016139
PPP1R2P1 27 -1.2635175 -1.2835267 1e-07 6.826780 0.0029884
TBX15 57 -0.8784010 -0.9985338 2e-07 6.710128 0.0039091
HLA-E 64 0.9883267 0.9471868 2e-07 6.621543 0.0047933
SOX2 31 -0.9356689 -0.9861379 4e-07 6.380592 0.0083477
HLA-J 60 -0.9474161 -0.9835159 4e-07 6.363271 0.0086869
TFAP2A 77 -0.6823000 -0.6389762 4e-07 6.359493 0.0087624
PCDHGB2 249 -0.3779342 -0.2994303 5e-07 6.334383 0.0092835
TBX2 42 -0.9192182 -0.9768360 5e-07 6.283615 0.0104341
NR2E1 55 -0.7936621 -0.8098274 6e-07 6.236670 0.0116246
DMRTA2 32 -0.8190626 -0.8833632 6e-07 6.231215 0.0117710
LOC100128811 24 -0.9214760 -1.0762458 6e-07 6.224720 0.0119478

Volcano plots

# volcano selfcont
sig <- subset(res,`fdr(sc)` < 0.05)
plot(res$median , -log10(res$`p-value(sc)`) ,
  xlab="effect size (mean t-stat)", ylab="-log10(p-value)",
  pch=19, cex=0.5, col="gray",main="self contained test")
grid()
points(sig$median , -log10(sig$`p-value(sc)`) ,
  pch=19, cex=0.5, col="red")

Boxplots

Boxplots smallest pvalue.

par(mfrow=c(1,2))
n=50
# self contained

gs <- head(rownames(res),50)
tstats <- lapply(gs, function(g) {
  df[which(df$genes==g),"tvals"]
})
names(tstats) <- gs

tstats <- tstats[order(unlist(lapply(tstats,median)))]
boxplot(tstats,horizontal=TRUE,las=1,
  main="smallest p-val(selfcont)",cex.axis=0.6,
  xlab="t-statistic")
grid()

n=50
# effect size median
sig <- subset(res,`fdr(sc)` < 0.05)
gs <- head(rownames(sig[order(-abs(sig$median)),]),n)

if ( length(gs) >2 ) {
  tstats <- lapply(gs, function(g) {
    df[which(df$genes==g),"tvals"]
  })
  names(tstats) <- gs
  tstats <- tstats[order(unlist(lapply(tstats,median)))]
  boxplot(tstats,horizontal=TRUE,las=1,
    main="biggest effect size(median)",cex.axis=0.6,
    xlab="t-statistic")
  grid()
} else {
  plot(1)
  mtext("too few significant genes found")
}

Mitch

dmscore <- data.frame( res$median * res$sig)

rownames(dmscore) <- rownames(res)

colnames(dmscore) <- "metric"

if ( ! file.exists("ReactomePathways.gmt") ) {
  download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip",
    destfile="ReactomePathways.gmt.zip")
  unzip("ReactomePathways.gmt.zip")
}
file.info("ReactomePathways.gmt")
##                        size isdir mode               mtime               ctime
## ReactomePathways.gmt 897680 FALSE  664 2022-05-11 13:54:42 2022-05-11 13:54:42
##                                    atime  uid  gid uname grname
## ReactomePathways.gmt 2022-05-11 13:55:35 1001 1003   mdz    mdz
genesets <- gmt_import("ReactomePathways.gmt")
length(genesets)
## [1] 2546
mres <- mitch_calc(x=dmscore, genesets=genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres$enrichment_result,20) %>%
  kbl(caption = "Top enriched gene sets with GMEA-Mitch") %>%
  kable_paper("hover", full_width = F)
Top enriched gene sets with GMEA-Mitch
set setSize pANOVA s.dist p.adjustANOVA
797 Negative regulation of activity of TFAP2 (AP-2) family transcription factors 10 0.0001209 -0.7018024 0.0009285
1099 Regulation of commissural axon pathfinding by SLIT and ROBO 10 0.0003908 -0.6474208 0.0025714
87 Apoptotic cleavage of cell adhesion proteins 11 0.0002096 -0.6453083 0.0014958
1418 Transcriptional regulation of testis differentiation 12 0.0002348 -0.6130531 0.0016520
1386 Thyroxine biosynthesis 10 0.0034244 -0.5343657 0.0159178
879 POU5F1 (OCT4), SOX2, NANOG repress genes related to differentiation 10 0.0055806 -0.5060048 0.0228640
367 ERBB2 Activates PTK6 Signaling 13 0.0018315 -0.4990366 0.0092526
796 Negative regulation of TCF-dependent signaling by WNT ligand antagonists 15 0.0009549 -0.4925046 0.0053957
64 Adenylate cyclase activating pathway 10 0.0081256 -0.4832503 0.0312683
29 Acetylcholine Neurotransmitter Release Cycle 16 0.0009281 -0.4780294 0.0052637
479 GABA synthesis, release, reuptake and degradation 19 0.0003437 -0.4742558 0.0022913
923 Platelet sensitization by LDL 17 0.0008191 0.4686399 0.0047701
275 Defective B3GALTL causes PpS 34 0.0000029 -0.4632690 0.0000521
1360 TP53 regulates transcription of additional cell cycle genes whose exact role in the p53 pathway remain uncertain 19 0.0004924 0.4616661 0.0031316
1167 SRP-dependent cotranslational protein targeting to membrane 105 0.0000000 0.4519886 0.0000000
1162 SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs 11 0.0094503 0.4518113 0.0354189
1340 TFAP2 (AP-2) family regulates transcription of growth factors and their receptors 12 0.0074999 -0.4456644 0.0295334
837 O-glycosylation of TSR domain-containing proteins 35 0.0000051 -0.4453420 0.0000803
168 Carnitine metabolism 11 0.0106185 -0.4447950 0.0388601
385 Erythrocytes take up carbon dioxide and release oxygen 12 0.0079199 -0.4426103 0.0306439
mitch_report(mres,outfile="dma3a_mitch.html",overwrite=TRUE)
## Note: overwriting existing report
## Dataset saved as " /tmp/RtmphV0LWD/dma3a_mitch.rds ".
## 
## 
## processing file: mitch.Rmd
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |..                                                                    |   3%
##    inline R code fragments
## 
## 
  |                                                                            
  |....                                                                  |   6%
## label: checklibraries (with options) 
## List of 1
##  $ echo: logi FALSE
## 
## 
  |                                                                            
  |......                                                                |   9%
##   ordinary text without R code
## 
## 
  |                                                                            
  |........                                                              |  12%
## label: peek (with options) 
## List of 1
##  $ echo: logi FALSE
## 
## 
  |                                                                            
  |..........                                                            |  15%
##   ordinary text without R code
## 
## 
  |                                                                            
  |............                                                          |  18%
## label: metrics (with options) 
## List of 1
##  $ echo: logi FALSE
## 
## 
  |                                                                            
  |..............                                                        |  21%
##   ordinary text without R code
## 
## 
  |                                                                            
  |................                                                      |  24%
## label: scatterplot (with options) 
## List of 5
##  $ echo      : logi FALSE
##  $ fig.height: num 6
##  $ fig.width : num 6.5
##  $ message   : logi FALSE
##  $ warning   : logi FALSE
## 
  |                                                                            
  |...................                                                   |  26%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.....................                                                 |  29%
## label: contourplot (with options) 
## List of 5
##  $ echo      : logi FALSE
##  $ fig.height: num 6
##  $ fig.width : num 6.5
##  $ warning   : logi FALSE
##  $ message   : logi FALSE
## Contour plot does not apply to unidimensional analysis.
## 
  |                                                                            
  |.......................                                               |  32%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.........................                                             |  35%
## label: input_geneset_metrics1 (with options) 
## List of 2
##  $ results: chr "asis"
##  $ echo   : logi FALSE
## 
## 
  |                                                                            
  |...........................                                           |  38%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.............................                                         |  41%
## label: input_geneset_metrics2 (with options) 
## List of 5
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
## 
  |                                                                            
  |...............................                                       |  44%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.................................                                     |  47%
## label: input_geneset_metrics3 (with options) 
## List of 5
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ message   : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
## 
  |                                                                            
  |...................................                                   |  50%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.....................................                                 |  53%
## label: echart1d (with options) 
## List of 6
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |.......................................                               |  56%
## label: echart2d (with options) 
## List of 6
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |.........................................                             |  59%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...........................................                           |  62%
## label: heatmap (with options) 
## List of 6
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 10
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |.............................................                         |  65%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...............................................                       |  68%
## label: effectsize (with options) 
## List of 6
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |.................................................                     |  71%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...................................................                   |  74%
## label: results_table (with options) 
## List of 2
##  $ results: chr "asis"
##  $ echo   : logi FALSE
## 
## 
  |                                                                            
  |......................................................                |  76%
##   ordinary text without R code
## 
## 
  |                                                                            
  |........................................................              |  79%
## label: results_table_complete (with options) 
## List of 2
##  $ results: chr "asis"
##  $ echo   : logi FALSE
## 
## 
  |                                                                            
  |..........................................................            |  82%
##   ordinary text without R code
## 
## 
  |                                                                            
  |............................................................          |  85%
## label: detailed_geneset_reports1d (with options) 
## List of 7
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 6
##  $ fig.width : num 6
##  $ out.width : chr "80%"
##  $ comment   : logi NA
##  $ message   : logi FALSE
## 
  |                                                                            
  |..............................................................        |  88%
##   ordinary text without R code
## 
## 
  |                                                                            
  |................................................................      |  91%
## label: detailed_geneset_reports2d (with options) 
## List of 7
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 5
##  $ fig.width : num 6
##  $ out.width : chr "80%"
##  $ comment   : logi NA
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |..................................................................    |  94%
##   ordinary text without R code
## 
## 
  |                                                                            
  |....................................................................  |  97%
## label: session_info (with options) 
## List of 3
##  $ include: logi TRUE
##  $ echo   : logi TRUE
##  $ results: chr "markup"
## 
## 
  |                                                                            
  |......................................................................| 100%
##   ordinary text without R code
## output file: /mnt/data/mdz/projects/gmea/mitch.knit.md
## /home/mdz/anaconda3/bin/pandoc +RTS -K512m -RTS /mnt/data/mdz/projects/gmea/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmphV0LWD/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --standalone --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmphV0LWD/rmarkdown-str3441fa6b40d25f.html
## 
## Output created: /tmp/RtmphV0LWD/mitch_report.html
## [1] TRUE

Using CAMERA

stat <- dm$t
names(stat) <- rownames(dm)

# slow
sets2 <- head(sets,1000)
tic()
cres <- cameraPR(statistic=stat, index=sets2, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
toc()
## 33.653 sec elapsed
# fast
CORES=detectCores()
#sets <- head(sets,100000)
sets_split <- split(sets,1:CORES)
## Warning in split.default(sets, 1:CORES): data length is not a multiple of split
## variable
tic()
cres <- mclapply(X=sets_split, function(mysets) {
  cameraPR(statistic=stat, index=mysets, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE)
}, mc.cores=CORES)
cres <- do.call(rbind,cres)
rownames(cres) <- sapply(strsplit(rownames(cres),"\\."),"[[",2)
cres <- cres[order(cres$PValue),]
g <- rownames(cres)[1]
m <- mclapply(rownames(cres) , function(g) {
  probes <- sets[[g]]
  scores <- stat[which(names(stat) %in% probes)]
  mymedian <- median(scores)
  mymean <- mean(scores)
  c(mymean,mymedian)
},mc.cores=CORES)
mdf <- do.call(rbind,m)
colnames(mdf) <- c("mean","median")
cres <- cbind(mdf,cres)
toc()
## 104.206 sec elapsed
head(subset(cres,Direction=="Up"),25)
##               mean    median NGenes Direction       PValue          FDR
## PTPRCAP  2.3244582 2.4199633     16        Up 2.971405e-16 3.732084e-13
## LTA      1.9127107 2.1742753     24        Up 9.103873e-16 1.144357e-12
## POU2AF1  2.3400616 2.8811809     12        Up 4.108978e-13 5.177312e-10
## HLA-E    0.9883267 0.9471868     64        Up 6.711529e-10 4.214840e-07
## UBASH3A  2.1508739 2.4913830     10        Up 7.137722e-10 8.893602e-07
## TAP1     0.8373978 0.8170222    100        Up 1.770983e-09 1.488403e-06
## IL32     2.1626345 2.1155364      9        Up 3.539798e-09 2.215914e-06
## CD3D     2.2705304 2.5050647      8        Up 4.655631e-09 5.833505e-06
## IFITM1   1.6211476 1.8124585     16        Up 6.419055e-09 4.031166e-06
## PSMB8    0.8313260 0.6488706     83        Up 1.231922e-08 3.868234e-06
## CXCR5    1.3747828 1.5721151     22        Up 1.284890e-08 5.362276e-06
## HLA-F    0.9382476 0.9727570     56        Up 1.611764e-08 1.009770e-05
## DGKA     1.2742041 1.1275025     25        Up 2.378672e-08 9.966635e-06
## KIAA1949 0.7427567 0.6964985    101        Up 5.797326e-08 2.429080e-05
## BCL11B   0.8447215 1.1065618     61        Up 1.279820e-07 3.905623e-05
## GPR81    1.5466288 1.3820297     14        Up 1.593058e-07 6.653674e-05
## DIABLO   1.3096804 1.4524589     18        Up 5.685340e-07 1.190131e-04
## SNORD34  1.6297553 1.4291833     11        Up 7.563096e-07 1.578166e-04
## SNORD31  1.7803007 1.9604996      9        Up 9.213007e-07 4.984571e-04
## LIME1    1.3134401 1.7264438     17        Up 9.851513e-07 6.201527e-04
## LCK      1.0847201 1.5988245     26        Up 1.011981e-06 1.590075e-04
## PRSS22   1.2096414 1.2322848     20        Up 1.189635e-06 4.984571e-04
## SNORD22  2.1334924 2.1363205      6        Up 1.390755e-06 5.827262e-04
## ISG20    1.5104566 1.1300646     12        Up 1.678216e-06 5.290576e-04
## SEPT1    1.6389022 1.7726724     10        Up 1.910060e-06 5.091599e-04
head(subset(cres,Direction=="Down"),25)
##                    mean     median NGenes Direction       PValue          FDR
## C11orf21     -1.7668062 -1.9463963     36      Down 8.428364e-15 1.055231e-11
## TSPAN32      -1.6254336 -1.9412022     42      Down 6.104176e-14 7.691262e-11
## MPO          -2.4948914 -2.6484633     13      Down 1.841064e-13 2.312376e-10
## KCNQ1DN      -1.6144397 -1.7080108     39      Down 3.924695e-13 4.933342e-10
## PRTN3        -1.9416063 -2.5713679     19      Down 2.669346e-11 3.366045e-08
## MIR145       -2.5680735 -2.7594034      9      Down 1.303212e-10 8.216749e-08
## ELANE        -1.9193572 -2.0514902     17      Down 3.376629e-10 4.227540e-07
## AZU1         -2.3303366 -2.3559866     10      Down 1.107618e-09 4.637228e-07
## NKX6-2       -1.3952706 -1.4319593     36      Down 1.657257e-09 1.032471e-06
## CD177        -2.6754771 -3.7016583      7      Down 2.368184e-09 1.488403e-06
## CEBPE        -2.1853301 -2.8379658     11      Down 2.725895e-09 3.423724e-06
## OXT          -2.0521117 -2.2532375     12      Down 7.114327e-09 4.453569e-06
## SLC16A3      -1.2912785 -0.9007149     38      Down 1.571977e-08 6.528943e-06
## LOC646627    -2.5340767 -2.4228463      7      Down 1.679106e-08 2.107277e-05
## LOC100131496 -2.4936562 -2.3802536      7      Down 2.883213e-08 1.810658e-05
## MS4A3        -2.2133875 -2.3290819      9      Down 3.750388e-08 9.420974e-06
## GPR97        -2.4545992 -2.2893246      7      Down 4.821967e-08 2.429080e-05
## SLFN13       -1.6680322 -1.5960605     17      Down 6.246367e-08 2.615146e-05
## S100A9       -1.7520050 -2.3862889     15      Down 6.563207e-08 1.373898e-05
## FAM124B      -1.8078312 -1.7546957     13      Down 1.553549e-07 3.905623e-05
## SPI1         -1.5737683 -1.8428044     18      Down 1.880414e-07 5.904499e-05
## REC8         -1.6097189 -1.8430645     17      Down 1.888226e-07 3.388016e-05
## GNA12        -0.9539754 -0.7963257     84      Down 1.891490e-07 1.110827e-04
## WT1          -1.0380025 -1.0581139     60      Down 2.108660e-07 6.600105e-05
## TNFRSF6B     -2.3207431 -3.3172884      7      Down 2.644825e-07 1.110827e-04
head(cres,50) %>%
  kbl(caption = "Top significant genes with CAMERA") %>%
  kable_paper("hover", full_width = F)
Top significant genes with CAMERA
mean median NGenes Direction PValue FDR
PTPRCAP 2.3244582 2.4199633 16 Up 0e+00 0.0000000
LTA 1.9127107 2.1742753 24 Up 0e+00 0.0000000
C11orf21 -1.7668062 -1.9463963 36 Down 0e+00 0.0000000
TSPAN32 -1.6254336 -1.9412022 42 Down 0e+00 0.0000000
MPO -2.4948914 -2.6484633 13 Down 0e+00 0.0000000
KCNQ1DN -1.6144397 -1.7080108 39 Down 0e+00 0.0000000
POU2AF1 2.3400616 2.8811809 12 Up 0e+00 0.0000000
PRTN3 -1.9416063 -2.5713679 19 Down 0e+00 0.0000000
MIR145 -2.5680735 -2.7594034 9 Down 0e+00 0.0000001
ELANE -1.9193572 -2.0514902 17 Down 0e+00 0.0000004
HLA-E 0.9883267 0.9471868 64 Up 0e+00 0.0000004
UBASH3A 2.1508739 2.4913830 10 Up 0e+00 0.0000009
AZU1 -2.3303366 -2.3559866 10 Down 0e+00 0.0000005
NKX6-2 -1.3952706 -1.4319593 36 Down 0e+00 0.0000010
TAP1 0.8373978 0.8170222 100 Up 0e+00 0.0000015
CD177 -2.6754771 -3.7016583 7 Down 0e+00 0.0000015
CEBPE -2.1853301 -2.8379658 11 Down 0e+00 0.0000034
IL32 2.1626345 2.1155364 9 Up 0e+00 0.0000022
CD3D 2.2705304 2.5050647 8 Up 0e+00 0.0000058
IFITM1 1.6211476 1.8124585 16 Up 0e+00 0.0000040
OXT -2.0521117 -2.2532375 12 Down 0e+00 0.0000045
PSMB8 0.8313260 0.6488706 83 Up 0e+00 0.0000039
CXCR5 1.3747828 1.5721151 22 Up 0e+00 0.0000054
SLC16A3 -1.2912785 -0.9007149 38 Down 0e+00 0.0000065
HLA-F 0.9382476 0.9727570 56 Up 0e+00 0.0000101
LOC646627 -2.5340767 -2.4228463 7 Down 0e+00 0.0000211
DGKA 1.2742041 1.1275025 25 Up 0e+00 0.0000100
LOC100131496 -2.4936562 -2.3802536 7 Down 0e+00 0.0000181
MS4A3 -2.2133875 -2.3290819 9 Down 0e+00 0.0000094
GPR97 -2.4545992 -2.2893246 7 Down 0e+00 0.0000243
KIAA1949 0.7427567 0.6964985 101 Up 1e-07 0.0000243
SLFN13 -1.6680322 -1.5960605 17 Down 1e-07 0.0000262
S100A9 -1.7520050 -2.3862889 15 Down 1e-07 0.0000137
BCL11B 0.8447215 1.1065618 61 Up 1e-07 0.0000391
FAM124B -1.8078312 -1.7546957 13 Down 2e-07 0.0000391
GPR81 1.5466288 1.3820297 14 Up 2e-07 0.0000665
SPI1 -1.5737683 -1.8428044 18 Down 2e-07 0.0000590
REC8 -1.6097189 -1.8430645 17 Down 2e-07 0.0000339
GNA12 -0.9539754 -0.7963257 84 Down 2e-07 0.0001111
WT1 -1.0380025 -1.0581139 60 Down 2e-07 0.0000660
TNFRSF6B -2.3207431 -3.3172884 7 Down 3e-07 0.0001111
PITX2 -1.0007742 -0.9208371 64 Down 4e-07 0.0000779
PSIMCT-1 -1.8716010 -1.8122459 11 Down 4e-07 0.0004985
SFRP2 -1.1033457 -1.2056608 44 Down 5e-07 0.0003373
TACR3 -1.7300623 -1.9615634 13 Down 6e-07 0.0001190
DIABLO 1.3096804 1.4524589 18 Up 6e-07 0.0001190
HORMAD2 -1.7271886 -2.0776542 13 Down 6e-07 0.0001450
SOX1 -1.2956875 -1.5392701 27 Down 6e-07 0.0001099
MGC12982 -1.5745583 -1.6173178 16 Down 7e-07 0.0006202
SNORD34 1.6297553 1.4291833 11 Up 8e-07 0.0001578

CAMERA + pathway analysis

There’s no ES and the p-value is biased, so not sure about this

Promoter analysis only

dm <- read.table("dma3a.tsv.gz")
dm <- dm[,4:9]
dm <- merge(myann,dm,by=0)
rownames(dm) <- dm[,1]
dm[,1] = NULL
dm <- dm[grep("Promoter_Associated",dm$Regulatory_Feature_Group),]

head(dm,50) %>%
  kbl(caption = "Top significant genes with limma") %>%
  kable_paper("hover", full_width = F)
Top significant genes with limma
UCSC_RefGene_Name Regulatory_Feature_Group logFC AveExpr t P.Value adj.P.Val B
cg00000029 RBL2 Promoter_Associated -0.0446959 0.4522209 -0.8638867 0.3888420 0.9098372 -5.265779
cg00000622 NIPA2 Promoter_Associated 0.0333418 -4.8495004 0.7574773 0.4497908 0.9250804 -5.340328
cg00000769 DDX55 Promoter_Associated 0.0044715 -3.1463101 0.1951579 0.8454975 0.9862198 -5.572331
cg00001245 MRPS25 Promoter_Associated 0.0127411 -4.2195473 0.3178478 0.7509823 0.9762343 -5.545030
cg00002028 PINK1 Promoter_Associated 0.0295245 -3.5036247 0.8499858 0.3965035 0.9114511 -5.276068
cg00002116 MRPL12 Promoter_Associated -0.0045132 -5.0826553 -0.0905479 0.9279562 0.9940693 -5.585300
cg00002236 RTTN Promoter_Associated -0.0189864 -3.5918761 -0.6113878 0.5417426 0.9447402 -5.426842
cg00002406 CD2BP2 Promoter_Associated 0.0073673 -4.1952587 0.2710988 0.7866371 0.9798899 -5.556971
cg00002660 SMARCC2 Promoter_Associated -0.0276799 -3.4149124 -0.9195186 0.3591001 0.8995507 -5.222947
cg00002930 NFKBIL1 , ATP6V1G2 Promoter_Associated 0.0431087 -2.5246196 0.9921947 0.3224833 0.8886832 -5.163013
cg00003173 CHCHD4, TMEM43 Promoter_Associated 0.0440248 -3.1942015 1.2228554 0.2230416 0.8453216 -4.943077
cg00003202 RFX5 Promoter_Associated 0.0246642 -4.4899392 0.4925218 0.6229727 0.9593472 -5.483671
cg00003784 CCDC45, DDX5 Promoter_Associated 0.0271078 -3.7722693 0.8931927 0.3729904 0.9046774 -5.243545
cg00004072 ZFP36 Promoter_Associated 0.0532912 -3.1321117 1.4801258 0.1406519 0.7861540 -4.644902
cg00004082 SLC2A9 Promoter_Associated -0.0567563 -2.1842139 -0.9558817 0.3404610 0.8938321 -5.193522
cg00004207 SFRS7 Promoter_Associated_Cell_type_specific -0.0362644 -3.0355991 -0.8113672 0.4182657 0.9177356 -5.303786
cg00005010 MT1F Promoter_Associated_Cell_type_specific 0.0077148 -4.0442169 0.2134724 0.8312090 0.9845704 -5.569085
cg00005543 TCTE3 , C6orf70 Promoter_Associated -0.0049202 -4.2181795 -0.1388422 0.8897358 0.9905448 -5.580493
cg00006032 GPHN Promoter_Associated -0.0762156 -3.1976445 -1.3023458 0.1945231 0.8284346 -4.856869
cg00006122 C12orf44 Promoter_Associated 0.0572183 -3.0480148 2.0292594 0.0439589 0.6192964 -3.825553
cg00006884 CCDC126 Promoter_Associated 0.0081832 -3.4410951 0.2750624 0.7835953 0.9796542 -5.556032
cg00007226 PACS2 Promoter_Associated 0.0187771 -2.7369800 0.6756715 0.5001482 0.9360383 -5.391035
cg00007269 ZNF77 Promoter_Associated 0.0243405 -3.2605321 0.9476296 0.3446351 0.8950468 -5.200298
cg00007898 DSTYK Promoter_Associated -0.0487729 -4.7627008 -0.9935158 0.3218412 0.8884407 -5.161882
cg00008004 Promoter_Associated 0.0530924 -3.1300562 1.2249733 0.2222445 0.8448815 -4.940849
cg00008033 ZNF613 Promoter_Associated -0.0926061 -2.3343986 -1.8015212 0.0733558 0.6948466 -4.195215
cg00008188 C14orf181 Promoter_Associated 0.0310443 -3.3313383 1.0703318 0.2859557 0.8754919 -5.093560
cg00008387 TMEM188 Promoter_Associated 0.0187394 -2.6875103 0.7807688 0.4360017 0.9222170 -5.324840
cg00008665 C3orf39 Promoter_Associated 0.0274437 2.6784492 0.6050298 0.5459492 0.9453510 -5.430190
cg00008671 FAM190B Promoter_Associated 0.0367837 -0.8166720 0.9195016 0.3591090 0.8995507 -5.222960
cg00008713 IMPA2 Promoter_Associated 0.0547502 -2.6401426 1.6624517 0.0982265 0.7348906 -4.400228
cg00008823 NRD1 Promoter_Associated 0.0392469 -4.1361594 1.3306030 0.1850646 0.8228002 -4.824945
cg00008839 OAZ1 Promoter_Associated -0.0109090 -5.3145920 -0.2359452 0.8137533 0.9829590 -5.564703
cg00009167 KIAA1324, C1orf194 Promoter_Associated 0.0203360 -2.6887142 0.7767049 0.4383897 0.9227737 -5.327576
cg00009214 DERA Promoter_Associated_Cell_type_specific 0.0165604 -2.8461860 0.7015914 0.4838734 0.9326330 -5.375591
cg00009407 TTC8 Promoter_Associated 0.0546805 -3.7141576 1.8150757 0.0712380 0.6896927 -4.174389
cg00009412 DDX18 Promoter_Associated 0.0556711 -3.8699150 1.5351918 0.1265570 0.7705896 -4.573905
cg00009970 MEX3C Promoter_Associated 0.0057340 -3.9394691 0.1630548 0.8706649 0.9887727 -5.577321
cg00010046 MGC23284, MVD Promoter_Associated 0.0578509 -3.2839645 1.7147500 0.0881753 0.7201932 -4.324988
cg00010168 MMS19, UBTD1 Promoter_Associated 0.0215936 -2.4492618 0.7670533 0.4440916 0.9238443 -5.334016
cg00010266 MFSD3 Promoter_Associated -0.0110287 -3.5919353 -0.3064669 0.7596164 0.9770348 -5.548111
cg00010659 TMEM14C Promoter_Associated -0.0108691 -3.4079584 -0.3903382 0.6967649 0.9700739 -5.522770
cg00010853 KIAA1949 Promoter_Associated 0.0616603 -0.2660541 1.0482193 0.2959945 0.8797502 -5.113741
cg00010932 METTL5 Promoter_Associated -0.0433476 1.7565091 -0.7089306 0.4793184 0.9317887 -5.371113
cg00010947 HELQ , MRPS18C Promoter_Associated 0.0352093 -3.4402376 0.6786797 0.4982445 0.9355984 -5.389272
cg00011122 C16orf75 Promoter_Associated -0.0340731 -2.7978920 -0.5123938 0.6090266 0.9571802 -5.475019
cg00011284 RBL2 Promoter_Associated -0.0490232 -3.3631727 -1.2791249 0.2025600 0.8332300 -4.882602
cg00011578 PPP5C Promoter_Associated 0.0318908 -2.3697313 1.3798035 0.1694215 0.8125546 -4.767763
cg00011994 FLOT1, IER3 Promoter_Associated -0.0011729 -3.7442208 -0.0386883 0.9691833 0.9977478 -5.588208
cg00012036 H3F3B Promoter_Associated 0.0349406 -2.9522732 1.2434522 0.2153776 0.8415437 -4.921250
# trim down the input dataset
dm <- dm[,c("UCSC_RefGene_Name","t")]

# histogram of t values
hist(dm$t,breaks=seq(from=-6,to=6,by=1))

# set cores to used for parallel execution

tic()
gme_res_promoter <- calc_sc(dm)
time2 <- toc() #15.7 20.2 20.3 17.5 17.9 17.7 20.6 
## 15.222 sec elapsed
time2
## $tic
## elapsed 
## 3780.97 
## 
## $toc
##  elapsed 
## 3796.192 
## 
## $msg
## logical(0)
df <- gme_res_promoter[[1]]
res <- gme_res_promoter[[2]] 
write.table(res ,file="gmea_promo.tsv")

Table of top results.

head(res,50) %>%
  kbl(caption = "Top significant genes with GMEA") %>%
  kable_paper("hover", full_width = F)
Top significant genes with GMEA
nprobes mean median p-value(sc) sig fdr(sc)
PSMB9 49 0.5761512 0.5821938 0.0000026 5.576796 0.0273321
LTA 17 2.3851459 2.5326143 0.0000153 4.816480 0.1573792
ZNF331 22 -0.8964193 -0.9539272 0.0000205 4.688161 0.2114577
HLA-F 37 0.8824152 0.8890888 0.0000264 4.579069 0.2718154
TAP1 52 0.5228433 0.5670995 0.0000274 4.561618 0.2829321
PHTF2 29 0.6846189 0.5747849 0.0000318 4.497636 0.3278101
TMEM60 29 0.6846189 0.5747849 0.0000318 4.497636 0.3278101
KDM6B 19 0.9253189 0.5649357 0.0000381 4.418540 0.3932190
C20orf94 15 0.6004834 0.6045686 0.0000610 4.214420 0.6290894
KIAA1949 67 0.5198463 0.5612521 0.0000614 4.211571 0.6331675
SF3A2 17 1.0063875 0.6782074 0.0000763 4.117510 0.7862091
TUBB 26 0.8749918 0.9736874 0.0000816 4.088475 0.8404865
PSMB8 46 0.5721948 0.4699794 0.0001023 3.989915 1.0000000
KIF5B 14 0.7902940 0.5348532 0.0001221 3.913390 1.0000000
SRP68 14 0.9111961 0.8667275 0.0001221 3.913390 1.0000000
PLEKHJ1 16 1.0268987 0.8825641 0.0001526 3.816480 1.0000000
HSPE1 23 0.6167638 0.5507955 0.0001814 3.741275 1.0000000
HSPD1 23 0.6167638 0.5507955 0.0001814 3.741275 1.0000000
TMEM134 19 0.5859688 0.5494877 0.0002098 3.678177 1.0000000
HSPA1B 23 0.6806965 0.5842477 0.0002148 3.667935 1.0000000
IQCH 13 0.8418442 0.6822960 0.0002441 3.612360 1.0000000
AAGAB 13 0.8418442 0.6822960 0.0002441 3.612360 1.0000000
RWDD1 13 0.8209741 0.6082937 0.0002441 3.612360 1.0000000
SSH3 13 -1.5301230 -1.4511893 0.0002441 3.612360 1.0000000
STAG3L4 13 0.7675809 0.8131921 0.0002441 3.612360 1.0000000
PMS2L4 13 0.7675809 0.8131921 0.0002441 3.612360 1.0000000
PTPRCAP 13 2.3916698 2.4635616 0.0002441 3.612360 1.0000000
BCL11B 14 1.3268440 1.3321196 0.0002441 3.612360 1.0000000
GALK2 25 0.7274296 0.7982154 0.0002498 3.602402 1.0000000
PEX10 22 0.7323587 0.8632916 0.0002556 3.592465 1.0000000
TRIM27 55 0.4703276 0.4519905 0.0003200 3.494916 1.0000000
C5orf36 20 0.7279515 0.8111289 0.0003223 3.491683 1.0000000
ANKRD32 20 0.7279515 0.8111289 0.0003223 3.491683 1.0000000
IRAK1BP1 14 0.7353866 0.7967438 0.0003662 3.436269 1.0000000
C5orf35 14 1.1025095 0.9671847 0.0003662 3.436269 1.0000000
PRKRA 15 0.7994224 0.7985942 0.0004272 3.369322 1.0000000
DFNB59 15 0.7994224 0.7985942 0.0004272 3.369322 1.0000000
ZBTB9 35 0.5068120 0.4382240 0.0004435 3.353107 1.0000000
HIST1H2AK 12 0.6486225 0.6487854 0.0004883 3.311330 1.0000000
ZC3H12D 12 1.0249054 0.8920786 0.0004883 3.311330 1.0000000
TBC1D10C 12 1.2114500 0.9331465 0.0004883 3.311330 1.0000000
TMEM128 12 1.0851858 0.8574542 0.0004883 3.311330 1.0000000
LYSMD1 12 0.8175382 0.6964782 0.0004883 3.311330 1.0000000
SCNM1 12 0.8175382 0.6964782 0.0004883 3.311330 1.0000000
PPM1M 13 1.3734122 1.2332757 0.0004883 3.311330 1.0000000
OBFC2A 12 0.4234877 0.3051854 0.0004883 3.311330 1.0000000
NME1-NME2 25 0.6867271 0.7243284 0.0004895 3.310271 1.0000000
CDC7 16 0.8879637 0.8115182 0.0005798 3.236696 1.0000000
ARSG 16 0.7496755 0.8700503 0.0005798 3.236696 1.0000000
UVRAG 15 0.8486591 0.9494920 0.0006104 3.214420 1.0000000

Volcano plots

# volcano selfcont
sig <- subset(res,`fdr(sc)` < 0.05)
plot(res$median , -log10(res$`p-value(sc)`) ,
  xlab="effect size (mean t-stat)", ylab="-log10(p-value)",
  pch=19, cex=0.5, col="gray",main="self contained test")
grid()
points(sig$median , -log10(sig$`p-value(sc)`) ,
  pch=19, cex=0.5, col="red")

Boxplots

Boxplots smallest pvalue.

par(mfrow=c(1,2))
n=50
# self contained

gs <- head(rownames(res),50)
tstats <- lapply(gs, function(g) {
  df[which(df$genes==g),"tvals"]
})
names(tstats) <- gs

tstats <- tstats[order(unlist(lapply(tstats,median)))]
boxplot(tstats,horizontal=TRUE,las=1,
  main="smallest p-val(selfcont)",cex.axis=0.6,
  xlab="t-statistic")
grid()

n=50
# effect size median
sig <- subset(res,`fdr(sc)` < 0.05)
gs <- head(rownames(sig[order(-abs(sig$median)),]),n)

if ( length(gs) >2 ) {
  tstats <- lapply(gs, function(g) {
    df[which(df$genes==g),"tvals"]
  })
  names(tstats) <- gs
  tstats <- tstats[order(unlist(lapply(tstats,median)))]
  boxplot(tstats,horizontal=TRUE,las=1,
    main="biggest effect size(median)",cex.axis=0.6,
    xlab="t-statistic")
  grid()
} else {
  plot(1)
  mtext("too few significant genes found")
}

Mitch

dmscore <- data.frame( res$median * res$sig)

rownames(dmscore) <- rownames(res)

colnames(dmscore) <- "metric"

genesets <- gmt_import("ReactomePathways.gmt")

mres <- mitch_calc(x=dmscore, genesets=genesets,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres$enrichment_result,20) %>%
  kbl(caption = "Top enriched gene sets with GMEA-Mitch (promoter only)") %>%
  kable_paper("hover", full_width = F)
Top enriched gene sets with GMEA-Mitch (promoter only)
set setSize pANOVA s.dist p.adjustANOVA
829 Regulation of FZD by ubiquitination 12 0.0004435 -0.5854282 0.0293696
812 RUNX2 regulates osteoblast differentiation 13 0.0002702 -0.5832176 0.0230084
379 Glutamate Neurotransmitter Release Cycle 11 0.0022989 -0.5306853 0.0830377
65 Assembly of collagen fibrils and other multimeric structures 10 0.0046010 -0.5173411 0.1166405
526 Maturation of nucleoprotein 11 0.0047419 0.4916096 0.1166405
1160 Voltage gated Potassium channels 10 0.0076020 -0.4873265 0.1372969
625 Neurotransmitter release cycle 18 0.0003600 -0.4856269 0.0266115
864 Regulation of pyruvate dehydrogenase (PDH) complex 11 0.0069827 -0.4696146 0.1328140
250 Dopamine Neurotransmitter Release Cycle 10 0.0142609 -0.4474236 0.1868015
508 Long-term potentiation 11 0.0104895 -0.4455287 0.1615337
672 PKA activation 10 0.0174194 -0.4341194 0.2171792
1001 Signaling by Retinoic Acid 17 0.0020036 -0.4327568 0.0770417
673 PKA-mediated phosphorylation of CREB 11 0.0133879 -0.4305654 0.1793073
614 Negative regulation of NMDA receptor-mediated neuronal transmission 12 0.0105702 -0.4261380 0.1615337
811 RUNX2 regulates bone development 17 0.0025294 -0.4229719 0.0861428
384 Glycogen storage diseases 10 0.0219436 -0.4183503 0.2356464
817 Ras activation upon Ca2+ influx through NMDA receptor 10 0.0272862 -0.4030277 0.2450697
1053 TNF receptor superfamily (TNFSF) members mediating non-canonical NF-kB pathway 10 0.0296084 0.3971664 0.2468059
1034 Synthesis of Leukotrienes (LT) and Eoxins (EX) 10 0.0334008 -0.3883940 0.2602207
835 Regulation of KIT signaling 10 0.0350228 0.3849005 0.2642229
mitch_report(mres,outfile="dma3a_mitch_promo.html",overwrite=TRUE)
## Note: overwriting existing report
## Dataset saved as " /tmp/RtmphV0LWD/dma3a_mitch_promo.rds ".
## 
## 
## processing file: mitch.Rmd
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |..                                                                    |   3%
##    inline R code fragments
## 
## 
  |                                                                            
  |....                                                                  |   6%
## label: checklibraries (with options) 
## List of 1
##  $ echo: logi FALSE
## 
## 
  |                                                                            
  |......                                                                |   9%
##   ordinary text without R code
## 
## 
  |                                                                            
  |........                                                              |  12%
## label: peek (with options) 
## List of 1
##  $ echo: logi FALSE
## 
## 
  |                                                                            
  |..........                                                            |  15%
##   ordinary text without R code
## 
## 
  |                                                                            
  |............                                                          |  18%
## label: metrics (with options) 
## List of 1
##  $ echo: logi FALSE
## 
## 
  |                                                                            
  |..............                                                        |  21%
##   ordinary text without R code
## 
## 
  |                                                                            
  |................                                                      |  24%
## label: scatterplot (with options) 
## List of 5
##  $ echo      : logi FALSE
##  $ fig.height: num 6
##  $ fig.width : num 6.5
##  $ message   : logi FALSE
##  $ warning   : logi FALSE
## 
  |                                                                            
  |...................                                                   |  26%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.....................                                                 |  29%
## label: contourplot (with options) 
## List of 5
##  $ echo      : logi FALSE
##  $ fig.height: num 6
##  $ fig.width : num 6.5
##  $ warning   : logi FALSE
##  $ message   : logi FALSE
## Contour plot does not apply to unidimensional analysis.
## 
  |                                                                            
  |.......................                                               |  32%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.........................                                             |  35%
## label: input_geneset_metrics1 (with options) 
## List of 2
##  $ results: chr "asis"
##  $ echo   : logi FALSE
## 
## 
  |                                                                            
  |...........................                                           |  38%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.............................                                         |  41%
## label: input_geneset_metrics2 (with options) 
## List of 5
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
## 
  |                                                                            
  |...............................                                       |  44%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.................................                                     |  47%
## label: input_geneset_metrics3 (with options) 
## List of 5
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ message   : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
## 
  |                                                                            
  |...................................                                   |  50%
##   ordinary text without R code
## 
## 
  |                                                                            
  |.....................................                                 |  53%
## label: echart1d (with options) 
## List of 6
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |.......................................                               |  56%
## label: echart2d (with options) 
## List of 6
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |.........................................                             |  59%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...........................................                           |  62%
## label: heatmap (with options) 
## List of 6
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 10
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |.............................................                         |  65%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...............................................                       |  68%
## label: effectsize (with options) 
## List of 6
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 7
##  $ fig.width : num 7
##  $ fig.show  : chr "all"
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |.................................................                     |  71%
##   ordinary text without R code
## 
## 
  |                                                                            
  |...................................................                   |  74%
## label: results_table (with options) 
## List of 2
##  $ results: chr "asis"
##  $ echo   : logi FALSE
## 
## 
  |                                                                            
  |......................................................                |  76%
##   ordinary text without R code
## 
## 
  |                                                                            
  |........................................................              |  79%
## label: results_table_complete (with options) 
## List of 2
##  $ results: chr "asis"
##  $ echo   : logi FALSE
## 
## 
  |                                                                            
  |..........................................................            |  82%
##   ordinary text without R code
## 
## 
  |                                                                            
  |............................................................          |  85%
## label: detailed_geneset_reports1d (with options) 
## List of 7
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 6
##  $ fig.width : num 6
##  $ out.width : chr "80%"
##  $ comment   : logi NA
##  $ message   : logi FALSE
## 
  |                                                                            
  |..............................................................        |  88%
##   ordinary text without R code
## 
## 
  |                                                                            
  |................................................................      |  91%
## label: detailed_geneset_reports2d (with options) 
## List of 7
##  $ results   : chr "asis"
##  $ echo      : logi FALSE
##  $ fig.height: num 5
##  $ fig.width : num 6
##  $ out.width : chr "80%"
##  $ comment   : logi NA
##  $ message   : logi FALSE
## 
## 
  |                                                                            
  |..................................................................    |  94%
##   ordinary text without R code
## 
## 
  |                                                                            
  |....................................................................  |  97%
## label: session_info (with options) 
## List of 3
##  $ include: logi TRUE
##  $ echo   : logi TRUE
##  $ results: chr "markup"
## 
## 
  |                                                                            
  |......................................................................| 100%
##   ordinary text without R code
## output file: /mnt/data/mdz/projects/gmea/mitch.knit.md
## /home/mdz/anaconda3/bin/pandoc +RTS -K512m -RTS /mnt/data/mdz/projects/gmea/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmphV0LWD/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --standalone --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmphV0LWD/rmarkdown-str3441fa75898b7.html
## 
## Output created: /tmp/RtmphV0LWD/mitch_report.html
## [1] TRUE

Session Information

For reproducibility.

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pkgload_1.3.0                                     
##  [2] GGally_2.1.2                                      
##  [3] ggplot2_3.3.6                                     
##  [4] reshape2_1.4.4                                    
##  [5] beeswarm_0.4.0                                    
##  [6] gplots_3.1.3                                      
##  [7] gtools_3.9.3                                      
##  [8] tibble_3.1.7                                      
##  [9] echarts4r_0.4.4                                   
## [10] tictoc_1.0.1                                      
## [11] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [12] IlluminaHumanMethylation450kmanifest_0.4.0        
## [13] minfi_1.42.0                                      
## [14] bumphunter_1.38.0                                 
## [15] locfit_1.5-9.6                                    
## [16] iterators_1.0.14                                  
## [17] foreach_1.5.2                                     
## [18] Biostrings_2.64.0                                 
## [19] XVector_0.36.0                                    
## [20] SummarizedExperiment_1.26.1                       
## [21] Biobase_2.56.0                                    
## [22] MatrixGenerics_1.8.1                              
## [23] matrixStats_0.62.0                                
## [24] GenomicRanges_1.48.0                              
## [25] GenomeInfoDb_1.32.2                               
## [26] IRanges_2.30.0                                    
## [27] S4Vectors_0.34.0                                  
## [28] BiocGenerics_0.42.0                               
## [29] limma_3.52.2                                      
## [30] mitch_1.8.0                                       
## [31] eulerr_6.1.1                                      
## [32] kableExtra_1.3.4                                  
## [33] dplyr_1.0.9                                       
## 
## loaded via a namespace (and not attached):
##   [1] BiocFileCache_2.4.0       systemfonts_1.0.4        
##   [3] plyr_1.8.7                splines_4.2.0            
##   [5] BiocParallel_1.30.3       digest_0.6.29            
##   [7] htmltools_0.5.3           fansi_1.0.3              
##   [9] magrittr_2.0.3            memoise_2.0.1            
##  [11] tzdb_0.3.0                readr_2.1.2              
##  [13] annotate_1.74.0           svglite_2.1.0            
##  [15] askpass_1.1               siggenes_1.70.0          
##  [17] prettyunits_1.1.1         colorspace_2.0-3         
##  [19] blob_1.2.3                rvest_1.0.2              
##  [21] rappdirs_0.3.3            xfun_0.31                
##  [23] crayon_1.5.1              RCurl_1.98-1.7           
##  [25] jsonlite_1.8.0            genefilter_1.78.0        
##  [27] GEOquery_2.64.2           survival_3.3-1           
##  [29] glue_1.6.2                gtable_0.3.0             
##  [31] zlibbioc_1.42.0           webshot_0.5.3            
##  [33] DelayedArray_0.22.0       Rhdf5lib_1.18.2          
##  [35] HDF5Array_1.24.1          scales_1.2.0             
##  [37] DBI_1.1.3                 rngtools_1.5.2           
##  [39] Rcpp_1.0.9                viridisLite_0.4.0        
##  [41] xtable_1.8-4              progress_1.2.2           
##  [43] bit_4.0.4                 mclust_5.4.10            
##  [45] preprocessCore_1.58.0     htmlwidgets_1.5.4        
##  [47] httr_1.4.3                RColorBrewer_1.1-3       
##  [49] ellipsis_0.3.2            pkgconfig_2.0.3          
##  [51] reshape_0.8.9             XML_3.99-0.10            
##  [53] sass_0.4.2                dbplyr_2.2.1             
##  [55] utf8_1.2.2                tidyselect_1.1.2         
##  [57] rlang_1.0.4               later_1.3.0              
##  [59] AnnotationDbi_1.58.0      munsell_0.5.0            
##  [61] tools_4.2.0               cachem_1.0.6             
##  [63] cli_3.3.0                 generics_0.1.3           
##  [65] RSQLite_2.2.15            evaluate_0.15            
##  [67] stringr_1.4.0             fastmap_1.1.0            
##  [69] yaml_2.3.5                knitr_1.39               
##  [71] bit64_4.0.5               beanplot_1.3.1           
##  [73] scrime_1.3.5              caTools_1.18.2           
##  [75] purrr_0.3.4               KEGGREST_1.36.3          
##  [77] nlme_3.1-158              doRNG_1.8.2              
##  [79] sparseMatrixStats_1.8.0   mime_0.12                
##  [81] nor1mix_1.3-0             xml2_1.3.3               
##  [83] biomaRt_2.52.0            compiler_4.2.0           
##  [85] rstudioapi_0.13           filelock_1.0.2           
##  [87] curl_4.3.2                png_0.1-7                
##  [89] bslib_0.4.0               stringi_1.7.8            
##  [91] highr_0.9                 GenomicFeatures_1.48.3   
##  [93] lattice_0.20-45           Matrix_1.4-1             
##  [95] multtest_2.52.0           vctrs_0.4.1              
##  [97] pillar_1.8.0              lifecycle_1.0.1          
##  [99] rhdf5filters_1.8.0        jquerylib_0.1.4          
## [101] data.table_1.14.2         bitops_1.0-7             
## [103] httpuv_1.6.5              rtracklayer_1.56.1       
## [105] R6_2.5.1                  BiocIO_1.6.0             
## [107] promises_1.2.0.1          KernSmooth_2.23-20       
## [109] gridExtra_2.3             codetools_0.2-18         
## [111] MASS_7.3-58               assertthat_0.2.1         
## [113] rhdf5_2.40.0              openssl_2.0.2            
## [115] rjson_0.2.21              withr_2.5.0              
## [117] GenomicAlignments_1.32.0  Rsamtools_2.12.0         
## [119] GenomeInfoDbData_1.2.8    hms_1.1.1                
## [121] quadprog_1.5-8            grid_4.2.0               
## [123] tidyr_1.2.0               base64_2.0               
## [125] rmarkdown_2.14            DelayedMatrixStats_1.18.0
## [127] illuminaio_0.38.0         shiny_1.7.2              
## [129] restfulr_0.0.15