Introduction
Previously, Mandhri and I analysed the B-PROOF 450K data, trying to understand whether vitamin supplementation caused changes in gene methylation. We used Limma and some basic analyses, which showed no specific probes with FDR<0.05, nor any DMRs.
In this analysis we will use the principle of Gene Set Enrichment Analysis, applying it to many probes belonging to genes. If the probes are trending in concert, then we can make some judgement about the enrichment of those probes. The statistical test used is the Wilcox test, which can be applied to competitive or self contained test types.
library("parallel")
library("dplyr")
library("kableExtra")
library("eulerr")
library("mitch")
library("tictoc")
CORES= detectCores()
Reactome gene sets
#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")
Functions
# Gene methylation enrichment analysis
calc_sc <- function(dm) {
gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=CORES)
dm$UCSC_RefGene_Name <- gnl
l <- mclapply(1:nrow(dm), function(i) {
a <- dm[i,]
len <- length(a[[1]][[1]])
tvals <- as.numeric(rep(a[2],len))
genes <- a[[1]][[1]]
data.frame(genes,tvals)
},mc.cores=CORES)
df <- do.call(rbind,l)
gme_res <- mclapply( 1:length(gn), function(i) {
g <- gn[i]
tstats <- df[which(df$genes==g),"tvals"]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
wtselfcont <- wilcox.test(tstats)
res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,"median"=mymedian,
"p-value(sc)"=wtselfcont$p.value)
} , mc.cores=CORES )
gme_res_df <- do.call(rbind, gme_res)
rownames(gme_res_df) <- gme_res_df[,1]
gme_res_df <- gme_res_df[,-1]
tmp <- apply(gme_res_df,2,as.numeric)
rownames(tmp) <- rownames(gme_res_df)
gme_res_df <- as.data.frame(tmp)
gme_res_df$sig <- -log10(gme_res_df[,4])
gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
gme_res_df$`fdr(sc)` <- p.adjust(gme_res_df$`p-value(sc)`)
out <- list("df"=df,"gme_res_df"=gme_res_df)
return(out)
}
gmea_volc <- function(res) {
sig <- subset(res,`fdr(sc)` < 0.05)
plot(res$median , -log10(res$`p-value(sc)`) ,
xlab="effect size (mean t-stat)", ylab="-log10(p-value)",
pch=19, cex=0.5, col="gray",main="self contained test")
grid()
points(sig$median , -log10(sig$`p-value(sc)`) ,
pch=19, cex=0.5, col="red")
}
gmea_barplot <- function(res) {
par(mfrow=c(1,2))
n=50
gs <- head(rownames(res),50)
tstats <- lapply(gs, function(g) {
df[which(df$genes==g),"tvals"]
})
names(tstats) <- gs
tstats <- tstats[order(unlist(lapply(tstats,median)))]
boxplot(tstats,horizontal=TRUE,las=1,
main="smallest p-val(selfcont)",cex.axis=0.6,
xlab="t-statistic")
grid()
sig <- subset(res,`fdr(sc)` < 0.05)
gs <- head(rownames(sig[order(-abs(sig$median)),]),n)
if ( length(gs) >2 ) {
tstats <- lapply(gs, function(g) {
df[which(df$genes==g),"tvals"]
})
names(tstats) <- gs
tstats <- tstats[order(unlist(lapply(tstats,median)))]
boxplot(tstats,horizontal=TRUE,las=1,
main="biggest effect size(median)",cex.axis=0.6,
xlab="t-statistic")
grid()
} else {
plot(1)
mtext("too few significant genes found")
}
par(mfrow=c(1,1))
}
DM1 - post trial supplement versus placebo
Whole gene analysis
dm <- read.table("dma1a.tsv")
head(dm,50) %>% kbl(caption = "Top significant genes with limma") %>% kable_paper("hover", full_width = F)
Top significant genes with limma
|
Row.names
|
UCSC_RefGene_Name
|
Regulatory_Feature_Group
|
logFC
|
AveExpr
|
t
|
P.Value
|
adj.P.Val
|
B
|
cg11922164
|
cg11922164
|
SYT15;SYT15
|
|
-0.3241503
|
3.0811458
|
-5.467839
|
0.0000004
|
0.1762222
|
2.8609369
|
cg17055959
|
cg17055959
|
RB1
|
Promoter_Associated
|
-0.2589536
|
-3.5513962
|
-4.489903
|
0.0000214
|
0.9999898
|
0.6028356
|
cg09095400
|
cg09095400
|
MCF2L;MCF2L
|
|
-0.2864056
|
3.0671992
|
-4.464955
|
0.0000235
|
0.9999898
|
0.5477069
|
cg00079169
|
cg00079169
|
THOP1
|
|
-0.2955543
|
3.1975846
|
-4.437042
|
0.0000262
|
0.9999898
|
0.4862035
|
cg22907174
|
cg22907174
|
|
Unclassified_Cell_type_specific
|
-0.2797225
|
-3.7484900
|
-4.185434
|
0.0000670
|
0.9999898
|
-0.0593122
|
cg00485312
|
cg00485312
|
RADIL
|
|
0.4204328
|
3.4730548
|
4.174616
|
0.0000697
|
0.9999898
|
-0.0823910
|
cg14356440
|
cg14356440
|
MGAT5
|
|
-0.3103486
|
3.2447286
|
-4.052258
|
0.0001088
|
0.9999898
|
-0.3411438
|
cg02007493
|
cg02007493
|
NUDT14
|
Unclassified
|
-0.2179231
|
-3.7401126
|
-3.965567
|
0.0001483
|
0.9999898
|
-0.5218415
|
cg26973953
|
cg26973953
|
GNPTG
|
|
-0.3541682
|
5.9713884
|
-3.958323
|
0.0001522
|
0.9999898
|
-0.5368395
|
cg15693572
|
cg15693572
|
|
|
-0.4938634
|
0.7629543
|
-3.919761
|
0.0001745
|
0.9999898
|
-0.6164053
|
cg16895973
|
cg16895973
|
TOLLIP
|
|
0.3297518
|
2.6982157
|
3.903079
|
0.0001850
|
0.9999898
|
-0.6506847
|
cg20451722
|
cg20451722
|
MX1
|
Promoter_Associated
|
-0.1698344
|
-3.3514466
|
-3.900823
|
0.0001865
|
0.9999898
|
-0.6553134
|
cg26966630
|
cg26966630
|
CBLN2
|
|
0.4062536
|
-2.9670074
|
3.891735
|
0.0001926
|
0.9999898
|
-0.6739459
|
cg03852670
|
cg03852670
|
SLC10A4
|
Unclassified_Cell_type_specific
|
-0.4430002
|
-0.7967887
|
-3.881409
|
0.0001997
|
0.9999898
|
-0.6950842
|
cg08998375
|
cg08998375
|
TBL3
|
|
-0.1877786
|
2.5459320
|
-3.855284
|
0.0002188
|
0.9999898
|
-0.7484145
|
cg15672304
|
cg15672304
|
GPX5;GPX5
|
|
-0.3336515
|
3.1181097
|
-3.837698
|
0.0002327
|
0.9999898
|
-0.7841908
|
cg20089935
|
cg20089935
|
ANKLE2
|
|
-0.2915339
|
4.0051127
|
-3.811572
|
0.0002548
|
0.9999898
|
-0.8371613
|
cg15828427
|
cg15828427
|
FBXW9
|
Unclassified
|
-0.2583253
|
0.5795931
|
-3.806959
|
0.0002589
|
0.9999898
|
-0.8464900
|
cg08267701
|
cg08267701
|
TOMM40;TOMM40;TOMM40
|
Promoter_Associated
|
-0.1743045
|
-3.1175045
|
-3.799373
|
0.0002658
|
0.9999898
|
-0.8618177
|
cg05392577
|
cg05392577
|
|
|
0.3437963
|
2.9223743
|
3.775844
|
0.0002884
|
0.9999898
|
-0.9092403
|
cg01142579
|
cg01142579
|
|
|
-0.3248540
|
3.3432889
|
-3.765804
|
0.0002985
|
0.9999898
|
-0.9294210
|
cg20954533
|
cg20954533
|
MAP1B
|
|
-0.4628496
|
-0.6760910
|
-3.756705
|
0.0003080
|
0.9999898
|
-0.9476804
|
cg05021312
|
cg05021312
|
CBFA2T2
|
Promoter_Associated
|
-0.2952828
|
2.5083399
|
-3.754228
|
0.0003107
|
0.9999898
|
-0.9526472
|
cg17396852
|
cg17396852
|
PIK3CG
|
|
-0.3209891
|
3.6737748
|
-3.749569
|
0.0003157
|
0.9999898
|
-0.9619841
|
cg23712359
|
cg23712359
|
ZEB1;ZEB1;ZEB1;ZEB1;ZEB1
|
Promoter_Associated
|
-0.2953219
|
-3.3975280
|
-3.742234
|
0.0003237
|
0.9999898
|
-0.9766652
|
cg09887667
|
cg09887667
|
NEDD4
|
|
-0.3996089
|
-3.2162534
|
-3.711208
|
0.0003600
|
0.9999898
|
-1.0385751
|
cg21697812
|
cg21697812
|
C11orf91
|
Promoter_Associated
|
-0.3423683
|
0.5214687
|
-3.709752
|
0.0003618
|
0.9999898
|
-1.0414727
|
cg21478490
|
cg21478490
|
PROP1;PROP1
|
|
-0.3961129
|
1.1593620
|
-3.704598
|
0.0003682
|
0.9999898
|
-1.0517233
|
cg07162820
|
cg07162820
|
NFATC4;NFATC4
|
Unclassified
|
-0.2672138
|
-1.0400681
|
-3.696270
|
0.0003788
|
0.9999898
|
-1.0682666
|
cg16728323
|
cg16728323
|
RAB20
|
Unclassified_Cell_type_specific
|
-0.2539616
|
3.0628003
|
-3.678793
|
0.0004021
|
0.9999898
|
-1.1029115
|
cg17479060
|
cg17479060
|
|
|
-0.2769604
|
4.3430224
|
-3.676850
|
0.0004047
|
0.9999898
|
-1.1067565
|
cg23666491
|
cg23666491
|
|
|
-0.2117727
|
2.6281734
|
-3.668545
|
0.0004163
|
0.9999898
|
-1.1231771
|
cg05616472
|
cg05616472
|
EHMT1;C9orf37;EHMT1
|
Promoter_Associated
|
-0.2821533
|
-2.0788718
|
-3.658848
|
0.0004302
|
0.9999898
|
-1.1423197
|
cg02852873
|
cg02852873
|
CNDP2
|
Promoter_Associated
|
-0.1398984
|
-2.0133284
|
-3.657861
|
0.0004316
|
0.9999898
|
-1.1442663
|
cg02779707
|
cg02779707
|
TAP2;TAP2
|
Promoter_Associated
|
-0.2886455
|
-3.3284846
|
-3.656625
|
0.0004334
|
0.9999898
|
-1.1467037
|
cg08836615
|
cg08836615
|
|
|
0.2845805
|
3.5093175
|
3.646502
|
0.0004485
|
0.9999898
|
-1.1666452
|
cg08837481
|
cg08837481
|
|
|
-0.2400255
|
3.3494209
|
-3.636667
|
0.0004637
|
0.9999898
|
-1.1859854
|
cg22304399
|
cg22304399
|
CACNA1A;CACNA1A
|
Unclassified
|
-0.2754963
|
-3.1766244
|
-3.632407
|
0.0004704
|
0.9999898
|
-1.1943525
|
cg08391356
|
cg08391356
|
|
Promoter_Associated
|
-0.1729776
|
-3.0377128
|
-3.626525
|
0.0004798
|
0.9999898
|
-1.2058949
|
cg10645648
|
cg10645648
|
HLA-DQA2
|
|
0.7902770
|
2.1075292
|
3.610488
|
0.0005063
|
0.9999898
|
-1.2373048
|
cg08844849
|
cg08844849
|
SIGLEC10;SIGLEC10;SIGLEC10;SIGLEC10;SIGLEC10;SIGLEC10;SIGLEC10
|
|
-0.2764699
|
1.6759984
|
-3.609514
|
0.0005080
|
0.9999898
|
-1.2392086
|
cg18034329
|
cg18034329
|
RABEP1;RABEP1
|
Promoter_Associated
|
-0.2081747
|
-3.1771846
|
-3.609420
|
0.0005082
|
0.9999898
|
-1.2393918
|
cg24181389
|
cg24181389
|
RPTOR;RPTOR
|
Gene_Associated_Cell_type_specific
|
-0.2960408
|
3.1936879
|
-3.605892
|
0.0005142
|
0.9999898
|
-1.2462881
|
cg15840079
|
cg15840079
|
TMEM132A;TMEM132A
|
Unclassified
|
-0.2210544
|
-2.1865179
|
-3.602825
|
0.0005195
|
0.9999898
|
-1.2522797
|
cg14686845
|
cg14686845
|
AGPAT5
|
Promoter_Associated
|
-0.1491522
|
-3.1939740
|
-3.599622
|
0.0005251
|
0.9999898
|
-1.2585348
|
cg16258224
|
cg16258224
|
|
|
-0.3368918
|
2.2019338
|
-3.598709
|
0.0005267
|
0.9999898
|
-1.2603163
|
cg24571822
|
cg24571822
|
CUBN
|
|
-0.2529204
|
-0.0429503
|
-3.598077
|
0.0005279
|
0.9999898
|
-1.2615487
|
cg01448562
|
cg01448562
|
|
|
-0.7166198
|
3.3952247
|
-3.592024
|
0.0005387
|
0.9999898
|
-1.2733538
|
cg21696208
|
cg21696208
|
MTFR1;MTFR1
|
Promoter_Associated
|
-0.3830576
|
-2.9811457
|
-3.590917
|
0.0005407
|
0.9999898
|
-1.2755117
|
cg16066221
|
cg16066221
|
CCDC55
|
|
-0.1847661
|
3.3401955
|
-3.589510
|
0.0005432
|
0.9999898
|
-1.2782531
|
dm <- dm[,c("UCSC_RefGene_Name","t")]
hist(dm$t,breaks=seq(from=-10,to=10,by=1))
tic() ; gmea <- calc_sc(dm) ; time2 <- toc()
## 41.776 sec elapsed
df <- gmea[[1]]
res <- gmea[[2]]
write.table(res,file="gmeawg_dm1.tsv")
head(res,50) %>% kbl(caption = "Top significant genes with GMEA") %>% kable_paper("hover", full_width = F)
Top significant genes with GMEA
|
nprobes
|
mean
|
median
|
p-value(sc)
|
sig
|
fdr(sc)
|
PCDHGA3
|
295
|
-0.7017497
|
-0.7444529
|
0e+00
|
25.235081
|
0.0000000
|
PCDHGA1
|
317
|
-0.6696821
|
-0.7108065
|
0e+00
|
25.184842
|
0.0000000
|
PCDHGA2
|
309
|
-0.6784529
|
-0.7113643
|
0e+00
|
25.051893
|
0.0000000
|
PCDHGB1
|
277
|
-0.7306268
|
-0.7648181
|
0e+00
|
24.888921
|
0.0000000
|
PCDHGA4
|
263
|
-0.7399222
|
-0.7805508
|
0e+00
|
23.847952
|
0.0000000
|
PCDHGB2
|
249
|
-0.7580196
|
-0.7805508
|
0e+00
|
23.310643
|
0.0000000
|
PCDHGA5
|
238
|
-0.7697027
|
-0.8120064
|
0e+00
|
22.465199
|
0.0000000
|
PCDHGB3
|
223
|
-0.7728903
|
-0.8105310
|
0e+00
|
21.153268
|
0.0000000
|
PCDHGA6
|
214
|
-0.7711420
|
-0.7850676
|
0e+00
|
20.069420
|
0.0000000
|
PCDHGA7
|
199
|
-0.7801077
|
-0.8105310
|
0e+00
|
18.267132
|
0.0000000
|
PCDHGB4
|
187
|
-0.7776558
|
-0.8105310
|
0e+00
|
16.634832
|
0.0000000
|
BRD2
|
166
|
-0.6702827
|
-0.6824132
|
0e+00
|
15.458711
|
0.0000000
|
PCDHGA8
|
171
|
-0.7915790
|
-0.8407872
|
0e+00
|
15.303684
|
0.0000000
|
PCDHGA9
|
144
|
-0.8577123
|
-0.8981900
|
0e+00
|
14.514405
|
0.0000000
|
TAPBP
|
148
|
-0.6818321
|
-0.6569560
|
0e+00
|
14.240174
|
0.0000000
|
PCDHGB6
|
139
|
-0.8533928
|
-0.8896805
|
0e+00
|
13.682237
|
0.0000000
|
PCDHGB5
|
156
|
-0.7970750
|
-0.8525708
|
0e+00
|
13.571377
|
0.0000000
|
PCDHGA10
|
129
|
-0.8126353
|
-0.8643544
|
0e+00
|
12.130833
|
0.0000000
|
NPAT
|
46
|
-0.9628576
|
-1.0425066
|
0e+00
|
10.976976
|
0.0000002
|
PCDHGB7
|
115
|
-0.7895743
|
-0.7805508
|
0e+00
|
10.300729
|
0.0000010
|
RNF39
|
86
|
-0.7788504
|
-0.7559190
|
0e+00
|
9.866275
|
0.0000027
|
KIAA1949
|
101
|
-0.6762457
|
-0.6361989
|
0e+00
|
9.719954
|
0.0000038
|
SGCE
|
88
|
-0.7888273
|
-0.7654908
|
0e+00
|
9.612636
|
0.0000049
|
RGL2
|
117
|
-0.5363788
|
-0.4756625
|
0e+00
|
8.624712
|
0.0000476
|
PCDHGA11
|
103
|
-0.7490053
|
-0.7596284
|
0e+00
|
8.544984
|
0.0000572
|
TAP1
|
100
|
-0.6159567
|
-0.5669570
|
0e+00
|
8.417318
|
0.0000768
|
PEG10
|
105
|
-0.6341071
|
-0.6657271
|
0e+00
|
8.384136
|
0.0000829
|
BAT4
|
73
|
-0.7966377
|
-0.8913803
|
0e+00
|
8.349961
|
0.0000896
|
PPT2
|
101
|
-0.5546419
|
-0.6200746
|
0e+00
|
8.327285
|
0.0000944
|
NBR2
|
49
|
-0.9728316
|
-1.0024802
|
0e+00
|
8.211310
|
0.0001233
|
TUBB
|
85
|
-0.6571760
|
-0.6208794
|
0e+00
|
8.204873
|
0.0001252
|
BRCA1
|
49
|
-0.9331738
|
-1.0000759
|
0e+00
|
7.976026
|
0.0002120
|
PSMB8
|
83
|
-0.6346065
|
-0.6118727
|
0e+00
|
7.726497
|
0.0003766
|
PFDN6
|
57
|
-0.8240819
|
-0.9209890
|
0e+00
|
7.357167
|
0.0008814
|
PSMB9
|
79
|
-0.6794080
|
-0.7169306
|
0e+00
|
7.301557
|
0.0010018
|
PRR3
|
74
|
-0.6640534
|
-0.6289730
|
1e-07
|
7.289274
|
0.0010305
|
GNMT
|
35
|
-0.7815110
|
-0.6911977
|
1e-07
|
7.063879
|
0.0017314
|
KILLIN
|
61
|
-0.7716617
|
-0.8054342
|
1e-07
|
7.058546
|
0.0017527
|
ZBTB22
|
77
|
-0.5499240
|
-0.4921742
|
1e-07
|
6.938364
|
0.0023114
|
HSPA1A
|
35
|
-0.9175739
|
-1.0721469
|
1e-07
|
6.859356
|
0.0027725
|
ATM
|
52
|
-0.7676261
|
-0.8333011
|
2e-07
|
6.647544
|
0.0045150
|
PBLD
|
23
|
-0.8300796
|
-0.5721446
|
2e-07
|
6.622660
|
0.0047810
|
LOC220930
|
25
|
-0.7068837
|
-0.6044628
|
3e-07
|
6.525750
|
0.0059760
|
TNXB
|
531
|
-0.1966839
|
-0.2114342
|
5e-07
|
6.344778
|
0.0090648
|
ANAPC13
|
29
|
-0.8779355
|
-0.9637643
|
5e-07
|
6.292119
|
0.0102328
|
VARS
|
102
|
-0.4198120
|
-0.3325725
|
5e-07
|
6.280618
|
0.0105069
|
BAT2
|
161
|
-0.3583810
|
-0.4181412
|
6e-07
|
6.259314
|
0.0110346
|
PTEN
|
59
|
-0.7268898
|
-0.8005707
|
6e-07
|
6.208860
|
0.0123934
|
PPP1R2P1
|
27
|
0.8317927
|
0.8893044
|
6e-07
|
6.193311
|
0.0128445
|
TUBG1
|
23
|
-0.7046680
|
-0.6052688
|
7e-07
|
6.145539
|
0.0143373
|
gmea_volc(res)
gmea_barplot(res)
dmscore <- data.frame( res$median * res$sig)
rownames(dmscore) <- rownames(res)
colnames(dmscore) <- "metric"
mres <- mitch_calc(x=dmscore, genesets=genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres$enrichment_result,20) %>% kbl(caption = "Top enriched gene sets with GMEA-Mitch") %>% kable_paper("hover", full_width = F)
Top enriched gene sets with GMEA-Mitch
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
1162
|
SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs
|
11
|
0.0006817
|
-0.5913323
|
0.0023875
|
1163
|
SLBP independent Processing of Histone Pre-mRNAs
|
10
|
0.0012759
|
-0.5881498
|
0.0040958
|
1123
|
Resolution of D-Loop Structures
|
26
|
0.0000007
|
-0.5605327
|
0.0000073
|
898
|
Pexophagy
|
11
|
0.0013388
|
-0.5583872
|
0.0042321
|
1124
|
Resolution of D-loop Structures through Holliday Junction Intermediates
|
25
|
0.0000013
|
-0.5581285
|
0.0000117
|
56
|
Activation of the phototransduction cascade
|
11
|
0.0014072
|
0.5558885
|
0.0044192
|
395
|
Expression and translocation of olfactory receptors
|
331
|
0.0000000
|
0.5434341
|
0.0000000
|
238
|
Creation of C4 and C2 activators
|
14
|
0.0004401
|
0.5423983
|
0.0016158
|
284
|
Defective HDR through Homologous Recombination (HRR) due to BRCA1 loss-of-function
|
20
|
0.0000272
|
-0.5417954
|
0.0001439
|
285
|
Defective HDR through Homologous Recombination (HRR) due to PALB2 loss of function
|
20
|
0.0000272
|
-0.5417954
|
0.0001439
|
286
|
Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA1 binding function
|
20
|
0.0000272
|
-0.5417954
|
0.0001439
|
287
|
Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA2/RAD51/RAD51C binding function
|
20
|
0.0000272
|
-0.5417954
|
0.0001439
|
580
|
Impaired BRCA2 binding to PALB2
|
20
|
0.0000272
|
-0.5417954
|
0.0001439
|
224
|
Condensation of Prometaphase Chromosomes
|
11
|
0.0019424
|
-0.5394793
|
0.0057892
|
841
|
Olfactory Signaling Pathway
|
338
|
0.0000000
|
0.5383073
|
0.0000000
|
1148
|
SARS-CoV-2 modulates autophagy
|
11
|
0.0023314
|
-0.5299960
|
0.0068017
|
1125
|
Resolution of D-loop Structures through Synthesis-Dependent Strand Annealing (SDSA)
|
22
|
0.0000179
|
-0.5280943
|
0.0001020
|
432
|
Fatty acids
|
15
|
0.0004555
|
0.5226522
|
0.0016605
|
1026
|
RNA Polymerase II Transcription Termination
|
50
|
0.0000000
|
-0.5142666
|
0.0000000
|
581
|
Impaired BRCA2 binding to RAD51
|
31
|
0.0000008
|
-0.5119085
|
0.0000078
|
#mitch_report(mres,outfile="gmeawg_mitch_dm1.html",overwrite=TRUE)
DM3 effect of homocysteine
Whole gene analysis
dm <- read.table("dma3a.tsv")
head(dm,50) %>% kbl(caption = "Top significant genes with limma") %>% kable_paper("hover", full_width = F)
Top significant genes with limma
|
Row.names
|
UCSC_RefGene_Name
|
Regulatory_Feature_Group
|
logFC
|
AveExpr
|
t
|
P.Value
|
adj.P.Val
|
B
|
cg04905210
|
cg04905210
|
POLR3D
|
|
-0.2917142
|
-2.8247113
|
-5.390393
|
2.00e-07
|
0.0957990
|
5.941825
|
cg09338148
|
cg09338148
|
|
Unclassified
|
-0.2931592
|
-1.1714089
|
-5.121719
|
8.00e-07
|
0.1687298
|
4.907067
|
cg04247967
|
cg04247967
|
|
|
-0.2054821
|
3.1801350
|
-4.965418
|
1.60e-06
|
0.2293803
|
4.322061
|
cg06458106
|
cg06458106
|
|
|
-0.1905124
|
1.6705818
|
-4.806972
|
3.30e-06
|
0.2433469
|
3.742284
|
cg26425904
|
cg26425904
|
OCA2
|
Unclassified
|
-0.2544221
|
-4.0153857
|
-4.740360
|
4.40e-06
|
0.2433469
|
3.502627
|
cg09762242
|
cg09762242
|
SIPA1;SIPA1
|
Promoter_Associated
|
-0.2101898
|
-2.5136127
|
-4.733232
|
4.60e-06
|
0.2433469
|
3.477128
|
cg19590707
|
cg19590707
|
SLC16A3;SLC16A3;SLC16A3
|
Promoter_Associated
|
-0.2482548
|
-1.2509036
|
-4.722777
|
4.80e-06
|
0.2433469
|
3.439780
|
cg10746622
|
cg10746622
|
PRSSL1
|
|
-0.1991931
|
1.5625330
|
-4.693210
|
5.40e-06
|
0.2433469
|
3.334483
|
cg11257888
|
cg11257888
|
RCAN3
|
|
0.2795356
|
0.4871632
|
4.691739
|
5.50e-06
|
0.2433469
|
3.329258
|
cg03622371
|
cg03622371
|
P2RY6;P2RY6;P2RY6;P2RY6
|
Unclassified
|
-0.2424592
|
-1.6765190
|
-4.679696
|
5.80e-06
|
0.2433469
|
3.286518
|
cg16971827
|
cg16971827
|
CBL
|
Gene_Associated_Cell_type_specific
|
0.3035900
|
1.7707651
|
4.648039
|
6.60e-06
|
0.2467538
|
3.174563
|
cg20997993
|
cg20997993
|
|
Unclassified
|
-0.2429900
|
1.9524246
|
-4.621663
|
7.40e-06
|
0.2467538
|
3.081715
|
cg08469255
|
cg08469255
|
DDR1;DDR1
|
Unclassified
|
0.2398657
|
1.4933506
|
4.615574
|
7.60e-06
|
0.2467538
|
3.060337
|
cg27557428
|
cg27557428
|
|
Unclassified
|
-0.2803875
|
-2.1681390
|
-4.585121
|
8.70e-06
|
0.2610062
|
2.953738
|
cg00493755
|
cg00493755
|
LYRM5;CASC1;CASC1;LYRM5;CASC1
|
Promoter_Associated
|
-0.2475869
|
-4.0864529
|
-4.539873
|
1.05e-05
|
0.2649257
|
2.796327
|
cg16879574
|
cg16879574
|
NHLRC1
|
Promoter_Associated
|
-0.2473504
|
-1.5752205
|
-4.527103
|
1.11e-05
|
0.2649257
|
2.752116
|
cg15742245
|
cg15742245
|
CD177
|
|
-0.2543110
|
1.9039638
|
-4.523418
|
1.12e-05
|
0.2649257
|
2.739376
|
cg23248150
|
cg23248150
|
TLL1
|
|
-0.2464022
|
-1.5240685
|
-4.522435
|
1.13e-05
|
0.2649257
|
2.735979
|
cg22948808
|
cg22948808
|
DGKZ
|
|
-0.2909225
|
-0.3489572
|
-4.503895
|
1.22e-05
|
0.2656216
|
2.672009
|
cg02845204
|
cg02845204
|
KRTAP5-9
|
|
-0.3070580
|
-0.4149132
|
-4.496832
|
1.26e-05
|
0.2656216
|
2.647692
|
cg03946955
|
cg03946955
|
LMNA;LMNA;LMNA
|
Unclassified_Cell_type_specific
|
-0.1824392
|
-2.3433644
|
-4.480178
|
1.35e-05
|
0.2690173
|
2.590467
|
cg15516314
|
cg15516314
|
RTL1;MIR431;MIR433
|
|
0.2392133
|
2.9600363
|
4.465319
|
1.44e-05
|
0.2690173
|
2.539550
|
cg22537604
|
cg22537604
|
CD177
|
|
-0.3193055
|
1.0749273
|
-4.460505
|
1.46e-05
|
0.2690173
|
2.523080
|
cg01345727
|
cg01345727
|
NEK3;NEK3;NEK3;NEK3
|
Promoter_Associated_Cell_type_specific
|
-0.2329252
|
1.6256861
|
-4.446244
|
1.55e-05
|
0.2736441
|
2.474372
|
cg19284277
|
cg19284277
|
SLC16A3;SLC16A3;SLC16A3
|
|
-0.2463026
|
-1.8395228
|
-4.434600
|
1.63e-05
|
0.2749055
|
2.434691
|
cg03032053
|
cg03032053
|
MICALL2
|
Unclassified_Cell_type_specific
|
-0.1577248
|
0.3915575
|
-4.417976
|
1.75e-05
|
0.2749055
|
2.378177
|
cg23659250
|
cg23659250
|
BRD1;LOC90834
|
|
-0.2598981
|
0.6604919
|
-4.413930
|
1.78e-05
|
0.2749055
|
2.364445
|
cg14204064
|
cg14204064
|
ZNF8
|
Promoter_Associated
|
-0.2727535
|
-3.0330794
|
-4.400132
|
1.88e-05
|
0.2749055
|
2.317694
|
cg00772000
|
cg00772000
|
NHLRC1
|
Promoter_Associated
|
-0.2612511
|
-1.8626859
|
-4.395918
|
1.92e-05
|
0.2749055
|
2.303438
|
cg18817487
|
cg18817487
|
HAL
|
|
-0.3791862
|
-1.7673383
|
-4.381199
|
2.04e-05
|
0.2749055
|
2.253729
|
cg16223079
|
cg16223079
|
PPCDC
|
Unclassified_Cell_type_specific
|
-0.2580476
|
-1.8276656
|
-4.380895
|
2.04e-05
|
0.2749055
|
2.252702
|
cg05305327
|
cg05305327
|
CBFA2T2;CBFA2T2;CBFA2T2
|
|
0.2044785
|
2.4023989
|
4.375843
|
2.08e-05
|
0.2749055
|
2.235670
|
cg26954174
|
cg26954174
|
NOD2
|
Unclassified_Cell_type_specific
|
-0.3098398
|
-1.8856005
|
-4.351680
|
2.30e-05
|
0.2944766
|
2.154421
|
cg16659510
|
cg16659510
|
BANP;BANP
|
Promoter_Associated_Cell_type_specific
|
-0.2237425
|
2.5640167
|
-4.336119
|
2.45e-05
|
0.2994994
|
2.102280
|
cg08521073
|
cg08521073
|
HRK
|
|
-0.2342466
|
-2.1812465
|
-4.332839
|
2.49e-05
|
0.2994994
|
2.091309
|
cg05788125
|
cg05788125
|
|
|
-0.2164090
|
0.5929679
|
-4.318979
|
2.63e-05
|
0.2994994
|
2.045016
|
cg15657641
|
cg15657641
|
|
|
-0.3045296
|
-1.6478040
|
-4.308557
|
2.75e-05
|
0.2994994
|
2.010284
|
cg00138407
|
cg00138407
|
KLHL18
|
|
-0.2310495
|
1.3743589
|
-4.299105
|
2.85e-05
|
0.2994994
|
1.978839
|
cg08533162
|
cg08533162
|
ERI3
|
|
-0.2838418
|
-0.7553821
|
-4.282412
|
3.05e-05
|
0.2994994
|
1.923439
|
cg00822241
|
cg00822241
|
FUT2;FUT2
|
|
-0.2413571
|
-1.0170752
|
-4.260359
|
3.34e-05
|
0.2994994
|
1.850510
|
cg24568579
|
cg24568579
|
|
Unclassified
|
-0.2454344
|
-1.2262927
|
-4.255112
|
3.41e-05
|
0.2994994
|
1.833200
|
cg27392804
|
cg27392804
|
|
|
-0.2126822
|
-2.0892490
|
-4.248151
|
3.51e-05
|
0.2994994
|
1.810262
|
cg01063280
|
cg01063280
|
|
|
-0.2659171
|
1.0708141
|
-4.246661
|
3.53e-05
|
0.2994994
|
1.805356
|
cg00504782
|
cg00504782
|
|
|
-0.1861498
|
0.1335070
|
-4.233364
|
3.72e-05
|
0.2994994
|
1.761634
|
cg19203575
|
cg19203575
|
ZNF323;ZKSCAN3;ZNF323
|
|
-0.1781090
|
-0.8128462
|
-4.220555
|
3.92e-05
|
0.2994994
|
1.719618
|
cg06625077
|
cg06625077
|
CTRL
|
Gene_Associated_Cell_type_specific
|
0.1262006
|
2.8187132
|
4.209985
|
4.09e-05
|
0.2994994
|
1.685023
|
cg11866943
|
cg11866943
|
STAB1
|
Unclassified_Cell_type_specific
|
-0.2082625
|
-1.2756607
|
-4.207151
|
4.14e-05
|
0.2994994
|
1.675757
|
cg26923863
|
cg26923863
|
CTBP1;CTBP1
|
|
-0.3035588
|
-0.9720327
|
-4.195757
|
4.33e-05
|
0.2994994
|
1.638563
|
cg18963859
|
cg18963859
|
|
|
-0.2795682
|
0.7305054
|
-4.189956
|
4.43e-05
|
0.2994994
|
1.619656
|
cg19409163
|
cg19409163
|
|
|
-0.1835966
|
-0.0363520
|
-4.179232
|
4.63e-05
|
0.2994994
|
1.584758
|
dm <- dm[,c("UCSC_RefGene_Name","t")]
hist(dm$t,breaks=seq(from=-10,to=10,by=1))
tic() ; gmea <- calc_sc(dm) ; time2 <- toc()
## 42.999 sec elapsed
df <- gmea[[1]]
res <- gmea[[2]]
write.table(res,file="gmeawg_dm3.tsv")
head(res,50) %>% kbl(caption = "Top significant genes with GMEA") %>% kable_paper("hover", full_width = F)
Top significant genes with GMEA
|
nprobes
|
mean
|
median
|
p-value(sc)
|
sig
|
fdr(sc)
|
TNXB
|
531
|
0.3429080
|
0.4078607
|
0e+00
|
14.041475
|
0.0000000
|
PCDHA1
|
162
|
-0.7077910
|
-0.6245086
|
0e+00
|
13.496797
|
0.0000000
|
NNAT
|
49
|
-0.9419415
|
-0.9392397
|
0e+00
|
13.303312
|
0.0000000
|
PCDHA2
|
149
|
-0.7188634
|
-0.6267169
|
0e+00
|
12.485679
|
0.0000000
|
PCDHA3
|
141
|
-0.7189282
|
-0.6267169
|
0e+00
|
11.583810
|
0.0000001
|
KCNQ1DN
|
39
|
-1.6144397
|
-1.7080108
|
0e+00
|
11.439140
|
0.0000001
|
TAP1
|
100
|
0.8373978
|
0.8170222
|
0e+00
|
10.577873
|
0.0000005
|
NCOR2
|
212
|
-0.6647509
|
-0.5447866
|
0e+00
|
10.345470
|
0.0000009
|
PCDHGA1
|
317
|
-0.4141393
|
-0.4153141
|
0e+00
|
10.238108
|
0.0000012
|
PCDHA4
|
131
|
-0.6779619
|
-0.6026401
|
0e+00
|
10.042257
|
0.0000018
|
PCDHGA2
|
309
|
-0.4165786
|
-0.4153141
|
0e+00
|
9.987553
|
0.0000021
|
PCDHGA3
|
295
|
-0.4180524
|
-0.3901466
|
0e+00
|
9.576696
|
0.0000053
|
MESTIT1
|
59
|
-0.7596278
|
-0.7939385
|
0e+00
|
9.544056
|
0.0000057
|
PCDHA5
|
122
|
-0.6998042
|
-0.6245086
|
0e+00
|
9.542358
|
0.0000058
|
NKX6-2
|
36
|
-1.3952706
|
-1.4319593
|
0e+00
|
9.536050
|
0.0000058
|
SOX2OT
|
85
|
-0.7468762
|
-0.8104998
|
0e+00
|
9.479982
|
0.0000066
|
PCDHA7
|
103
|
-0.7321981
|
-0.6267169
|
0e+00
|
9.375369
|
0.0000085
|
C11orf21
|
36
|
-1.7668062
|
-1.9463963
|
0e+00
|
9.257296
|
0.0000111
|
PITX2
|
64
|
-1.0007742
|
-0.9208371
|
0e+00
|
9.163509
|
0.0000138
|
PCDHA6
|
114
|
-0.6916765
|
-0.6245086
|
0e+00
|
8.749552
|
0.0000357
|
TSPAN32
|
42
|
-1.6254336
|
-1.9412022
|
0e+00
|
8.645175
|
0.0000454
|
MEST
|
85
|
-0.6582939
|
-0.7858506
|
0e+00
|
8.607705
|
0.0000495
|
ASCL2
|
49
|
-0.8873371
|
-0.9068647
|
0e+00
|
8.538922
|
0.0000580
|
WT1
|
60
|
-1.0380025
|
-1.0581139
|
0e+00
|
8.537528
|
0.0000582
|
PCDHGB1
|
277
|
-0.4040374
|
-0.3512600
|
0e+00
|
8.169027
|
0.0001360
|
PCDHA8
|
92
|
-0.7250898
|
-0.6311017
|
0e+00
|
8.158124
|
0.0001394
|
KIAA1949
|
101
|
0.7427567
|
0.6964985
|
0e+00
|
8.107412
|
0.0001567
|
PCDHA10
|
81
|
-0.7546498
|
-0.6842488
|
0e+00
|
7.961957
|
0.0002190
|
PCDHA9
|
84
|
-0.7351768
|
-0.6598677
|
0e+00
|
7.948914
|
0.0002257
|
PSMB8
|
83
|
0.8313260
|
0.6488706
|
0e+00
|
7.945020
|
0.0002277
|
PRDM13
|
41
|
-0.9251076
|
-0.9595314
|
0e+00
|
7.870850
|
0.0002701
|
GNA12
|
84
|
-0.9539754
|
-0.7963257
|
0e+00
|
7.622375
|
0.0004787
|
PCDHGA4
|
263
|
-0.4030756
|
-0.3512600
|
0e+00
|
7.620039
|
0.0004812
|
MAD1L1
|
684
|
-0.2827612
|
-0.2513758
|
0e+00
|
7.532317
|
0.0005889
|
SFRP2
|
44
|
-1.1033457
|
-1.2056608
|
0e+00
|
7.518236
|
0.0006083
|
SOX1
|
27
|
-1.2956875
|
-1.5392701
|
0e+00
|
7.349659
|
0.0008967
|
ZIC1
|
40
|
-1.0223609
|
-1.1114647
|
1e-07
|
7.295516
|
0.0010157
|
SPON2
|
30
|
-1.0371460
|
-1.0318217
|
1e-07
|
7.096401
|
0.0016064
|
TLX1
|
36
|
-0.7858743
|
-0.8711780
|
1e-07
|
7.094355
|
0.0016139
|
PPP1R2P1
|
27
|
-1.2635175
|
-1.2835267
|
1e-07
|
6.826780
|
0.0029884
|
TBX15
|
57
|
-0.8784010
|
-0.9985338
|
2e-07
|
6.710128
|
0.0039091
|
HLA-E
|
64
|
0.9883267
|
0.9471868
|
2e-07
|
6.621543
|
0.0047933
|
SOX2
|
31
|
-0.9356689
|
-0.9861379
|
4e-07
|
6.380592
|
0.0083477
|
HLA-J
|
60
|
-0.9474161
|
-0.9835159
|
4e-07
|
6.363271
|
0.0086869
|
TFAP2A
|
77
|
-0.6823000
|
-0.6389762
|
4e-07
|
6.359493
|
0.0087624
|
PCDHGB2
|
249
|
-0.3779342
|
-0.2994303
|
5e-07
|
6.334383
|
0.0092835
|
TBX2
|
42
|
-0.9192182
|
-0.9768360
|
5e-07
|
6.283615
|
0.0104341
|
NR2E1
|
55
|
-0.7936621
|
-0.8098274
|
6e-07
|
6.236670
|
0.0116246
|
DMRTA2
|
32
|
-0.8190626
|
-0.8833632
|
6e-07
|
6.231215
|
0.0117710
|
LOC100128811
|
24
|
-0.9214760
|
-1.0762458
|
6e-07
|
6.224720
|
0.0119478
|
gmea_volc(res)
gmea_barplot(res)
dmscore <- data.frame( res$median * res$sig)
rownames(dmscore) <- rownames(res)
colnames(dmscore) <- "metric"
mres <- mitch_calc(x=dmscore, genesets=genesets, priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres$enrichment_result,20) %>% kbl(caption = "Top enriched gene sets with GMEA-Mitch") %>% kable_paper("hover", full_width = F)
Top enriched gene sets with GMEA-Mitch
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
797
|
Negative regulation of activity of TFAP2 (AP-2) family transcription factors
|
10
|
0.0001209
|
-0.7018024
|
0.0009285
|
1099
|
Regulation of commissural axon pathfinding by SLIT and ROBO
|
10
|
0.0003908
|
-0.6474208
|
0.0025714
|
87
|
Apoptotic cleavage of cell adhesion proteins
|
11
|
0.0002096
|
-0.6453083
|
0.0014958
|
1418
|
Transcriptional regulation of testis differentiation
|
12
|
0.0002348
|
-0.6130531
|
0.0016520
|
1386
|
Thyroxine biosynthesis
|
10
|
0.0034244
|
-0.5343657
|
0.0159178
|
879
|
POU5F1 (OCT4), SOX2, NANOG repress genes related to differentiation
|
10
|
0.0055806
|
-0.5060048
|
0.0228640
|
367
|
ERBB2 Activates PTK6 Signaling
|
13
|
0.0018315
|
-0.4990366
|
0.0092526
|
796
|
Negative regulation of TCF-dependent signaling by WNT ligand antagonists
|
15
|
0.0009549
|
-0.4925046
|
0.0053957
|
64
|
Adenylate cyclase activating pathway
|
10
|
0.0081256
|
-0.4832503
|
0.0312683
|
29
|
Acetylcholine Neurotransmitter Release Cycle
|
16
|
0.0009281
|
-0.4780294
|
0.0052637
|
479
|
GABA synthesis, release, reuptake and degradation
|
19
|
0.0003437
|
-0.4742558
|
0.0022913
|
923
|
Platelet sensitization by LDL
|
17
|
0.0008191
|
0.4686399
|
0.0047701
|
275
|
Defective B3GALTL causes PpS
|
34
|
0.0000029
|
-0.4632690
|
0.0000521
|
1360
|
TP53 regulates transcription of additional cell cycle genes whose exact role in the p53 pathway remain uncertain
|
19
|
0.0004924
|
0.4616661
|
0.0031316
|
1167
|
SRP-dependent cotranslational protein targeting to membrane
|
105
|
0.0000000
|
0.4519886
|
0.0000000
|
1162
|
SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs
|
11
|
0.0094503
|
0.4518113
|
0.0354189
|
1340
|
TFAP2 (AP-2) family regulates transcription of growth factors and their receptors
|
12
|
0.0074999
|
-0.4456644
|
0.0295334
|
837
|
O-glycosylation of TSR domain-containing proteins
|
35
|
0.0000051
|
-0.4453420
|
0.0000803
|
168
|
Carnitine metabolism
|
11
|
0.0106185
|
-0.4447950
|
0.0388601
|
385
|
Erythrocytes take up carbon dioxide and release oxygen
|
12
|
0.0079199
|
-0.4426103
|
0.0306439
|
#mitch_report(mres,outfile="gmeawg_mitch_dm3.html",overwrite=TRUE)