Source: https://github.com/markziemann/TODO
Introduction
Here we are establishing the methylation profile of ovarian cancer versus healthy tissue in order to understand whether this signature can be observed in the blood of individuals who will soon be diagnosed with the disease.
suppressPackageStartupMessages({
library("mitch")
library("HGNChelper")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library("kableExtra")
library("vioplot")
library("beeswarm")
library("RhpcBLASctl")
library("eulerr")
library("gplots")
})
RhpcBLASctl::blas_set_num_threads(1)
Load cancer data
dm <- readRDS("GSE199057_limma.rds")
head(dm)
## logFC AveExpr t P.Value adj.P.Val B
## cg18430990 2.258776 -1.647987 28.17418 3.225054e-63 2.504322e-57 132.9933
## cg16306898 3.444725 -2.803218 26.80747 2.061958e-60 8.005768e-55 126.6744
## cg16601494 3.973390 -2.205109 26.51770 8.338578e-60 2.158360e-54 125.3060
## cg17301223 2.883374 -1.007534 26.12827 5.536985e-59 1.074896e-53 123.4511
## cg03241244 3.747524 -1.779332 25.27802 3.676459e-57 5.709695e-52 119.3366
## cg16300300 2.629925 -1.230023 24.80718 3.896248e-56 5.042531e-51 117.0195
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","UCSC_RefGene_Group","Islands_Name","Relation_to_Island")])
head(myann)
## UCSC_RefGene_Name UCSC_RefGene_Group Islands_Name
## cg18478105 YTHDF1 TSS200 chr20:61846843-61848103
## cg09835024 EIF2S3 TSS1500 chrX:24072558-24073135
## cg14361672 PKN3 TSS1500 chr9:131464843-131465830
## cg01763666 CCDC57 Body
## cg12950382 INF2;INF2 Body;Body
## cg02115394 CDC16;CDC16 TSS200;TSS200 chr13:115000148-115000874
## Relation_to_Island
## cg18478105 Island
## cg09835024 Island
## cg14361672 N_Shore
## cg01763666 OpenSea
## cg12950382 OpenSea
## cg02115394 Island
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
gp2 <- lapply(gp2,unique)
gt1 <- stack(gp2)
colnames(gt1) <- c("gene","probe")
gt1$probe <- as.character(gt1$probe)
dim(gt1)
## [1] 684970 2
head(gt1)
## gene probe
## 1 YTHDF1 cg18478105
## 2 EIF2S3 cg09835024
## 3 PKN3 cg14361672
## 4 CCDC57 cg01763666
## 5 INF2 cg12950382
## 6 CDC16 cg02115394
Load gene sets
gs1 <- mitch::gmt_import("../../ReactomePathways_2024-12-13.gmt")
gs2 <- mitch::gmt_import("../../c5.go.v2024.1.Hs.symbols.gmt")
names(gs2) <- gsub("_"," ",names(gs2))
gs <- c(gs1,gs2)
gs3 <- mitch::gmt_import("../../go_2024-11.gmt")
gs3 <- gs3[which(lapply(gs3,length)>5)]
length(gs3)
## [1] 6929
Update deprecated gene symbols
new.hgnc.table <- getCurrentHumanMap()
## Using the already downloaded hgnc_complete_set.txt file.
fix <- checkGeneSymbols(gt1$gene,map=new.hgnc.table)
## Warning in checkGeneSymbols(gt1$gene, map = new.hgnc.table): Human gene symbols
## should be all upper-case except for the 'orf' in open reading frames. The case
## of some letters was corrected.
## Warning in checkGeneSymbols(gt1$gene, map = new.hgnc.table): x contains
## non-approved gene symbols
fix2 <- fix[which(fix$x != fix$Suggested.Symbol),]
length(unique(fix2$x))
## [1] 3300
gt1$gene <- fix$Suggested.Symbol
head(gt1)
## gene probe
## 1 YTHDF1 cg18478105
## 2 EIF2S3 cg09835024
## 3 PKN3 cg14361672
## 4 CCDC57 cg01763666
## 5 INF2 cg12950382
## 6 CDC16 cg02115394
Run mitch (cancer v normal)
y <- mitch_import(dm,DEtype="limma",geneTable=gt1)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 776521
## Note: no. genes in output = 22227
## Warning in mitch_import(dm, DEtype = "limma", geneTable = gt1): Warning: less than half of the input genes are also in the
## output
head(y)
## x
## A1BG 1.256876
## A1BG-AS1 -1.418138
## A1CF 2.954785
## A2M -4.988589
## A2M-AS1 -2.232500
## A2ML1 -3.565671
y <- y[order(y$x),,drop=FALSE]
head(y)
## x
## APOOP5 -15.82582
## OR5J2 -15.37975
## FAM133CP -15.35387
## CDRT15P3 -15.14757
## MIR622 -14.64271
## OR56B4 -14.55324
tail(y)
## x
## MIR552 12.41409
## HOXA2 12.67896
## SNX29P1 13.01832
## MIR4487 13.75684
## ZNF793-AS1 13.84461
## MIR4730 16.07294
res <- mitch_calc(y,gs,priority="effect",cores=12,minsetsize=5)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mres <- res$enrichment_result
head(mres,30) %>% kbl() %>% kable_paper("hover", full_width = F)
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
11171
|
GOMF HEMOGLOBIN ALPHA BINDING
|
5
|
0.0006314
|
-0.8824948
|
0.0096044
|
4475
|
GOBP MESENCHYME MIGRATION
|
5
|
0.0011311
|
0.8406624
|
0.0152103
|
3609
|
GOBP FOREBRAIN NEURON FATE COMMITMENT
|
7
|
0.0001198
|
0.8394239
|
0.0025607
|
2260
|
GOBP ANTIGEN PROCESSING AND PRESENTATION ENDOGENOUS LIPID ANTIGEN VIA MHC CLASS IB
|
5
|
0.0017977
|
-0.8060841
|
0.0218120
|
11356
|
GOMF LIPID ANTIGEN BINDING
|
5
|
0.0017977
|
-0.8060841
|
0.0218120
|
11617
|
GOMF OLFACTORY RECEPTOR ACTIVITY
|
353
|
0.0000000
|
-0.7823111
|
0.0000000
|
12216
|
GOMF TYPE I INTERFERON RECEPTOR BINDING
|
14
|
0.0000004
|
-0.7818909
|
0.0000199
|
11615
|
GOMF ODORANT BINDING
|
101
|
0.0000000
|
-0.7812483
|
0.0000000
|
540
|
Expression and translocation of olfactory receptors
|
354
|
0.0000000
|
-0.7719287
|
0.0000000
|
5612
|
GOBP NORADRENERGIC NEURON DIFFERENTIATION
|
10
|
0.0000256
|
0.7685646
|
0.0007626
|
10817
|
GOMF CCR6 CHEMOKINE RECEPTOR BINDING
|
7
|
0.0004560
|
-0.7649865
|
0.0074237
|
2555
|
GOBP CARDIAC SEPTUM CELL DIFFERENTIATION
|
5
|
0.0031015
|
0.7636756
|
0.0328625
|
5511
|
GOBP NEURAL PLATE REGIONALIZATION
|
6
|
0.0012181
|
0.7624619
|
0.0160982
|
1147
|
Olfactory Signaling Pathway
|
361
|
0.0000000
|
-0.7617404
|
0.0000000
|
11258
|
GOMF INHIBITORY MHC CLASS I RECEPTOR ACTIVITY
|
9
|
0.0000888
|
-0.7543133
|
0.0020565
|
8274
|
GOBP REGULATION OF SKELETAL MUSCLE TISSUE REGENERATION
|
8
|
0.0002684
|
0.7438904
|
0.0049144
|
2875
|
GOBP CEREBRAL CORTEX REGIONALIZATION
|
7
|
0.0007489
|
0.7356821
|
0.0109187
|
8877
|
GOBP SENSORY PERCEPTION OF SMELL
|
379
|
0.0000000
|
-0.7322834
|
0.0000000
|
6699
|
GOBP POSITIVE REGULATION OF SKELETAL MUSCLE TISSUE REGENERATION
|
5
|
0.0049515
|
0.7256413
|
0.0471092
|
590
|
Ficolins bind to repetitive carbohydrate structures on the target cell surface
|
5
|
0.0062444
|
-0.7061471
|
0.0554551
|
8896
|
GOBP SERINE PHOSPHORYLATION OF STAT PROTEIN
|
20
|
0.0000000
|
-0.7055523
|
0.0000030
|
9030
|
GOBP SPINAL CORD OLIGODENDROCYTE CELL DIFFERENTIATION
|
5
|
0.0066247
|
0.7011070
|
0.0579121
|
160
|
Beta defensins
|
28
|
0.0000000
|
-0.7008617
|
0.0000000
|
5510
|
GOBP NEURAL PLATE PATTERN SPECIFICATION
|
8
|
0.0006486
|
0.6962285
|
0.0098291
|
1494
|
Regulation of gene expression in early pancreatic precursor cells
|
8
|
0.0007008
|
0.6919078
|
0.0104528
|
2134
|
GOBP ADENYLATE CYCLASE INHIBITING SEROTONIN RECEPTOR SIGNALING PATHWAY
|
7
|
0.0015597
|
0.6903690
|
0.0195992
|
428
|
Developmental Cell Lineages
|
5
|
0.0078873
|
0.6860589
|
0.0657684
|
429
|
Developmental Lineage of Pancreatic Acinar Cells
|
5
|
0.0078873
|
0.6860589
|
0.0657684
|
11199
|
GOMF HISTONE H3K36 TRIMETHYLTRANSFERASE ACTIVITY
|
5
|
0.0081572
|
0.6831248
|
0.0671535
|
10689
|
GOMF ARYL SULFOTRANSFERASE ACTIVITY
|
7
|
0.0018058
|
0.6810081
|
0.0218268
|
updf <- head(subset(mres,p.adjustANOVA<0.05 & s.dist > 0),15)
dndf <- head(subset(mres,p.adjustANOVA<0.05 & s.dist < 0),15)
up <- updf$s.dist
names(up) <- updf$set
dn <- dndf$s.dist
names(dn) <- dndf$set
top <- c(up,dn)
top <- top[order(top)]
par(mar = c(5.1, 28.1, 4.1, 2.1))
barplot(top,horiz=TRUE,las=1,xlab="ES")
alldn <- subset(mres,p.adjustANOVA<0.05 & s.dist < 0)$set
allup <- subset(mres,p.adjustANOVA<0.05 & s.dist > 0)$set
par(mar = c(5.1, 4.1, 4.1, 2.1))
Get incidence data
incid <- readRDS("../../multi_ewas_incid.rds")
incid2 <- incid[,grep("colorectal_cancer",colnames(incid) ),drop=FALSE]
head(incid2)
## colorectal_cancer
## A1BG 0.0020387679
## A1BG-AS1 0.0029418062
## A1CF -0.0015391277
## A2M -0.0004250785
## A2M-AS1 0.0057066000
## A2ML1 0.0000405520
ires <- mitch_calc(incid2,genesets=gs,minsetsize=5,cores=12,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
ires <- ires$enrichment_result
head(ires,30) %>% kbl(caption="incidence") %>% kable_paper("hover", full_width = F)
incidence
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
10546
|
GOMF 3 KETO STEROL REDUCTASE ACTIVITY
|
5
|
0.0030439
|
-0.7651668
|
0.1618577
|
5502
|
GOBP NEURAL FOLD FORMATION
|
7
|
0.0005572
|
-0.7532727
|
0.0556352
|
9577
|
GOCC ATP BINDING CASSETTE ABC TRANSPORTER COMPLEX
|
6
|
0.0015604
|
0.7456328
|
0.1079425
|
7047
|
GOBP PROTEIN O LINKED FUCOSYLATION
|
5
|
0.0049818
|
0.7251341
|
0.2117199
|
9546
|
GOCC ALPHA BETA T CELL RECEPTOR COMPLEX
|
5
|
0.0056256
|
-0.7149714
|
0.2239424
|
7661
|
GOBP REGULATION OF GLUCOSYLCERAMIDE CATABOLIC PROCESS
|
5
|
0.0056341
|
0.7148441
|
0.2239424
|
10646
|
GOMF AMINOACYL TRNA HYDROLASE ACTIVITY
|
5
|
0.0069067
|
-0.6975366
|
0.2526645
|
6469
|
GOBP POSITIVE REGULATION OF MITOTIC SISTER CHROMATID SEGREGATION
|
6
|
0.0033116
|
-0.6923776
|
0.1687671
|
3619
|
GOBP FORMATION OF TRANSLATION PREINITIATION COMPLEX
|
6
|
0.0036689
|
-0.6848552
|
0.1802443
|
6374
|
GOBP POSITIVE REGULATION OF INTRINSIC APOPTOTIC SIGNALING PATHWAY BY P53 CLASS MEDIATOR
|
7
|
0.0021552
|
-0.6695584
|
0.1350517
|
9497
|
GOBP VOLUNTARY MUSCULOSKELETAL MOVEMENT
|
7
|
0.0022130
|
0.6678312
|
0.1360538
|
11172
|
GOMF HEXOKINASE ACTIVITY
|
5
|
0.0100326
|
0.6648487
|
0.2969173
|
9181
|
GOBP THELARCHE
|
5
|
0.0118466
|
-0.6498682
|
0.3283293
|
5929
|
GOBP PH REDUCTION
|
6
|
0.0065930
|
0.6404103
|
0.2476292
|
9769
|
GOCC CYTOPLASMIC SIDE OF ROUGH ENDOPLASMIC RETICULUM MEMBRANE
|
5
|
0.0137392
|
-0.6362694
|
0.3485038
|
10172
|
GOCC OUTER DENSE FIBER
|
6
|
0.0078073
|
0.6271079
|
0.2710223
|
10914
|
GOMF CYTOSKELETON NUCLEAR MEMBRANE ANCHOR ACTIVITY
|
5
|
0.0157290
|
0.6236524
|
0.3646299
|
9843
|
GOCC EQUATORIAL MICROTUBULE ORGANIZING CENTER
|
5
|
0.0159001
|
0.6226343
|
0.3663884
|
6368
|
GOBP POSITIVE REGULATION OF INTRACELLULAR LIPID TRANSPORT
|
5
|
0.0159554
|
0.6223071
|
0.3669752
|
570
|
FLT3 signaling through SRC family kinases
|
6
|
0.0084289
|
-0.6210021
|
0.2801000
|
581
|
FasL/ CD95L signaling
|
5
|
0.0177403
|
-0.6122534
|
0.3847555
|
7466
|
GOBP REGULATION OF CHRONIC INFLAMMATORY RESPONSE
|
8
|
0.0030838
|
-0.6041411
|
0.1618577
|
10781
|
GOMF CARBOHYDRATE PROTON SYMPORTER ACTIVITY
|
7
|
0.0058062
|
0.6020390
|
0.2292967
|
10020
|
GOCC MANNOSYLTRANSFERASE COMPLEX
|
5
|
0.0200787
|
-0.6003454
|
0.4022942
|
2008
|
Zinc efflux and compartmentalization by the SLC30 family
|
5
|
0.0214272
|
-0.5940187
|
0.4131383
|
3717
|
GOBP GLUCOSYLCERAMIDE CATABOLIC PROCESS
|
8
|
0.0036435
|
0.5935724
|
0.1797158
|
2260
|
GOBP ANTIGEN PROCESSING AND PRESENTATION ENDOGENOUS LIPID ANTIGEN VIA MHC CLASS IB
|
5
|
0.0216555
|
-0.5929825
|
0.4144367
|
11348
|
GOMF LIPID ANTIGEN BINDING
|
5
|
0.0216555
|
-0.5929825
|
0.4144367
|
9960
|
GOCC INNER DYNEIN ARM
|
5
|
0.0228792
|
0.5875829
|
0.4253280
|
8029
|
GOBP REGULATION OF OOCYTE MATURATION
|
7
|
0.0072085
|
0.5864416
|
0.2587551
|
updf <- head(subset(ires,p.adjustANOVA<0.05 & s.dist > 0),15)
dndf <- head(subset(ires,p.adjustANOVA<0.05 & s.dist < 0),15)
up <- updf$s.dist
names(up) <- updf$set
dn <- dndf$s.dist
names(dn) <- dndf$set
top <- c(up,dn)
top <- top[order(top)]
par(mar = c(5.1, 28.1, 4.1, 2.1))
barplot(top,horiz=TRUE,las=1,xlab="ES")
idn <- subset(ires,p.adjustANOVA<0.05 & s.dist < 0)$set
iup <- subset(ires,p.adjustANOVA<0.05 & s.dist > 0)$set
par(mar = c(5.1, 4.1, 4.1, 2.1))
Intersect sig pw
l <- list("cup"=allup,"cdn"=alldn,"idn"=idn,"iup"=iup)
plot(euler(l),quantities = TRUE)
intersect(allup,iup)
## character(0)
intersect(alldn,idn)
## [1] "Beta defensins"
## [2] "Response of EIF2AK4 (GCN2) to amino acid deficiency"
Cancer specific genes
down100 <- rownames(head(y,100))
down200 <- rownames(head(y,200))
down500 <- rownames(head(y,500))
down1000 <- rownames(head(y,1000))
down2000 <- rownames(head(y,2000))
down5000 <- rownames(head(y,5000))
up100 <- rownames(tail(y,100))
up200 <- rownames(tail(y,200))
up500 <- rownames(tail(y,500))
up1000 <- rownames(tail(y,1000))
up2000 <- rownames(tail(y,2000))
up5000 <- rownames(tail(y,5000))
cancer_genes <- list("down100"=down100,"up100"=up100,
"down200"=down200,"up200"=up200,
"down500"=down500,"up500"=up500,
"down1000"=down1000,"up1000"=up1000,
"down2000"=down2000,"up2000"=up2000,
"down5000"=down5000,"up5000"=up5000)
ocres <- mitch_calc(incid2,genesets=cancer_genes,minsetsize=5,cores=12,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
ocres$enrichment_result %>% kbl(caption="cancer gene signature deteted before diagnosis") %>% kable_paper("hover", full_width = F)
cancer gene signature deteted before diagnosis
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
1
|
down100
|
98
|
0.0000059
|
-0.2647631
|
0.0000118
|
3
|
down200
|
192
|
0.0000001
|
-0.2273312
|
0.0000001
|
2
|
up100
|
95
|
0.0018016
|
0.1852703
|
0.0021619
|
5
|
down500
|
479
|
0.0000000
|
-0.1726888
|
0.0000000
|
7
|
down1000
|
962
|
0.0000000
|
-0.1133745
|
0.0000000
|
12
|
up5000
|
4918
|
0.0000000
|
0.1095730
|
0.0000000
|
4
|
up200
|
192
|
0.0269012
|
0.0926112
|
0.0278072
|
10
|
up2000
|
1946
|
0.0000000
|
0.0752149
|
0.0000001
|
8
|
up1000
|
967
|
0.0011691
|
0.0616378
|
0.0015588
|
6
|
up500
|
476
|
0.0278072
|
0.0588582
|
0.0278072
|
9
|
down2000
|
1930
|
0.0005617
|
-0.0474581
|
0.0008789
|
11
|
down5000
|
4877
|
0.0005859
|
-0.0322139
|
0.0008789
|
Two dimensional analysis
m <- merge(y,incid2,by=0)
rownames(m) <- m$Row.names
m[,1] <- NULL
colnames(m) <- c("cancer","incidence")
head(m)
## cancer incidence
## A1BG 1.256876 0.0020387679
## A1BG-AS1 -1.418138 0.0029418062
## A1CF 2.954785 -0.0015391277
## A2M -4.988589 -0.0004250785
## A2M-AS1 -2.232500 0.0057066000
## A2ML1 -3.565671 0.0000405520
m2 <- mitch_calc(m,genesets=gs,minsetsize=5,cores=12,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
m2res <- m2$enrichment_result
m2res$FDR.cancer <- p.adjust(m2res$p.cancer,method="BH")
m2res$FDR.incidence <- p.adjust(m2res$p.incidence,method="BH")
m2resf <- subset(m2res,FDR.cancer < 0.05 | FDR.incidence < 0.05)
dim(m2res)
## [1] 12277 12
dim(m2resf)
## [1] 1418 12
m2resf <- subset(m2resf,p.adjustMANOVA < 0.05)
dim(m2resf)
## [1] 1161 12
head(m2resf,30) %>% kbl(caption="Top hits in 2D analysis") %>% kable_paper("hover", full_width = F)
Top hits in 2D analysis
|
set
|
setSize
|
pMANOVA
|
s.cancer
|
s.incidence
|
p.cancer
|
p.incidence
|
s.dist
|
SD
|
p.adjustMANOVA
|
FDR.cancer
|
FDR.incidence
|
2260
|
GOBP ANTIGEN PROCESSING AND PRESENTATION ENDOGENOUS LIPID ANTIGEN VIA MHC CLASS IB
|
5
|
0.0008678
|
-0.8080942
|
-0.5935210
|
0.0017507
|
0.0215366
|
1.0026382
|
0.1517261
|
0.0122735
|
0.0215802
|
0.4109459
|
11343
|
GOMF LIPID ANTIGEN BINDING
|
5
|
0.0008678
|
-0.8080942
|
-0.5935210
|
0.0017507
|
0.0215366
|
1.0026382
|
0.1517261
|
0.0122735
|
0.0215802
|
0.4109459
|
3608
|
GOBP FOREBRAIN NEURON FATE COMMITMENT
|
7
|
0.0001105
|
0.8410743
|
-0.3375966
|
0.0001162
|
0.1219190
|
0.9062987
|
0.8334461
|
0.0023636
|
0.0025195
|
0.7066229
|
11159
|
GOMF HEMOGLOBIN ALPHA BINDING
|
5
|
0.0027991
|
-0.8849113
|
-0.0366747
|
0.0006100
|
0.8870671
|
0.8856709
|
0.5997938
|
0.0305108
|
0.0093968
|
0.9873547
|
4473
|
GOBP MESENCHYME MIGRATION
|
5
|
0.0028310
|
0.8423689
|
-0.2072638
|
0.0011050
|
0.4222073
|
0.8674928
|
0.7422024
|
0.0307300
|
0.0150909
|
0.8974098
|
160
|
Beta defensins
|
26
|
0.0000000
|
-0.6935426
|
-0.4157082
|
0.0000000
|
0.0002429
|
0.8085880
|
0.1964586
|
0.0000000
|
0.0000001
|
0.0271574
|
11604
|
GOMF OLFACTORY RECEPTOR ACTIVITY
|
338
|
0.0000000
|
-0.7880687
|
-0.0975139
|
0.0000000
|
0.0020610
|
0.7940788
|
0.4882960
|
0.0000000
|
0.0000000
|
0.1304279
|
12202
|
GOMF TYPE I INTERFERON RECEPTOR BINDING
|
14
|
0.0000014
|
-0.7836668
|
0.1104098
|
0.0000004
|
0.4744410
|
0.7914063
|
0.6322076
|
0.0000509
|
0.0000192
|
0.9092831
|
5505
|
GOBP NEURAL PLATE REGIONALIZATION
|
6
|
0.0043234
|
0.7644643
|
0.1990327
|
0.0011823
|
0.3985201
|
0.7899492
|
0.3998206
|
0.0424966
|
0.0159162
|
0.8946374
|
11602
|
GOMF ODORANT BINDING
|
99
|
0.0000000
|
-0.7802393
|
-0.1055901
|
0.0000000
|
0.0694382
|
0.7873516
|
0.4770490
|
0.0000000
|
0.0000000
|
0.6080546
|
540
|
Expression and translocation of olfactory receptors
|
339
|
0.0000000
|
-0.7772028
|
-0.0929688
|
0.0000000
|
0.0032618
|
0.7827435
|
0.4838265
|
0.0000000
|
0.0000000
|
0.1677343
|
2134
|
GOBP ADENYLATE CYCLASE INHIBITING SEROTONIN RECEPTOR SIGNALING PATHWAY
|
7
|
0.0025777
|
0.6911052
|
0.3501776
|
0.0015417
|
0.1086226
|
0.7747586
|
0.2410722
|
0.0287430
|
0.0195137
|
0.6855090
|
10679
|
GOMF ARYL SULFOTRANSFERASE ACTIVITY
|
7
|
0.0026396
|
0.6826831
|
0.3646361
|
0.0017593
|
0.0947881
|
0.7739610
|
0.2248932
|
0.0292641
|
0.0216205
|
0.6597018
|
5606
|
GOBP NORADRENERGIC NEURON DIFFERENTIATION
|
10
|
0.0001158
|
0.7696057
|
-0.0531124
|
0.0000250
|
0.7711842
|
0.7714362
|
0.5817496
|
0.0024424
|
0.0007481
|
0.9723824
|
1147
|
Olfactory Signaling Pathway
|
346
|
0.0000000
|
-0.7665025
|
-0.0833837
|
0.0000000
|
0.0076934
|
0.7710246
|
0.4830379
|
0.0000000
|
0.0000000
|
0.2651502
|
11245
|
GOMF INHIBITORY MHC CLASS I RECEPTOR ACTIVITY
|
9
|
0.0003971
|
-0.7576984
|
-0.1335128
|
0.0000826
|
0.4879489
|
0.7693715
|
0.4413658
|
0.0065084
|
0.0019497
|
0.9150066
|
8265
|
GOBP REGULATION OF SKELETAL MUSCLE TISSUE REGENERATION
|
8
|
0.0011493
|
0.7453454
|
-0.0379096
|
0.0002611
|
0.8527006
|
0.7463089
|
0.5538449
|
0.0153541
|
0.0047988
|
0.9821274
|
8868
|
GOBP SENSORY PERCEPTION OF SMELL
|
364
|
0.0000000
|
-0.7356773
|
-0.0943359
|
0.0000000
|
0.0019912
|
0.7417010
|
0.4534969
|
0.0000000
|
0.0000000
|
0.1293446
|
2875
|
GOBP CEREBRAL CORTEX REGIONALIZATION
|
7
|
0.0032663
|
0.7374531
|
0.0164727
|
0.0007271
|
0.9398398
|
0.7376371
|
0.5098101
|
0.0342150
|
0.0107165
|
0.9934062
|
3534
|
GOBP FAS SIGNALING PATHWAY
|
8
|
0.0010780
|
0.6384275
|
-0.3544652
|
0.0017645
|
0.0825353
|
0.7302296
|
0.7020811
|
0.0145761
|
0.0216239
|
0.6392363
|
11191
|
GOMF HISTONE H3K4 TRIMETHYLTRANSFERASE ACTIVITY
|
10
|
0.0006348
|
0.5595382
|
0.4616283
|
0.0021827
|
0.0114745
|
0.7253852
|
0.0692327
|
0.0095865
|
0.0253285
|
0.3206668
|
5504
|
GOBP NEURAL PLATE PATTERN SPECIFICATION
|
8
|
0.0023720
|
0.6980469
|
0.1793488
|
0.0006277
|
0.3797139
|
0.7207187
|
0.3667749
|
0.0267905
|
0.0096328
|
0.8853886
|
411
|
Defensins
|
32
|
0.0000000
|
-0.6417100
|
-0.3077475
|
0.0000000
|
0.0025848
|
0.7116883
|
0.2361471
|
0.0000000
|
0.0000000
|
0.1489825
|
1494
|
Regulation of gene expression in early pancreatic precursor cells
|
8
|
0.0018825
|
0.6928219
|
-0.1560874
|
0.0006894
|
0.4445746
|
0.7101870
|
0.6002696
|
0.0225041
|
0.0103980
|
0.9005912
|
8887
|
GOBP SERINE PHOSPHORYLATION OF STAT PROTEIN
|
20
|
0.0000003
|
-0.7080952
|
-0.0272076
|
0.0000000
|
0.8331676
|
0.7086177
|
0.4814602
|
0.0000129
|
0.0000027
|
0.9781253
|
6359
|
GOBP POSITIVE REGULATION OF INTERLEUKIN 23 PRODUCTION
|
7
|
0.0042604
|
-0.6541573
|
0.2545745
|
0.0027231
|
0.2434640
|
0.7019472
|
0.6425704
|
0.0419786
|
0.0300102
|
0.8163502
|
1192
|
POU5F1 (OCT4), SOX2, NANOG activate genes related to proliferation
|
10
|
0.0014855
|
0.6494524
|
-0.0637550
|
0.0003755
|
0.7270131
|
0.6525742
|
0.5043138
|
0.0189185
|
0.0063859
|
0.9664638
|
8864
|
GOBP SENSORY PERCEPTION OF CHEMICAL STIMULUS
|
439
|
0.0000000
|
-0.6407528
|
-0.0622601
|
0.0000000
|
0.0253052
|
0.6437705
|
0.4090561
|
0.0000000
|
0.0000000
|
0.4429419
|
11213
|
GOMF HMG BOX DOMAIN BINDING
|
13
|
0.0002294
|
0.6091455
|
-0.1972530
|
0.0001427
|
0.2181620
|
0.6402867
|
0.5702098
|
0.0042217
|
0.0029599
|
0.7969063
|
11751
|
GOMF PHOSPHATIDYLINOSITOL TRANSFER ACTIVITY
|
10
|
0.0029493
|
0.6027382
|
0.2026652
|
0.0009641
|
0.2671102
|
0.6358982
|
0.2828943
|
0.0317624
|
0.0134322
|
0.8333688
|
mx <- as.matrix(m2resf[1:30,4:5])
rownames(mx) <- head(m2resf,30)$set
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(mx,trace="none",scale="none",mar=c(5,25),cexCol="0.9",col=colfunc)
mitch_report(res=m2,outfile="2dmitch.html",overwrite=TRUE)
## Dataset saved as " /tmp/RtmpUUu8R3/2dmitch.rds ".
##
##
## processing file: mitch.Rmd
## 1/34
## 2/34 [checklibraries]
## 3/34
## 4/34 [peek]
## 5/34
## 6/34 [metrics]
## 7/34
## 8/34 [scatterplot]
## 9/34
## 10/34 [contourplot]
## 11/34
## 12/34 [input_geneset_metrics1]
## 13/34
## 14/34 [input_geneset_metrics2]
## 15/34
## 16/34 [input_geneset_metrics3]
## 17/34
## 18/34 [echart1d]
## 19/34 [echart2d]
## 20/34
## 21/34 [heatmap]
## 22/34
## 23/34 [effectsize]
## 24/34
## 25/34 [results_table]
## 26/34
## 27/34 [results_table_complete]
## 28/34
## 29/34 [detailed_geneset_reports1d]
## 30/34
## 31/34 [detailed_geneset_reports2d]
## 32/34
## 33/34 [session_info]
## 34/34
## output file: /home/mark.ziemann@domain.internal.burnet.edu.au/projects/cancer_biomarkers/colorectal/GSE199057/mitch.knit.md
## /usr/bin/pandoc +RTS -K512m -RTS /home/mark.ziemann@domain.internal.burnet.edu.au/projects/cancer_biomarkers/colorectal/GSE199057/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpUUu8R3/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpUUu8R3/rmarkdown-str3241b0546b83e5.html
##
## Output created: /tmp/RtmpUUu8R3/mitch_report.html
## [1] TRUE
A few top genes
mrank <- m2$ranked_profile
mrank <- as.data.frame(cbind(mrank,rowMedians(mrank)))
mrank <- mrank[order(mrank$V3),]
head(mrank,50) %>% kbl(caption="common hypomethylated genes") %>% kable_paper("hover", full_width = F)
common hypomethylated genes
|
cancer
|
incidence
|
V3
|
MIR3616
|
-10022
|
-10781
|
-10401.5
|
MIR622
|
-10028
|
-10771
|
-10399.5
|
IFNA8
|
-10003
|
-10773
|
-10388.0
|
SPART-AS1
|
-10024
|
-10711
|
-10367.5
|
MIR873
|
-9969
|
-10739
|
-10354.0
|
OR10T2
|
-9998
|
-10706
|
-10352.0
|
LINC01289
|
-9940
|
-10763
|
-10351.5
|
CCT8L2
|
-9872
|
-10783
|
-10327.5
|
MIR4535
|
-10008
|
-10636
|
-10322.0
|
MIR218-2
|
-9932
|
-10698
|
-10315.0
|
OR5A2
|
-9958
|
-10661
|
-10309.5
|
LINC01524
|
-9949
|
-10644
|
-10296.5
|
OR2M7
|
-10009
|
-10563
|
-10286.0
|
OR4A47
|
-9954
|
-10617
|
-10285.5
|
LINC00326
|
-9864
|
-10686
|
-10275.0
|
CDRT15P3
|
-10029
|
-10513
|
-10271.0
|
MIR7515
|
-10017
|
-10508
|
-10262.5
|
ANKRD62P1-PARP4P3
|
-9796
|
-10718
|
-10257.0
|
LINC01142
|
-9986
|
-10527
|
-10256.5
|
LINC01493
|
-10010
|
-10503
|
-10256.5
|
MIR588
|
-9716
|
-10779
|
-10247.5
|
FAM133CP
|
-10030
|
-10442
|
-10236.0
|
GABRG3-AS1
|
-9991
|
-10474
|
-10232.5
|
CD1E
|
-9824
|
-10624
|
-10224.0
|
LINC00701
|
-9693
|
-10742
|
-10217.5
|
MIR548A3
|
-9957
|
-10477
|
-10217.0
|
TRIM49D2
|
-9741
|
-10687
|
-10214.0
|
MIR6075
|
-9667
|
-10756
|
-10211.5
|
CHRFAM7A
|
-9734
|
-10681
|
-10207.5
|
LINC01467
|
-9845
|
-10570
|
-10207.5
|
MIR5571
|
-9643
|
-10766
|
-10204.5
|
UBQLNL
|
-9774
|
-10623
|
-10198.5
|
IFNA16
|
-10014
|
-10376
|
-10195.0
|
OR10AG1
|
-9802
|
-10579
|
-10190.5
|
LCE1F
|
-10023
|
-10342
|
-10182.5
|
MIR4643
|
-9644
|
-10719
|
-10181.5
|
DMBT1L1
|
-9886
|
-10445
|
-10165.5
|
LCE2D
|
-9903
|
-10426
|
-10164.5
|
OR2M1P
|
-9788
|
-10541
|
-10164.5
|
BLID
|
-9878
|
-10443
|
-10160.5
|
LINC01050
|
-9680
|
-10635
|
-10157.5
|
MIR520F
|
-9679
|
-10622
|
-10150.5
|
APCS
|
-9590
|
-10699
|
-10144.5
|
KRTAP21-2
|
-9922
|
-10364
|
-10143.0
|
DEFB118
|
-9973
|
-10312
|
-10142.5
|
LINC01241
|
-9982
|
-10297
|
-10139.5
|
PPP3R2
|
-9767
|
-10509
|
-10138.0
|
SNORD115-39
|
-9671
|
-10602
|
-10136.5
|
SLC9A9-AS1
|
-9935
|
-10330
|
-10132.5
|
MIR5584
|
-9510
|
-10749
|
-10129.5
|
tail(mrank,50) %>% kbl(caption="common hypermethylated genes") %>% kable_paper("hover", full_width = F)
common hypermethylated genes
|
cancer
|
incidence
|
V3
|
EPN2-IT1
|
11054
|
11100
|
11077.0
|
MIR3115
|
11559
|
10622
|
11090.5
|
MOB2
|
11640
|
10543
|
11091.5
|
UBE2Q1-AS1
|
11451
|
10746
|
11098.5
|
SNORA71A
|
11388
|
10818
|
11103.0
|
MIR629
|
11329
|
10895
|
11112.0
|
IL11RA
|
11521
|
10704
|
11112.5
|
MIR602
|
11594
|
10639
|
11116.5
|
QRFP
|
11319
|
10925
|
11122.0
|
CIMIP2B
|
11658
|
10601
|
11129.5
|
MIR1827
|
11173
|
11091
|
11132.0
|
MIR6784
|
11235
|
11030
|
11132.5
|
MIR106B
|
11526
|
10753
|
11139.5
|
SNORA74B
|
11690
|
10605
|
11147.5
|
RNF216-IT1
|
11491
|
10813
|
11152.0
|
SNORD114-30
|
11360
|
10958
|
11159.0
|
ZSWIM8-AS1
|
11489
|
10832
|
11160.5
|
MIR6851
|
11281
|
11066
|
11173.5
|
MIR4691
|
11351
|
10997
|
11174.0
|
AGER
|
11731
|
10638
|
11184.5
|
MIR6807
|
11586
|
10786
|
11186.0
|
MIR8089
|
11404
|
10976
|
11190.0
|
MIR765
|
11590
|
10794
|
11192.0
|
MIR6509
|
11333
|
11067
|
11200.0
|
ZBED10P
|
11774
|
10648
|
11211.0
|
LINC01504
|
11635
|
10801
|
11218.0
|
NPTN-IT1
|
11601
|
10879
|
11240.0
|
MUSTN1
|
11436
|
11064
|
11250.0
|
MIR6882
|
11768
|
10734
|
11251.0
|
MIR6856
|
11571
|
10944
|
11257.5
|
MIR6720
|
11871
|
10662
|
11266.5
|
SNORD7
|
11461
|
11081
|
11271.0
|
UCN
|
11733
|
10809
|
11271.0
|
MIR6775
|
11471
|
11115
|
11293.0
|
MIR199A2
|
11843
|
10752
|
11297.5
|
LGALS7
|
11524
|
11074
|
11299.0
|
MIR326
|
11878
|
10732
|
11305.0
|
MIR197
|
11751
|
10860
|
11305.5
|
MIR4647
|
11510
|
11134
|
11322.0
|
MIR5692B
|
11868
|
10778
|
11323.0
|
MIR6751
|
11778
|
10920
|
11349.0
|
MIR8085
|
11851
|
10871
|
11361.0
|
MIR4304
|
11633
|
11110
|
11371.5
|
MIR6823
|
11854
|
10896
|
11375.0
|
MIR4718
|
11624
|
11129
|
11376.5
|
MIR28
|
11829
|
10956
|
11392.5
|
MIR4505
|
11738
|
11072
|
11405.0
|
MIR6736
|
11703
|
11116
|
11409.5
|
LCAT
|
11883
|
10943
|
11413.0
|
MIR1271
|
11863
|
11048
|
11455.5
|
Run mitch (healthy control v normal (CRC diagnosed))
dm <- readRDS("GSE199057_2_limma.rds")
y <- mitch_import(dm,DEtype="limma",geneTable=gt1)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 776521
## Note: no. genes in output = 22227
## Warning in mitch_import(dm, DEtype = "limma", geneTable = gt1): Warning: less than half of the input genes are also in the
## output
head(y)
## x
## A1BG -0.1624750
## A1BG-AS1 -0.3989024
## A1CF 0.9616946
## A2M -0.7927841
## A2M-AS1 -0.6770865
## A2ML1 1.4917204
y <- y[order(y$x),,drop=FALSE]
head(y)
## x
## MIR199A2 -9.514949
## MIR3619 -8.915660
## LINC00942 -8.816906
## MAB21L2 -8.624217
## MIR8085 -8.390826
## MIR5006 -8.389382
tail(y)
## x
## MIR548AQ 8.499405
## ZNF676 8.553380
## MIR4531 9.020449
## MIR550B1 9.596144
## MIR548AL 9.652990
## UNC5B-AS1 9.738375
res <- mitch_calc(y,gs,priority="effect",cores=12,minsetsize=5)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mres <- res$enrichment_result
head(mres,30) %>% kbl() %>% kable_paper("hover", full_width = F)
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
1852
|
Tachykinin receptors bind tachykinins
|
5
|
0.0009366
|
-0.8543965
|
0.0411617
|
4229
|
GOBP LOBAR BRONCHUS EPITHELIUM DEVELOPMENT
|
5
|
0.0014257
|
0.8235442
|
0.0540350
|
9148
|
GOBP TACHYKININ RECEPTOR SIGNALING PATHWAY
|
8
|
0.0000643
|
-0.8157883
|
0.0059908
|
2281
|
GOBP AORTA SMOOTH MUSCLE TISSUE MORPHOGENESIS
|
6
|
0.0005679
|
-0.8123997
|
0.0293256
|
7494
|
GOBP REGULATION OF CORTICOTROPIN SECRETION
|
5
|
0.0016606
|
-0.8120961
|
0.0588183
|
11080
|
GOMF GALACTOSIDE BINDING
|
5
|
0.0017026
|
0.8102061
|
0.0595628
|
10817
|
GOMF CCR6 CHEMOKINE RECEPTOR BINDING
|
7
|
0.0003397
|
0.7819210
|
0.0195152
|
4228
|
GOBP LOBAR BRONCHUS DEVELOPMENT
|
6
|
0.0009886
|
0.7763977
|
0.0427852
|
7660
|
GOBP REGULATION OF GLUCOCORTICOID RECEPTOR SIGNALING PATHWAY
|
7
|
0.0008842
|
-0.7256397
|
0.0398075
|
4475
|
GOBP MESENCHYME MIGRATION
|
5
|
0.0055922
|
-0.7154712
|
0.1236969
|
9186
|
GOBP THELARCHE
|
5
|
0.0058851
|
0.7111691
|
0.1277978
|
11609
|
GOMF N ACETYLLACTOSAMINIDE BETA 1 3 N ACETYLGLUCOSAMINYLTRANSFERASE ACTIVITY
|
6
|
0.0025940
|
0.7100341
|
0.0792276
|
6277
|
GOBP POSITIVE REGULATION OF FEAR RESPONSE
|
7
|
0.0011895
|
-0.7073936
|
0.0484930
|
4995
|
GOBP NEGATIVE REGULATION OF EPINEPHRINE SECRETION
|
5
|
0.0065346
|
-0.7022770
|
0.1343094
|
3019
|
GOBP CORTISOL SECRETION
|
5
|
0.0065636
|
-0.7018990
|
0.1346800
|
2905
|
GOBP CHONDROBLAST DIFFERENTIATION
|
5
|
0.0069105
|
-0.6974890
|
0.1387849
|
11171
|
GOMF HEMOGLOBIN ALPHA BINDING
|
5
|
0.0071338
|
0.6947529
|
0.1423473
|
3406
|
GOBP EPINEPHRINE SECRETION
|
7
|
0.0014907
|
-0.6932365
|
0.0548490
|
9127
|
GOBP SYNAPTIC TRANSMISSION GLYCINERGIC
|
5
|
0.0086810
|
-0.6776708
|
0.1562197
|
10548
|
GOMF 3 CHLOROALLYL ALDEHYDE DEHYDROGENASE ACTIVITY
|
5
|
0.0114118
|
0.6532625
|
0.1806855
|
5692
|
GOBP N ACYLETHANOLAMINE METABOLIC PROCESS
|
5
|
0.0120197
|
0.6485465
|
0.1858298
|
6148
|
GOBP POSITIVE REGULATION OF CEREBELLAR GRANULE CELL PRECURSOR PROLIFERATION
|
6
|
0.0059761
|
-0.6480356
|
0.1290899
|
2555
|
GOBP CARDIAC SEPTUM CELL DIFFERENTIATION
|
5
|
0.0120888
|
-0.6480245
|
0.1863091
|
10490
|
GOCC TROPONIN COMPLEX
|
9
|
0.0008734
|
0.6406417
|
0.0394660
|
6103
|
GOBP POSITIVE REGULATION OF CD8 POSITIVE ALPHA BETA T CELL ACTIVATION
|
8
|
0.0017721
|
0.6381700
|
0.0605841
|
12216
|
GOMF TYPE I INTERFERON RECEPTOR BINDING
|
14
|
0.0000356
|
0.6380562
|
0.0037719
|
5367
|
GOBP NEGATIVE REGULATION OF SERINE TYPE PEPTIDASE ACTIVITY
|
5
|
0.0136803
|
0.6366664
|
0.2006172
|
9520
|
GOBP XENOBIOTIC GLUCURONIDATION
|
7
|
0.0036313
|
0.6347692
|
0.0978776
|
10113
|
GOCC NEUROFIBRILLARY TANGLE
|
5
|
0.0142661
|
-0.6327783
|
0.2042418
|
6889
|
GOBP PROGESTERONE BIOSYNTHETIC PROCESS
|
6
|
0.0073085
|
0.6323298
|
0.1442457
|
updf <- head(subset(mres,p.adjustANOVA<0.05 & s.dist > 0),15)
dndf <- head(subset(mres,p.adjustANOVA<0.05 & s.dist < 0),15)
up <- updf$s.dist
names(up) <- updf$set
dn <- dndf$s.dist
names(dn) <- dndf$set
top <- c(up,dn)
top <- top[order(top)]
par(mar = c(5.1, 28.1, 4.1, 2.1))
barplot(top,horiz=TRUE,las=1,xlab="ES")
alldn <- subset(mres,p.adjustANOVA<0.05 & s.dist < 0)$set
allup <- subset(mres,p.adjustANOVA<0.05 & s.dist > 0)$set
par(mar = c(5.1, 4.1, 4.1, 2.1))
Intersect sig pw
l <- list("cup"=allup,"cdn"=alldn,"idn"=idn,"iup"=iup)
plot(euler(l),quantities = TRUE)
intersect(allup,iup)
## character(0)
intersect(alldn,idn)
## [1] "GOCC CHROMOSOMAL REGION"
## [2] "GOCC NUCLEAR PROTEIN CONTAINING COMPLEX"
## [3] "GOCC CATALYTIC COMPLEX"
Cancer specific genes
down100 <- rownames(head(y,100))
down200 <- rownames(head(y,200))
down500 <- rownames(head(y,500))
down1000 <- rownames(head(y,1000))
down2000 <- rownames(head(y,2000))
down5000 <- rownames(head(y,5000))
up100 <- rownames(tail(y,100))
up200 <- rownames(tail(y,200))
up500 <- rownames(tail(y,500))
up1000 <- rownames(tail(y,1000))
up2000 <- rownames(tail(y,2000))
up5000 <- rownames(tail(y,5000))
cancer_genes <- list("down100"=down100,"up100"=up100,
"down200"=down200,"up200"=up200,
"down500"=down500,"up500"=up500,
"down1000"=down1000,"up1000"=up1000,
"down2000"=down2000,"up2000"=up2000,
"down5000"=down5000,"up5000"=up5000)
ocres <- mitch_calc(incid2,genesets=cancer_genes,minsetsize=5,cores=12,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
ocres$enrichment_result %>% kbl(caption="cancer gene signature deteted before diagnosis") %>% kable_paper("hover", full_width = F)
cancer gene signature deteted before diagnosis
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
2
|
up100
|
94
|
0.0000006
|
-0.2974194
|
0.0000012
|
4
|
up200
|
190
|
0.0000025
|
-0.1980727
|
0.0000043
|
7
|
down1000
|
969
|
0.0000000
|
0.1700285
|
0.0000000
|
3
|
down200
|
186
|
0.0000726
|
0.1686555
|
0.0000968
|
1
|
down100
|
92
|
0.0054919
|
0.1674811
|
0.0059912
|
5
|
down500
|
475
|
0.0000000
|
0.1591991
|
0.0000000
|
9
|
down2000
|
1951
|
0.0000000
|
0.1556511
|
0.0000000
|
11
|
down5000
|
4910
|
0.0000000
|
0.1496950
|
0.0000000
|
12
|
up5000
|
4908
|
0.0000000
|
-0.0703598
|
0.0000000
|
6
|
up500
|
486
|
0.0141215
|
-0.0649932
|
0.0141215
|
8
|
up1000
|
976
|
0.0009764
|
-0.0623285
|
0.0011717
|
10
|
up2000
|
1957
|
0.0000220
|
-0.0580115
|
0.0000330
|
Two dimensional analysis
m <- merge(y,incid2,by=0)
rownames(m) <- m$Row.names
m[,1] <- NULL
colnames(m) <- c("cancer","incidence")
head(m)
## cancer incidence
## A1BG -0.1624750 0.0020387679
## A1BG-AS1 -0.3989024 0.0029418062
## A1CF 0.9616946 -0.0015391277
## A2M -0.7927841 -0.0004250785
## A2M-AS1 -0.6770865 0.0057066000
## A2ML1 1.4917204 0.0000405520
m2 <- mitch_calc(m,genesets=gs,minsetsize=5,cores=12,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
m2res <- m2$enrichment_result
m2res$FDR.cancer <- p.adjust(m2res$p.cancer,method="BH")
m2res$FDR.incidence <- p.adjust(m2res$p.incidence,method="BH")
m2resf <- subset(m2res,FDR.cancer < 0.05 | FDR.incidence < 0.05)
dim(m2res)
## [1] 12277 12
dim(m2resf)
## [1] 415 12
m2resf <- subset(m2resf,p.adjustMANOVA < 0.05)
dim(m2resf)
## [1] 362 12
head(m2resf,30) %>% kbl(caption="Top hits in 2D analysis") %>% kable_paper("hover", full_width = F)
Top hits in 2D analysis
|
set
|
setSize
|
pMANOVA
|
s.cancer
|
s.incidence
|
p.cancer
|
p.incidence
|
s.dist
|
SD
|
p.adjustMANOVA
|
FDR.cancer
|
FDR.incidence
|
2281
|
GOBP AORTA SMOOTH MUSCLE TISSUE MORPHOGENESIS
|
6
|
0.0013838
|
-0.8141845
|
-0.1643852
|
0.0005521
|
0.4856175
|
0.8306135
|
0.4594774
|
0.0414377
|
0.0284820
|
0.9141557
|
9139
|
GOBP TACHYKININ RECEPTOR SIGNALING PATHWAY
|
8
|
0.0003243
|
-0.8173314
|
0.1352104
|
0.0000623
|
0.5078190
|
0.8284398
|
0.6735487
|
0.0139254
|
0.0059307
|
0.9200781
|
10480
|
GOCC TROPONIN COMPLEX
|
9
|
0.0000307
|
0.6420694
|
0.5181957
|
0.0008504
|
0.0070995
|
0.8250939
|
0.0875919
|
0.0021324
|
0.0391036
|
0.2557285
|
11191
|
GOMF HISTONE H3K4 TRIMETHYLTRANSFERASE ACTIVITY
|
10
|
0.0003623
|
-0.6118383
|
0.4616283
|
0.0008062
|
0.0114745
|
0.7664508
|
0.7590555
|
0.0151291
|
0.0377786
|
0.3206668
|
3406
|
GOBP EPINEPHRINE TRANSPORT
|
11
|
0.0002087
|
-0.6111708
|
-0.2999946
|
0.0004476
|
0.0849129
|
0.6808278
|
0.2200348
|
0.0102470
|
0.0245216
|
0.6410204
|
1553
|
SARS-CoV-1 modulates host translation machinery
|
33
|
0.0000000
|
0.4164995
|
-0.5362220
|
0.0000345
|
0.0000001
|
0.6789741
|
0.6736758
|
0.0000003
|
0.0036507
|
0.0000258
|
613
|
Formation of the anterior neural plate
|
10
|
0.0009364
|
-0.6247809
|
-0.1979007
|
0.0006227
|
0.2785190
|
0.6553746
|
0.3018499
|
0.0309032
|
0.0314584
|
0.8363468
|
12202
|
GOMF TYPE I INTERFERON RECEPTOR BINDING
|
14
|
0.0000901
|
0.6393750
|
0.1104098
|
0.0000343
|
0.4744410
|
0.6488380
|
0.3740349
|
0.0054198
|
0.0036507
|
0.9092831
|
9773
|
GOCC CYTOSOLIC SMALL RIBOSOMAL SUBUNIT
|
38
|
0.0000000
|
0.3545104
|
-0.5071718
|
0.0001554
|
0.0000001
|
0.6187898
|
0.6093013
|
0.0000006
|
0.0114746
|
0.0000174
|
8887
|
GOBP SERINE PHOSPHORYLATION OF STAT PROTEIN
|
20
|
0.0000122
|
0.6125377
|
-0.0272076
|
0.0000021
|
0.8331676
|
0.6131416
|
0.4523682
|
0.0009818
|
0.0004447
|
0.9781253
|
1749
|
Specification of the neural plate border
|
16
|
0.0000672
|
-0.5890966
|
-0.1604013
|
0.0000450
|
0.2666376
|
0.6105435
|
0.3031334
|
0.0041640
|
0.0045236
|
0.8333688
|
2522
|
GOBP CARBON DIOXIDE TRANSPORT
|
15
|
0.0005142
|
0.5802803
|
-0.0706350
|
0.0000995
|
0.6357525
|
0.5845635
|
0.4602666
|
0.0195454
|
0.0082555
|
0.9462818
|
1889
|
Trafficking of GluR2-containing AMPA receptors
|
15
|
0.0010699
|
-0.4908233
|
0.3074786
|
0.0009961
|
0.0392167
|
0.5791810
|
0.5644847
|
0.0345671
|
0.0426981
|
0.5186298
|
1985
|
Viral mRNA Translation
|
83
|
0.0000000
|
0.2765060
|
-0.4916169
|
0.0000133
|
0.0000000
|
0.5640414
|
0.5431449
|
0.0000000
|
0.0018008
|
0.0000000
|
1212
|
Peptide chain elongation
|
83
|
0.0000000
|
0.2764850
|
-0.4892403
|
0.0000133
|
0.0000000
|
0.5619609
|
0.5414495
|
0.0000000
|
0.0018008
|
0.0000000
|
537
|
Eukaryotic Translation Termination
|
86
|
0.0000000
|
0.2908287
|
-0.4760722
|
0.0000031
|
0.0000000
|
0.5578763
|
0.5422808
|
0.0000000
|
0.0006167
|
0.0000000
|
12069
|
GOMF STRUCTURAL CONSTITUENT OF SKIN EPIDERMIS
|
36
|
0.0000000
|
0.4415354
|
0.3291830
|
0.0000045
|
0.0006299
|
0.5507404
|
0.0794451
|
0.0000018
|
0.0008293
|
0.0589553
|
389
|
Defective GALNT3 causes HFTC
|
18
|
0.0002455
|
0.5389072
|
0.0684299
|
0.0000753
|
0.6152305
|
0.5432344
|
0.3326776
|
0.0114605
|
0.0067447
|
0.9434817
|
535
|
Eukaryotic Translation Elongation
|
87
|
0.0000000
|
0.2634266
|
-0.4736059
|
0.0000216
|
0.0000000
|
0.5419373
|
0.5211606
|
0.0000000
|
0.0025713
|
0.0000000
|
1559
|
SARS-CoV-2 modulates host translation machinery
|
45
|
0.0000000
|
0.3147740
|
-0.4364706
|
0.0002583
|
0.0000004
|
0.5381350
|
0.5312101
|
0.0000039
|
0.0164821
|
0.0000901
|
619
|
Formation of the ternary complex, and subsequently, the 43S complex
|
45
|
0.0000000
|
0.2389816
|
-0.4742749
|
0.0055380
|
0.0000000
|
0.5310827
|
0.5043485
|
0.0000044
|
0.1230485
|
0.0000110
|
1613
|
Selenocysteine synthesis
|
86
|
0.0000000
|
0.2599420
|
-0.4581841
|
0.0000308
|
0.0000000
|
0.5267850
|
0.5077918
|
0.0000000
|
0.0034659
|
0.0000000
|
601
|
Formation of a pool of free 40S subunits
|
93
|
0.0000000
|
0.2185428
|
-0.4741562
|
0.0002695
|
0.0000000
|
0.5220967
|
0.4898121
|
0.0000000
|
0.0167954
|
0.0000000
|
1121
|
Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
|
88
|
0.0000000
|
0.2687791
|
-0.4392909
|
0.0000131
|
0.0000000
|
0.5149939
|
0.5006811
|
0.0000000
|
0.0018008
|
0.0000000
|
7382
|
GOBP REGULATION OF CD8 POSITIVE ALPHA BETA T CELL ACTIVATION
|
20
|
0.0006601
|
0.4697425
|
-0.2078714
|
0.0002755
|
0.1075319
|
0.5136814
|
0.4791454
|
0.0234883
|
0.0170837
|
0.6808506
|
9769
|
GOCC CYTOSOLIC LARGE RIBOSOMAL SUBUNIT
|
51
|
0.0000000
|
0.1824002
|
-0.4788838
|
0.0242250
|
0.0000000
|
0.5124446
|
0.4675984
|
0.0000013
|
0.2638957
|
0.0000011
|
2473
|
GOBP C21 STEROID HORMONE BIOSYNTHETIC PROCESS
|
20
|
0.0002081
|
0.4758241
|
0.1798146
|
0.0002292
|
0.1638758
|
0.5086668
|
0.2093103
|
0.0102470
|
0.0150505
|
0.7556604
|
1531
|
Response of EIF2AK4 (GCN2) to amino acid deficiency
|
94
|
0.0000000
|
0.2483127
|
-0.4416287
|
0.0000316
|
0.0000000
|
0.5066509
|
0.4878623
|
0.0000000
|
0.0035061
|
0.0000000
|
878
|
L13a-mediated translational silencing of Ceruloplasmin expression
|
102
|
0.0000000
|
0.2139250
|
-0.4582322
|
0.0001886
|
0.0000000
|
0.5057081
|
0.4752869
|
0.0000000
|
0.0132307
|
0.0000000
|
669
|
GTP hydrolysis and joining of the 60S ribosomal subunit
|
103
|
0.0000000
|
0.2127330
|
-0.4587180
|
0.0001907
|
0.0000000
|
0.5056456
|
0.4747875
|
0.0000000
|
0.0132992
|
0.0000000
|
mx <- as.matrix(m2resf[1:30,4:5])
rownames(mx) <- head(m2resf,30)$set
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(mx,trace="none",scale="none",mar=c(5,25),cexCol="0.9",col=colfunc)
mitch_report(res=m2,outfile="2dmitch2.html",overwrite=TRUE)
## Dataset saved as " /tmp/RtmpUUu8R3/2dmitch2.rds ".
##
##
## processing file: mitch.Rmd
## 1/34
## 2/34 [checklibraries]
## 3/34
## 4/34 [peek]
## 5/34
## 6/34 [metrics]
## 7/34
## 8/34 [scatterplot]
## 9/34
## 10/34 [contourplot]
## 11/34
## 12/34 [input_geneset_metrics1]
## 13/34
## 14/34 [input_geneset_metrics2]
## 15/34
## 16/34 [input_geneset_metrics3]
## 17/34
## 18/34 [echart1d]
## 19/34 [echart2d]
## 20/34
## 21/34 [heatmap]
## 22/34
## 23/34 [effectsize]
## 24/34
## 25/34 [results_table]
## 26/34
## 27/34 [results_table_complete]
## 28/34
## 29/34 [detailed_geneset_reports1d]
## 30/34
## 31/34 [detailed_geneset_reports2d]
## 32/34
## 33/34 [session_info]
## 34/34
## output file: /home/mark.ziemann@domain.internal.burnet.edu.au/projects/cancer_biomarkers/colorectal/GSE199057/mitch.knit.md
## /usr/bin/pandoc +RTS -K512m -RTS /home/mark.ziemann@domain.internal.burnet.edu.au/projects/cancer_biomarkers/colorectal/GSE199057/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpUUu8R3/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpUUu8R3/rmarkdown-str3241b042acef56.html
##
## Output created: /tmp/RtmpUUu8R3/mitch_report.html
## [1] TRUE