Let’s take a look at how pathway methylation associates with gene expression.
Partition methylation analysis between promoter and body.
Will need to do this at the annotation level.
Compare to GSAmeth.
suppressPackageStartupMessages({
library("DESeq2")
library("eulerr")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library("tictoc")
library("limma")
library("mitch")
library("kableExtra")
library("beeswarm")
library("missMethyl")
library("HGNChelper")
library("gplots")
})
CORES=16
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","UCSC_RefGene_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
design <- readRDS("GSE158422_design.rds")
mval <- readRDS("GSE158422_mx.rds")
runlimma <- function(mval,design,myann) {
fit.reduced <- lmFit(mval,design)
fit.reduced <- eBayes(fit.reduced)
dm <- topTable(fit.reduced,coef=ncol(design), number = Inf)
dm <- merge(myann,dm,by=0)
dm <- dm[order(dm$P.Value),]
rownames(dm) <- dm$Row.names
dm$Row.names=NULL
return(dm)
}
dm <- runlimma(mval=mval,design=design,myann=myann)
## Coefficients not estimable: patientpat32
## Warning: Partial NA coefficients for 839473 probe(s)
## Warning in .ebayes(fit = fit, proportion = proportion, stdev.coef.lim =
## stdev.coef.lim, : Estimation of var.prior failed - set to default value
TFlink version 1.0 downloaded 29th Nov 2023.
gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")
gs_symbols <- gmt_import("TFLink_Homo_sapiens_interactions_All_GMT_proteinName_v1.0.gmt")
## Warning in readLines(gmtfile): incomplete final line found on
## 'TFLink_Homo_sapiens_interactions_All_GMT_proteinName_v1.0.gmt'
From GEO https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE158420 GSE158420_counts.txt.gz
x <- read.table("GSE158420_counts.txt.gz",row.names=NULL)
dim(x)
## [1] 18747 75
x <- aggregate(. ~ row.names,x,sum)
dim(x)
## [1] 18745 75
#RUFKM
head(x)
## row.names X10N X10T X11N X11T X12N X12T X14N X14T X15N X15T X16N X16T X17N
## 1 1-Dec 0 31 0 1 0 1 0 0 0 0 0 0 0
## 2 1-Mar 431 461 478 98 862 635 489 499 543 608 707 498 700
## 3 1-Sep 160 131 280 131 132 383 381 124 114 217 258 886 302
## 4 10-Mar 286 217 130 58 2 18 19 39 417 68 12 90 9
## 5 10-Sep 3541 4311 2174 3675 2856 1864 3306 1546 3507 2483 3568 1294 3658
## 6 11-Mar 0 40 6 1 0 0 0 0 0 1 0 0 0
## X17T X18N X18T X19N X19T X20N X20T X21N X21T X22N X22T X23N X23T X24N X24T
## 1 1 0 79 0 18 0 1 0 1 0 0 0 1 0 2
## 2 411 607 446 608 463 361 488 615 350 673 1838 297 262 762 529
## 3 162 133 279 138 88 239 533 212 53 258 126 197 229 137 236
## 4 122 123 313 70 127 1481 1010 97 7 94 21 588 694 92 24
## 5 542 3337 1050 4527 1865 2656 1314 4449 1060 3196 400 3499 1709 3280 3528
## 6 2 1 38 0 18 0 0 1 3 0 3 1 2 0 5
## X25N X25T X26N X26T X27N X27T X28N X28T X29N X29T X2N X2T X30N X30T X31N
## 1 1 0 0 3 0 2 0 2 0 0 0 0 0 0 0
## 2 534 860 451 1110 798 380 572 336 289 514 575 257 464 483 535
## 3 132 66 98 56 573 190 211 75 79 205 151 147 183 144 212
## 4 87 23 126 368 192 114 441 5 2242 19 23 80 616 24 589
## 5 3463 2832 2893 3445 2442 3703 3250 5465 2028 1971 3502 2354 2605 934 3291
## 6 4 3 0 3 0 4 0 0 2 1 0 0 1 3 1
## X31T X32N X32T X33N X33T X34N X34T X35N X35T X36N X36T X37N X37T X38N X38T
## 1 2 0 0 0 0 1 0 0 1 0 1 0 0 0 4
## 2 478 311 280 630 437 713 334 643 789 286 339 488 320 427 507
## 3 144 302 1086 231 363 185 290 125 199 61 355 59 121 92 203
## 4 28 48 27 4 11 956 13 640 63 654 19 1234 2076 380 185
## 5 4226 3135 1416 3072 1702 3356 2515 3034 2532 3678 502 1512 1324 2684 2542
## 6 5 0 0 0 1 1 0 0 1 1 6 0 6 0 9
## X39N X39T X3N X3T X4N X4T X5N X5T X6N X6T X7N X7T X8N X8T X9N
## 1 0 1 1 3 0 2 0 8 2 0 1 1 17 2 0
## 2 695 458 628 287 305 255 801 364 626 320 638 412 264 436 504
## 3 329 174 120 114 119 59 144 44 110 184 181 327 101 217 191
## 4 43 135 11 63 323 33 78 63 28 6 323 16 185 168 76
## 5 1770 1446 2921 1688 2692 2447 3367 4396 3830 2841 2623 3671 1614 2102 3507
## 6 1 2 0 10 2 3 0 1 0 0 2 1 7 9 0
## X9T
## 1 2
## 2 317
## 3 48
## 4 7
## 5 2482
## 6 5
There are some gene name errors, so I will use HGNChelper to repair them.
human=x$row.names
fix <- checkGeneSymbols(human)
## Maps last updated on: Thu Oct 24 12:31:05 2019
## Warning in checkGeneSymbols(human): Human gene symbols should be all upper-case
## except for the 'orf' in open reading frames. The case of some letters was
## corrected.
## Warning in checkGeneSymbols(human): x contains non-approved gene symbols
x$gene <- fix$Suggested.Symbol
x$row.names=NULL
x <- aggregate(. ~ gene,x,sum)
dim(x)
## [1] 18712 75
row.names(x)=x$gene
x$gene=NULL
head(x)
## X10N X10T X11N X11T X12N X12T X14N X14T X15N X15T X16N
## A1BG 120 217 148 39 73 87 146 95 88 69 72
## A1CF 8 29 22 2 0 1 4 0 2 3 1
## A2M 89330 6096 39181 3282 126977 89739 108730 40523 160543 179385 155740
## A2ML1 16 1307 50 2729 26 19 31 393 20 16 21
## A3GALT2 5 49 21 3 2 0 1 5 1 4 5
## A4GALT 304 1138 350 1439 219 149 178 809 446 150 300
## X16T X17N X17T X18N X18T X19N X19T X20N X20T X21N X21T
## A1BG 121 63 30 113 279 43 137 152 129 56 19
## A1CF 0 3 9 4 104 0 46 6 3 1 1
## A2M 39899 129176 16554 109697 15992 125658 3504 106485 59452 194434 10592
## A2ML1 17 15 114 13 591 10 5830 16 38 15 29
## A3GALT2 4 2 3 13 76 3 53 17 5 14 6
## A4GALT 249 203 286 296 787 209 1814 335 554 220 311
## X22N X22T X23N X23T X24N X24T X25N X25T X26N X26T X27N
## A1BG 92 12 106 85 68 101 107 15 50 46 110
## A1CF 1 10 2 5 3 1 3 7 2 5 1
## A2M 127707 7544 129002 107685 84494 8974 125875 3304 133114 23805 124283
## A2ML1 13 146 13 24 16 298 96 2730 7 257 9
## A3GALT2 1 6 4 1 8 11 17 0 4 13 9
## A4GALT 248 115 291 567 177 728 248 536 284 445 235
## X27T X28N X28T X29N X29T X2N X2T X30N X30T X31N X31T
## A1BG 155 143 64 204 125 82 117 97 127 115 39
## A1CF 1 2 4 3 4 0 0 2 6 1 1
## A2M 16776 163693 15056 56772 32450 110818 57261 242936 22835 157038 5604
## A2ML1 3891 74 67 19 50 8 11 8 28 31 93
## A3GALT2 16 6 1 11 6 6 6 19 1 10 8
## A4GALT 1059 586 3534 808 461 280 1505 723 236 442 481
## X32N X32T X33N X33T X34N X34T X35N X35T X36N X36T X37N
## A1BG 82 105 116 104 119 63 101 111 58 90 107
## A1CF 0 0 3 2 3 1 7 4 1 3 4
## A2M 158632 11656 64662 92680 122607 84313 190327 145504 148968 7674 38901
## A2ML1 6 130 42 9 17 31 22 33 9 33 24
## A3GALT2 14 3 6 10 10 12 8 14 1 47 5
## A4GALT 299 94 292 564 345 694 452 521 454 322 656
## X37T X38N X38T X39N X39T X3N X3T X4N X4T X5N X5T X6N
## A1BG 57 57 133 105 125 67 94 99 291 87 59 68
## A1CF 9 1 15 2 5 2 18 1 6 3 10 0
## A2M 65047 82116 123542 127464 91470 99241 9283 117914 5648 89583 2462 97346
## A2ML1 58 12 61 13 26 13 3833 57 499 37 1108 5
## A3GALT2 1 14 30 2 10 12 21 18 11 8 3 2
## A4GALT 949 588 934 209 473 359 1134 257 711 235 627 126
## X6T X7N X7T X8N X8T X9N X9T
## A1BG 27 108 86 194 84 77 8
## A1CF 0 2 6 35 9 1 0
## A2M 9188 98328 29242 15384 26414 121946 4447
## A2ML1 632 8 329 158 1072 13 2879
## A3GALT2 1 16 9 57 9 11 3
## A4GALT 1176 225 2035 498 491 160 1470
Now do DESeq2
patient <- head(as.character(unlist(lapply(1:ncol(x),function(i) {c(i,i)}))),ncol(x))
tumor <- rep(c(0,1),ncol(x)/2)
ss <- data.frame(patient,tumor)
dds <- DESeqDataSetFromMatrix(countData = x , colData=ss, design = ~ patient + tumor)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument with nbinomWaldTest
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste("mRNA", SIG , "DEGs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3,
xlab="log2 base mean",
ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
,xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- rep(c(0,1),ncol(x)/2)
colCols <- gsub("0","gray",colCols)
colCols <- gsub("1","black",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
RNA expression pathway enrichment with mitch.
rna <- mitch_import(dge, DEtype="deseq2")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 18712
## Note: no. genes in output = 18712
## Note: estimated proportion of input genes in output = 1
res <- mitch_calc(rna, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mres <- res$enrichment_result
msig <- subset(mres,p.adjustANOVA<0.05)
head(subset(mres,p.adjustANOVA<0.05 & s.dist>0),20) %>%
kbl(caption="mRNA upregulated pathways") %>%
kable_styling("hover",full_width=FALSE)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
1249 | CENPT | 14 | 0.0004175 | 0.5446491 | 0.0010170 |
880 | HSD17B8 | 543 | 0.0000000 | 0.4753891 | 0.0000000 |
535 | LHX3 | 29 | 0.0000384 | 0.4416303 | 0.0001058 |
360 | SF1 | 14 | 0.0057786 | 0.4260356 | 0.0116316 |
1228 | ASXL2 | 38 | 0.0000227 | 0.3970910 | 0.0000645 |
374 | HSF4 | 16 | 0.0060925 | 0.3960407 | 0.0122240 |
1231 | MTHFD1 | 18 | 0.0242061 | 0.3068572 | 0.0437316 |
1216 | ZNF532 | 56 | 0.0000738 | 0.3062160 | 0.0001967 |
1157 | MCRS1 | 35 | 0.0126803 | 0.2434803 | 0.0241273 |
1134 | EPC1 | 329 | 0.0000000 | 0.1838819 | 0.0000000 |
1105 | NPAT | 220 | 0.0000056 | 0.1778055 | 0.0000170 |
1170 | GLYR1 | 95 | 0.0037518 | 0.1721129 | 0.0077386 |
1044 | CREB3L2 | 120 | 0.0012443 | 0.1706970 | 0.0027969 |
1032 | RBP2 | 1391 | 0.0000000 | 0.1650522 | 0.0000000 |
1101 | RIMBP2 | 1391 | 0.0000000 | 0.1650522 | 0.0000000 |
264 | KAT5 | 763 | 0.0000000 | 0.1636448 | 0.0000000 |
1195 | PTPRA | 100 | 0.0096656 | 0.1497930 | 0.0187327 |
947 | SNAPC1 | 1652 | 0.0000000 | 0.1478488 | 0.0000000 |
740 | TAF2 | 670 | 0.0000000 | 0.1387190 | 0.0000000 |
1058 | TASOR | 239 | 0.0003407 | 0.1346330 | 0.0008364 |
head(subset(mres,p.adjustANOVA<0.05 & s.dist<0),20) %>%
kbl(caption="mRNA downregulated pathways") %>%
kable_styling("hover",full_width=FALSE)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
177 | RFXAP | 13 | 0.0000002 | -0.8366295 | 0.0000006 |
133 | IRF8 | 14 | 0.0020492 | -0.4758677 | 0.0044159 |
409 | CREB5 | 12 | 0.0098821 | -0.4301171 | 0.0190932 |
458 | FOXD3 | 38 | 0.0028613 | -0.2795849 | 0.0059906 |
1052 | ZNF157 | 62 | 0.0029943 | -0.2180007 | 0.0062584 |
258 | NR1H4 | 363 | 0.0000000 | -0.2075237 | 0.0000000 |
944 | FOXJ2 | 519 | 0.0000000 | -0.1967947 | 0.0000000 |
1130 | ZNF165 | 73 | 0.0053910 | -0.1883998 | 0.0109040 |
1160 | ZNF37A | 121 | 0.0004662 | -0.1842558 | 0.0011290 |
1155 | PRMT5 | 68 | 0.0090503 | -0.1830774 | 0.0177046 |
444 | NR0B1 | 311 | 0.0000002 | -0.1729673 | 0.0000006 |
2 | PPARG | 15211 | 0.0000000 | -0.1627127 | 0.0000000 |
1210 | ZNF354B | 174 | 0.0002615 | -0.1605223 | 0.0006470 |
214 | HIC1 | 3712 | 0.0000000 | -0.1576092 | 0.0000000 |
24 | SOX10 | 243 | 0.0000243 | -0.1573508 | 0.0000686 |
22 | CEBPB | 15761 | 0.0000000 | -0.1537553 | 0.0000000 |
79 | CEBPA | 15247 | 0.0000000 | -0.1489676 | 0.0000000 |
13 | NR3C1 | 15401 | 0.0000000 | -0.1489010 | 0.0000000 |
52 | SPI1 | 15673 | 0.0000000 | -0.1475166 | 0.0000000 |
16 | RELA | 16478 | 0.0000000 | -0.1462964 | 0.0000000 |
Use Euler diagrams to find the similarity. As medians are normally distributed about the zero, one would expect equal numbers of up and down- methylated gene sets.
Use all probes on the chip.
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","UCSC_RefGene_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
gp2 <- lapply(gp2,unique)
gt <- stack(gp2)
colnames(gt) <- c("gene","probe")
gt$probe <- as.character(gt$probe)
dim(gt)
## [1] 684970 2
str(gt)
## 'data.frame': 684970 obs. of 2 variables:
## $ gene : chr "YTHDF1" "EIF2S3" "PKN3" "CCDC57" ...
## $ probe: chr "cg18478105" "cg09835024" "cg14361672" "cg01763666" ...
Here we will look at parallel analysis of gene body and promoter.
The approach here is to generate two mapping tables of probes to genes based on the annotation which is split into gene body and promoter (TSS).
gene body=gb
gene promoter (TSS) = gp
# partition body and promoter data(probes)
dmg <- strsplit(dm$UCSC_RefGene_Name,";")
body <- lapply(1:nrow(dm),function(i) {
a <- dm[i,"UCSC_RefGene_Group"]
a <- unlist(strsplit(a,";"))
dmg[[i]][a=="Body"]
})
names(body) <- rownames(dm)
body <- lapply(body,unique)
gb <- stack(body)
colnames(gb) <- c("gene","probe")
gb$probe <- as.character(gb$probe)
dim(gb)
## [1] 368271 2
str(gb)
## 'data.frame': 368271 obs. of 2 variables:
## $ gene : chr "DMRTA2" "DMRTA2" "DMRTA2" "SRPK2" ...
## $ probe: chr "cg09792881" "cg10512745" "cg16732616" "cg17627973" ...
tss <- lapply(1:nrow(dm),function(i) {
a <- dm[i,"UCSC_RefGene_Group"]
a <- unlist(strsplit(a,";"))
dmg[[i]][grep("TSS",a)]
})
names(tss) <- rownames(dm)
tss <- lapply(tss,unique)
gp <- stack(tss)
colnames(gp) <- c("gene","probe")
gp$probe <- as.character(gp$probe)
dim(gp)
## [1] 199765 2
str(gp)
## 'data.frame': 199765 obs. of 2 variables:
## $ gene : chr "GALK2" "MIR4716" "LZTS2" "CCNB1" ...
## $ probe: chr "cg02655980" "cg02655980" "cg27014672" "cg17160384" ...
# import for promoters
dmp <- dm[which(rownames(dm) %in% gp$probe),]
mp <- mitch_import(x=dmp,DEtype="limma",geneTable=gp)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 188962
## Note: no. genes in output = 24990
## Note: estimated proportion of input genes in output = 0.132
dim(dmp)
## [1] 188962 10
dim(mp)
## [1] 24990 1
# import bodies
dmb <- dm[which(rownames(dm) %in% gb$probe),]
mb <- mitch_import(x=dmb,DEtype="limma",geneTable=gb)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 351261
## Note: no. genes in output = 22858
## Note: estimated proportion of input genes in output = 0.0651
dim(dmb)
## [1] 351261 10
dim(mb)
## [1] 22858 1
# promoter
dmp_res <- mitch_calc(mp, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
dmp_res_up <- subset(dmp_res$enrichment_result,s.dist>0 & p.adjustANOVA < 0.05)
dmp_res_dn <- subset(dmp_res$enrichment_result,s.dist<0 & p.adjustANOVA < 0.05)
nrow(dmp_res_up)
## [1] 1096
nrow(dmp_res_dn)
## [1] 2
head(dmp_res_up,20)
## set setSize pANOVA s.dist p.adjustANOVA
## 1214 WRN 10 0.0125157161 0.4560208 0.0146485441
## 1224 NFE2L3 10 0.0197334909 0.4257326 0.0227974710
## 1197 ZNF488 26 0.0004991068 0.3943494 0.0006207796
## 240 VHL 20 0.0025352850 0.3899039 0.0030708394
## 298 FOXF2 15 0.0154650038 0.3610517 0.0179993061
## 701 KDM4A 12739 0.0000000000 0.3352152 0.0000000000
## 678 KDM2B 12936 0.0000000000 0.3228333 0.0000000000
## 667 RBBP5 13236 0.0000000000 0.3222435 0.0000000000
## 438 JARID2 7334 0.0000000000 0.3211671 0.0000000000
## 272 E2F6 13221 0.0000000000 0.3061079 0.0000000000
## 1 NANOG 12720 0.0000000000 0.3055690 0.0000000000
## 655 CHD1 12405 0.0000000000 0.3035609 0.0000000000
## 605 CNOT3 12871 0.0000000000 0.3013356 0.0000000000
## 185 HDAC2 14261 0.0000000000 0.3002746 0.0000000000
## 166 SMAD2 12935 0.0000000000 0.2966657 0.0000000000
## 91 KDM5B 14147 0.0000000000 0.2964276 0.0000000000
## 537 HEXIM1 12596 0.0000000000 0.2940421 0.0000000000
## 15 SP1 15079 0.0000000000 0.2937628 0.0000000000
## 251 SIN3A 13921 0.0000000000 0.2911591 0.0000000000
## 1109 HOXA13 40 0.0015439650 0.2892585 0.0018847229
head(dmp_res_dn,20)
## set setSize pANOVA s.dist p.adjustANOVA
## 261 IRF7 11 0.0018020966 -0.5433983 0.002191265
## 177 RFXAP 13 0.0009665156 -0.5285447 0.001189119
# body
dmb_res <- mitch_calc(mb, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
dmb_res_up <- subset(dmb_res$enrichment_result,s.dist>0 & p.adjustANOVA < 0.05)
dmb_res_dn <- subset(dmb_res$enrichment_result,s.dist<0 & p.adjustANOVA < 0.05)
nrow(dmb_res_up)
## [1] 1104
nrow(dmb_res_dn)
## [1] 2
head(dmb_res_up,20)
## set setSize pANOVA s.dist p.adjustANOVA
## 481 MDM2 11 1.399258e-02 0.4278859 1.625071e-02
## 1245 ZNF626 10 2.881592e-02 0.3991947 3.306527e-02
## 1226 MTHFD1 16 3.278283e-02 0.3082315 3.751344e-02
## 1178 THRAP3 19 2.155527e-02 0.3045184 2.489454e-02
## 1180 ZSCAN23 63 5.263558e-05 0.2944986 6.572849e-05
## 1230 HJURP 27 1.030733e-02 0.2852019 1.208345e-02
## 239 VHL 20 4.243846e-02 0.2620851 4.785859e-02
## 741 NELFA 9874 6.710675e-254 0.2591079 8.354790e-251
## 620 TAF3 9390 2.481772e-245 0.2565158 1.544903e-242
## 1134 TOX4 166 1.196774e-08 0.2562869 1.624846e-08
## 180 SMAD5 9329 9.762570e-238 0.2528270 2.430880e-235
## 462 NONO 10190 1.342811e-241 0.2519876 5.006923e-239
## 723 RBFOX2 10707 1.608650e-241 0.2509637 5.006923e-239
## 366 SP2 11451 1.094671e-236 0.2479817 2.271442e-234
## 1193 ZNF488 24 3.766334e-02 0.2450841 4.274463e-02
## 629 NELFE 11876 2.200783e-230 0.2448910 3.424968e-228
## 265 BCL3 11207 1.803718e-230 0.2447951 3.208041e-228
## 101 NFATC1 10409 5.243262e-228 0.2444305 6.527861e-226
## 423 HEY1 11220 1.040968e-229 0.2443908 1.440006e-227
## 661 INTS13 10021 6.722100e-226 0.2442023 6.437703e-224
head(dmb_res_dn,20)
## set setSize pANOVA s.dist p.adjustANOVA
## 133 IRF8 13 0.043199940 -0.3238379 0.048673236
## 1092 IGF1R 62 0.003163636 -0.2166905 0.003772726
# combined methylation
mm <- merge(mp,mb,by=0)
rownames(mm) <- mm$Row.names
mm$Row.names=NULL
colnames(mm) <- c("prom","body")
mm_res <- mitch_calc(mm, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mm_res_sig <- subset(mm_res$enrichment_result,p.adjustMANOVA < 0.01)
nrow(mm_res_sig)
## [1] 1054
head(mm_res_sig,20)
## set setSize pMANOVA s.prom s.body p.prom
## 177 RFXAP 13 1.155166e-03 -0.5872123 -0.2083208 2.459919e-04
## 1193 ZNF488 24 8.386077e-03 0.3324487 0.2363996 4.810521e-03
## 697 KDM4A 12259 0.000000e+00 0.2853565 0.2119217 5.985053e-294
## 663 RBBP5 12727 0.000000e+00 0.2708900 0.2180890 1.060644e-260
## 674 KDM2B 12473 0.000000e+00 0.2719977 0.2130008 5.641974e-265
## 651 CHD1 11941 2.964394e-322 0.2511634 0.2242708 3.358349e-228
## 270 E2F6 12698 5.346705e-309 0.2528606 0.2163478 7.706541e-227
## 166 SMAD2 12475 2.357748e-303 0.2414823 0.2238640 4.681071e-208
## 366 SP2 11373 1.061470e-304 0.2218890 0.2396866 4.821727e-179
## 608 ARID2 12074 5.042920e-297 0.2309310 0.2279607 5.237239e-192
## 1 NANOG 12212 3.317703e-298 0.2534945 0.2018511 3.621864e-231
## 585 BRD2 12592 7.059627e-287 0.2302465 0.2239248 2.647589e-188
## 91 KDM5B 13539 5.372265e-276 0.2415016 0.2108621 1.138512e-199
## 316 SP4 10892 1.134995e-292 0.2253548 0.2273366 3.658137e-185
## 601 CNOT3 12398 4.560167e-286 0.2497455 0.1971713 3.252513e-223
## 1180 ZSCAN23 63 3.437407e-04 0.1309716 0.2859484 7.219143e-02
## 185 HDAC2 13691 4.768326e-259 0.2444876 0.1920416 4.097242e-203
## 371 PHF8 11958 3.115510e-272 0.2194878 0.2198621 1.014276e-173
## 559 CTCFL 11818 3.255899e-273 0.2311264 0.2072968 2.806551e-193
## 250 SIN3A 13338 2.758976e-260 0.2359881 0.2008569 2.181508e-192
## p.body s.dist SD p.adjustMANOVA
## 177 1.934217e-01 0.6230696 0.2679167400 1.399247e-03
## 1193 4.498612e-02 0.4079300 0.0679169764 9.897799e-03
## 697 7.937861e-161 0.3554422 0.0519262502 0.000000e+00
## 663 4.366002e-168 0.3477703 0.0373359203 0.000000e+00
## 674 1.477317e-161 0.3454737 0.0417170931 0.000000e+00
## 651 1.731807e-181 0.3367202 0.0190159209 9.219265e-320
## 270 1.476572e-165 0.3327835 0.0258184380 1.330260e-306
## 166 1.510355e-178 0.3292853 0.0124580618 4.190055e-301
## 366 3.154828e-209 0.3266258 0.0125847935 2.200780e-302
## 608 4.677414e-187 0.3244922 0.0021002859 6.970436e-295
## 1 4.957409e-146 0.3240422 0.0365173880 5.159029e-296
## 585 5.008647e-178 0.3211787 0.0044701188 7.983796e-285
## 91 5.942560e-152 0.3206023 0.0216653625 5.140844e-274
## 316 1.836731e-188 0.3201042 0.0014013617 1.411934e-290
## 601 1.113447e-138 0.3181970 0.0371755896 4.727373e-284
## 1180 8.640653e-05 0.3145156 0.1095851229 4.237993e-04
## 185 4.413642e-125 0.3108925 0.0370848806 2.579042e-257
## 371 2.566498e-174 0.3106674 0.0002646685 2.583796e-270
## 559 2.892999e-155 0.3104696 0.0168500536 2.893099e-271
## 250 3.671072e-139 0.3098933 0.0248414989 1.634365e-258
table(sign(mm_res_sig$s.prom) == sign(mm_res_sig$s.body) )
##
## FALSE TRUE
## 4 1050
mm_res_sig[sign(mm_res_sig$s.prom) != sign(mm_res_sig$s.body),]
## set setSize pMANOVA s.prom s.body p.prom
## 1092 IGF1R 61 4.448704e-04 0.122068201 -0.2195118 0.09915972
## 1006 HOXA2 2102 7.525052e-26 -0.006954174 0.1346245 0.59974715
## 1181 DLX2 214 4.997303e-03 -0.031155186 0.1116661 0.43218521
## 950 BHLHE22 1032 7.035224e-09 -0.015110472 0.1031525 0.41193907
## p.body s.dist SD p.adjustMANOVA
## 1092 3.022394e-03 0.2511694 0.24153350 5.457779e-04
## 1006 2.674499e-24 0.1348040 0.10011124 1.291195e-25
## 1181 4.870658e-03 0.1159308 0.10098987 5.931913e-03
## 950 2.105805e-08 0.1042533 0.08362452 9.627963e-09
# three way
mmrna <- merge(mm,rna,by=0)
rownames(mmrna) <- mmrna$Row.names
mmrna$Row.names = NULL
colnames(mmrna) <- c("prom","body","rna")
mmrnares <- mitch_calc(mmrna, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mmrnares_sig <- subset(mmrnares$enrichment_result, p.adjustMANOVA < 0.05)
head(mmrnares_sig,20)
## set setSize pMANOVA s.prom s.body s.rna
## 177 RFXAP 13 2.953806e-10 -0.6528248 -0.2374402 -0.82782991
## 42 ERG 14672 1.113936e-182 0.4863765 0.3577331 -0.13001774
## 269 EP300 14829 3.972829e-147 0.4773192 0.3564608 -0.15262678
## 15 SP1 14181 1.786214e-267 0.4802300 0.3743091 -0.08391186
## 133 IRF8 13 2.508094e-03 -0.2136083 -0.3793063 -0.42881605
## 93 MAX 14403 2.622820e-220 0.4641287 0.3729867 -0.11610274
## 21 MYC 14715 1.692321e-144 0.4218486 0.3727288 -0.10445967
## 11 YY1 14086 9.555399e-240 0.4228830 0.3580224 -0.12283906
## 71 POU5F1 13961 2.892494e-258 0.4443434 0.3443150 -0.04446454
## 17 RUNX1 14680 7.462646e-140 0.4186383 0.3345439 -0.13649067
## 330 SMARCA4 14653 3.131440e-141 0.3885634 0.3590508 -0.14432625
## 14 AR 14933 1.316972e-98 0.4190605 0.3293937 -0.10978549
## 165 CTCF 15218 1.336216e-55 0.4122217 0.3430179 -0.08361652
## 664 BRD4 15092 4.883414e-74 0.4089720 0.3376228 -0.11194095
## 107 TRPS1 14306 4.539320e-187 0.4122884 0.3403176 -0.07309498
## 91 KDM5B 13315 4.272586e-304 0.3805958 0.3664182 -0.06975344
## 196 FOXA2 14724 1.194148e-123 0.4105297 0.3140799 -0.11393594
## 37 FLI1 14078 9.438875e-206 0.3861481 0.3374338 -0.12132261
## 185 HDAC2 13464 1.316176e-280 0.4044546 0.3277201 -0.07250439
## 255 KMT2A 14041 3.225283e-200 0.3800293 0.3254542 -0.12752779
## p.prom p.body p.rna s.dist SD p.adjustMANOVA
## 177 4.513830e-05 1.380500e-01 2.217989e-07 1.0806759 0.3032412 4.388964e-10
## 42 6.201382e-140 3.008630e-76 2.858979e-11 0.6176080 0.3251645 1.474188e-181
## 269 5.626950e-111 2.143671e-61 9.573710e-13 0.6149739 0.3343174 2.792203e-146
## 15 8.690332e-200 1.281502e-120 1.833936e-07 0.6146294 0.2998449 1.481367e-265
## 133 1.814761e-01 1.781773e-02 7.308879e-03 0.6110523 0.1127101 3.095307e-03
## 93 9.847666e-160 2.703185e-102 2.355299e-11 0.6066419 0.3120320 7.587878e-219
## 21 1.416550e-98 1.361530e-76 1.840565e-07 0.5725337 0.2907238 1.131853e-143
## 11 3.973626e-164 2.230585e-118 3.731959e-15 0.5675382 0.2981183 4.571891e-238
## 71 1.526213e-195 5.845100e-116 3.211642e-03 0.5638891 0.2582274 1.999035e-256
## 17 1.947167e-100 1.973023e-64 3.947331e-12 0.5529985 0.2991972 4.688652e-139
## 330 2.061108e-89 2.426151e-76 1.039839e-13 0.5483877 0.2995082 2.007995e-140
## 14 3.615822e-74 1.284736e-45 1.712011e-06 0.5442102 0.2830184 5.184536e-98
## 165 1.047065e-41 9.177799e-29 5.619632e-03 0.5427520 0.2685336 3.399289e-55
## 664 3.074220e-54 7.960560e-37 1.947578e-05 0.5420129 0.2824148 1.499992e-73
## 107 3.735314e-135 5.873052e-92 1.346868e-05 0.5395745 0.2619436 6.722517e-186
## 91 1.349855e-189 7.864202e-176 9.400265e-08 0.5328987 0.2560147 7.592995e-302
## 196 3.195495e-93 6.380803e-55 1.480742e-08 0.5293035 0.2791548 5.942079e-123
## 37 9.121981e-138 1.602391e-104 6.855524e-15 0.5269641 0.2799872 2.096779e-204
## 185 5.336679e-203 3.062454e-133 7.025785e-08 0.5255862 0.2561112 1.364436e-278
## 255 6.422479e-135 5.275665e-99 1.283732e-16 0.5163391 0.2786232 6.368654e-199
table(sign(mmrnares_sig$s.prom) == sign(mmrnares_sig$s.body))
##
## FALSE TRUE
## 56 1013
table(sign(mmrnares_sig$s.prom) == sign(mmrnares_sig$s.rna))
##
## FALSE TRUE
## 701 368
table(sign(mmrnares_sig$s.body) == sign(mmrnares_sig$s.rna))
##
## FALSE TRUE
## 701 368
mtop <- head(mmrnares_sig,30)
mtop <- mtop[,c("set","s.prom","s.body","s.rna")]
rownames(mtop) <- gsub("REACTOME_","",mtop$set)
rownames(mtop) <- gsub("_"," ",rownames(mtop))
mtop$set=NULL
colnames(mtop) <- c("promoter","body","RNA")
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(mtop) , trace="none",margins = c(6,25), col=colfunc(25), cexRow=.6, cexCol=1)
# rna vs promoter
mprna <- merge(mp,rna,by=0)
rownames(mprna) <- mprna$Row.names
mprna$Row.names = NULL
colnames(mprna) <- c("prom","rna")
mprnares <- mitch_calc(mprna, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mprnares_sig <- subset(mprnares$enrichment_result, p.adjustMANOVA < 0.05)
head(mprnares_sig,20)
## set setSize pMANOVA s.prom s.rna p.prom
## 177 RFXAP 13 1.854082e-10 -0.6073334 -0.831907020 1.445399e-04
## 261 IRF7 11 2.363767e-03 -0.6018513 -0.008236169 5.315952e-04
## 271 EP300 15672 3.434136e-264 0.5736062 -0.148303991 1.132827e-256
## 42 ERG 15483 8.558112e-288 0.5608239 -0.124535496 5.232420e-283
## 15 SP1 14791 0.000000e+00 0.5573057 -0.093585815 0.000000e+00
## 93 MAX 15085 0.000000e+00 0.5489568 -0.117343447 0.000000e+00
## 165 CTCF 16266 1.004809e-121 0.5400668 -0.086559595 2.920189e-122
## 667 BRD4 16059 3.518865e-158 0.5363258 -0.106154144 3.258640e-157
## 17 RUNX1 15456 5.251988e-252 0.5224803 -0.128872889 5.782818e-246
## 21 MYC 15514 1.964655e-241 0.5245171 -0.104242787 3.594213e-239
## 11 YY1 14707 0.000000e+00 0.5137106 -0.127988711 0.000000e+00
## 14 AR 15880 2.575526e-175 0.5140811 -0.120628325 2.998095e-172
## 196 FOXA2 15578 7.358478e-222 0.5117560 -0.111985784 1.038224e-218
## 332 SMARCA4 15416 5.597843e-240 0.5015488 -0.136507906 4.186886e-232
## 107 TRPS1 15025 1.196606e-290 0.5036390 -0.083754396 9.792291e-290
## 71 POU5F1 14644 0.000000e+00 0.5054753 -0.065933249 0.000000e+00
## 715 RAD21 15158 3.487948e-261 0.4857973 -0.139801546 8.646851e-251
## 5 ESR1 16276 2.586595e-100 0.4957708 -0.077420875 4.883132e-101
## 878 HSD17B8 512 1.368324e-84 0.1388505 0.474145419 9.906764e-08
## 37 FLI1 14724 4.752574e-303 0.4757113 -0.126731185 2.616210e-293
## p.rna s.dist SD p.adjustMANOVA
## 177 1.884981e-07 1.0300112 0.1587976 3.209284e-10
## 261 9.621671e-01 0.6019076 0.4197493 3.158438e-03
## 271 1.818680e-18 0.5924679 0.5104676 1.020429e-262
## 42 3.738616e-15 0.5744846 0.4846223 3.814473e-286
## 15 2.283641e-12 0.5651088 0.4602498 0.000000e+00
## 93 1.507517e-16 0.5613583 0.4711455 0.000000e+00
## 165 1.591540e-04 0.5469595 0.4430918 7.693759e-121
## 667 1.278952e-07 0.5467304 0.4543019 3.721647e-157
## 17 2.528792e-16 0.5381393 0.4605762 1.310896e-250
## 21 7.550486e-11 0.5347755 0.4446004 4.301561e-240
## 11 1.742617e-22 0.5294145 0.4537499 0.000000e+00
## 14 5.522880e-11 0.5280441 0.4488073 3.151232e-174
## 196 7.321391e-12 0.5238654 0.4410520 1.350497e-220
## 332 1.609711e-18 0.5197938 0.4511743 1.204501e-238
## 107 2.424772e-09 0.5105556 0.4153498 5.973455e-289
## 71 3.742227e-07 0.5097573 0.4040469 0.000000e+00
## 715 4.429792e-22 0.5055131 0.4423652 9.893088e-260
## 5 7.931646e-04 0.5017795 0.4053077 1.663954e-99
## 878 5.811396e-76 0.4940580 0.2370893 7.869438e-84
## 37 5.986314e-22 0.4923028 0.4259911 2.696005e-301
table(sign(mprnares_sig$s.prom) == sign(mmrnares_sig$s.rna))
## Warning in sign(mprnares_sig$s.prom) == sign(mmrnares_sig$s.rna): longer object
## length is not a multiple of shorter object length
##
## FALSE TRUE
## 708 361
mprnares_sig <- subset(mprnares$enrichment_result, p.adjustMANOVA < 0.05)
mylm <- lm(mprnares$enrichment_result$s.rna ~ mprnares$enrichment_result$s.prom)
summary(mylm)
##
## Call:
## lm(formula = mprnares$enrichment_result$s.rna ~ mprnares$enrichment_result$s.prom)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.91560 -0.03015 0.00003 0.02969 0.50705
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.011211 0.003057 -3.667 0.000256 ***
## mprnares$enrichment_result$s.prom -0.156263 0.018405 -8.490 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07148 on 1246 degrees of freedom
## Multiple R-squared: 0.05469, Adjusted R-squared: 0.05393
## F-statistic: 72.08 on 1 and 1246 DF, p-value: < 2.2e-16
mylm2 <- lm(mprnares_sig$s.rna ~ mprnares_sig$s.pr)
summary(mylm2)
##
## Call:
## lm(formula = mprnares_sig$s.rna ~ mprnares_sig$s.pr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.89914 -0.02877 -0.00015 0.02915 0.50926
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.016074 0.003357 -4.789 1.93e-06 ***
## mprnares_sig$s.pr -0.137160 0.018794 -7.298 5.87e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06553 on 1017 degrees of freedom
## Multiple R-squared: 0.04977, Adjusted R-squared: 0.04883
## F-statistic: 53.26 on 1 and 1017 DF, p-value: 5.874e-13
plot(mprnares$enrichment_result$s.prom,mprnares$enrichment_result$s.rna,xlab="promoter",
ylab="RNA", main="S dist",col="gray",pch=19)
abline(v=0,h=0,lty=2,lwd=2)
points(mprnares_sig$s.pr,mprnares_sig$s.rna,col="black",pch=19,cex=1.01)
abline(mylm,col="blue",lty=3,lwd=3)
abline(mylm2,col="red",lty=3,lwd=3)
# rna vs body
mbrna <- merge(mb,rna,by=0)
rownames(mbrna) <- mbrna$Row.names
mbrna$Row.names = NULL
colnames(mbrna) <- c("body","rna")
mbrnares <- mitch_calc(mbrna, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mbrnares_sig <- subset(mbrnares$enrichment_result, p.adjustMANOVA < 0.05)
head(mbrnares_sig,20)
## set setSize pMANOVA s.body s.rna p.body
## 177 RFXAP 13 3.841603e-07 -0.2353477 -0.82821163 1.416833e-01
## 133 IRF8 13 1.405371e-03 -0.3762280 -0.42874475 1.879000e-02
## 875 HSD17B8 502 2.324304e-80 0.1229486 0.47799883 2.565505e-06
## 357 SF1 13 3.758007e-02 -0.1522242 0.38493753 3.419546e-01
## 1223 ASXL2 37 1.347171e-04 0.0224851 0.39869908 8.121565e-01
## 1178 THRAP3 17 1.795353e-02 0.3934496 -0.06465052 4.939981e-03
## 93 MAX 14496 5.431456e-115 0.3689987 -0.10804455 5.684632e-109
## 21 MYC 14811 1.532353e-87 0.3705879 -0.09426064 9.946768e-85
## 330 SMARCA4 14755 6.541704e-91 0.3551120 -0.13439763 1.862180e-82
## 269 EP300 14928 1.004801e-75 0.3540605 -0.13513569 1.110976e-68
## 15 SP1 14271 2.233678e-128 0.3683445 -0.07924802 1.322223e-125
## 11 YY1 14174 3.942318e-135 0.3570088 -0.11488315 4.674354e-126
## 42 ERG 14766 1.544408e-89 0.3550983 -0.11831085 1.541742e-83
## 91 KDM5B 13392 3.146443e-186 0.3655321 -0.06573867 1.103747e-183
## 95 ELF1 13528 2.955067e-165 0.3484921 -0.08661465 3.276820e-159
## 46 ETS1 13486 7.634810e-166 0.3370477 -0.12106154 6.137138e-151
## 17 RUNX1 14777 1.318562e-78 0.3343106 -0.12547130 1.386445e-71
## 37 FLI1 14166 1.614331e-121 0.3382579 -0.11371074 1.632756e-112
## 86 JUN 13982 9.089315e-127 0.3223518 -0.13301912 7.329458e-112
## 255 KMT2A 14127 3.664749e-117 0.3256942 -0.12155889 3.708000e-106
## p.rna s.dist SD p.adjustMANOVA
## 177 2.183579e-07 0.8610012 0.41921811 5.326053e-07
## 133 7.312456e-03 0.5704118 0.03713493 1.758479e-03
## 875 1.631980e-75 0.4935577 0.25105842 8.038218e-80
## 357 1.603342e-02 0.4139434 0.37983071 4.401429e-02
## 1223 2.604874e-05 0.3993326 0.26602346 1.743480e-04
## 1178 6.437889e-01 0.3987259 0.32392573 2.128776e-02
## 93 9.386637e-11 0.3844914 0.33732049 2.965861e-114
## 21 6.814148e-07 0.3823879 0.32869758 5.694864e-87
## 330 3.773621e-13 0.3796936 0.34613558 2.545132e-90
## 269 1.930071e-11 0.3789729 0.34591392 3.266260e-75
## 15 3.374955e-07 0.3767730 0.31649567 1.519633e-127
## 11 2.894534e-14 0.3750378 0.33367796 3.146273e-134
## 42 1.788840e-10 0.3742890 0.33475083 5.880085e-89
## 91 2.618396e-07 0.3713964 0.30495451 1.305774e-184
## 95 3.516014e-11 0.3590945 0.30766696 6.131763e-164
## 46 9.822218e-21 0.3581299 0.32393214 1.667603e-164
## 17 1.868945e-11 0.3570807 0.32511490 4.377625e-78
## 37 4.565842e-14 0.3568593 0.31959010 9.435877e-121
## 86 2.160265e-20 0.3487187 0.32199586 5.893853e-126
## 255 3.623004e-16 0.3476396 0.31625567 2.064530e-116
table(sign(mbrnares_sig$s.body) == sign(mbrnares_sig$s.rna))
##
## FALSE TRUE
## 697 370
mbrnares_sig <- subset(mbrnares$enrichment_result, p.adjustMANOVA < 0.05)
mylm <- lm(mbrnares$enrichment_result$s.rna ~ mbrnares$enrichment_result$s.body)
summary(mylm)
##
## Call:
## lm(formula = mbrnares$enrichment_result$s.rna ~ mbrnares$enrichment_result$s.body)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83556 -0.03592 -0.00152 0.03305 0.49388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.007912 0.004094 -1.932 0.05353 .
## mbrnares$enrichment_result$s.body -0.064854 0.023093 -2.808 0.00506 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07263 on 1243 degrees of freedom
## Multiple R-squared: 0.006305, Adjusted R-squared: 0.005506
## F-statistic: 7.887 on 1 and 1243 DF, p-value: 0.005057
mylm2 <- lm(mbrnares_sig$s.rna ~ mbrnares_sig$s.body)
summary(mylm2)
##
## Call:
## lm(formula = mbrnares_sig$s.rna ~ mbrnares_sig$s.body)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83556 -0.03378 -0.00100 0.03080 0.49444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.008279 0.004756 -1.741 0.08204 .
## mbrnares_sig$s.body -0.066392 0.025700 -2.583 0.00992 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06623 on 1065 degrees of freedom
## Multiple R-squared: 0.006227, Adjusted R-squared: 0.005294
## F-statistic: 6.674 on 1 and 1065 DF, p-value: 0.009918
plot(mbrnares$enrichment_result$s.body,mbrnares$enrichment_result$s.rna,xlab="ex-promoter",
ylab="RNA",main="S dist",col="gray",pch=19)
abline(v=0,h=0,lty=2,lwd=2)
points(mbrnares_sig$s.body,mbrnares_sig$s.rna,col="black",pch=19,cex=1.01)
abline(mylm,col="blue",lty=3,lwd=3)
abline(mylm2,col="red",lty=3,lwd=3)
Now look at GSAmeth.
# body
sigup <- rownames(subset(dm,logFC > 0 & adj.P.Val < 0.05))
sigdn <- rownames(subset(dm,logFC < 0 & adj.P.Val < 0.05))
gsaup <- gsameth(sig.cpg=sigup, all.cpg=rownames(dm), collection=gs_entrez,
array.type="EPIC", genomic.features = "Body")
rownames(gsaup) <- paste(rownames(gsaup),"UP")
gsadn <- gsameth(sig.cpg=sigdn, all.cpg=rownames(dm), collection=gs_entrez,
array.type="EPIC", genomic.features = "Body")
rownames(gsadn) <- paste(rownames(gsadn),"DN")
gsig_up <- subset(gsaup,FDR<0.05)
gsig_dn <- subset(gsadn,FDR<0.05)
gsig <- rbind(gsig_up,gsig_dn)
nrow(gsig)
## [1] 84
gsig_body <- gsig
gsig_body_up <- gsig_up
gsig_body_dn <- gsig_dn
# promoter
TSS = c("TSS200", "TSS1500")
gsaup <- gsameth(sig.cpg=sigup, all.cpg=rownames(dm), collection=gs_entrez,
array.type="EPIC", genomic.features = TSS)
rownames(gsaup) <- paste(rownames(gsaup),"UP")
gsadn <- gsameth(sig.cpg=sigdn, all.cpg=rownames(dm), collection=gs_entrez,
array.type="EPIC", genomic.features = TSS)
rownames(gsadn) <- paste(rownames(gsadn),"DN")
gsig_up <- subset(gsaup,FDR<0.05)
gsig_dn <- subset(gsadn,FDR<0.05)
gsig <- rbind(gsig_up,gsig_dn)
nrow(gsig)
## [1] 23
gsig_promoter <- gsig
gsig_promoter_up <- gsig_up
gsig_promoter_dn <- gsig_dn
Now make a Euler plot for GSA meth-RNA comparison.
rna_up <- subset(msig,s.dist>0)$set
rna_dn <- subset(msig,s.dist<0)$set
gsig_body_up <- rownames(gsig_body_up)
gsig_body_dn <- rownames(gsig_body_dn)
gsig_promoter_up <- rownames(gsig_promoter_up)
gsig_promoter_dn <- rownames(gsig_promoter_dn)
vg <- list("RNA up"=rna_up,"RNA dn"=rna_dn,
"GSA body up"=gsig_body_up, "GSA body dn"=gsig_body_dn,
"GSA promoter up"=gsig_promoter_up, "GSA prom dn"=gsig_promoter_dn)
plot(euler(vg),quantities=TRUE)
Now make a Euler plot for LAM-RNA comparison.
dmp_res_up <- dmp_res_up$set
dmp_res_dn <- dmp_res_dn$set
dmb_res_up <- dmb_res_up$set
dmb_res_dn <- dmb_res_dn$set
vm <- list("RNA up"=rna_up,"RNA dn"=rna_dn,
"LAM body up"=dmb_res_up, "LAM body dn"=dmb_res_dn,
"LAM promoter up"=dmp_res_up, "LAM prom dn"=dmp_res_dn)
plot(euler(vm),quantities=TRUE)
Although there were some differentially methylated pathways analysed with different methods, they were not concordant with differential expression.
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] gplots_3.1.3
## [2] HGNChelper_0.8.1
## [3] missMethyl_1.36.0
## [4] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [5] beeswarm_0.4.0
## [6] kableExtra_1.3.4
## [7] mitch_1.15.0
## [8] limma_3.58.1
## [9] tictoc_1.2
## [10] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [11] IlluminaHumanMethylation450kmanifest_0.4.0
## [12] minfi_1.48.0
## [13] bumphunter_1.44.0
## [14] locfit_1.5-9.8
## [15] iterators_1.0.14
## [16] foreach_1.5.2
## [17] Biostrings_2.70.1
## [18] XVector_0.42.0
## [19] eulerr_7.0.0
## [20] DESeq2_1.42.0
## [21] SummarizedExperiment_1.32.0
## [22] Biobase_2.62.0
## [23] MatrixGenerics_1.14.0
## [24] matrixStats_1.1.0
## [25] GenomicRanges_1.54.1
## [26] GenomeInfoDb_1.38.1
## [27] IRanges_2.36.0
## [28] S4Vectors_0.40.1
## [29] BiocGenerics_0.48.1
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.2 later_1.3.1
## [3] BiocIO_1.12.0 bitops_1.0-7
## [5] filelock_1.0.2 BiasedUrn_2.0.11
## [7] tibble_3.2.1 polyclip_1.10-6
## [9] preprocessCore_1.64.0 XML_3.99-0.15
## [11] lifecycle_1.0.4 lattice_0.22-5
## [13] MASS_7.3-60 base64_2.0.1
## [15] scrime_1.3.5 magrittr_2.0.3
## [17] sass_0.4.7 rmarkdown_2.25
## [19] jquerylib_0.1.4 yaml_2.3.7
## [21] httpuv_1.6.12 doRNG_1.8.6
## [23] askpass_1.2.0 DBI_1.1.3
## [25] RColorBrewer_1.1-3 abind_1.4-5
## [27] zlibbioc_1.48.0 rvest_1.0.3
## [29] quadprog_1.5-8 purrr_1.0.2
## [31] RCurl_1.98-1.13 rappdirs_0.3.3
## [33] GenomeInfoDbData_1.2.11 genefilter_1.84.0
## [35] annotate_1.80.0 svglite_2.1.2
## [37] DelayedMatrixStats_1.24.0 codetools_0.2-19
## [39] DelayedArray_0.28.0 xml2_1.3.5
## [41] tidyselect_1.2.0 beanplot_1.3.1
## [43] BiocFileCache_2.10.1 webshot_0.5.5
## [45] illuminaio_0.44.0 GenomicAlignments_1.38.0
## [47] jsonlite_1.8.7 multtest_2.58.0
## [49] ellipsis_0.3.2 survival_3.5-7
## [51] systemfonts_1.0.5 polylabelr_0.2.0
## [53] tools_4.3.2 progress_1.2.2
## [55] Rcpp_1.0.11 glue_1.6.2
## [57] gridExtra_2.3 SparseArray_1.2.2
## [59] xfun_0.41 dplyr_1.1.3
## [61] HDF5Array_1.30.0 fastmap_1.1.1
## [63] GGally_2.1.2 rhdf5filters_1.14.1
## [65] fansi_1.0.5 openssl_2.1.1
## [67] caTools_1.18.2 digest_0.6.33
## [69] R6_2.5.1 mime_0.12
## [71] colorspace_2.1-0 gtools_3.9.4
## [73] biomaRt_2.58.0 RSQLite_2.3.3
## [75] utf8_1.2.4 tidyr_1.3.0
## [77] generics_0.1.3 data.table_1.14.8
## [79] rtracklayer_1.62.0 prettyunits_1.2.0
## [81] httr_1.4.7 htmlwidgets_1.6.2
## [83] S4Arrays_1.2.0 pkgconfig_2.0.3
## [85] gtable_0.3.4 blob_1.2.4
## [87] siggenes_1.76.0 htmltools_0.5.7
## [89] echarts4r_0.4.5 scales_1.2.1
## [91] png_0.1-8 rstudioapi_0.15.0
## [93] knitr_1.45 reshape2_1.4.4
## [95] tzdb_0.4.0 rjson_0.2.21
## [97] nlme_3.1-163 curl_5.1.0
## [99] org.Hs.eg.db_3.18.0 cachem_1.0.8
## [101] rhdf5_2.46.0 stringr_1.5.0
## [103] KernSmooth_2.23-22 AnnotationDbi_1.64.1
## [105] restfulr_0.0.15 GEOquery_2.70.0
## [107] pillar_1.9.0 grid_4.3.2
## [109] reshape_0.8.9 vctrs_0.6.4
## [111] promises_1.2.1 dbplyr_2.4.0
## [113] xtable_1.8-4 evaluate_0.23
## [115] readr_2.1.4 GenomicFeatures_1.54.1
## [117] cli_3.6.1 compiler_4.3.2
## [119] Rsamtools_2.18.0 rlang_1.1.2
## [121] crayon_1.5.2 rngtools_1.5.2
## [123] nor1mix_1.3-0 mclust_6.0.0
## [125] plyr_1.8.9 stringi_1.7.12
## [127] viridisLite_0.4.2 BiocParallel_1.36.0
## [129] munsell_0.5.0 Matrix_1.6-2
## [131] hms_1.1.3 sparseMatrixStats_1.14.0
## [133] bit64_4.0.5 ggplot2_3.4.4
## [135] Rhdf5lib_1.24.0 KEGGREST_1.42.0
## [137] statmod_1.5.0 shiny_1.7.5.1
## [139] highr_0.10 memoise_2.0.1
## [141] bslib_0.5.1 bit_4.0.5