Let’s take a look at how pathway methylation associates with gene expression.
Partition methylation analysis between promoter and body.
Will need to do this at the annotation level.
Compare to GSAmeth.
suppressPackageStartupMessages({
library("DESeq2")
library("eulerr")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library("HGNChelper")
library("tictoc")
library("limma")
library("mitch")
library("kableExtra")
library("beeswarm")
library("missMethyl")
library("HGNChelper")
library("gplots")
library("gridExtra")
library("png")
})
CORES=16
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","UCSC_RefGene_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
design <- readRDS("GSE158422_design.rds")
mval <- readRDS("GSE158422_mx.rds")
runlimma <- function(mval,design,myann) {
fit.reduced <- lmFit(mval,design)
fit.reduced <- eBayes(fit.reduced)
dm <- topTable(fit.reduced,coef=ncol(design), number = Inf)
dm <- merge(myann,dm,by=0)
dm <- dm[order(dm$P.Value),]
rownames(dm) <- dm$Row.names
dm$Row.names=NULL
return(dm)
}
dm <- runlimma(mval=mval,design=design,myann=myann)
## Coefficients not estimable: patientpat32
## Warning: Partial NA coefficients for 839473 probe(s)
## Warning in .ebayes(fit = fit, proportion = proportion, stdev.coef.lim =
## stdev.coef.lim, : Estimation of var.prior failed - set to default value
Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.
gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")
gs_symbols <- gmt_import("c2.cp.reactome.v2023.1.Hs.symbols.gmt")
From GEO https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE158420 GSE158420_counts.txt.gz
if (!file.exists("GSE158420_counts.txt.gz")) {
download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE158420&format=file&file=GSE158420%5Fcounts%2Etxt%2Egz",
destfile="GSE158420_counts.txt.gz")
}
x <- read.table("GSE158420_counts.txt.gz",row.names=NULL)
dim(x)
## [1] 18747 75
x <- aggregate(. ~ row.names,x,sum)
dim(x)
## [1] 18745 75
#RUFKM
head(x)
## row.names X10N X10T X11N X11T X12N X12T X14N X14T X15N X15T X16N X16T X17N
## 1 1-Dec 0 31 0 1 0 1 0 0 0 0 0 0 0
## 2 1-Mar 431 461 478 98 862 635 489 499 543 608 707 498 700
## 3 1-Sep 160 131 280 131 132 383 381 124 114 217 258 886 302
## 4 10-Mar 286 217 130 58 2 18 19 39 417 68 12 90 9
## 5 10-Sep 3541 4311 2174 3675 2856 1864 3306 1546 3507 2483 3568 1294 3658
## 6 11-Mar 0 40 6 1 0 0 0 0 0 1 0 0 0
## X17T X18N X18T X19N X19T X20N X20T X21N X21T X22N X22T X23N X23T X24N X24T
## 1 1 0 79 0 18 0 1 0 1 0 0 0 1 0 2
## 2 411 607 446 608 463 361 488 615 350 673 1838 297 262 762 529
## 3 162 133 279 138 88 239 533 212 53 258 126 197 229 137 236
## 4 122 123 313 70 127 1481 1010 97 7 94 21 588 694 92 24
## 5 542 3337 1050 4527 1865 2656 1314 4449 1060 3196 400 3499 1709 3280 3528
## 6 2 1 38 0 18 0 0 1 3 0 3 1 2 0 5
## X25N X25T X26N X26T X27N X27T X28N X28T X29N X29T X2N X2T X30N X30T X31N
## 1 1 0 0 3 0 2 0 2 0 0 0 0 0 0 0
## 2 534 860 451 1110 798 380 572 336 289 514 575 257 464 483 535
## 3 132 66 98 56 573 190 211 75 79 205 151 147 183 144 212
## 4 87 23 126 368 192 114 441 5 2242 19 23 80 616 24 589
## 5 3463 2832 2893 3445 2442 3703 3250 5465 2028 1971 3502 2354 2605 934 3291
## 6 4 3 0 3 0 4 0 0 2 1 0 0 1 3 1
## X31T X32N X32T X33N X33T X34N X34T X35N X35T X36N X36T X37N X37T X38N X38T
## 1 2 0 0 0 0 1 0 0 1 0 1 0 0 0 4
## 2 478 311 280 630 437 713 334 643 789 286 339 488 320 427 507
## 3 144 302 1086 231 363 185 290 125 199 61 355 59 121 92 203
## 4 28 48 27 4 11 956 13 640 63 654 19 1234 2076 380 185
## 5 4226 3135 1416 3072 1702 3356 2515 3034 2532 3678 502 1512 1324 2684 2542
## 6 5 0 0 0 1 1 0 0 1 1 6 0 6 0 9
## X39N X39T X3N X3T X4N X4T X5N X5T X6N X6T X7N X7T X8N X8T X9N
## 1 0 1 1 3 0 2 0 8 2 0 1 1 17 2 0
## 2 695 458 628 287 305 255 801 364 626 320 638 412 264 436 504
## 3 329 174 120 114 119 59 144 44 110 184 181 327 101 217 191
## 4 43 135 11 63 323 33 78 63 28 6 323 16 185 168 76
## 5 1770 1446 2921 1688 2692 2447 3367 4396 3830 2841 2623 3671 1614 2102 3507
## 6 1 2 0 10 2 3 0 1 0 0 2 1 7 9 0
## X9T
## 1 2
## 2 317
## 3 48
## 4 7
## 5 2482
## 6 5
There are some gene name errors, so I will use HGNChelper to repair them.
human=x$row.names
new.hgnc.table <- readRDS("new.hgnc.table.rds")
fix <- checkGeneSymbols(human,map=new.hgnc.table)
## Warning in checkGeneSymbols(human, map = new.hgnc.table): Human gene symbols
## should be all upper-case except for the 'orf' in open reading frames. The case
## of some letters was corrected.
## Warning in checkGeneSymbols(human, map = new.hgnc.table): x contains
## non-approved gene symbols
x$gene <- fix$Suggested.Symbol
x$row.names=NULL
x <- aggregate(. ~ gene,x,sum)
dim(x)
## [1] 18704 75
row.names(x)=x$gene
x$gene=NULL
head(x)
## X10N X10T X11N X11T X12N X12T X14N X14T X15N X15T X16N
## A1BG 120 217 148 39 73 87 146 95 88 69 72
## A1CF 8 29 22 2 0 1 4 0 2 3 1
## A2M 89330 6096 39181 3282 126977 89739 108730 40523 160543 179385 155740
## A2ML1 16 1307 50 2729 26 19 31 393 20 16 21
## A3GALT2 5 49 21 3 2 0 1 5 1 4 5
## A4GALT 304 1138 350 1439 219 149 178 809 446 150 300
## X16T X17N X17T X18N X18T X19N X19T X20N X20T X21N X21T
## A1BG 121 63 30 113 279 43 137 152 129 56 19
## A1CF 0 3 9 4 104 0 46 6 3 1 1
## A2M 39899 129176 16554 109697 15992 125658 3504 106485 59452 194434 10592
## A2ML1 17 15 114 13 591 10 5830 16 38 15 29
## A3GALT2 4 2 3 13 76 3 53 17 5 14 6
## A4GALT 249 203 286 296 787 209 1814 335 554 220 311
## X22N X22T X23N X23T X24N X24T X25N X25T X26N X26T X27N
## A1BG 92 12 106 85 68 101 107 15 50 46 110
## A1CF 1 10 2 5 3 1 3 7 2 5 1
## A2M 127707 7544 129002 107685 84494 8974 125875 3304 133114 23805 124283
## A2ML1 13 146 13 24 16 298 96 2730 7 257 9
## A3GALT2 1 6 4 1 8 11 17 0 4 13 9
## A4GALT 248 115 291 567 177 728 248 536 284 445 235
## X27T X28N X28T X29N X29T X2N X2T X30N X30T X31N X31T
## A1BG 155 143 64 204 125 82 117 97 127 115 39
## A1CF 1 2 4 3 4 0 0 2 6 1 1
## A2M 16776 163693 15056 56772 32450 110818 57261 242936 22835 157038 5604
## A2ML1 3891 74 67 19 50 8 11 8 28 31 93
## A3GALT2 16 6 1 11 6 6 6 19 1 10 8
## A4GALT 1059 586 3534 808 461 280 1505 723 236 442 481
## X32N X32T X33N X33T X34N X34T X35N X35T X36N X36T X37N
## A1BG 82 105 116 104 119 63 101 111 58 90 107
## A1CF 0 0 3 2 3 1 7 4 1 3 4
## A2M 158632 11656 64662 92680 122607 84313 190327 145504 148968 7674 38901
## A2ML1 6 130 42 9 17 31 22 33 9 33 24
## A3GALT2 14 3 6 10 10 12 8 14 1 47 5
## A4GALT 299 94 292 564 345 694 452 521 454 322 656
## X37T X38N X38T X39N X39T X3N X3T X4N X4T X5N X5T X6N
## A1BG 57 57 133 105 125 67 94 99 291 87 59 68
## A1CF 9 1 15 2 5 2 18 1 6 3 10 0
## A2M 65047 82116 123542 127464 91470 99241 9283 117914 5648 89583 2462 97346
## A2ML1 58 12 61 13 26 13 3833 57 499 37 1108 5
## A3GALT2 1 14 30 2 10 12 21 18 11 8 3 2
## A4GALT 949 588 934 209 473 359 1134 257 711 235 627 126
## X6T X7N X7T X8N X8T X9N X9T
## A1BG 27 108 86 194 84 77 8
## A1CF 0 2 6 35 9 1 0
## A2M 9188 98328 29242 15384 26414 121946 4447
## A2ML1 632 8 329 158 1072 13 2879
## A3GALT2 1 16 9 57 9 11 3
## A4GALT 1176 225 2035 498 491 160 1470
Now do DESeq2
patient <- head(as.character(unlist(lapply(1:ncol(x),function(i) {c(i,i)}))),ncol(x))
tumor <- rep(c(0,1),ncol(x)/2)
ss <- data.frame(patient,tumor)
dds <- DESeqDataSetFromMatrix(countData = x , colData=ss, design = ~ patient + tumor)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste("mRNA", SIG , "DEGs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3,
xlab="log2 base mean",
ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
,xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- rep(c(0,1),ncol(x)/2)
colCols <- gsub("0","gray",colCols)
colCols <- gsub("1","black",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
RNA expression pathway enrichment with mitch.
rna <- mitch_import(dge, DEtype="deseq2")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 18704
## Note: no. genes in output = 18704
## Note: estimated proportion of input genes in output = 1
res <- mitch_calc(rna, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mres <- res$enrichment_result
msig <- subset(mres,p.adjustANOVA<0.05)
head(subset(mres,p.adjustANOVA<0.05 & s.dist>0),20) %>%
kbl(caption="mRNA upregulated pathways") %>%
kable_styling("hover",full_width=FALSE)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
216 | REACTOME_UNWINDING_OF_DNA | 12 | 0.0000000 | 0.9476460 | 0.0000001 |
404 | REACTOME_CONDENSATION_OF_PROMETAPHASE_CHROMOSOMES | 11 | 0.0000077 | 0.7785821 | 0.0000507 |
699 | REACTOME_DNA_METHYLATION | 64 | 0.0000000 | 0.7471251 | 0.0000000 |
906 | REACTOME_DNA_STRAND_ELONGATION | 32 | 0.0000000 | 0.7418428 | 0.0000000 |
900 | REACTOME_ACTIVATION_OF_THE_PRE_REPLICATIVE_COMPLEX | 33 | 0.0000000 | 0.7372399 | 0.0000000 |
894 | REACTOME_ASSEMBLY_OF_THE_ORC_COMPLEX_AT_THE_ORIGIN_OF_REPLICATION | 68 | 0.0000000 | 0.7243288 | 0.0000000 |
840 | REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE | 68 | 0.0000000 | 0.7225303 | 0.0000000 |
363 | REACTOME_PRC2_METHYLATES_HISTONES_AND_DNA | 72 | 0.0000000 | 0.7202774 | 0.0000000 |
391 | REACTOME_CONDENSATION_OF_PROPHASE_CHROMOSOMES | 72 | 0.0000000 | 0.7177838 | 0.0000000 |
213 | REACTOME_ACTIVATION_OF_ATR_IN_RESPONSE_TO_REPLICATION_STRESS | 37 | 0.0000000 | 0.7122379 | 0.0000000 |
119 | REACTOME_POLO_LIKE_KINASE_MEDIATED_EVENTS | 16 | 0.0000019 | 0.6881559 | 0.0000135 |
604 | REACTOME_ERCC6_CSB_AND_EHMT2_G9A_POSITIVELY_REGULATE_RRNA_EXPRESSION | 75 | 0.0000000 | 0.6856077 | 0.0000000 |
76 | REACTOME_APOPTOSIS_INDUCED_DNA_FRAGMENTATION | 13 | 0.0000259 | 0.6737971 | 0.0001556 |
1145 | REACTOME_MEIOTIC_RECOMBINATION | 85 | 0.0000000 | 0.6716330 | 0.0000000 |
603 | REACTOME_SIRT1_NEGATIVELY_REGULATES_RRNA_EXPRESSION | 67 | 0.0000000 | 0.6674984 | 0.0000000 |
745 | REACTOME_ACTIVATED_PKN1_STIMULATES_TRANSCRIPTION_OF_AR_ANDROGEN_RECEPTOR_REGULATED_GENES_KLK2_AND_KLK3 | 66 | 0.0000000 | 0.6552362 | 0.0000000 |
952 | REACTOME_PURINE_RIBONUCLEOSIDE_MONOPHOSPHATE_BIOSYNTHESIS | 11 | 0.0001688 | 0.6549211 | 0.0008290 |
907 | REACTOME_G1_S_SPECIFIC_TRANSCRIPTION | 29 | 0.0000000 | 0.6457420 | 0.0000000 |
1064 | REACTOME_NUCLEOTIDE_BIOSYNTHESIS | 14 | 0.0000296 | 0.6446061 | 0.0001739 |
972 | REACTOME_CHK1_CHK2_CDS1_MEDIATED_INACTIVATION_OF_CYCLIN_B_CDK1_COMPLEX | 13 | 0.0000580 | 0.6439700 | 0.0003250 |
head(subset(mres,p.adjustANOVA<0.05 & s.dist<0),20) %>%
kbl(caption="mRNA downregulated pathways") %>%
kable_styling("hover",full_width=FALSE)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
817 | REACTOME_DISEASES_ASSOCIATED_WITH_SURFACTANT_METABOLISM | 10 | 0.0000043 | -0.8389466 | 0.0000299 |
386 | REACTOME_MUCOPOLYSACCHARIDOSES | 11 | 0.0000194 | -0.7437856 | 0.0001191 |
53 | REACTOME_ERYTHROCYTES_TAKE_UP_CARBON_DIOXIDE_AND_RELEASE_OXYGEN | 13 | 0.0001161 | -0.6172969 | 0.0005923 |
526 | REACTOME_PD_1_SIGNALING | 21 | 0.0000223 | -0.5344633 | 0.0001360 |
373 | REACTOME_HYALURONAN_UPTAKE_AND_DEGRADATION | 12 | 0.0015299 | -0.5282934 | 0.0058305 |
315 | REACTOME_METABOLISM_OF_ANGIOTENSINOGEN_TO_ANGIOTENSINS | 18 | 0.0001492 | -0.5162639 | 0.0007494 |
571 | REACTOME_SEMA4D_INDUCED_CELL_MIGRATION_AND_GROWTH_CONE_COLLAPSE | 20 | 0.0000697 | -0.5136513 | 0.0003794 |
317 | REACTOME_KERATAN_SULFATE_DEGRADATION | 13 | 0.0019869 | -0.4952896 | 0.0073999 |
329 | REACTOME_FCGR_ACTIVATION | 12 | 0.0030055 | -0.4946642 | 0.0106204 |
563 | REACTOME_SEMA4D_IN_SEMAPHORIN_SIGNALING | 24 | 0.0000291 | -0.4928750 | 0.0001720 |
51 | REACTOME_ENDOSOMAL_VACUOLAR_PATHWAY | 11 | 0.0053474 | -0.4849887 | 0.0169333 |
537 | REACTOME_SIGNAL_REGULATORY_PROTEIN_FAMILY_INTERACTIONS | 16 | 0.0007890 | -0.4846550 | 0.0032527 |
1186 | REACTOME_REGULATION_OF_FOXO_TRANSCRIPTIONAL_ACTIVITY_BY_ACETYLATION | 10 | 0.0085022 | -0.4805509 | 0.0251046 |
554 | REACTOME_SEMA3A_PLEXIN_REPULSION_SIGNALING_BY_INHIBITING_INTEGRIN_ADHESION | 14 | 0.0020012 | -0.4769567 | 0.0074321 |
375 | REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA | 12 | 0.0042979 | -0.4760380 | 0.0142647 |
639 | REACTOME_CELL_EXTRACELLULAR_MATRIX_INTERACTIONS | 18 | 0.0006455 | -0.4644611 | 0.0027566 |
687 | REACTOME_UPTAKE_AND_FUNCTION_OF_ANTHRAX_TOXINS | 11 | 0.0108508 | -0.4435792 | 0.0307245 |
23 | REACTOME_NEUROTRANSMITTER_CLEARANCE | 10 | 0.0152658 | -0.4430408 | 0.0410954 |
324 | REACTOME_GENERATION_OF_SECOND_MESSENGER_MOLECULES | 32 | 0.0000182 | -0.4377727 | 0.0001128 |
416 | REACTOME_SIGNALING_BY_LEPTIN | 11 | 0.0119512 | -0.4376804 | 0.0334072 |
Use Euler diagrams to find the similarity. As medians are normally distributed about the zero, one would expect equal numbers of up and down- methylated gene sets.
Use all probes on the chip.
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","UCSC_RefGene_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
gp2 <- lapply(gp2,unique)
gt <- stack(gp2)
colnames(gt) <- c("gene","probe")
gt$probe <- as.character(gt$probe)
dim(gt)
## [1] 684970 2
str(gt)
## 'data.frame': 684970 obs. of 2 variables:
## $ gene : chr "YTHDF1" "EIF2S3" "PKN3" "CCDC57" ...
## $ probe: chr "cg18478105" "cg09835024" "cg14361672" "cg01763666" ...
#new.hgnc.table <- getCurrentHumanMap()
new.hgnc.table <- readRDS("new.hgnc.table.rds")
fix <- checkGeneSymbols(gt$gene,map=new.hgnc.table)
## Warning in checkGeneSymbols(gt$gene, map = new.hgnc.table): Human gene symbols
## should be all upper-case except for the 'orf' in open reading frames. The case
## of some letters was corrected.
## Warning in checkGeneSymbols(gt$gene, map = new.hgnc.table): x contains
## non-approved gene symbols
fix2 <- fix[which(fix$x != fix$Suggested.Symbol),]
length(unique(fix2$x))
## [1] 3253
gt$gene <- fix$Suggested.Symbol
Here we will look at parallel analysis of gene body and promoter.
The approach here is to generate two mapping tables of probes to genes based on the annotation which is split into gene body and promoter (TSS).
gene body=gb
gene promoter (TSS) = gp
# partition body and promoter data(probes)
dmg <- strsplit(dm$UCSC_RefGene_Name,";")
body <- lapply(1:nrow(dm),function(i) {
a <- dm[i,"UCSC_RefGene_Group"]
a <- unlist(strsplit(a,";"))
dmg[[i]][a=="Body"]
})
names(body) <- rownames(dm)
body <- lapply(body,unique)
gb <- stack(body)
colnames(gb) <- c("gene","probe")
gb$probe <- as.character(gb$probe)
dim(gb)
## [1] 368271 2
str(gb)
## 'data.frame': 368271 obs. of 2 variables:
## $ gene : chr "DMRTA2" "DMRTA2" "DMRTA2" "SRPK2" ...
## $ probe: chr "cg09792881" "cg10512745" "cg16732616" "cg17627973" ...
fix <- checkGeneSymbols(gb$gene,map=new.hgnc.table)
## Warning in checkGeneSymbols(gb$gene, map = new.hgnc.table): Human gene symbols
## should be all upper-case except for the 'orf' in open reading frames. The case
## of some letters was corrected.
## Warning in checkGeneSymbols(gb$gene, map = new.hgnc.table): x contains
## non-approved gene symbols
fix2 <- fix[which(fix$x != fix$Suggested.Symbol),]
length(unique(fix2$x))
## [1] 2788
gb$gene <- fix$Suggested.Symbol
tss <- lapply(1:nrow(dm),function(i) {
a <- dm[i,"UCSC_RefGene_Group"]
a <- unlist(strsplit(a,";"))
dmg[[i]][grep("TSS",a)]
})
names(tss) <- rownames(dm)
tss <- lapply(tss,unique)
gp <- stack(tss)
colnames(gp) <- c("gene","probe")
gp$probe <- as.character(gp$probe)
dim(gp)
## [1] 199765 2
str(gp)
## 'data.frame': 199765 obs. of 2 variables:
## $ gene : chr "GALK2" "MIR4716" "LZTS2" "CCNB1" ...
## $ probe: chr "cg02655980" "cg02655980" "cg27014672" "cg17160384" ...
fix <- checkGeneSymbols(gp$gene,map=new.hgnc.table)
## Warning in checkGeneSymbols(gp$gene, map = new.hgnc.table): Human gene symbols
## should be all upper-case except for the 'orf' in open reading frames. The case
## of some letters was corrected.
## Warning in checkGeneSymbols(gp$gene, map = new.hgnc.table): x contains
## non-approved gene symbols
fix2 <- fix[which(fix$x != fix$Suggested.Symbol),]
length(unique(fix2$x))
## [1] 2995
gp$gene <- fix$Suggested.Symbol
# import for promoters
dmp <- dm[which(rownames(dm) %in% gp$probe),]
mp <- mitch_import(x=dmp,DEtype="limma",geneTable=gp)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 188962
## Note: no. genes in output = 21942
## Note: estimated proportion of input genes in output = 0.116
dim(dmp)
## [1] 188962 10
dim(mp)
## [1] 21942 1
# import bodies
dmb <- dm[which(rownames(dm) %in% gb$probe),]
mb <- mitch_import(x=dmb,DEtype="limma",geneTable=gb)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 351261
## Note: no. genes in output = 19908
## Note: estimated proportion of input genes in output = 0.0567
dim(dmb)
## [1] 351261 10
dim(mb)
## [1] 19908 1
# promoter
dmp_res <- mitch_calc(mp, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
dmp_res_up <- subset(dmp_res$enrichment_result,s.dist>0 & p.adjustANOVA < 0.05)
dmp_res_dn <- subset(dmp_res$enrichment_result,s.dist<0 & p.adjustANOVA < 0.05)
nrow(dmp_res_up)
## [1] 209
nrow(dmp_res_dn)
## [1] 32
head(dmp_res_up,20)
## set
## 324 REACTOME_YAP1_AND_WWTR1_TAZ_STIMULATED_GENE_EXPRESSION
## 596 REACTOME_REGULATION_OF_COMMISSURAL_AXON_PATHFINDING_BY_SLIT_AND_ROBO
## 547 REACTOME_CRMPS_IN_SEMA3A_SIGNALING
## 420 REACTOME_POU5F1_OCT4_SOX2_NANOG_REPRESS_GENES_RELATED_TO_DIFFERENTIATION
## 672 REACTOME_O_GLYCOSYLATION_OF_TSR_DOMAIN_CONTAINING_PROTEINS
## 708 REACTOME_PHYSIOLOGICAL_FACTORS
## 780 REACTOME_RESPONSE_TO_METAL_IONS
## 312 REACTOME_DERMATAN_SULFATE_BIOSYNTHESIS
## 744 REACTOME_ACTIVATION_OF_SMO
## 316 REACTOME_CS_DS_DEGRADATION
## 714 REACTOME_CLEC7A_DECTIN_1_INDUCES_NFAT_ACTIVATION
## 383 REACTOME_CROSSLINKING_OF_COLLAGEN_FIBRILS
## 781 REACTOME_METALLOTHIONEINS_BIND_METALS
## 797 REACTOME_LGI_ADAM_INTERACTIONS
## 628 REACTOME_CELL_EXTRACELLULAR_MATRIX_INTERACTIONS
## 646 REACTOME_TRANSCRIPTIONAL_REGULATION_OF_PLURIPOTENT_STEM_CELLS
## 658 REACTOME_REPRESSION_OF_WNT_TARGET_GENES
## 546 REACTOME_SEMA3A_PLEXIN_REPULSION_SIGNALING_BY_INHIBITING_INTEGRIN_ADHESION
## 117 REACTOME_ELASTIC_FIBRE_FORMATION
## 657 REACTOME_REGULATION_OF_FZD_BY_UBIQUITINATION
## setSize pANOVA s.dist p.adjustANOVA
## 324 15 3.302845e-05 0.6189964 6.979874e-04
## 596 10 7.855574e-04 0.6131497 7.474856e-03
## 547 15 6.766113e-05 0.5940834 1.231242e-03
## 420 10 2.183450e-03 0.5595203 1.730685e-02
## 672 38 2.310035e-08 0.5235694 1.570824e-06
## 708 14 7.806307e-04 0.5185217 7.474856e-03
## 780 14 1.553869e-03 0.4884101 1.312156e-02
## 312 10 7.825621e-03 0.4856557 4.339357e-02
## 744 18 4.329686e-04 0.4790488 4.906977e-03
## 316 12 4.396664e-03 0.4748138 2.898209e-02
## 714 11 6.727377e-03 0.4718642 3.932274e-02
## 383 16 1.143863e-03 0.4696023 1.026300e-02
## 781 11 8.407973e-03 0.4588399 4.622596e-02
## 797 14 3.327156e-03 0.4531258 2.414992e-02
## 628 16 1.749662e-03 0.4518779 1.449079e-02
## 646 29 2.568616e-05 0.4514514 6.033913e-04
## 658 14 4.049155e-03 0.4436402 2.739010e-02
## 546 13 6.483050e-03 0.4360331 3.824704e-02
## 117 44 1.691829e-06 0.4169995 7.051109e-05
## 657 21 9.733779e-04 0.4156940 8.856368e-03
head(dmp_res_dn,20)
## set setSize
## 87 REACTOME_BETA_DEFENSINS 37
## 504 REACTOME_OLFACTORY_SIGNALING_PATHWAY 354
## 88 REACTOME_DEFENSINS 46
## 241 REACTOME_DIGESTION_OF_DIETARY_CARBOHYDRATE 10
## 398 REACTOME_INTERACTION_WITH_CUMULUS_CELLS_AND_THE_ZONA_PELLUCIDA 11
## 854 REACTOME_ANTIMICROBIAL_PEPTIDES 89
## 624 REACTOME_TYPE_I_HEMIDESMOSOME_ASSEMBLY 11
## 666 REACTOME_DEFECTIVE_GALNT3_CAUSES_HFTC 16
## 730 REACTOME_DECTIN_2_FAMILY 26
## 1245 REACTOME_SENSORY_PERCEPTION 561
## 667 REACTOME_DEFECTIVE_C1GALT1C1_CAUSES_TNPS 16
## 269 REACTOME_GLUCOCORTICOID_BIOSYNTHESIS 10
## 804 REACTOME_REGULATION_OF_TLR_BY_ENDOGENOUS_LIGAND 21
## 352 REACTOME_CYP2E1_REACTIONS 11
## 1026 REACTOME_DIGESTION 21
## 867 REACTOME_KERATINIZATION 214
## 351 REACTOME_XENOBIOTICS 23
## 39 REACTOME_FERTILIZATION 26
## 712 REACTOME_IRAK4_DEFICIENCY_TLR2_4 17
## 1275 REACTOME_TERMINATION_OF_O_GLYCAN_BIOSYNTHESIS 23
## pANOVA s.dist p.adjustANOVA
## 87 7.777683e-17 -0.7911695 1.256096e-14
## 504 1.380857e-142 -0.7808034 1.784067e-139
## 88 4.416627e-18 -0.7380125 9.510470e-16
## 241 7.084103e-05 -0.7255243 1.253789e-03
## 398 1.217640e-04 -0.6689949 1.966488e-03
## 854 4.219209e-27 -0.6603094 1.817073e-24
## 624 5.870412e-04 -0.5985011 5.972104e-03
## 666 7.635692e-05 -0.5710743 1.333151e-03
## 730 2.147722e-06 -0.5367978 8.671428e-05
## 1245 2.105399e-102 -0.5278147 1.360087e-99
## 667 5.202543e-04 -0.5010091 5.546044e-03
## 269 6.288949e-03 -0.4989513 3.727212e-02
## 804 1.365006e-04 -0.4807501 2.150717e-03
## 352 7.724038e-03 -0.4638308 4.301490e-02
## 1026 5.766381e-04 -0.4338718 5.912828e-03
## 867 1.506617e-26 -0.4224026 4.091945e-24
## 351 8.273545e-04 -0.4026901 7.745957e-03
## 39 5.236976e-04 -0.3929128 5.546044e-03
## 712 7.101908e-03 -0.3770957 4.063119e-02
## 1275 3.461371e-03 -0.3521166 2.470769e-02
# body
dmb_res <- mitch_calc(mb, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
dmb_res_up <- subset(dmb_res$enrichment_result,s.dist>0 & p.adjustANOVA < 0.05)
dmb_res_dn <- subset(dmb_res$enrichment_result,s.dist<0 & p.adjustANOVA < 0.05)
nrow(dmb_res_up)
## [1] 295
nrow(dmb_res_dn)
## [1] 50
head(dmb_res_up,20)
## set
## 333 REACTOME_REGULATION_OF_GENE_EXPRESSION_IN_LATE_STAGE_BRANCHING_MORPHOGENESIS_PANCREATIC_BUD_PRECURSOR_CELLS
## 996 REACTOME_ACTIVATION_OF_THE_TFAP2_AP_2_FAMILY_OF_TRANSCRIPTION_FACTORS
## 274 REACTOME_BETA_CATENIN_PHOSPHORYLATION_CASCADE
## 652 REACTOME_SIGNALING_BY_CTNNB1_PHOSPHO_SITE_MUTANTS
## 1031 REACTOME_RUNX3_REGULATES_P14_ARF
## 314 REACTOME_ERKS_ARE_INACTIVATED
## 45 REACTOME_REGULATION_OF_GENE_EXPRESSION_BY_HYPOXIA_INDUCIBLE_FACTOR
## 1021 REACTOME_RUNX3_REGULATES_NOTCH_SIGNALING
## 636 REACTOME_TRANSCRIPTIONAL_REGULATION_OF_PLURIPOTENT_STEM_CELLS
## 303 REACTOME_METABOLISM_OF_NITRIC_OXIDE_NOS3_ACTIVATION_AND_REGULATION
## 70 REACTOME_MITOCHONDRIAL_IRON_SULFUR_CLUSTER_BIOGENESIS
## 1267 REACTOME_FORMATION_OF_AXIAL_MESODERM
## 50 REACTOME_ENDOSOMAL_VACUOLAR_PATHWAY
## 995 REACTOME_NEGATIVE_REGULATION_OF_ACTIVITY_OF_TFAP2_AP_2_FAMILY_TRANSCRIPTION_FACTORS
## 5 REACTOME_MAPK3_ERK1_ACTIVATION
## 651 REACTOME_SIGNALING_BY_WNT_IN_CANCER
## 28 REACTOME_RAF_INDEPENDENT_MAPK1_3_ACTIVATION
## 1256 REACTOME_GASTRULATION
## 1092 REACTOME_NOTCH3_INTRACELLULAR_DOMAIN_REGULATES_TRANSCRIPTION
## 650 REACTOME_SUMOYLATION_OF_IMMUNE_RESPONSE_PROTEINS
## setSize pANOVA s.dist p.adjustANOVA
## 333 15 1.760683e-05 0.6401347 3.513114e-04
## 996 10 1.029310e-03 0.5993869 7.426149e-03
## 274 14 1.186938e-04 0.5940197 1.612468e-03
## 652 14 1.186938e-04 0.5940197 1.612468e-03
## 1031 10 1.598396e-03 0.5763494 1.041404e-02
## 314 13 3.471569e-04 0.5729580 3.546555e-03
## 45 11 1.077777e-03 0.5692452 7.597780e-03
## 1021 13 8.531177e-04 0.5341543 6.370943e-03
## 636 27 2.198986e-06 0.5262665 6.382058e-05
## 303 15 6.688171e-04 0.5072974 5.641519e-03
## 70 13 1.567803e-03 0.5064318 1.032967e-02
## 1267 14 1.135421e-03 0.5023410 7.795334e-03
## 50 11 4.026145e-03 0.5007836 1.992786e-02
## 995 10 7.626994e-03 0.4872449 3.370128e-02
## 5 10 7.817173e-03 0.4857322 3.410035e-02
## 651 31 2.849988e-06 0.4857081 8.087632e-05
## 28 22 8.197087e-05 0.4850077 1.176144e-03
## 1256 46 2.266532e-08 0.4762732 1.378267e-06
## 1092 24 6.659014e-05 0.4702273 9.887862e-04
## 650 10 1.038197e-02 0.4680169 4.195498e-02
head(dmb_res_dn,20)
## set setSize
## 86 REACTOME_BETA_DEFENSINS 31
## 87 REACTOME_DEFENSINS 39
## 1013 REACTOME_DIGESTION 17
## 572 REACTOME_CLASS_C_3_METABOTROPIC_GLUTAMATE_PHEROMONE_RECEPTORS 14
## 843 REACTOME_ANTIMICROBIAL_PEPTIDES 76
## 474 REACTOME_HORMONE_LIGAND_BINDING_RECEPTORS 11
## 496 REACTOME_OLFACTORY_SIGNALING_PATHWAY 26
## 1042 REACTOME_DIGESTION_AND_ABSORPTION 21
## 167 REACTOME_CREATION_OF_C4_AND_C2_ACTIVATORS 14
## 1260 REACTOME_TERMINATION_OF_O_GLYCAN_BIOSYNTHESIS 22
## 656 REACTOME_DEFECTIVE_GALNT3_CAUSES_HFTC 15
## 305 REACTOME_METABOLISM_OF_ANGIOTENSINOGEN_TO_ANGIOTENSINS 16
## 924 REACTOME_PYRIMIDINE_CATABOLISM 12
## 657 REACTOME_DEFECTIVE_C1GALT1C1_CAUSES_TNPS 15
## 165 REACTOME_COMPLEMENT_CASCADE 57
## 1024 REACTOME_COLLAGEN_CHAIN_TRIMERIZATION 42
## 166 REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT 22
## 719 REACTOME_DECTIN_2_FAMILY 24
## 39 REACTOME_FERTILIZATION 24
## 473 REACTOME_AMINE_LIGAND_BINDING_RECEPTORS 25
## pANOVA s.dist p.adjustANOVA
## 86 1.247101e-12 -0.7363950 2.275069e-10
## 87 8.512572e-15 -0.7175912 2.717639e-12
## 1013 4.344048e-05 -0.5726474 6.848579e-04
## 572 2.434452e-04 -0.5662870 2.727013e-03
## 843 3.076757e-16 -0.5417056 1.309673e-13
## 474 3.111273e-03 -0.5147830 1.656858e-02
## 496 7.990785e-06 -0.5058460 1.889673e-04
## 1042 1.502979e-04 -0.4777637 1.910331e-03
## 167 3.303371e-03 -0.4534820 1.707856e-02
## 1260 2.421178e-04 -0.4520037 2.727013e-03
## 656 2.649011e-03 -0.4482213 1.496809e-02
## 305 2.877044e-03 -0.4303615 1.576818e-02
## 924 1.184516e-02 -0.4195734 4.583717e-02
## 657 5.393751e-03 -0.4149366 2.570082e-02
## 165 1.602835e-07 -0.4012003 7.310070e-06
## 1024 1.187896e-05 -0.3905260 2.528240e-04
## 166 2.010061e-03 -0.3803864 1.246043e-02
## 719 2.970231e-03 -0.3503068 1.600416e-02
## 39 8.299597e-03 -0.3112637 3.554925e-02
## 473 9.621987e-03 -0.2991480 3.925648e-02
# combined methylation
mm <- merge(mp,mb,by=0)
rownames(mm) <- mm$Row.names
mm$Row.names=NULL
colnames(mm) <- c("prom","body")
mm_res <- mitch_calc(mm, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mm_res_sig <- subset(mm_res$enrichment_result,p.adjustMANOVA < 0.01)
nrow(mm_res_sig)
## [1] 170
head(mm_res_sig,20)
## set
## 86 REACTOME_BETA_DEFENSINS
## 87 REACTOME_DEFENSINS
## 842 REACTOME_ANTIMICROBIAL_PEPTIDES
## 393 REACTOME_INTERACTION_WITH_CUMULUS_CELLS_AND_THE_ZONA_PELLUCIDA
## 655 REACTOME_DEFECTIVE_GALNT3_CAUSES_HFTC
## 1012 REACTOME_DIGESTION
## 635 REACTOME_TRANSCRIPTIONAL_REGULATION_OF_PLURIPOTENT_STEM_CELLS
## 656 REACTOME_DEFECTIVE_C1GALT1C1_CAUSES_TNPS
## 320 REACTOME_YAP1_AND_WWTR1_TAZ_STIMULATED_GENE_EXPRESSION
## 333 REACTOME_REGULATION_OF_GENE_EXPRESSION_IN_LATE_STAGE_BRANCHING_MORPHOGENESIS_PANCREATIC_BUD_PRECURSOR_CELLS
## 496 REACTOME_OLFACTORY_SIGNALING_PATHWAY
## 718 REACTOME_DECTIN_2_FAMILY
## 614 REACTOME_TYPE_I_HEMIDESMOSOME_ASSEMBLY
## 274 REACTOME_BETA_CATENIN_PHOSPHORYLATION_CASCADE
## 651 REACTOME_SIGNALING_BY_CTNNB1_PHOSPHO_SITE_MUTANTS
## 571 REACTOME_CLASS_C_3_METABOTROPIC_GLUTAMATE_PHEROMONE_RECEPTORS
## 1259 REACTOME_TERMINATION_OF_O_GLYCAN_BIOSYNTHESIS
## 792 REACTOME_REGULATION_OF_TLR_BY_ENDOGENOUS_LIGAND
## 538 REACTOME_CRMPS_IN_SEMA3A_SIGNALING
## 1041 REACTOME_DIGESTION_AND_ABSORPTION
## setSize pMANOVA s.prom s.body p.prom p.body
## 86 31 3.300848e-20 -0.8199003 -0.7429049702 2.650124e-15 7.901567e-13
## 87 39 8.883135e-23 -0.7605344 -0.7243029546 1.955356e-16 4.789336e-15
## 842 76 1.206368e-31 -0.6966518 -0.5488022318 7.540330e-26 1.261188e-16
## 393 10 1.832106e-04 -0.7191814 -0.4244741079 8.194693e-05 2.010679e-02
## 655 15 2.415676e-05 -0.6191674 -0.4559642560 3.288005e-05 2.230556e-03
## 1012 17 3.335599e-05 -0.4064160 -0.5812544700 3.716909e-03 3.327466e-05
## 635 27 2.621767e-07 0.4488267 0.5223500689 5.409295e-05 2.615712e-06
## 656 15 2.074529e-04 -0.5438903 -0.4222845664 2.648052e-04 4.628986e-03
## 320 15 1.440428e-04 0.6044403 0.3253255750 5.040713e-05 2.914693e-02
## 333 15 9.372179e-05 0.2496398 0.6372817955 9.414343e-02 1.919217e-05
## 496 26 7.183400e-07 -0.4449478 -0.5119381320 8.581508e-05 6.208331e-06
## 718 24 1.622734e-06 -0.5713958 -0.3574596739 1.255981e-06 2.434059e-03
## 614 11 5.766047e-04 -0.6470166 0.0002738795 2.022488e-04 9.987451e-01
## 274 14 6.587244e-04 0.1626058 0.5907542507 2.921714e-01 1.293920e-04
## 651 14 6.587244e-04 0.1626058 0.5907542507 2.921714e-01 1.293920e-04
## 571 14 5.198957e-05 0.2008119 -0.5761041390 1.932992e-01 1.894703e-04
## 1259 22 7.427397e-05 -0.3908510 -0.4606356691 1.505177e-03 1.836393e-04
## 792 21 6.935285e-05 -0.5430432 -0.2407036196 1.642314e-05 5.620400e-02
## 538 15 3.985183e-04 0.5799321 0.0533596564 1.005342e-04 7.205039e-01
## 1041 21 1.837009e-04 -0.3158022 -0.4868071332 1.223554e-02 1.123235e-04
## s.dist SD p.adjustMANOVA
## 86 1.1064105 0.05444392 1.052970e-17
## 87 1.0502511 0.02561948 3.778293e-20
## 842 0.8868526 0.10454543 1.539325e-28
## 393 0.8351049 0.20838954 2.074357e-03
## 655 0.7689419 0.11540202 4.198721e-04
## 1012 0.7092466 0.12362947 5.387627e-04
## 635 0.6886908 0.05198890 9.497212e-06
## 656 0.6885789 0.08598822 2.262478e-03
## 320 0.6864290 0.19736391 1.750462e-03
## 333 0.6844327 0.27410429 1.207970e-03
## 496 0.6782767 0.04736929 2.083186e-05
## 718 0.6739960 0.15127567 4.225732e-05
## 614 0.6470167 0.45770349 5.074121e-03
## 274 0.6127244 0.30274664 5.566439e-03
## 651 0.6127244 0.30274664 5.566439e-03
## 571 0.6100995 0.54936261 7.992614e-04
## 1259 0.6041107 0.04934523 1.023825e-03
## 792 0.5939985 0.21378640 9.943173e-04
## 538 0.5823818 0.37234296 3.739039e-03
## 1041 0.5802691 0.12091872 2.074357e-03
table(sign(mm_res_sig$s.prom) == sign(mm_res_sig$s.body) )
##
## FALSE TRUE
## 34 136
mm_res_sig[sign(mm_res_sig$s.prom) != sign(mm_res_sig$s.body),]
## set
## 614 REACTOME_TYPE_I_HEMIDESMOSOME_ASSEMBLY
## 571 REACTOME_CLASS_C_3_METABOTROPIC_GLUTAMATE_PHEROMONE_RECEPTORS
## 661 REACTOME_O_GLYCOSYLATION_OF_TSR_DOMAIN_CONTAINING_PROTEINS
## 1023 REACTOME_COLLAGEN_CHAIN_TRIMERIZATION
## 153 REACTOME_COLLAGEN_BIOSYNTHESIS_AND_MODIFYING_ENZYMES
## 1263 REACTOME_GABA_RECEPTOR_ACTIVATION
## 84 REACTOME_COLLAGEN_DEGRADATION
## 146 REACTOME_HEPARAN_SULFATE_HEPARIN_HS_GAG_METABOLISM
## 867 REACTOME_RETROGRADE_TRANSPORT_AT_THE_TRANS_GOLGI_NETWORK
## 65 REACTOME_VOLTAGE_GATED_POTASSIUM_CHANNELS
## 423 REACTOME_NON_INTEGRIN_MEMBRANE_ECM_INTERACTIONS
## 64 REACTOME_POTASSIUM_CHANNELS
## 518 REACTOME_DISEASES_ASSOCIATED_WITH_O_GLYCOSYLATION_OF_PROTEINS
## 187 REACTOME_DDX58_IFIH1_MEDIATED_INDUCTION_OF_INTERFERON_ALPHA_BETA
## 660 REACTOME_O_LINKED_GLYCOSYLATION
## 424 REACTOME_ECM_PROTEOGLYCANS
## 118 REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION
## 90 REACTOME_DEGRADATION_OF_THE_EXTRACELLULAR_MATRIX
## 92 REACTOME_COLLAGEN_FORMATION
## 835 REACTOME_PROTEIN_PROTEIN_INTERACTIONS_AT_SYNAPSES
## 1160 REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY
## 138 REACTOME_GLYCOSAMINOGLYCAN_METABOLISM
## 25 REACTOME_NEURONAL_SYSTEM
## 480 REACTOME_DISEASES_OF_GLYCOSYLATION
## 692 REACTOME_CARDIAC_CONDUCTION
## 24 REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES
## 23 REACTOME_NEUROTRANSMITTER_RECEPTORS_AND_POSTSYNAPTIC_SIGNAL_TRANSMISSION
## 91 REACTOME_EXTRACELLULAR_MATRIX_ORGANIZATION
## 465 REACTOME_CLASS_A_1_RHODOPSIN_LIKE_RECEPTORS
## 1120 REACTOME_INTERFERON_SIGNALING
## 654 REACTOME_GPCR_LIGAND_BINDING
## 464 REACTOME_SIGNALING_BY_GPCR
## 61 REACTOME_ADAPTIVE_IMMUNE_SYSTEM
## 60 REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM
## setSize pMANOVA s.prom s.body p.prom p.body
## 614 11 5.766047e-04 -0.647016603 0.0002738795 2.022488e-04 9.987451e-01
## 571 14 5.198957e-05 0.200811916 -0.5761041390 1.932992e-01 1.894703e-04
## 661 38 2.028680e-10 0.507787284 -0.2142659640 6.025372e-08 2.228449e-02
## 1023 42 4.707678e-10 0.300353532 -0.4005242282 7.569915e-04 7.060580e-06
## 153 65 4.534807e-09 0.275616851 -0.2605736174 1.216545e-04 2.801941e-04
## 1263 56 1.094274e-06 0.249841948 -0.2384212601 1.221377e-03 2.028834e-03
## 84 60 1.986603e-06 0.192697322 -0.2654793522 9.841984e-03 3.757127e-04
## 146 49 8.789241e-05 0.304737404 -0.0958166133 2.238530e-04 2.459750e-01
## 867 46 5.026184e-04 -0.024127188 0.3122167646 7.771204e-01 2.485638e-04
## 65 38 8.908525e-04 0.199652317 -0.2238915885 3.320573e-02 1.693247e-02
## 423 55 5.896776e-04 0.257739200 -0.0787948203 9.455547e-04 3.121889e-01
## 64 97 6.241906e-07 0.171633402 -0.2064114825 3.493588e-03 4.437883e-04
## 518 64 8.280622e-05 0.172862193 -0.2045109771 1.678934e-02 4.666655e-03
## 187 65 6.214891e-04 -0.001008118 0.2649538814 9.887878e-01 2.207134e-04
## 660 104 1.264392e-06 0.105433478 -0.2372504186 6.327488e-02 2.914750e-05
## 424 73 1.106346e-04 0.246642444 -0.0777359784 2.690147e-04 2.509064e-01
## 118 85 8.487780e-05 0.046490157 -0.2449657389 4.588719e-01 9.474583e-05
## 90 132 1.013360e-07 0.154021000 -0.1901039563 2.253372e-03 1.629286e-04
## 92 88 3.770097e-05 0.164865474 -0.1710039113 7.524451e-03 5.567411e-03
## 835 75 4.213964e-04 0.127799319 -0.1867889653 5.571976e-02 5.166307e-03
## 1160 90 9.458927e-04 0.004517348 -0.2177703599 9.409712e-01 3.565426e-04
## 138 113 1.723007e-04 0.216918136 -0.0045555581 6.813208e-05 9.333547e-01
## 25 376 8.652653e-14 0.124571781 -0.1556651598 3.421163e-05 2.238991e-07
## 480 129 2.058616e-04 0.172131593 -0.0691451477 7.377559e-04 1.752178e-01
## 692 124 4.665386e-04 0.056971157 -0.1727059593 2.734187e-01 8.983929e-04
## 24 250 1.258330e-07 0.114774102 -0.1346918424 1.790497e-03 2.472665e-04
## 23 189 1.018731e-05 0.099152119 -0.1427289870 1.880413e-02 7.190848e-04
## 91 287 6.830593e-08 0.159138115 -0.0687481271 3.558458e-06 4.526829e-02
## 465 272 1.713479e-06 0.011330444 -0.1713629056 7.479462e-01 1.164429e-06
## 1120 181 9.847892e-04 -0.035149552 0.1410660939 4.149782e-01 1.068140e-03
## 654 371 4.142464e-07 0.072528882 -0.1218704933 1.656189e-02 5.648244e-05
## 464 592 3.926762e-07 0.098566193 -0.0560376368 4.311263e-05 2.007126e-02
## 61 722 1.256623e-07 -0.077478736 0.0713792791 4.031587e-04 1.116480e-03
## 60 658 1.530396e-05 -0.071992201 0.0576620495 1.668333e-03 1.180848e-02
## s.dist SD p.adjustMANOVA
## 614 0.64701666 0.4577035 5.074121e-03
## 571 0.61009950 0.5493626 7.992614e-04
## 661 0.55114229 0.5105687 1.617872e-08
## 1023 0.50063150 0.4955954 3.337221e-08
## 153 0.37929310 0.3791439 2.630188e-07
## 1263 0.34534866 0.3452542 3.035420e-05
## 84 0.32804199 0.3239798 4.782839e-05
## 146 0.31944594 0.2832345 1.144395e-03
## 867 0.31314762 0.2378311 4.453758e-03
## 65 0.29998082 0.2994908 7.194480e-03
## 423 0.26951460 0.2379655 5.153620e-03
## 64 0.26844687 0.2673181 1.852249e-05
## 518 0.26777991 0.2668431 1.100633e-03
## 187 0.26495580 0.1880635 5.394694e-03
## 660 0.25962276 0.2423141 3.361176e-05
## 424 0.25860274 0.2293702 1.397720e-03
## 118 0.24933822 0.2060904 1.116537e-03
## 90 0.24466709 0.2433331 4.458782e-06
## 92 0.23753518 0.2374955 6.013305e-04
## 835 0.22632451 0.2224475 3.888921e-03
## 1160 0.21781721 0.1571811 7.543494e-03
## 138 0.21696597 0.1566056 1.998688e-03
## 25 0.19937345 0.1981574 1.577255e-11
## 480 0.18550023 0.1706084 2.262478e-03
## 692 0.18186000 0.1624062 4.222009e-03
## 24 0.17696041 0.1763991 5.179448e-06
## 23 0.17378926 0.1710358 1.999846e-04
## 91 0.17335295 0.1611399 3.112799e-06
## 465 0.17173708 0.1291837 4.372798e-05
## 1120 0.14537928 0.1246033 7.804913e-03
## 654 0.14181980 0.1374611 1.355329e-05
## 464 0.11338215 0.1093214 1.318565e-05
## 61 0.10534684 0.1052585 5.179448e-06
## 60 0.09223768 0.0916794 2.712202e-04
# three way
mmrna <- merge(mm,rna,by=0)
rownames(mmrna) <- mmrna$Row.names
mmrna$Row.names = NULL
colnames(mmrna) <- c("prom","body","rna")
mmrnares <- mitch_calc(mmrna, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mmrnares_sig <- subset(mmrnares$enrichment_result, p.adjustMANOVA < 0.05)
head(mmrnares_sig,20)
## set
## 86 REACTOME_BETA_DEFENSINS
## 87 REACTOME_DEFENSINS
## 210 REACTOME_UNWINDING_OF_DNA
## 393 REACTOME_INTERACTION_WITH_CUMULUS_CELLS_AND_THE_ZONA_PELLUCIDA
## 841 REACTOME_ANTIMICROBIAL_PEPTIDES
## 993 REACTOME_NEGATIVE_REGULATION_OF_ACTIVITY_OF_TFAP2_AP_2_FAMILY_TRANSCRIPTION_FACTORS
## 654 REACTOME_DEFECTIVE_GALNT3_CAUSES_HFTC
## 1011 REACTOME_DIGESTION
## 881 REACTOME_DNA_STRAND_ELONGATION
## 875 REACTOME_ACTIVATION_OF_THE_PRE_REPLICATIVE_COMPLEX
## 391 REACTOME_CONDENSATION_OF_PROMETAPHASE_CHROMOSOMES
## 717 REACTOME_DECTIN_2_FAMILY
## 266 REACTOME_GLUCOCORTICOID_BIOSYNTHESIS
## 655 REACTOME_DEFECTIVE_C1GALT1C1_CAUSES_TNPS
## 374 REACTOME_MUCOPOLYSACCHARIDOSES
## 305 REACTOME_METABOLISM_OF_ANGIOTENSINOGEN_TO_ANGIOTENSINS
## 207 REACTOME_ACTIVATION_OF_ATR_IN_RESPONSE_TO_REPLICATION_STRESS
## 320 REACTOME_YAP1_AND_WWTR1_TAZ_STIMULATED_GENE_EXPRESSION
## 962 REACTOME_SLBP_DEPENDENT_PROCESSING_OF_REPLICATION_DEPENDENT_HISTONE_PRE_MRNAS
## 880 REACTOME_LAGGING_STRAND_SYNTHESIS
## setSize pMANOVA s.prom s.body s.rna p.prom
## 86 30 1.261730e-21 -0.853801170 -0.76407510 -0.107430394 5.212414e-16
## 87 38 7.032768e-24 -0.795301829 -0.74748058 -0.101880294 6.902665e-17
## 210 12 1.900618e-07 -0.282707168 -0.01163172 0.944732854 8.939856e-02
## 393 10 6.710904e-05 -0.763246234 -0.45696895 0.302933067 2.869714e-05
## 841 75 2.702251e-33 -0.732992593 -0.57258765 0.016044171 3.656118e-27
## 993 10 1.428277e-04 0.420473409 0.47279434 0.550434155 2.140421e-02
## 654 15 1.010427e-05 -0.669766298 -0.48678967 -0.004501854 6.921308e-06
## 1011 16 1.018234e-05 -0.513577096 -0.62293191 0.053186037 3.696968e-04
## 881 30 1.645226e-12 -0.068127629 0.11009336 0.787835199 5.158140e-01
## 875 31 6.542117e-13 0.042127714 0.11549277 0.784415840 6.877030e-01
## 391 11 1.305799e-04 0.057416268 -0.01092206 0.779145225 7.433530e-01
## 717 24 4.573818e-08 -0.619500338 -0.38511476 -0.201613857 1.442481e-07
## 266 10 3.111595e-03 -0.615210575 -0.36887796 0.205742656 7.455304e-04
## 655 15 1.078062e-04 -0.593620336 -0.45143091 -0.009700716 6.748521e-05
## 374 10 1.324807e-03 0.206861359 0.10920381 -0.702490913 2.581832e-01
## 305 16 9.975898e-06 -0.186043115 -0.47477551 -0.529738468 1.965235e-01
## 207 37 3.630026e-13 -0.034583543 0.17324727 0.712759265 7.125720e-01
## 320 15 1.828790e-04 0.597047970 0.30757688 -0.273415862 6.269781e-05
## 962 11 4.971296e-04 0.007691276 0.35298261 0.628684430 9.666606e-01
## 880 18 3.562181e-06 0.075078769 0.19111084 0.682032569 5.833528e-01
## p.body p.rna s.dist SD p.adjustMANOVA
## 86 4.088303e-13 3.075853e-01 1.1507947 0.40749282 8.037218e-20
## 87 5.397905e-15 2.826734e-01 1.0961806 0.38728113 5.973164e-22
## 210 9.442581e-01 1.342633e-08 0.9861941 0.64481575 2.161953e-06
## 393 1.227707e-02 9.648529e-02 0.9397520 0.54893763 3.379325e-04
## 841 9.138379e-17 8.123653e-01 0.9302646 0.39439262 5.737780e-31
## 993 9.580575e-03 2.524202e-03 0.8386359 0.06539013 6.616816e-04
## 654 1.088067e-03 9.758706e-01 0.8279924 0.34367105 6.601454e-05
## 1011 1.576100e-05 7.120938e-01 0.8090948 0.36293122 6.618518e-05
## 881 2.964951e-01 6.957391e-14 0.7984023 0.45162098 4.657817e-11
## 875 2.655967e-01 3.473007e-14 0.7939909 0.40902978 1.894240e-11
## 391 9.498728e-01 7.300198e-06 0.7813342 0.43775357 6.184343e-04
## 717 1.082493e-03 8.671068e-02 0.7567973 0.20945894 6.069837e-07
## 266 4.324737e-02 2.589630e-01 0.7462473 0.42127442 9.305569e-03
## 655 2.450703e-03 9.480343e-01 0.7458345 0.30449563 5.144013e-04
## 374 5.496857e-01 1.156567e-04 0.7404124 0.49921720 4.561634e-03
## 305 1.000160e-03 2.364660e-04 0.7352868 0.18462296 6.551183e-05
## 207 6.811369e-02 5.373896e-14 0.7343272 0.38574346 1.127964e-11
## 320 3.905042e-02 6.619895e-02 0.7251386 0.44329324 8.232785e-04
## 962 4.252442e-02 2.965415e-04 0.7210409 0.31114576 1.985401e-03
## 880 1.602022e-01 5.135884e-07 0.7122700 0.32219572 2.685336e-05
table(sign(mmrnares_sig$s.prom) == sign(mmrnares_sig$s.body))
##
## FALSE TRUE
## 188 428
table(sign(mmrnares_sig$s.prom) == sign(mmrnares_sig$s.rna))
##
## FALSE TRUE
## 271 345
table(sign(mmrnares_sig$s.body) == sign(mmrnares_sig$s.rna))
##
## FALSE TRUE
## 241 375
mtop <- head(mmrnares_sig,30)
mtop <- mtop[,c("set","s.prom","s.body","s.rna")]
rownames(mtop) <- gsub("REACTOME_","",mtop$set)
rownames(mtop) <- gsub("_"," ",rownames(mtop))
mtop$set=NULL
colnames(mtop) <- c("promoter","body","RNA")
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(mtop) , trace="none",margins = c(6,25), col=colfunc(25), cexRow=.6, cexCol=1)
pdf("fig5c.pdf")
heatmap.2( as.matrix(mtop) , trace="none",margins = c(6,25), col=colfunc(25), cexRow=.6, cexCol=1)
dev.off()
## png
## 2
# rna vs promoter
mprna <- merge(mp,rna,by=0)
rownames(mprna) <- mprna$Row.names
mprna$Row.names = NULL
colnames(mprna) <- c("prom","rna")
mprnares <- mitch_calc(mprna, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mprnares_sig <- subset(mprnares$enrichment_result, p.adjustMANOVA < 0.05)
head(mprnares_sig,20)
## set
## 211 REACTOME_UNWINDING_OF_DNA
## 87 REACTOME_BETA_DEFENSINS
## 504 REACTOME_OLFACTORY_SIGNALING_PATHWAY
## 241 REACTOME_DIGESTION_OF_DIETARY_CARBOHYDRATE
## 686 REACTOME_DNA_METHYLATION
## 88 REACTOME_DEFENSINS
## 881 REACTOME_ASSEMBLY_OF_THE_ORC_COMPLEX_AT_THE_ORIGIN_OF_REPLICATION
## 887 REACTOME_ACTIVATION_OF_THE_PRE_REPLICATIVE_COMPLEX
## 398 REACTOME_INTERACTION_WITH_CUMULUS_CELLS_AND_THE_ZONA_PELLUCIDA
## 356 REACTOME_PRC2_METHYLATES_HISTONES_AND_DNA
## 384 REACTOME_CONDENSATION_OF_PROPHASE_CHROMOSOMES
## 396 REACTOME_CONDENSATION_OF_PROMETAPHASE_CHROMOSOMES
## 379 REACTOME_MUCOPOLYSACCHARIDOSES
## 827 REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE
## 592 REACTOME_ERCC6_CSB_AND_EHMT2_G9A_POSITIVELY_REGULATE_RRNA_EXPRESSION
## 893 REACTOME_DNA_STRAND_ELONGATION
## 733 REACTOME_ACTIVATED_PKN1_STIMULATES_TRANSCRIPTION_OF_AR_ANDROGEN_RECEPTOR_REGULATED_GENES_KLK2_AND_KLK3
## 591 REACTOME_SIRT1_NEGATIVELY_REGULATES_RRNA_EXPRESSION
## 853 REACTOME_ANTIMICROBIAL_PEPTIDES
## 208 REACTOME_ACTIVATION_OF_ATR_IN_RESPONSE_TO_REPLICATION_STRESS
## setSize pMANOVA s.prom s.rna p.prom p.rna
## 211 12 5.706444e-08 -0.244352696 0.94334857 1.410229e-01 1.366966e-08
## 87 36 1.735255e-17 -0.838465633 -0.12531621 1.009671e-17 1.982180e-01
## 504 350 1.563698e-158 -0.827311578 0.12952370 6.114659e-159 3.069918e-05
## 241 10 2.256100e-05 -0.777986562 -0.26662157 1.964482e-05 1.431156e-01
## 686 58 1.277936e-26 0.233440834 0.78179774 2.788603e-03 1.156858e-24
## 88 45 9.356263e-19 -0.786883729 -0.12142891 6.647980e-19 1.671007e-01
## 881 62 1.169994e-26 0.233042870 0.75374119 1.996705e-03 1.548917e-24
## 887 31 1.932778e-13 0.068354142 0.78126320 5.153726e-01 4.158213e-14
## 398 11 6.117606e-05 -0.731067293 0.27917287 2.583370e-05 1.078314e-01
## 356 65 2.308384e-27 0.228057348 0.74674011 1.950410e-03 3.234265e-25
## 384 65 2.548051e-27 0.223858588 0.74761293 2.376493e-03 2.847331e-25
## 396 11 3.459428e-05 0.083226778 0.77431024 6.360827e-01 8.154838e-06
## 379 10 2.723328e-04 0.229563831 -0.71625223 2.101894e-01 8.340179e-05
## 827 63 2.179976e-24 0.154704111 0.73579388 4.216251e-02 8.197237e-24
## 592 68 1.436281e-26 0.251658444 0.70641104 4.470111e-04 1.017903e-23
## 893 31 8.538522e-12 -0.018693531 0.73747615 8.502474e-01 9.904347e-13
## 733 59 2.656047e-22 0.234567638 0.69912456 2.428527e-03 2.346490e-20
## 591 60 2.052119e-22 0.242797320 0.69108302 1.523677e-03 3.054083e-20
## 853 88 1.178350e-28 -0.711441370 -0.01937468 1.581129e-29 7.569037e-01
## 208 37 6.575715e-13 -0.005303678 0.70754696 9.480028e-01 7.747145e-14
## s.dist SD p.adjustMANOVA
## 211 0.9744818 0.8398316 5.416926e-07
## 87 0.8477787 0.5042728 3.294433e-16
## 504 0.8373893 0.6765847 2.018734e-155
## 241 0.8224051 0.3615897 1.329116e-04
## 686 0.8159058 0.3877469 6.345445e-25
## 88 0.7961978 0.4705476 1.887334e-17
## 881 0.7889454 0.3681893 6.041850e-25
## 887 0.7842477 0.5041028 2.935548e-12
## 398 0.7825579 0.7143477 3.263566e-04
## 356 0.7807887 0.3667641 1.568486e-25
## 384 0.7804087 0.3703502 1.644767e-25
## 396 0.7787702 0.4886698 1.927205e-04
## 379 0.7521415 0.6687930 1.233620e-03
## 827 0.7518816 0.4108925 7.817635e-23
## 592 0.7498990 0.3215586 6.867552e-25
## 893 0.7377130 0.5346927 1.136416e-10
## 733 0.7374260 0.3284913 6.997869e-21
## 591 0.7324932 0.3169859 5.887300e-21
## 853 0.7117051 0.4893651 1.086607e-26
## 208 0.7075668 0.5040615 9.538480e-12
table(sign(mprnares_sig$s.prom) == sign(mmrnares_sig$s.rna))
## Warning in sign(mprnares_sig$s.prom) == sign(mmrnares_sig$s.rna): longer object
## length is not a multiple of shorter object length
##
## FALSE TRUE
## 269 347
mprnares_sig <- subset(mprnares$enrichment_result, p.adjustMANOVA < 0.05)
mylm <- lm(mprnares$enrichment_result$s.rna ~ mprnares$enrichment_result$s.prom)
summary(mylm)
##
## Call:
## lm(formula = mprnares$enrichment_result$s.rna ~ mprnares$enrichment_result$s.prom)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76343 -0.19285 -0.03902 0.17195 0.94594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.023069 0.007658 3.012 0.00264 **
## mprnares$enrichment_result$s.prom 0.105035 0.043727 2.402 0.01644 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2627 on 1289 degrees of freedom
## Multiple R-squared: 0.004456, Adjusted R-squared: 0.003684
## F-statistic: 5.77 on 1 and 1289 DF, p-value: 0.01644
mylm2 <- lm(mprnares_sig$s.rna ~ mprnares_sig$s.pr)
summary(mylm2)
##
## Call:
## lm(formula = mprnares_sig$s.rna ~ mprnares_sig$s.pr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86613 -0.33350 0.07698 0.26306 0.84808
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12342 0.01512 8.163 2.36e-15 ***
## mprnares_sig$s.pr 0.11523 0.07164 1.609 0.108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3359 on 535 degrees of freedom
## Multiple R-squared: 0.004813, Adjusted R-squared: 0.002953
## F-statistic: 2.587 on 1 and 535 DF, p-value: 0.1083
plot(mprnares$enrichment_result$s.prom,mprnares$enrichment_result$s.rna,xlab="promoter",
ylab="RNA", main="S dist",col="gray",pch=19)
abline(v=0,h=0,lty=2,lwd=2)
points(mprnares_sig$s.pr,mprnares_sig$s.rna,col="black",pch=19,cex=1.01)
abline(mylm,col="blue",lty=3,lwd=3)
abline(mylm2,col="red",lty=3,lwd=3)
# rna vs body
mbrna <- merge(mb,rna,by=0)
rownames(mbrna) <- mbrna$Row.names
mbrna$Row.names = NULL
colnames(mbrna) <- c("body","rna")
mbrnares <- mitch_calc(mbrna, genesets=gs_symbols, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
mbrnares_sig <- subset(mbrnares$enrichment_result, p.adjustMANOVA < 0.05)
head(mbrnares_sig,20)
## set
## 210 REACTOME_UNWINDING_OF_DNA
## 882 REACTOME_DNA_STRAND_ELONGATION
## 876 REACTOME_ACTIVATION_OF_THE_PRE_REPLICATIVE_COMPLEX
## 391 REACTOME_CONDENSATION_OF_PROMETAPHASE_CHROMOSOMES
## 86 REACTOME_BETA_DEFENSINS
## 87 REACTOME_DEFENSINS
## 207 REACTOME_ACTIVATION_OF_ATR_IN_RESPONSE_TO_REPLICATION_STRESS
## 994 REACTOME_NEGATIVE_REGULATION_OF_ACTIVITY_OF_TFAP2_AP_2_FAMILY_TRANSCRIPTION_FACTORS
## 963 REACTOME_SLBP_DEPENDENT_PROCESSING_OF_REPLICATION_DEPENDENT_HISTONE_PRE_MRNAS
## 374 REACTOME_MUCOPOLYSACCHARIDOSES
## 305 REACTOME_METABOLISM_OF_ANGIOTENSINOGEN_TO_ANGIOTENSINS
## 881 REACTOME_LAGGING_STRAND_SYNTHESIS
## 880 REACTOME_PROCESSIVE_SYNTHESIS_ON_THE_LAGGING_STRAND
## 879 REACTOME_POLYMERASE_SWITCHING
## 117 REACTOME_POLO_LIKE_KINASE_MEDIATED_EVENTS
## 571 REACTOME_CLASS_C_3_METABOTROPIC_GLUTAMATE_PHEROMONE_RECEPTORS
## 50 REACTOME_ENDOSOMAL_VACUOLAR_PATHWAY
## 433 REACTOME_PROCESSING_AND_ACTIVATION_OF_SUMO
## 927 REACTOME_PURINE_RIBONUCLEOSIDE_MONOPHOSPHATE_BIOSYNTHESIS
## 995 REACTOME_ACTIVATION_OF_THE_TFAP2_AP_2_FAMILY_OF_TRANSCRIPTION_FACTORS
## setSize pMANOVA s.body s.rna p.body p.rna
## 210 12 9.646190e-08 -0.008644537 0.9450121 9.587183e-01 1.327525e-08
## 882 30 2.671366e-13 0.112787620 0.7883558 2.846665e-01 6.681814e-14
## 876 31 1.210688e-13 0.118120602 0.7850591 2.547181e-01 3.301249e-14
## 391 11 4.221186e-05 -0.007926603 0.7795484 9.637610e-01 7.214130e-06
## 86 30 1.971865e-12 -0.762044390 -0.1073390 4.728136e-13 3.079713e-01
## 87 38 2.641333e-14 -0.745376000 -0.1018486 6.481474e-15 2.827971e-01
## 207 37 5.353431e-14 0.175863145 0.7133267 6.399577e-02 5.118581e-14
## 994 10 2.672565e-04 0.474338011 0.5514101 9.342324e-03 2.478744e-03
## 963 11 1.352155e-04 0.355099596 0.6298136 4.127791e-02 2.889911e-04
## 374 10 5.178103e-04 0.111982916 -0.7032468 5.394372e-01 1.136271e-04
## 305 16 3.613020e-06 -0.471855164 -0.5301067 1.075639e-03 2.339466e-04
## 881 18 9.202603e-07 0.193606804 0.6827225 1.547324e-01 4.995512e-07
## 880 13 6.094510e-05 0.148593397 0.6830191 3.532446e-01 1.922182e-05
## 879 12 1.494631e-04 0.185959238 0.6665152 2.643428e-01 6.153368e-05
## 117 16 9.488149e-06 0.032722473 0.6906295 8.205198e-01 1.634662e-06
## 571 14 7.178253e-05 -0.611671985 0.3021836 7.318061e-05 4.979884e-02
## 50 11 6.506994e-04 0.494505738 -0.4623285 4.483032e-03 7.796796e-03
## 433 10 1.119654e-03 0.386345333 0.5368158 3.425707e-02 3.221051e-03
## 927 11 8.189373e-04 0.024966857 0.6533993 8.858329e-01 1.693832e-04
## 995 10 1.648551e-03 0.593178768 0.2534964 1.151418e-03 1.642448e-01
## s.dist SD p.adjustMANOVA
## 210 0.9450516 0.67433704 1.118081e-06
## 882 0.7963830 0.47769882 8.307297e-12
## 876 0.7938956 0.47159675 4.062177e-12
## 391 0.7795887 0.55682890 2.329875e-04
## 86 0.7695670 0.46294659 5.586951e-11
## 87 0.7523021 0.45504259 9.905000e-13
## 207 0.7346855 0.38004411 1.896007e-12
## 994 0.7273580 0.05449821 1.139639e-03
## 963 0.7230220 0.19425212 6.432829e-04
## 374 0.7121069 0.57645446 2.000631e-03
## 305 0.7096904 0.04119006 2.725799e-05
## 881 0.7096433 0.34585701 7.874711e-06
## 880 0.6989958 0.37789606 3.210951e-04
## 879 0.6919706 0.33980435 6.929652e-04
## 117 0.6914043 0.46521052 5.959305e-05
## 571 0.6822445 0.64619348 3.660909e-04
## 50 0.6769664 0.67658398 2.432967e-03
## 433 0.6613879 0.10639871 3.817003e-03
## 927 0.6538761 0.44436883 2.932992e-03
## 995 0.6450748 0.24019173 5.403349e-03
table(sign(mbrnares_sig$s.body) == sign(mbrnares_sig$s.rna))
##
## FALSE TRUE
## 238 368
mbrnares_sig <- subset(mbrnares$enrichment_result, p.adjustMANOVA < 0.05)
mylm <- lm(mbrnares$enrichment_result$s.rna ~ mbrnares$enrichment_result$s.body)
summary(mylm)
##
## Call:
## lm(formula = mbrnares$enrichment_result$s.rna ~ mbrnares$enrichment_result$s.body)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73416 -0.18230 -0.02342 0.17002 0.92455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.021207 0.007333 2.892 0.00389 **
## mbrnares$enrichment_result$s.body 0.086658 0.037000 2.342 0.01933 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2449 on 1273 degrees of freedom
## Multiple R-squared: 0.004291, Adjusted R-squared: 0.003508
## F-statistic: 5.485 on 1 and 1273 DF, p-value: 0.01933
mylm2 <- lm(mbrnares_sig$s.rna ~ mbrnares_sig$s.body)
summary(mylm2)
##
## Call:
## lm(formula = mbrnares_sig$s.rna ~ mbrnares_sig$s.body)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.80412 -0.27194 0.02432 0.24445 0.85518
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.09063 0.01348 6.721 4.18e-11 ***
## mbrnares_sig$s.body 0.09152 0.05743 1.594 0.112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3006 on 604 degrees of freedom
## Multiple R-squared: 0.004187, Adjusted R-squared: 0.002539
## F-statistic: 2.54 on 1 and 604 DF, p-value: 0.1115
plot(mbrnares$enrichment_result$s.body,mbrnares$enrichment_result$s.rna,xlab="ex-promoter",
ylab="RNA",main="S dist",col="gray",pch=19)
abline(v=0,h=0,lty=2,lwd=2)
points(mbrnares_sig$s.body,mbrnares_sig$s.rna,col="black",pch=19,cex=1.01)
abline(mylm,col="blue",lty=3,lwd=3)
abline(mylm2,col="red",lty=3,lwd=3)
Now look at GSAmeth.
# body
sigup <- rownames(subset(dm,logFC > 0 & adj.P.Val < 0.05))
sigdn <- rownames(subset(dm,logFC < 0 & adj.P.Val < 0.05))
gsaup <- gsameth(sig.cpg=sigup, all.cpg=rownames(dm), collection=gs_entrez,
array.type="EPIC", genomic.features = "Body")
rownames(gsaup) <- paste(rownames(gsaup),"UP")
gsadn <- gsameth(sig.cpg=sigdn, all.cpg=rownames(dm), collection=gs_entrez,
array.type="EPIC", genomic.features = "Body")
rownames(gsadn) <- paste(rownames(gsadn),"DN")
gsig_up <- subset(gsaup,FDR<0.05)
gsig_dn <- subset(gsadn,FDR<0.05)
gsig <- rbind(gsig_up,gsig_dn)
nrow(gsig)
## [1] 93
gsig_body <- gsig
gsig_body_up <- gsig_up
gsig_body_dn <- gsig_dn
# promoter
TSS = c("TSS200", "TSS1500")
gsaup <- gsameth(sig.cpg=sigup, all.cpg=rownames(dm), collection=gs_entrez,
array.type="EPIC", genomic.features = TSS)
rownames(gsaup) <- paste(rownames(gsaup),"UP")
gsadn <- gsameth(sig.cpg=sigdn, all.cpg=rownames(dm), collection=gs_entrez,
array.type="EPIC", genomic.features = TSS)
rownames(gsadn) <- paste(rownames(gsadn),"DN")
gsig_up <- subset(gsaup,FDR<0.05)
gsig_dn <- subset(gsadn,FDR<0.05)
gsig <- rbind(gsig_up,gsig_dn)
nrow(gsig)
## [1] 22
gsig_promoter <- gsig
gsig_promoter_up <- gsig_up
gsig_promoter_dn <- gsig_dn
Now make a Euler plot for GSA meth-RNA comparison.
rna_up <- subset(msig,s.dist>0)$set
rna_dn <- subset(msig,s.dist<0)$set
gsig_body_up <- rownames(gsig_body_up)
gsig_body_dn <- rownames(gsig_body_dn)
gsig_promoter_up <- rownames(gsig_promoter_up)
gsig_promoter_dn <- rownames(gsig_promoter_dn)
vg <- list("RNA up"=rna_up,"RNA dn"=rna_dn,
"GSA body up"=gsig_body_up, "GSA body dn"=gsig_body_dn,
"GSA promoter up"=gsig_promoter_up, "GSA prom dn"=gsig_promoter_dn)
plot(euler(vg),quantities=TRUE)
pdf("fig5a.pdf")
plot(euler(vg),quantities=TRUE)
dev.off()
## png
## 2
Now make a Euler plot for LAM-RNA comparison.
dmp_res_up <- dmp_res_up$set
dmp_res_dn <- dmp_res_dn$set
dmb_res_up <- dmb_res_up$set
dmb_res_dn <- dmb_res_dn$set
vm <- list("RNA up"=rna_up,"RNA dn"=rna_dn,
"LAM body up"=dmb_res_up, "LAM body dn"=dmb_res_dn,
"LAM promoter up"=dmp_res_up, "LAM prom dn"=dmp_res_dn)
plot(euler(vm),quantities=TRUE)
pdf("fig5b.pdf")
plot(euler(vm),quantities=TRUE)
dev.off()
## png
## 2
Although there were some differentially methylated pathways analysed with different methods, they were not concordant with differential expression.
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] png_0.1-8
## [2] gridExtra_2.3
## [3] gplots_3.1.3
## [4] missMethyl_1.36.0
## [5] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [6] beeswarm_0.4.0
## [7] kableExtra_1.3.4
## [8] mitch_1.15.0
## [9] limma_3.58.1
## [10] tictoc_1.2
## [11] HGNChelper_0.8.1
## [12] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [13] IlluminaHumanMethylation450kmanifest_0.4.0
## [14] minfi_1.48.0
## [15] bumphunter_1.44.0
## [16] locfit_1.5-9.8
## [17] iterators_1.0.14
## [18] foreach_1.5.2
## [19] Biostrings_2.70.1
## [20] XVector_0.42.0
## [21] eulerr_7.0.0
## [22] DESeq2_1.42.0
## [23] SummarizedExperiment_1.32.0
## [24] Biobase_2.62.0
## [25] MatrixGenerics_1.14.0
## [26] matrixStats_1.2.0
## [27] GenomicRanges_1.54.1
## [28] GenomeInfoDb_1.38.2
## [29] IRanges_2.36.0
## [30] S4Vectors_0.40.2
## [31] BiocGenerics_0.48.1
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.2 later_1.3.2
## [3] BiocIO_1.12.0 bitops_1.0-7
## [5] filelock_1.0.3 BiasedUrn_2.0.11
## [7] tibble_3.2.1 polyclip_1.10-6
## [9] preprocessCore_1.64.0 XML_3.99-0.16
## [11] lifecycle_1.0.4 lattice_0.22-5
## [13] MASS_7.3-60 base64_2.0.1
## [15] scrime_1.3.5 magrittr_2.0.3
## [17] sass_0.4.8 rmarkdown_2.25
## [19] jquerylib_0.1.4 yaml_2.3.8
## [21] httpuv_1.6.13 doRNG_1.8.6
## [23] askpass_1.2.0 DBI_1.1.3
## [25] RColorBrewer_1.1-3 abind_1.4-5
## [27] zlibbioc_1.48.0 rvest_1.0.3
## [29] quadprog_1.5-8 purrr_1.0.2
## [31] RCurl_1.98-1.13 rappdirs_0.3.3
## [33] GenomeInfoDbData_1.2.11 genefilter_1.84.0
## [35] annotate_1.80.0 svglite_2.1.3
## [37] DelayedMatrixStats_1.24.0 codetools_0.2-19
## [39] DelayedArray_0.28.0 xml2_1.3.6
## [41] tidyselect_1.2.0 beanplot_1.3.1
## [43] BiocFileCache_2.10.1 webshot_0.5.5
## [45] illuminaio_0.44.0 GenomicAlignments_1.38.0
## [47] jsonlite_1.8.8 multtest_2.58.0
## [49] ellipsis_0.3.2 survival_3.5-7
## [51] systemfonts_1.0.5 polylabelr_0.2.0
## [53] tools_4.3.2 progress_1.2.3
## [55] Rcpp_1.0.11 glue_1.6.2
## [57] SparseArray_1.2.2 xfun_0.41
## [59] dplyr_1.1.4 HDF5Array_1.30.0
## [61] fastmap_1.1.1 GGally_2.2.0
## [63] rhdf5filters_1.14.1 fansi_1.0.6
## [65] openssl_2.1.1 caTools_1.18.2
## [67] digest_0.6.33 R6_2.5.1
## [69] mime_0.12 colorspace_2.1-0
## [71] gtools_3.9.5 biomaRt_2.58.0
## [73] RSQLite_2.3.4 utf8_1.2.4
## [75] tidyr_1.3.0 generics_0.1.3
## [77] data.table_1.14.10 rtracklayer_1.62.0
## [79] prettyunits_1.2.0 httr_1.4.7
## [81] htmlwidgets_1.6.4 S4Arrays_1.2.0
## [83] ggstats_0.5.1 pkgconfig_2.0.3
## [85] gtable_0.3.4 blob_1.2.4
## [87] siggenes_1.76.0 htmltools_0.5.7
## [89] echarts4r_0.4.5 scales_1.3.0
## [91] rstudioapi_0.15.0 knitr_1.45
## [93] reshape2_1.4.4 tzdb_0.4.0
## [95] rjson_0.2.21 nlme_3.1-163
## [97] curl_5.2.0 org.Hs.eg.db_3.18.0
## [99] cachem_1.0.8 rhdf5_2.46.1
## [101] stringr_1.5.1 KernSmooth_2.23-22
## [103] AnnotationDbi_1.64.1 restfulr_0.0.15
## [105] GEOquery_2.70.0 pillar_1.9.0
## [107] grid_4.3.2 reshape_0.8.9
## [109] vctrs_0.6.5 promises_1.2.1
## [111] dbplyr_2.4.0 xtable_1.8-4
## [113] evaluate_0.23 readr_2.1.4
## [115] GenomicFeatures_1.54.1 cli_3.6.2
## [117] compiler_4.3.2 Rsamtools_2.18.0
## [119] rlang_1.1.2 crayon_1.5.2
## [121] rngtools_1.5.2 nor1mix_1.3-2
## [123] mclust_6.0.1 plyr_1.8.9
## [125] stringi_1.8.3 viridisLite_0.4.2
## [127] BiocParallel_1.36.0 munsell_0.5.0
## [129] Matrix_1.6-1.1 hms_1.1.3
## [131] sparseMatrixStats_1.14.0 bit64_4.0.5
## [133] ggplot2_3.4.4 Rhdf5lib_1.24.1
## [135] KEGGREST_1.42.0 statmod_1.5.0
## [137] shiny_1.8.0 highr_0.10
## [139] memoise_2.0.1 bslib_0.6.1
## [141] bit_4.0.5