Source: https://github.com/markziemann/echs1_rnaseq
The samples are labelled as CON (control) and KO for knockout, ut are the untreated samples and dNs are the treated samples (mito biogenesis stimulator).
suppressPackageStartupMessages({
library("zoo")
library("tidyverse")
library("reshape2")
library("DESeq2")
library("gplots")
library("fgsea")
library("MASS")
library("mitch")
library("eulerr")
library("limma")
library("topconfects")
library("kableExtra")
library("vioplot")
library("beeswarm")
})
Importing RNA-seq data
tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
xx$geneid = NULL
xx <- round(xx)
xx <- xx[,which(colnames(xx)!="test")]
xx[1:6,1:6]
## 1_CON_ut_A_RNA_HB_HLF57DRXY_CTGTTGGTCC-AACGGTCTAT_L001
## ENSG00000000003.15 TSPAN6 704
## ENSG00000000005.6 TNMD 0
## ENSG00000000419.13 DPM1 624
## ENSG00000000457.14 SCYL3 97
## ENSG00000000460.17 C1orf112 110
## ENSG00000000938.13 FGR 0
## 1_CON_ut_A_RNA_HB_HLF57DRXY_CTGTTGGTCC-AACGGTCTAT_L002
## ENSG00000000003.15 TSPAN6 676
## ENSG00000000005.6 TNMD 0
## ENSG00000000419.13 DPM1 643
## ENSG00000000457.14 SCYL3 99
## ENSG00000000460.17 C1orf112 122
## ENSG00000000938.13 FGR 0
## 10_KO_ut_B_RNA_HB_HLF57DRXY_CACGGAACAA-GTGCTAGGTT_L001
## ENSG00000000003.15 TSPAN6 752
## ENSG00000000005.6 TNMD 0
## ENSG00000000419.13 DPM1 360
## ENSG00000000457.14 SCYL3 102
## ENSG00000000460.17 C1orf112 110
## ENSG00000000938.13 FGR 0
## 10_KO_ut_B_RNA_HB_HLF57DRXY_CACGGAACAA-GTGCTAGGTT_L002
## ENSG00000000003.15 TSPAN6 803
## ENSG00000000005.6 TNMD 0
## ENSG00000000419.13 DPM1 334
## ENSG00000000457.14 SCYL3 75
## ENSG00000000460.17 C1orf112 100
## ENSG00000000938.13 FGR 0
## 11_KO_ut_C_RNA_HB_HLF57DRXY_TGGAGTACTT-TCCACACAGA_L001
## ENSG00000000003.15 TSPAN6 1502
## ENSG00000000005.6 TNMD 0
## ENSG00000000419.13 DPM1 703
## ENSG00000000457.14 SCYL3 203
## ENSG00000000460.17 C1orf112 175
## ENSG00000000938.13 FGR 0
## 11_KO_ut_C_RNA_HB_HLF57DRXY_TGGAGTACTT-TCCACACAGA_L002
## ENSG00000000003.15 TSPAN6 1509
## ENSG00000000005.6 TNMD 0
## ENSG00000000419.13 DPM1 659
## ENSG00000000457.14 SCYL3 184
## ENSG00000000460.17 C1orf112 191
## ENSG00000000938.13 FGR 0
dim(xx)
## [1] 60651 32
Fix the sample names.
They are duplicated for lane 1 and 2, which I will aggregate.
colnames(xx) <- sapply(strsplit(colnames(xx),"_RNA_"),"[[",1)
colnames(xx) <- sapply(strsplit(sub("_","@",colnames(xx)),"@"),"[[",2)
labels <- unique(sapply(strsplit(colnames(xx),"\\."),"[[",1))
l <- lapply(labels,function(x) { rowSums(xx[,grep(x,colnames(xx))]) } )
ll <- do.call(cbind,l)
colnames(ll) <- labels
ll <- as.data.frame(ll[,order(colnames(ll))])
write.table(ll,file="counts.tsv",sep="\t",quote=FALSE)
rpm <- ll/colSums(ll) *1e6
write.table(rpm,file="rpm.tsv",sep="\t",quote=FALSE)
Make a sample sheet. This will be easy because the groups are specified in the name.
ss <- as.data.frame(colnames(ll))
ss$ko <- factor(grepl("KO", colnames(ll)))
ss$trt <- factor(grepl("dNs",colnames(ll)))
rownames(ss) <- ss[,1]
ss[,1]=NULL
# can also set some colours for later
ss$cols <- c(rep("gray",4),rep("lightblue",4),rep("pink",4),
rep("orange",4))
ss %>% kbl(caption = "Sample sheet") %>%
kable_paper("hover", full_width = F)
ko | trt | cols | |
---|---|---|---|
CON_dNs_A | FALSE | TRUE | gray |
CON_dNs_B | FALSE | TRUE | gray |
CON_dNs_C | FALSE | TRUE | gray |
CON_dNs_D | FALSE | TRUE | gray |
CON_ut_A | FALSE | FALSE | lightblue |
CON_ut_B | FALSE | FALSE | lightblue |
CON_ut_C | FALSE | FALSE | lightblue |
CON_ut_D | FALSE | FALSE | lightblue |
KO_dNs_A | TRUE | TRUE | pink |
KO_dNs_B | TRUE | TRUE | pink |
KO_dNs_C | TRUE | TRUE | pink |
KO_dNs_D | TRUE | TRUE | pink |
KO_ut_A | TRUE | FALSE | orange |
KO_ut_B | TRUE | FALSE | orange |
KO_ut_C | TRUE | FALSE | orange |
KO_ut_D | TRUE | FALSE | orange |
Here I’ll look at a few different quality control measures.
par(mar=c(5,8,3,1))
barplot(colSums(ll),horiz=TRUE,las=1,xlab="num reads",col=ss$cols)
sums <- colSums(ll)
sums <- sums[order(sums)]
barplot(sums,horiz=TRUE,las=1,xlab="num reads")
abline(v=15000000,col="red")
Multidimensional scaling plot to show the variation between all samples, very similar to PCA.
Firstly with the data before aggregating technical replicates.
mds <- cmdscale(dist(t(xx)))
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=19, cex=4 ,col="gray")
text(mds, labels=rownames(mds) )
Now for the data after aggregation.
It looks like the drug has a bigger effect than the gene KO.
mds <- cmdscale(dist(t(ll)))
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
pdf("mds1.pdf")
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
dev.off()
## png
## 2
heatmap.2(cor(ll),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
pdf("cor1.pdf")
heatmap.2(cor(ll),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()
## png
## 2
ll2 <- ll[,grep("ut",colnames(ll))]
mds <- cmdscale(dist(t(ll2)))
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
pdf("mds2.pdf")
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
dev.off()
## png
## 2
ll3 <- ll[,grep("dNs",colnames(ll))]
mds <- cmdscale(dist(t(ll3)))
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
pdf("mds3.pdf")
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
dev.off()
## png
## 2
heatmap.2(cor(ll2),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
heatmap.2(cor(ll3),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
pdf("cor2.pdf")
heatmap.2(cor(ll2),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()
## png
## 2
pdf("cor3.pdf")
heatmap.2(cor(ll3),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()
## png
## 2
Don’t forget to remove poorly detected genes from the matrix with a threshold of 10 reads per sample on average.
There are 4 contrasts to set up.
Effect of the KO in untreated cells
Effect of the KO in treated cells
Effect of the drug in WT cells
Effect of the drug in KO cells
CON ut vs KO dNs. (what was altered in ECHS1 KO, that has been returned to normal levels (and no longer significantly different) to CON ut).
ss1 <- ss[which(ss$trt=="FALSE"),]
xx1 <- ll[which(colnames(ll) %in% rownames(ss1))]
xx1 <- xx1[which(rowSums(xx1)>=10),]
rpm1 <- xx1/colSums(xx1) *1e6
ss2 <- ss[which(ss$trt=="TRUE"),]
xx2 <- ll[which(colnames(ll) %in% rownames(ss2))]
xx2 <- xx2[which(rowSums(xx2)>=10),]
rpm2 <- xx2/colSums(xx2) *1e6
ss3 <- ss[which(ss$ko=="FALSE"),]
xx3 <- ll[which(colnames(ll) %in% rownames(ss3))]
xx3 <- xx3[which(rowSums(xx3)>=10),]
rpm3 <- xx3/colSums(xx3) *1e6
ss4 <- ss[which(ss$ko=="TRUE"),]
xx4 <- ll[which(colnames(ll) %in% rownames(ss4))]
xx4 <- xx4[which(rowSums(xx4)>=10),]
rpm4 <- xx4/colSums(xx4) *1e6
ss5 <- subset(ss,ko == trt)
xx5 <- ll[which(colnames(ll) %in% rownames(ss5))]
xx5 <- xx5[which(rowSums(xx5)>=10),]
rpm5 <- xx5/colSums(xx5) *1e6
dds <- DESeqDataSetFromMatrix(countData = xx1 , colData = ss1,
design = ~ ko )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
kbl(caption = "Top gene expression differences for contrast 1: Effect of the KO in untreated cells") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSG00000049192.15 ADAMTS6 | 1232.5643 | -3.320315 | 0.0735209 | -45.16150 | 0 | 0 |
ENSG00000090339.9 ICAM1 | 8426.0162 | 2.137068 | 0.0421343 | 50.72040 | 0 | 0 |
ENSG00000099875.16 MKNK2 | 5196.7000 | 1.646797 | 0.0435241 | 37.83644 | 0 | 0 |
ENSG00000105825.14 TFPI2 | 10749.0439 | -2.758378 | 0.0645077 | -42.76045 | 0 | 0 |
ENSG00000107551.21 RASSF4 | 1224.3947 | 3.078164 | 0.0812402 | 37.88965 | 0 | 0 |
ENSG00000116016.14 EPAS1 | 16685.0850 | 1.887592 | 0.0496555 | 38.01373 | 0 | 0 |
ENSG00000125538.12 IL1B | 9573.0006 | -1.925913 | 0.0457459 | -42.10027 | 0 | 0 |
ENSG00000138080.14 EMILIN1 | 6299.0508 | 2.088516 | 0.0483164 | 43.22583 | 0 | 0 |
ENSG00000146197.9 SCUBE3 | 7326.2924 | -2.787733 | 0.0674659 | -41.32061 | 0 | 0 |
ENSG00000154146.13 NRGN | 1528.8050 | 2.283661 | 0.0600431 | 38.03371 | 0 | 0 |
ENSG00000164949.8 GEM | 1954.1763 | -2.372324 | 0.0565928 | -41.91920 | 0 | 0 |
ENSG00000165507.9 DEPP1 | 3368.7544 | 2.850455 | 0.0522246 | 54.58071 | 0 | 0 |
ENSG00000166250.12 CLMP | 20860.4032 | -1.674644 | 0.0397766 | -42.10125 | 0 | 0 |
ENSG00000166670.10 MMP10 | 3155.5769 | -6.120693 | 0.1021577 | -59.91416 | 0 | 0 |
ENSG00000166920.13 C15orf48 | 2293.5579 | 2.687532 | 0.0645990 | 41.60330 | 0 | 0 |
ENSG00000167772.12 ANGPTL4 | 14717.2697 | 3.591520 | 0.0856872 | 41.91430 | 0 | 0 |
ENSG00000168528.12 SERINC2 | 4459.4824 | 1.937358 | 0.0495824 | 39.07346 | 0 | 0 |
ENSG00000170961.7 HAS2 | 3615.4379 | -4.322465 | 0.0852007 | -50.73275 | 0 | 0 |
ENSG00000172572.7 PDE3A | 1574.2471 | -3.854290 | 0.0926037 | -41.62136 | 0 | 0 |
ENSG00000172927.8 MYEOV | 928.0298 | 4.057927 | 0.0905625 | 44.80804 | 0 | 0 |
dge1 <- dge
d1up <- rownames(subset(dge1,padj <= 0.05 & log2FoldChange > 0))
d1dn <- rownames(subset(dge1,padj <= 0.05 & log2FoldChange < 0))
write.table(dge1,file="dge1.tsv",quote=FALSE,sep="\t")
dds <- DESeqDataSetFromMatrix(countData = xx2 , colData = ss2,
design = ~ ko )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
kbl(caption = "Top gene expression differences for contrast 2: Effect of the KO in treated cells") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSG00000081248.12 CACNA1S | 1209.4345 | 4.728860 | 0.1048201 | 45.11406 | 0 | 0 |
ENSG00000142910.16 TINAGL1 | 4889.9014 | 3.128308 | 0.0629648 | 49.68348 | 0 | 0 |
ENSG00000162804.14 SNED1 | 6126.7547 | -2.358713 | 0.0490839 | -48.05475 | 0 | 0 |
ENSG00000166920.13 C15orf48 | 2299.7911 | 3.960911 | 0.0886313 | 44.68975 | 0 | 0 |
ENSG00000167772.12 ANGPTL4 | 9936.5018 | 3.543783 | 0.0816955 | 43.37797 | 0 | 0 |
ENSG00000113361.13 CDH6 | 816.5796 | -3.817982 | 0.1059909 | -36.02179 | 0 | 0 |
ENSG00000101187.16 SLCO4A1 | 19101.5516 | 1.934484 | 0.0565000 | 34.23868 | 0 | 0 |
ENSG00000143127.13 ITGA10 | 1156.7355 | -3.687434 | 0.1104110 | -33.39734 | 0 | 0 |
ENSG00000151062.15 CACNA2D4 | 655.2444 | 4.551764 | 0.1378037 | 33.03079 | 0 | 0 |
ENSG00000170961.7 HAS2 | 613.5916 | -3.363180 | 0.1045664 | -32.16312 | 0 | 0 |
ENSG00000168528.12 SERINC2 | 3732.8640 | 1.667401 | 0.0525189 | 31.74856 | 0 | 0 |
ENSG00000135245.10 HILPDA | 1510.8969 | 2.621093 | 0.0845048 | 31.01708 | 0 | 0 |
ENSG00000172927.8 MYEOV | 472.9211 | 4.325653 | 0.1395499 | 30.99717 | 0 | 0 |
ENSG00000115756.13 HPCAL1 | 4084.1250 | 1.608911 | 0.0524306 | 30.68650 | 0 | 0 |
ENSG00000100985.7 MMP9 | 14795.4099 | -2.331039 | 0.0761929 | -30.59392 | 0 | 0 |
ENSG00000163235.16 TGFA | 1660.7730 | 2.758967 | 0.0910679 | 30.29573 | 0 | 0 |
ENSG00000125571.9 IL37 | 1230.5444 | 2.713549 | 0.0896194 | 30.27858 | 0 | 0 |
ENSG00000118257.17 NRP2 | 2033.2565 | -2.000568 | 0.0681154 | -29.37026 | 0 | 0 |
ENSG00000213658.12 LAT | 1708.1308 | -1.949041 | 0.0664827 | -29.31650 | 0 | 0 |
ENSG00000101443.18 WFDC2 | 5495.9129 | 2.267593 | 0.0773716 | 29.30783 | 0 | 0 |
dge2 <- dge
d2up <- rownames(subset(dge2,padj <= 0.05 & log2FoldChange > 0))
d2dn <- rownames(subset(dge2,padj <= 0.05 & log2FoldChange < 0))
write.table(dge2,file="dge2.tsv",quote=FALSE,sep="\t")
dds <- DESeqDataSetFromMatrix(countData = xx3 , colData = ss3,
design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
kbl(caption = "Top gene expression differences for contrast 3: Effect of the drug in WT cells") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSG00000068912.14 ERLEC1 | 1851.848 | -2.762775 | 0.0670700 | -41.19240 | 0 | 0 |
ENSG00000078747.16 ITCH | 2736.282 | -2.038664 | 0.0523301 | -38.95780 | 0 | 0 |
ENSG00000100836.10 PABPN1 | 6539.865 | 2.072716 | 0.0487025 | 42.55874 | 0 | 0 |
ENSG00000102755.12 FLT1 | 5426.297 | -3.705474 | 0.0880185 | -42.09882 | 0 | 0 |
ENSG00000106366.9 SERPINE1 | 24625.790 | -2.091105 | 0.0507831 | -41.17717 | 0 | 0 |
ENSG00000108821.14 COL1A1 | 9382.064 | 2.181043 | 0.0456926 | 47.73301 | 0 | 0 |
ENSG00000110048.12 OSBP | 3405.995 | -2.355956 | 0.0614408 | -38.34517 | 0 | 0 |
ENSG00000112473.18 SLC39A7 | 4521.830 | -2.281225 | 0.0578603 | -39.42640 | 0 | 0 |
ENSG00000116285.13 ERRFI1 | 36944.117 | -2.684910 | 0.0567602 | -47.30267 | 0 | 0 |
ENSG00000122884.12 P4HA1 | 4295.439 | -2.579782 | 0.0582809 | -44.26463 | 0 | 0 |
ENSG00000131016.17 AKAP12 | 21452.521 | -3.317855 | 0.0663163 | -50.03076 | 0 | 0 |
ENSG00000138166.6 DUSP5 | 4318.320 | -2.155073 | 0.0498547 | -43.22711 | 0 | 0 |
ENSG00000142949.17 PTPRF | 7682.322 | -1.848612 | 0.0487213 | -37.94262 | 0 | 0 |
ENSG00000150961.15 SEC24D | 2402.484 | -3.024974 | 0.0617986 | -48.94889 | 0 | 0 |
ENSG00000150995.20 ITPR1 | 4106.314 | -3.021908 | 0.0665647 | -45.39805 | 0 | 0 |
ENSG00000159167.12 STC1 | 13523.272 | -3.145427 | 0.0722957 | -43.50780 | 0 | 0 |
ENSG00000162804.14 SNED1 | 6283.910 | 2.862292 | 0.0481808 | 59.40725 | 0 | 0 |
ENSG00000164484.12 TMEM200A | 1197.746 | -3.470114 | 0.0881540 | -39.36421 | 0 | 0 |
ENSG00000166250.12 CLMP | 18283.547 | -2.335014 | 0.0538393 | -43.37009 | 0 | 0 |
ENSG00000166670.10 MMP10 | 3245.447 | -3.537259 | 0.0771167 | -45.86894 | 0 | 0 |
dge3 <- dge
d3up <- rownames(subset(dge3,padj <= 0.05 & log2FoldChange > 0))
d3dn <- rownames(subset(dge3,padj <= 0.05 & log2FoldChange < 0))
write.table(dge3,file="dge3.tsv",quote=FALSE,sep="\t")
dds <- DESeqDataSetFromMatrix(countData = xx4 , colData = ss4,
design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
kbl(caption = "Top gene expression differences for contrast 4: Effect of the drug in KO cells") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSG00000059804.16 SLC2A3 | 5623.218 | -1.769194 | 0.0468167 | -37.78983 | 0 | 0 |
ENSG00000103888.17 CEMIP | 25230.452 | -2.601566 | 0.0673460 | -38.62986 | 0 | 0 |
ENSG00000104419.17 NDRG1 | 38083.260 | -2.549397 | 0.0511765 | -49.81576 | 0 | 0 |
ENSG00000134250.20 NOTCH2 | 6533.444 | -2.176937 | 0.0578054 | -37.65975 | 0 | 0 |
ENSG00000197324.9 LRP10 | 9816.752 | -1.770453 | 0.0378713 | -46.74921 | 0 | 0 |
ENSG00000198712.1 MT-CO2 | 290785.069 | 2.997493 | 0.0744604 | 40.25622 | 0 | 0 |
ENSG00000198727.2 MT-CYB | 116486.352 | 3.700459 | 0.0765560 | 48.33662 | 0 | 0 |
ENSG00000198763.3 MT-ND2 | 121199.785 | 3.262193 | 0.0678248 | 48.09735 | 0 | 0 |
ENSG00000198840.2 MT-ND3 | 17981.096 | 2.878705 | 0.0705335 | 40.81328 | 0 | 0 |
ENSG00000198959.12 TGM2 | 1712.100 | -2.516008 | 0.0594500 | -42.32139 | 0 | 0 |
ENSG00000225630.1 MTND2P28 | 5023.069 | 3.268132 | 0.0682696 | 47.87097 | 0 | 0 |
ENSG00000258017.2 AC011603.2 | 7033.925 | 2.918598 | 0.0767330 | 38.03574 | 0 | 0 |
ENSG00000259207.9 ITGB3 | 6216.236 | -2.380112 | 0.0532419 | -44.70377 | 0 | 0 |
ENSG00000198804.2 MT-CO1 | 506735.846 | 2.899638 | 0.0779490 | 37.19915 | 0 | 0 |
ENSG00000134531.10 EMP1 | 8062.151 | -1.864264 | 0.0501442 | -37.17805 | 0 | 0 |
ENSG00000198888.2 MT-ND1 | 70406.196 | 2.780834 | 0.0758103 | 36.68147 | 0 | 0 |
ENSG00000142949.17 PTPRF | 7521.711 | -1.802689 | 0.0498071 | -36.19343 | 0 | 0 |
ENSG00000198938.2 MT-CO3 | 211300.735 | 2.731621 | 0.0754793 | 36.19031 | 0 | 0 |
ENSG00000178726.7 THBD | 1419.592 | -2.752535 | 0.0765193 | -35.97178 | 0 | 0 |
ENSG00000131016.17 AKAP12 | 17614.587 | -2.494844 | 0.0707156 | -35.27996 | 0 | 0 |
dge4 <- dge
d4up <- rownames(subset(dge4,padj <= 0.05 & log2FoldChange > 0))
d4dn <- rownames(subset(dge4,padj <= 0.05 & log2FoldChange < 0))
write.table(dge4,file="dge4.tsv",quote=FALSE,sep="\t")
dds <- DESeqDataSetFromMatrix(countData = xx5 , colData = ss5,
design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
kbl(caption = "Top gene expression differences for contrast 5: Does the drug restore changes caused by KO?") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSG00000006459.11 KDM7A | 2102.825 | -2.968547 | 0.0756232 | -39.25444 | 0 | 0 |
ENSG00000011638.11 TMEM159 | 1310.273 | -3.080679 | 0.0735046 | -41.91135 | 0 | 0 |
ENSG00000026559.14 KCNG1 | 3444.570 | -2.168208 | 0.0544081 | -39.85081 | 0 | 0 |
ENSG00000049192.15 ADAMTS6 | 1080.879 | -4.046315 | 0.0968751 | -41.76838 | 0 | 0 |
ENSG00000054356.14 PTPRN | 2934.182 | -2.849605 | 0.0710961 | -40.08101 | 0 | 0 |
ENSG00000059804.16 SLC2A3 | 5889.284 | -1.919649 | 0.0509892 | -37.64813 | 0 | 0 |
ENSG00000081181.8 ARG2 | 7528.745 | -2.886314 | 0.0654474 | -44.10126 | 0 | 0 |
ENSG00000081248.12 CACNA1S | 1201.973 | 6.324357 | 0.1441395 | 43.87665 | 0 | 0 |
ENSG00000085449.15 WDFY1 | 2455.389 | -2.926425 | 0.0779364 | -37.54886 | 0 | 0 |
ENSG00000100985.7 MMP9 | 34579.692 | -3.683329 | 0.0725006 | -50.80410 | 0 | 0 |
ENSG00000101443.18 WFDC2 | 5112.498 | 3.272677 | 0.0868267 | 37.69207 | 0 | 0 |
ENSG00000102755.12 FLT1 | 5139.855 | -3.718650 | 0.0709667 | -52.39995 | 0 | 0 |
ENSG00000106333.13 PCOLCE | 9578.772 | 2.391137 | 0.0557298 | 42.90588 | 0 | 0 |
ENSG00000106366.9 SERPINE1 | 21920.897 | -2.648544 | 0.0513740 | -51.55418 | 0 | 0 |
ENSG00000112902.12 SEMA5A | 1857.884 | -3.979788 | 0.0893823 | -44.52548 | 0 | 0 |
ENSG00000115170.15 ACVR1 | 1323.707 | -2.656676 | 0.0684522 | -38.81067 | 0 | 0 |
ENSG00000115677.17 HDLBP | 13291.684 | -2.302710 | 0.0521315 | -44.17120 | 0 | 0 |
ENSG00000115758.13 ODC1 | 5532.578 | -3.052796 | 0.0739435 | -41.28553 | 0 | 0 |
ENSG00000118257.17 NRP2 | 5136.700 | -3.513717 | 0.0603518 | -58.22055 | 0 | 0 |
ENSG00000125430.9 HS3ST3B1 | 1104.229 | -3.706736 | 0.0853897 | -43.40962 | 0 | 0 |
dge5 <- dge
d5up <- rownames(subset(dge5,padj <= 0.05 & log2FoldChange > 0))
d5dn <- rownames(subset(dge5,padj <= 0.05 & log2FoldChange < 0))
write.table(dge5,file="dge5.tsv",quote=FALSE,sep="\t")
Here let’s look at some plots.
MA plot shows the average level and fold change of all detected genes. Volcano plot shows the fold change and the significance, as measured by -log(p-value). Significant genes are shown as red points.
There are heatmaps of the top ranked genes by p-value. Above the gene expression values there is a bar in yellow/orange colours. Yellow shows relatively low QuickDASH score and orange shows high score.
maplot <- function(de,contrast_name) {
de <- de[which(!is.na(de$padj)),]
sig <-subset(de, padj < 0.05 )
up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
GENESUP <- length(up)
GENESDN <- length(dn)
DET=nrow(de)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=0.5, col="dark gray",
main=contrast_name, cex.main=1)
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=0.5, col="red")
mtext(SUBHEADER,cex = 1)
}
make_volcano <- function(de,name) {
de <- de[which(!is.na(de$padj)),]
de$pvalue[which(de$pvalue==0)] <- 1e-320
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval")
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}
make_volcano2 <- function(de,name) {
de <- de[which(!is.na(de$padj)),]
de$pvalue[which(de$pvalue==0)] <- 1e-320
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
top <- head(sig,30)
mylabels <- sapply(strsplit(rownames(top)," "),"[[",2)
plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval")
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
text(top$log2FoldChange+0.2,-log10(top$pval),labels=mylabels, srt=35 ,cex=0.7)
}
make_heatmap <- function(de,name,myss,mx,n=30){
colfunc <- colorRampPalette(c("blue", "white", "red"))
mxn <- mx/rowSums(mx)*1000000
x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(10,15), cexRow=0.7,
main=paste("Top ranked",n,"genes in",name) , ColSideColors = myss$cols )
}
make_heatmap2 <- function(de,name,myss,mx,n=30){
colfunc <- colorRampPalette(c("blue", "white", "red"))
mxn <- mx/rowSums(mx)*1000000
x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
rownames(x) <- sapply(strsplit(rownames(x)," "),"[[",2)
heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(10,15), cexRow=0.6,
main=paste("Top ranked",n,"genes in",name) , ColSideColors = myss$cols )
}
mitch_barplot <- function(res){
sig <- head(subset(res$enrichment_result,p.adjustANOVA<0.05),30)
sig <- sig[order(sig$s.dist),]
par(mar=c(3,25,1,1)); barplot(sig$s.dist,horiz=TRUE,las=2,cex.names = 0.6,cex.axis = 0.6,
names.arg=sig$set,main="Enrichment score") ;grid()
}
mitch_barplot2 <- function(res){
sig_up <- head(subset(res$enrichment_result,p.adjustANOVA<0.05 & s.dist>0),15)
sig_dn <- head(subset(res$enrichment_result,p.adjustANOVA<0.05 & s.dist<0),15)
sig <- rbind(sig_up,sig_dn)
sig <- sig[order(sig$s.dist),]
par(mar=c(3,25,1,1)); barplot(sig$s.dist,horiz=TRUE,las=2,cex.names = 0.6,cex.axis = 0.6,
names.arg=sig$set,main="Enrichment score") ;grid()
}
maplot(dge1,"Cont1: Effect of KO in untr cells")
make_volcano(dge1,"Cont1: Effect of KO in untr cells")
make_volcano2(dge1,"Cont1: Effect of KO in untr cells")
make_heatmap(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)
make_heatmap2(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)
maplot(dge2,"Cont2: Effect of KO in trt cells")
make_volcano(dge2,"Cont2: Effect of KO in trt cells")
make_volcano2(dge2,"Cont2: Effect of KO in trt cells")
make_heatmap(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)
make_heatmap2(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)
maplot(dge3,"Cont3: Effect of drug in WT cells")
make_volcano(dge3,"Cont3: Effect of drug in WT cells")
make_volcano2(dge3,"Cont3: Effect of drug in WT cells")
make_heatmap(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)
make_heatmap2(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)
maplot(dge4,"Cont4: Effect of drug in KO cells")
make_volcano(dge4,"Cont4: Effect of drug in KO cells")
make_volcano2(dge4,"Cont4: Effect of drug in KO cells")
make_heatmap(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)
make_heatmap2(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)
maplot(dge5,"CON ut vs KO dNs")
make_volcano(dge5,"CON ut vs KO dNs")
make_volcano2(dge5,"CON ut vs KO dNs")
make_heatmap(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)
make_heatmap2(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)
pdf("dge1.pdf")
maplot(dge1,"Cont1: Effect of KO in untr cells")
make_volcano(dge1,"Cont1: Effect of KO in untr cells")
make_volcano2(dge1,"Cont1: Effect of KO in untr cells")
make_heatmap(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)
make_heatmap2(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)
dev.off()
## png
## 2
pdf("dge2.pdf")
maplot(dge2,"Cont2: Effect of KO in trt cells")
make_volcano(dge2,"Cont2: Effect of KO in trt cells")
make_volcano2(dge2,"Cont2: Effect of KO in trt cells")
make_heatmap(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)
make_heatmap2(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)
dev.off()
## png
## 2
pdf("dge3.pdf")
maplot(dge3,"Cont3: Effect of drug in WT cells")
make_volcano(dge3,"Cont3: Effect of drug in WT cells")
make_volcano2(dge3,"Cont3: Effect of drug in WT cells")
make_heatmap(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)
make_heatmap2(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)
dev.off()
## png
## 2
pdf("dge4.pdf")
maplot(dge4,"Cont4: Effect of drug in KO cells")
make_volcano(dge4,"Cont4: Effect of drug in KO cells")
make_volcano2(dge4,"Cont4: Effect of drug in KO cells")
make_heatmap(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)
make_heatmap2(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)
dev.off()
## png
## 2
pdf("dge5.pdf")
maplot(dge5,"CON ut vs KO dNs")
make_volcano(dge5,"CON ut vs KO dNs")
make_volcano2(dge5,"CON ut vs KO dNs")
make_heatmap(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)
make_heatmap2(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)
dev.off()
## png
## 2
Contrast 1 and 4 - get top 25 genes and make heatmap of all sample groups.
g1 <- head(rownames(dge1),25)
g4 <- head(rownames(dge4),25)
g14 <- union(g1,g4)
fc1 <- dge1[which(rownames(dge1) %in% g14),"log2FoldChange",drop=F]
rpm14 <- rpm[which(rownames(rpm) %in% g14),]
rpm14 <- merge(rpm14,fc1,by=0)
rpm14 <- rpm14[order(rpm14$log2FoldChange),]
rpm14$log2FoldChange=NULL
rownames(rpm14) <- rpm14$Row.names
rpm14$Row.names=NULL
rownames(rpm14) <- sapply(strsplit(rownames(rpm14)," "),"[[",2)
rpm14 <- rpm14[,rev(order(colnames(rpm14)))]
# reorder columns
rpm14 <- rpm14[,c( grep("CON_ut",colnames(rpm14)) , grep("KO_ut",colnames(rpm14)) , grep("KO_dN",colnames(rpm14)) , grep("CON_dN",colnames(rpm14)) )]
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
margins = c(8,15), cexRow=0.5 , cexCol=0.8)
heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
Colv="none" , dendrogram="none",
margins = c(8,15), cexRow=0.5 , cexCol=0.8)
pdf("heat1.pdf")
heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
margins = c(8,15), cexRow=0.5 , cexCol=0.8)
heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
Colv="none" , dendrogram="none",
margins = c(8,15), cexRow=0.5 , cexCol=0.8)
dev.off()
## png
## 2
rpm14 <- rpm[which(rownames(rpm) %in% g14),]
rownames(rpm14) <- sapply(strsplit(rownames(rpm14)," "),"[[",2)
# reorder columns
rpm14 <- rpm14[,c( grep("CON_ut",colnames(rpm14)) , grep("KO_ut",colnames(rpm14)) , grep("KO_dN",colnames(rpm14)) , grep("CON_dN",colnames(rpm14)) )]
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
margins = c(8,15), cexRow=0.5 , cexCol=0.8)
heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
Colv="none" , dendrogram="none",
margins = c(8,15), cexRow=0.5 , cexCol=0.8)
pdf("heat2.pdf")
heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
margins = c(8,15), cexRow=0.5 , cexCol=0.8)
heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
Colv="none" , dendrogram="none",
margins = c(8,15), cexRow=0.5 , cexCol=0.8)
dev.off()
## png
## 2
rpm1x <- rpm1[which(rownames(rpm1) %in% g14),]
fc1 <- dge1[which(rownames(dge1) %in% g14),"log2FoldChange",drop=F]
rpm1x <- merge(rpm1x,fc1,by=0)
rpm1x <- rpm1x[order(rpm1x$log2FoldChange),]
rpm1x$log2FoldChange=NULL
rownames(rpm1x) <- rpm1x$Row.names
rpm1x$Row.names=NULL
rownames(rpm1x) <- sapply(strsplit(rownames(rpm1x)," "),"[[",2)
rpm1x <- rpm1x[,rev(order(colnames(rpm1x)))]
rpm4x <- rpm4[which(rownames(rpm4) %in% g14),]
rpm4x <- merge(rpm4x,fc1,by=0)
rpm4x <- rpm4x[order(rpm4x$log2FoldChange),]
rpm4x$log2FoldChange=NULL
rownames(rpm4x) <- rpm4x$Row.names
rpm4x$Row.names=NULL
rownames(rpm4x) <- sapply(strsplit(rownames(rpm4x)," "),"[[",2)
rpm4x <- rpm4x[,rev(order(colnames(rpm4x)))]
heatmap.2(as.matrix(rpm1x),trace="none",col=colfunc(25),scale="row", margins = c(8,15),
cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)
heatmap.2(as.matrix(rpm4x),trace="none",col=colfunc(25),scale="row", margins = c(8,15),
cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)
pdf("heat3.pdf")
heatmap.2(as.matrix(rpm1x),trace="none",col=colfunc(25),scale="row", margins = c(8,15),
cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)
heatmap.2(as.matrix(rpm4x),trace="none",col=colfunc(25),scale="row", margins = c(8,15),
cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)
dev.off()
## png
## 2
All DEGs
par(cex.main=0.5)
par(mar=c(2,2,2,2))
v0 <- list("C1 up"=d1up,"C1 dn"=d1dn, "C2 up" = d2up, "C2 dn"=d2dn, "C3 up"=d3up, "C3 dn"=d3dn, "C4 up"=d4up, "C4 dn"=d4dn)
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
pdf("euler1.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()
## png
## 2
Now with just the DEGs from contrasts 1 and 4.
par(cex.main=0.5)
par(mar=c(2,2,2,2))
v0 <- list("KO up"=d1up,"KO dn"=d1dn, "drug up"=d4up, "drug dn"=d4dn)
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
pdf("euler2.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()
## png
## 2
Now with just the DEGs from contrasts 1 and 5.
par(cex.main=0.5)
par(mar=c(2,2,2,2))
v0 <- list("KO up"=d1up,"KO dn"=d1dn, "drugtrt up"=d5up, "drugtrt dn"=d5dn)
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
pdf("euler3.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()
## png
## 2
Firstly need to conduct mitch enrichment analysis for each contrast separately.
#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")
# gene table
gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)
m1 <- mitch_import(dge1, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 25258
## Note: no. genes in output = 25203
## Note: estimated proportion of input genes in output = 0.998
mres1 <- mitch_calc(m1, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres1$enrichment_result,20) %>%
kbl(caption = "Top gene pathway differences in contrast 1") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
1008 | Receptor Mediated Mitophagy | 11 | 0.0000080 | -0.7772236 | 0.0000639 |
720 | Mucopolysaccharidoses | 11 | 0.0000117 | 0.7629839 | 0.0000908 |
670 | Membrane binding and targetting of GAG proteins | 14 | 0.0000402 | -0.6337233 | 0.0002665 |
1264 | Synthesis And Processing Of GAG, GAGPOL Polyproteins | 14 | 0.0000402 | -0.6337233 | 0.0002665 |
1411 | Unwinding of DNA | 12 | 0.0002829 | 0.6051698 | 0.0014768 |
93 | Assembly Of The HIV Virion | 16 | 0.0000324 | -0.6000020 | 0.0002240 |
121 | Biotin transport and metabolism | 11 | 0.0011127 | 0.5676477 | 0.0047067 |
1276 | Synthesis of active ubiquitin: roles of E1 and E2 enzymes | 30 | 0.0000001 | -0.5630345 | 0.0000016 |
1115 | SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs | 11 | 0.0018488 | -0.5420841 | 0.0073752 |
1166 | Signaling by Activin | 15 | 0.0003295 | -0.5354190 | 0.0016612 |
710 | Mitophagy | 28 | 0.0000011 | -0.5309604 | 0.0000116 |
1086 | Response of EIF2AK1 (HRI) to heme deficiency | 15 | 0.0005010 | -0.5189191 | 0.0023789 |
55 | Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S | 58 | 0.0000000 | -0.5187746 | 0.0000000 |
538 | Hyaluronan uptake and degradation | 10 | 0.0047720 | 0.5153177 | 0.0167247 |
572 | Insertion of tail-anchored proteins into the endoplasmic reticulum membrane | 22 | 0.0000312 | -0.5127387 | 0.0002190 |
1376 | Translation initiation complex formation | 57 | 0.0000000 | -0.5104645 | 0.0000000 |
644 | Lysosphingolipid and LPA receptors | 12 | 0.0024056 | -0.5059280 | 0.0092214 |
1462 | rRNA modification in the nucleus and cytosol | 60 | 0.0000000 | -0.5021477 | 0.0000000 |
517 | HIV elongation arrest and recovery | 32 | 0.0000009 | -0.5012440 | 0.0000096 |
848 | Pausing and recovery of HIV elongation | 32 | 0.0000009 | -0.5012440 | 0.0000096 |
mitch_barplot(mres1)
mitch_barplot2(mres1)
mitch_plots(mres1,outfile="mitch1.pdf")
## png
## 2
m1top <- subset(mres1$enrichment_result,p.adjustANOVA<0.05)
m1up <- subset(m1top,s.dist>0)$set
m1dn <- subset(m1top,s.dist<0)$set
#mitch_report(mres1,outfile="mitch1.html",overwrite=TRUE)
write.table(mres1$enrichment_result,file="mitch1.tsv",quote=FALSE,sep="\t",row.names=FALSE)
m1top_up <- head(subset(m1top,s.dist>0),10)[,"s.dist"]
names(m1top_up) <- head(subset(m1top,s.dist>0),10)[,"set"]
m1top_dn <- head(subset(m1top,s.dist<0),10)[,"s.dist"]
names(m1top_dn) <- head(subset(m1top,s.dist<0),10)[,"set"]
m1top_updn <- c(m1top_up,m1top_dn)
m1top_updn <- m1top_updn[order(m1top_updn)]
pdf("f2b.pdf")
par(mar=c(5,25,3,1))
barplot(m1top_updn,horiz=TRUE,las=1,col="darkgray",xlab="Enrichment score",cex.names=0.8,xlim=c(-1,1))
grid()
dev.off()
## png
## 2
m2 <- mitch_import(dge2, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 27340
## Note: no. genes in output = 27278
## Note: estimated proportion of input genes in output = 0.998
mres2 <- mitch_calc(m2, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres2$enrichment_result,20) %>%
kbl(caption = "Top gene pathway differences in contrast 2") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
1427 | Unwinding of DNA | 12 | 0.0000101 | 0.7359532 | 0.0000679 |
640 | Leading Strand Synthesis | 14 | 0.0000029 | 0.7219567 | 0.0000217 |
893 | Polymerase switching | 14 | 0.0000029 | 0.7219567 | 0.0000217 |
1102 | Response to metal ions | 10 | 0.0000778 | 0.7214390 | 0.0004521 |
458 | G1/S-Specific Transcription | 29 | 0.0000000 | 0.6885884 | 0.0000000 |
216 | Condensation of Prometaphase Chromosomes | 11 | 0.0000792 | 0.6871343 | 0.0004567 |
1345 | Tetrahydrobiopterin (BH4) synthesis, recycling, salvage and regulation | 10 | 0.0002313 | 0.6723265 | 0.0011212 |
423 | Formation of ATP by chemiosmotic coupling | 18 | 0.0000009 | 0.6681544 | 0.0000075 |
1130 | SLBP independent Processing of Histone Pre-mRNAs | 10 | 0.0003221 | 0.6567625 | 0.0015025 |
1129 | SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs | 11 | 0.0001693 | 0.6547542 | 0.0008750 |
260 | DNA strand elongation | 32 | 0.0000000 | 0.6539698 | 0.0000000 |
1397 | Translesion synthesis by REV1 | 16 | 0.0000094 | 0.6395441 | 0.0000640 |
1396 | Translesion synthesis by POLK | 17 | 0.0000057 | 0.6353722 | 0.0000407 |
422 | Folding of actin by CCT/TriC | 10 | 0.0005533 | 0.6306000 | 0.0024504 |
1118 | SCF(Skp2)-mediated degradation of p27/p21 | 60 | 0.0000000 | 0.6273814 | 0.0000000 |
1351 | The role of GTSE1 in G2/M progression after G2 checkpoint | 58 | 0.0000000 | 0.6165776 | 0.0000000 |
686 | Metabolism of cofactors | 19 | 0.0000036 | 0.6139295 | 0.0000265 |
1050 | Regulation of PTEN mRNA translation | 12 | 0.0002439 | -0.6115125 | 0.0011746 |
1424 | Ubiquitin-dependent degradation of Cyclin D | 50 | 0.0000000 | 0.6083723 | 0.0000000 |
636 | Lagging Strand Synthesis | 20 | 0.0000029 | 0.6041896 | 0.0000217 |
mitch_barplot(mres2)
mitch_barplot2(mres2)
mitch_plots(mres2,outfile="mitch2.pdf")
## png
## 2
m2top <- subset(mres2$enrichment_result,p.adjustANOVA<0.05)
m2up <- subset(m2top,s.dist>0)$set
m2dn <- subset(m2top,s.dist<0)$set
#mitch_report(mres2,outfile="mitch2.html",overwrite=TRUE)
write.table(mres2$enrichment_result,file="mitch2.tsv",quote=FALSE,sep="\t",row.names=FALSE)
m3 <- mitch_import(dge3, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 26801
## Note: no. genes in output = 26741
## Note: estimated proportion of input genes in output = 0.998
mres3 <- mitch_calc(m3, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres3$enrichment_result,20) %>%
kbl(caption = "Top gene pathway differences in contrast 3") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
137 | CLEC7A (Dectin-1) induces NFAT activation | 11 | 1.70e-06 | -0.8325273 | 0.0000081 |
375 | Establishment of Sister Chromatid Cohesion | 11 | 2.30e-06 | -0.8231201 | 0.0000102 |
207 | Cohesin Loading onto Chromatin | 10 | 7.30e-06 | -0.8190042 | 0.0000294 |
360 | ERKs are inactivated | 13 | 4.00e-07 | -0.8096839 | 0.0000023 |
1108 | S33 mutants of beta-catenin aren’t phosphorylated | 15 | 1.00e-07 | -0.8083414 | 0.0000004 |
1109 | S37 mutants of beta-catenin aren’t phosphorylated | 15 | 1.00e-07 | -0.8083414 | 0.0000004 |
1110 | S45 mutants of beta-catenin aren’t phosphorylated | 15 | 1.00e-07 | -0.8083414 | 0.0000004 |
1180 | Signaling by CTNNB1 phospho-site mutants | 15 | 1.00e-07 | -0.8083414 | 0.0000004 |
1207 | Signaling by GSK3beta mutants | 15 | 1.00e-07 | -0.8083414 | 0.0000004 |
1298 | T41 mutants of beta-catenin aren’t phosphorylated | 15 | 1.00e-07 | -0.8083414 | 0.0000004 |
506 | Golgi Cisternae Pericentriolar Stack Reorganization | 14 | 2.00e-07 | -0.8059533 | 0.0000011 |
11 | APC truncation mutants have impaired AXIN binding | 14 | 2.00e-07 | -0.7983954 | 0.0000013 |
24 | AXIN missense mutants destabilize the destruction complex | 14 | 2.00e-07 | -0.7983954 | 0.0000013 |
1173 | Signaling by AMER1 mutants | 14 | 2.00e-07 | -0.7983954 | 0.0000013 |
1174 | Signaling by APC mutants | 14 | 2.00e-07 | -0.7983954 | 0.0000013 |
1175 | Signaling by AXIN mutants | 14 | 2.00e-07 | -0.7983954 | 0.0000013 |
1414 | Truncations of AMER1 destabilize the destruction complex | 14 | 2.00e-07 | -0.7983954 | 0.0000013 |
1209 | Signaling by Hippo | 20 | 0.00e+00 | -0.7935032 | 0.0000000 |
664 | MET activates RAP1 and RAC1 | 11 | 5.40e-06 | -0.7917695 | 0.0000227 |
659 | MAPK3 (ERK1) activation | 10 | 2.78e-05 | -0.7651341 | 0.0001004 |
mitch_barplot(mres3)
mitch_barplot2(mres3)
mitch_plots(mres3,outfile="mitch3.pdf")
## png
## 2
m3top <- subset(mres3$enrichment_result,p.adjustANOVA<0.05)
m3up <- subset(m3top,s.dist>0)$set
m3dn <- subset(m3top,s.dist<0)$set
#mitch_report(mres3,outfile="mitch3.html",overwrite=TRUE)
write.table(mres3$enrichment_result,file="mitch3.tsv",quote=FALSE,sep="\t",row.names=FALSE)
m4 <- mitch_import(dge4, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 26222
## Note: no. genes in output = 26163
## Note: estimated proportion of input genes in output = 0.998
mres4 <- mitch_calc(m4, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres4$enrichment_result,20) %>%
kbl(caption = "Top gene pathway differences in contrast 4") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
134 | CLEC7A (Dectin-1) induces NFAT activation | 11 | 1.0e-06 | -0.8526516 | 6.10e-06 |
203 | Cohesin Loading onto Chromatin | 10 | 3.0e-06 | -0.8523305 | 1.63e-05 |
630 | Laminin interactions | 23 | 0.0e+00 | -0.7933735 | 0.00e+00 |
1103 | S33 mutants of beta-catenin aren’t phosphorylated | 15 | 2.0e-07 | -0.7825965 | 1.20e-06 |
1104 | S37 mutants of beta-catenin aren’t phosphorylated | 15 | 2.0e-07 | -0.7825965 | 1.20e-06 |
1105 | S45 mutants of beta-catenin aren’t phosphorylated | 15 | 2.0e-07 | -0.7825965 | 1.20e-06 |
1175 | Signaling by CTNNB1 phospho-site mutants | 15 | 2.0e-07 | -0.7825965 | 1.20e-06 |
1202 | Signaling by GSK3beta mutants | 15 | 2.0e-07 | -0.7825965 | 1.20e-06 |
1293 | T41 mutants of beta-catenin aren’t phosphorylated | 15 | 2.0e-07 | -0.7825965 | 1.20e-06 |
1204 | Signaling by Hippo | 20 | 0.0e+00 | -0.7785564 | 0.00e+00 |
1268 | Syndecan interactions | 20 | 0.0e+00 | -0.7770952 | 0.00e+00 |
658 | MET activates RAP1 and RAC1 | 11 | 8.9e-06 | -0.7734156 | 4.31e-05 |
11 | APC truncation mutants have impaired AXIN binding | 14 | 6.0e-07 | -0.7679398 | 4.20e-06 |
24 | AXIN missense mutants destabilize the destruction complex | 14 | 6.0e-07 | -0.7679398 | 4.20e-06 |
1168 | Signaling by AMER1 mutants | 14 | 6.0e-07 | -0.7679398 | 4.20e-06 |
1169 | Signaling by APC mutants | 14 | 6.0e-07 | -0.7679398 | 4.20e-06 |
1170 | Signaling by AXIN mutants | 14 | 6.0e-07 | -0.7679398 | 4.20e-06 |
1410 | Truncations of AMER1 destabilize the destruction complex | 14 | 6.0e-07 | -0.7679398 | 4.20e-06 |
357 | ERKs are inactivated | 13 | 1.7e-06 | -0.7668245 | 9.70e-06 |
657 | MET activates PTK2 signaling | 18 | 0.0e+00 | -0.7616753 | 2.00e-07 |
mitch_barplot(mres4)
mitch_barplot2(mres4)
mitch_plots(mres4,outfile="mitch4.pdf")
## png
## 2
m4top <- subset(mres4$enrichment_result,p.adjustANOVA<0.05)
m4up <- subset(m4top,s.dist>0)$set
m4dn <- subset(m4top,s.dist<0)$set
#mitch_report(mres4,outfile="mitch4.html",overwrite=TRUE)
write.table(mres4$enrichment_result,file="mitch4.tsv",quote=FALSE,sep="\t",row.names=FALSE)
m5 <- mitch_import(dge5, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 26081
## Note: no. genes in output = 26025
## Note: estimated proportion of input genes in output = 0.998
mres5 <- mitch_calc(m5, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres5$enrichment_result,20) %>%
kbl(caption = "Top gene pathway differences in contrast 5") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
136 | CLEC7A (Dectin-1) induces NFAT activation | 11 | 1.80e-06 | -0.8306926 | 0.0000126 |
206 | Cohesin Loading onto Chromatin | 10 | 1.05e-05 | -0.8044820 | 0.0000609 |
660 | MET activates RAP1 and RAC1 | 11 | 6.10e-06 | -0.7873173 | 0.0000376 |
11 | APC truncation mutants have impaired AXIN binding | 14 | 4.00e-07 | -0.7808455 | 0.0000033 |
24 | AXIN missense mutants destabilize the destruction complex | 14 | 4.00e-07 | -0.7808455 | 0.0000033 |
1171 | Signaling by AMER1 mutants | 14 | 4.00e-07 | -0.7808455 | 0.0000033 |
1172 | Signaling by APC mutants | 14 | 4.00e-07 | -0.7808455 | 0.0000033 |
1173 | Signaling by AXIN mutants | 14 | 4.00e-07 | -0.7808455 | 0.0000033 |
1412 | Truncations of AMER1 destabilize the destruction complex | 14 | 4.00e-07 | -0.7808455 | 0.0000033 |
1106 | S33 mutants of beta-catenin aren’t phosphorylated | 15 | 2.00e-07 | -0.7803050 | 0.0000014 |
1107 | S37 mutants of beta-catenin aren’t phosphorylated | 15 | 2.00e-07 | -0.7803050 | 0.0000014 |
1108 | S45 mutants of beta-catenin aren’t phosphorylated | 15 | 2.00e-07 | -0.7803050 | 0.0000014 |
1178 | Signaling by CTNNB1 phospho-site mutants | 15 | 2.00e-07 | -0.7803050 | 0.0000014 |
1205 | Signaling by GSK3beta mutants | 15 | 2.00e-07 | -0.7803050 | 0.0000014 |
1296 | T41 mutants of beta-catenin aren’t phosphorylated | 15 | 2.00e-07 | -0.7803050 | 0.0000014 |
1207 | Signaling by Hippo | 20 | 0.00e+00 | -0.7490483 | 0.0000001 |
373 | Establishment of Sister Chromatid Cohesion | 11 | 1.81e-05 | -0.7464442 | 0.0000973 |
113 | Beta-catenin phosphorylation cascade | 17 | 1.00e-07 | -0.7403288 | 0.0000012 |
358 | ERKs are inactivated | 13 | 4.10e-06 | -0.7373224 | 0.0000266 |
605 | Interleukin-6 signaling | 11 | 3.09e-05 | -0.7253577 | 0.0001591 |
mitch_barplot(mres5)
mitch_barplot2(mres5)
mitch_plots(mres5,outfile="mitch5.pdf")
## png
## 2
m5top <- subset(mres5$enrichment_result,p.adjustANOVA<0.05)
m5up <- subset(m5top,s.dist>0)$set
m5dn <- subset(m5top,s.dist<0)$set
#mitch_report(mres5,outfile="mitch5.html",overwrite=TRUE)
write.table(mres5$enrichment_result,file="mitch5.tsv",quote=FALSE,sep="\t",row.names=FALSE)
pdf("mitch_barplots.pdf")
mitch_barplot(mres1)
mitch_barplot2(mres1)
mitch_barplot(mres2)
mitch_barplot2(mres2)
mitch_barplot(mres3)
mitch_barplot2(mres3)
mitch_barplot(mres4)
mitch_barplot2(mres4)
mitch_barplot(mres5)
mitch_barplot2(mres5)
dev.off()
## png
## 2
Make some ridge plots.
myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
sets <- head(mres$enrichment_result,10)
sets <- sets[order(sets$s.dist),"set"]
ridgedata <- lapply( sets , function(myset) {
genes <- genesets[[which(names(genesets) == myset)]]
generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
return(generanks)
})
names(ridgedata) <- sets
myrange <- range(mres$ranked_profile[,1])
par(mar=c(5,20,3,2))
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,main="Top 10 genesets",
xlab="differential gene rank")
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,col="white",main="Top 10 genesets",
xlab="differential gene rank",
rectCol="gray")
beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})
pdf("ridge1.pdf")
z <- lapply(myres, function(mres) {
sets <- head(mres$enrichment_result,10)
sets <- sets[order(sets$s.dist),"set"]
ridgedata <- lapply( sets , function(myset) {
genes <- genesets[[which(names(genesets) == myset)]]
generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
return(generanks)
})
names(ridgedata) <- sets
myrange <- range(mres$ranked_profile[,1])
par(mar=c(5,20,3,2))
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,main="Top 10 genesets",
xlab="differential gene rank")
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,col="white",main="Top 10 genesets",
xlab="differential gene rank",
rectCol="gray")
beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})
dev.off()
## png
## 2
Make some heatmaps
sets <- head(mres1$enrichment_result$set,10)
z <- lapply(sets, function(set){
genes <- genesets[[which(names(genesets) == set)]]
refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
hm <- as.matrix(rpm1[which(refgenes %in% genes),])
hm <- hm/rowMeans(hm)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
cexCol=0.9, main=paste(set) )
})
z <- lapply(sets, function(set){
genes <- genesets[[which(names(genesets) == set)]]
refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
hm <- as.matrix(rpm[which(refgenes %in% genes),])
hm <- hm/rowMeans(hm)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
cexCol=0.9, main=paste(set) )
})
pdf("peheat1.pdf")
z <- lapply(sets, function(set){
genes <- genesets[[which(names(genesets) == set)]]
refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
hm <- as.matrix(rpm1[which(refgenes %in% genes),])
hm <- hm/rowMeans(hm)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
cexCol=0.9, main=paste(set) )
})
z <- lapply(sets, function(set){
genes <- genesets[[which(names(genesets) == set)]]
refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
hm <- as.matrix(rpm[which(refgenes %in% genes),])
hm <- hm/rowMeans(hm)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
cexCol=0.9, main=paste(set) )
})
dev.off()
## png
## 2
Now an Euler diagram of differentially regulated gene sets from contrasts 1 and 4.
par(cex.main=0.5)
par(mar=c(2,2,2,2))
v0 <- list("KO up"=m1up,"KO dn"=m1dn, "drug up"=m4up, "drug dn"=m4dn)
plot(euler(v0),quantities = TRUE, edges = "gray", main="Reactome pathways")
pdf("gseuler1.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="Reactome pathways")
dev.off()
## png
## 2
I’m going to focus on the two contrasts which I think are the most important WRT mito disease; #1 and #4. The effect of the KO, and the effect of the drug when the gene is knocked out.
mm <- list("KO"=dge1,"drug"=dge4)
m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)
## Note: Mean no. genes in input = 25740
## Note: no. genes in output = 24473
## Note: estimated proportion of input genes in output = 0.951
mres <- mitch_calc(m, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres$enrichment_result,20) %>%
kbl(caption = "Top gene pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)
set | setSize | pMANOVA | s.KO | s.drug | p.KO | p.drug | s.dist | SD | p.adjustMANOVA | |
---|---|---|---|---|---|---|---|---|---|---|
133 | CLEC7A (Dectin-1) induces NFAT activation | 11 | 3.40e-06 | -0.3079507 | -0.8424718 | 0.0769588 | 0.0000013 | 0.8969908 | 0.3779635 | 1.31e-05 |
202 | Cohesin Loading onto Chromatin | 10 | 1.94e-05 | -0.1962392 | -0.8421289 | 0.2825607 | 0.0000040 | 0.8646913 | 0.4567130 | 6.18e-05 |
650 | MET activates RAP1 and RAC1 | 11 | 1.26e-05 | -0.3986517 | -0.7577616 | 0.0220462 | 0.0000135 | 0.8562277 | 0.2539290 | 4.27e-05 |
1256 | Syndecan interactions | 19 | 0.00e+00 | 0.2156918 | -0.8161403 | 0.1035746 | 0.0000000 | 0.8441611 | 0.7296154 | 0.00e+00 |
844 | Peptide chain elongation | 87 | 0.00e+00 | -0.4224866 | 0.7146629 | 0.0000000 | 0.0000000 | 0.8302036 | 0.8040862 | 0.00e+00 |
1415 | Viral mRNA Translation | 87 | 0.00e+00 | -0.4204815 | 0.7154708 | 0.0000000 | 0.0000000 | 0.8298814 | 0.8032396 | 0.00e+00 |
11 | APC truncation mutants have impaired AXIN binding | 14 | 1.30e-06 | -0.3476780 | -0.7519055 | 0.0242864 | 0.0000011 | 0.8283972 | 0.2858320 | 5.60e-06 |
24 | AXIN missense mutants destabilize the destruction complex | 14 | 1.30e-06 | -0.3476780 | -0.7519055 | 0.0242864 | 0.0000011 | 0.8283972 | 0.2858320 | 5.60e-06 |
1156 | Signaling by AMER1 mutants | 14 | 1.30e-06 | -0.3476780 | -0.7519055 | 0.0242864 | 0.0000011 | 0.8283972 | 0.2858320 | 5.60e-06 |
1157 | Signaling by APC mutants | 14 | 1.30e-06 | -0.3476780 | -0.7519055 | 0.0242864 | 0.0000011 | 0.8283972 | 0.2858320 | 5.60e-06 |
1158 | Signaling by AXIN mutants | 14 | 1.30e-06 | -0.3476780 | -0.7519055 | 0.0242864 | 0.0000011 | 0.8283972 | 0.2858320 | 5.60e-06 |
1396 | Truncations of AMER1 destabilize the destruction complex | 14 | 1.30e-06 | -0.3476780 | -0.7519055 | 0.0242864 | 0.0000011 | 0.8283972 | 0.2858320 | 5.60e-06 |
622 | Laminin interactions | 23 | 0.00e+00 | 0.2685267 | -0.7821464 | 0.0257806 | 0.0000000 | 0.8269580 | 0.7429381 | 0.00e+00 |
351 | ERKs are inactivated | 13 | 4.30e-06 | -0.3333229 | -0.7507139 | 0.0374309 | 0.0000028 | 0.8213863 | 0.2951400 | 1.60e-05 |
1001 | Receptor Mediated Mitophagy | 11 | 2.63e-05 | -0.7759568 | -0.2626486 | 0.0000083 | 0.1314471 | 0.8192028 | 0.3629637 | 8.10e-05 |
1091 | S33 mutants of beta-catenin aren’t phosphorylated | 15 | 8.00e-07 | -0.2603974 | -0.7675743 | 0.0807712 | 0.0000003 | 0.8105413 | 0.3586282 | 3.50e-06 |
1092 | S37 mutants of beta-catenin aren’t phosphorylated | 15 | 8.00e-07 | -0.2603974 | -0.7675743 | 0.0807712 | 0.0000003 | 0.8105413 | 0.3586282 | 3.50e-06 |
1093 | S45 mutants of beta-catenin aren’t phosphorylated | 15 | 8.00e-07 | -0.2603974 | -0.7675743 | 0.0807712 | 0.0000003 | 0.8105413 | 0.3586282 | 3.50e-06 |
1163 | Signaling by CTNNB1 phospho-site mutants | 15 | 8.00e-07 | -0.2603974 | -0.7675743 | 0.0807712 | 0.0000003 | 0.8105413 | 0.3586282 | 3.50e-06 |
1190 | Signaling by GSK3beta mutants | 15 | 8.00e-07 | -0.2603974 | -0.7675743 | 0.0807712 | 0.0000003 | 0.8105413 | 0.3586282 | 3.50e-06 |
#mitch_report(mres,outfile="mitchmulti_effect.html",overwrite=TRUE)
mitch_plots(mres, outfile="mitchmulti_effect.pdf")
## png
## 2
write.table(mres$enrichment_result,file="mitch_multi_effect.tsv",quote=FALSE,sep="\t",row.names=FALSE)
n=40
mx <- as.matrix(mres$enrichment_result[1:n,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set[1:n]
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,15), cexRow=0.7,
cexCol=0.9, main=paste("Top ranked Reactomes") )
Mitch can also be used to search for discordant pathways. Gene sets are prioritised by standard deviation.
mm <- list("KO"=dge1,"drug"=dge4)
m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)
## Note: Mean no. genes in input = 25740
## Note: no. genes in output = 24473
## Note: estimated proportion of input genes in output = 0.951
mres <- mitch_calc(m, genesets, priority="SD",resrows=52)
## Note: Prioritisation by SD after selecting sets with
## p.adjustMANOVA<=0.05.
head(mres$enrichment_result,20) %>% kbl(caption = "Top gene pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)
set | setSize | pMANOVA | s.KO | s.drug | p.KO | p.drug | s.dist | SD | p.adjustMANOVA | |
---|---|---|---|---|---|---|---|---|---|---|
844 | Peptide chain elongation | 87 | 0.00e+00 | -0.4224866 | 0.7146629 | 0.0000000 | 0.0000000 | 0.8302036 | 0.8040862 | 0.00e+00 |
1415 | Viral mRNA Translation | 87 | 0.00e+00 | -0.4204815 | 0.7154708 | 0.0000000 | 0.0000000 | 0.8298814 | 0.8032396 | 0.00e+00 |
419 | Formation of a pool of free 40S subunits | 99 | 0.00e+00 | -0.4444643 | 0.6516333 | 0.0000000 | 0.0000000 | 0.7887804 | 0.7750580 | 0.00e+00 |
371 | Eukaryotic Translation Termination | 91 | 0.00e+00 | -0.4020927 | 0.6842987 | 0.0000000 | 0.0000000 | 0.7936897 | 0.7681947 | 0.00e+00 |
369 | Eukaryotic Translation Elongation | 92 | 0.00e+00 | -0.4120627 | 0.6735448 | 0.0000000 | 0.0000000 | 0.7895937 | 0.7676404 | 0.00e+00 |
268 | Defective EXT1 causes exostoses 1, TRPS2 and CHDS | 13 | 1.40e-06 | 0.3576640 | -0.7141141 | 0.0255478 | 0.0000082 | 0.7986755 | 0.7578616 | 5.90e-06 |
269 | Defective EXT2 causes exostoses 2 | 13 | 1.40e-06 | 0.3576640 | -0.7141141 | 0.0255478 | 0.0000082 | 0.7986755 | 0.7578616 | 5.90e-06 |
622 | Laminin interactions | 23 | 0.00e+00 | 0.2685267 | -0.7821464 | 0.0257806 | 0.0000000 | 0.8269580 | 0.7429381 | 0.00e+00 |
1080 | Response of EIF2AK4 (GCN2) to amino acid deficiency | 99 | 0.00e+00 | -0.4230307 | 0.6251677 | 0.0000000 | 0.0000000 | 0.7548441 | 0.7411882 | 0.00e+00 |
777 | Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) | 93 | 0.00e+00 | -0.4238720 | 0.6173622 | 0.0000000 | 0.0000000 | 0.7488682 | 0.7362638 | 0.00e+00 |
1134 | Selenocysteine synthesis | 91 | 0.00e+00 | -0.3552729 | 0.6805615 | 0.0000000 | 0.0000000 | 0.7677127 | 0.7324455 | 0.00e+00 |
425 | Formation of the ternary complex, and subsequently, the 43S complex | 50 | 0.00e+00 | -0.5007952 | 0.5323556 | 0.0000000 | 0.0000000 | 0.7308887 | 0.7305479 | 0.00e+00 |
1256 | Syndecan interactions | 19 | 0.00e+00 | 0.2156918 | -0.8161403 | 0.1035746 | 0.0000000 | 0.8441611 | 0.7296154 | 0.00e+00 |
713 | Mucopolysaccharidoses | 11 | 1.36e-05 | 0.7559480 | -0.2581369 | 0.0000141 | 0.1382052 | 0.7988066 | 0.7170663 | 4.58e-05 |
265 | Defective B4GALT7 causes EDS, progeroid type | 18 | 1.00e-07 | 0.3708989 | -0.6386151 | 0.0064379 | 0.0000027 | 0.7385088 | 0.7138342 | 6.00e-07 |
617 | L13a-mediated translational silencing of Ceruloplasmin expression | 109 | 0.00e+00 | -0.4660523 | 0.5254805 | 0.0000000 | 0.0000000 | 0.7023778 | 0.7011196 | 0.00e+00 |
460 | GTP hydrolysis and joining of the 60S ribosomal subunit | 110 | 0.00e+00 | -0.4418892 | 0.5302721 | 0.0000000 | 0.0000000 | 0.6902568 | 0.6874218 | 0.00e+00 |
264 | Defective B3GAT3 causes JDSSDHD | 18 | 9.00e-07 | 0.3764511 | -0.5746291 | 0.0056854 | 0.0000243 | 0.6869600 | 0.6725153 | 4.00e-06 |
806 | Other semaphorin interactions | 15 | 8.70e-06 | 0.3594897 | -0.5881975 | 0.0159171 | 0.0000798 | 0.6893541 | 0.6701160 | 3.06e-05 |
262 | Defective B3GALT6 causes EDSP2 and SEMDJL1 | 18 | 1.30e-06 | 0.3746746 | -0.5643972 | 0.0059170 | 0.0000338 | 0.6774402 | 0.6640240 | 5.60e-06 |
#mitch_report(mres,outfile="mitchmulti_discord.html",overwrite=TRUE)
mitch_plots(mres, outfile="mitchmulti_discord.pdf")
## png
## 2
write.table(mres$enrichment_result,file="mitch_multi_discord.tsv",quote=FALSE,sep="\t",row.names=FALSE)
n=50
mx <- as.matrix(mres$enrichment_result[1:n,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set[1:n]
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,25), cexRow=0.6,
cexCol=0.9, main=paste("Top ranked Reactomes") )
pdf("mitchmulti_discord_heat.pdf")
heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,25), cexRow=0.6,
cexCol=0.9, main=paste("Top ranked Reactomes") )
dev.off()
## png
## 2
custom_set_names <- readLines("custom_pathway_list.txt")
custom_sets <- genesets[which(names(genesets) %in% custom_set_names)]
m1 <- mitch_import(dge1, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 25258
## Note: no. genes in output = 25203
## Note: estimated proportion of input genes in output = 0.998
mres1 <- mitch_calc(m1, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres1$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 1") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
19 | Receptor Mediated Mitophagy | 11 | 0.0000080 | -0.7772236 | 0.0000286 |
12 | Mucopolysaccharidoses | 11 | 0.0000117 | 0.7629839 | 0.0000352 |
11 | Mitophagy | 28 | 0.0000011 | -0.5309604 | 0.0000069 |
14 | Nucleotide biosynthesis | 12 | 0.0038650 | -0.4815940 | 0.0059227 |
8 | Mitochondrial iron-sulfur cluster biogenesis | 13 | 0.0068477 | -0.4331145 | 0.0086497 |
20 | Reduction of cytosolic Ca++ levels | 12 | 0.0099007 | 0.4299750 | 0.0113151 |
24 | Stabilization of p53 | 53 | 0.0000001 | -0.4224405 | 0.0000008 |
3 | Diseases of carbohydrate metabolism | 30 | 0.0021328 | 0.3238999 | 0.0036562 |
1 | Cellular response to starvation | 150 | 0.0000000 | -0.3190112 | 0.0000000 |
16 | PTEN Regulation | 140 | 0.0000000 | -0.2798439 | 0.0000001 |
21 | Resolution of D-Loop Structures | 33 | 0.0140933 | 0.2468836 | 0.0153745 |
4 | Gene Silencing by RNA | 97 | 0.0000404 | -0.2410756 | 0.0000979 |
18 | Pyruvate metabolism and Citric Acid (TCA) cycle | 52 | 0.0039485 | -0.2309956 | 0.0059227 |
23 | Signaling by the B Cell Receptor (BCR) | 106 | 0.0000408 | -0.2305372 | 0.0000979 |
7 | Mitochondrial biogenesis | 91 | 0.0003036 | -0.2189995 | 0.0006624 |
9 | Mitochondrial protein import | 64 | 0.0059608 | -0.1987064 | 0.0084153 |
15 | Oxidative Stress Induced Senescence | 82 | 0.0092662 | -0.1661699 | 0.0111194 |
22 | Respiratory electron transport | 103 | 0.0065102 | -0.1551008 | 0.0086497 |
17 | Phospholipid metabolism | 186 | 0.0007655 | -0.1429699 | 0.0014133 |
10 | Mitochondrial translation | 96 | 0.0157974 | -0.1424910 | 0.0164843 |
m1top <- subset(mres1$enrichment_result,p.adjustANOVA<0.05)
m1up <- subset(m1top,s.dist>0)$set
m1dn <- subset(m1top,s.dist<0)$set
#mitch_report(mres1,outfile="mitch_mito1.html",overwrite=TRUE)
mitch_plots(mres1, outfile="mitch_mito1.pdf")
## png
## 2
write.table(mres1$enrichment_result,file="mitch_mito1.tsv",quote=FALSE,sep="\t",row.names=FALSE)
m2 <- mitch_import(dge2, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 27340
## Note: no. genes in output = 27278
## Note: estimated proportion of input genes in output = 0.998
mres2 <- mitch_calc(m2, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres2$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 2") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
10 | Mitochondrial translation | 96 | 0.0000000 | 0.5392072 | 0.0000000 |
24 | Stabilization of p53 | 53 | 0.0000000 | 0.5020005 | 0.0000000 |
9 | Mitochondrial protein import | 64 | 0.0000001 | 0.3891552 | 0.0000002 |
3 | Diseases of carbohydrate metabolism | 31 | 0.0002686 | 0.3780363 | 0.0005860 |
20 | Reduction of cytosolic Ca++ levels | 12 | 0.0289978 | 0.3640003 | 0.0463964 |
22 | Respiratory electron transport | 103 | 0.0000000 | 0.3397608 | 0.0000000 |
23 | Signaling by the B Cell Receptor (BCR) | 106 | 0.0000000 | 0.3265140 | 0.0000000 |
2 | Complex I biogenesis | 57 | 0.0000465 | 0.3117478 | 0.0001239 |
11 | Mitophagy | 28 | 0.0064809 | 0.2971848 | 0.0111100 |
12 | Mucopolysaccharidoses | 11 | 0.1441237 | 0.2543167 | 0.1921650 |
18 | Pyruvate metabolism and Citric Acid (TCA) cycle | 53 | 0.0023525 | 0.2414478 | 0.0047049 |
21 | Resolution of D-Loop Structures | 33 | 0.0359199 | 0.2109689 | 0.0538799 |
6 | Metabolism of amino acids and derivatives | 344 | 0.0000000 | 0.1954042 | 0.0000000 |
8 | Mitochondrial iron-sulfur cluster biogenesis | 13 | 0.2345154 | 0.1904132 | 0.2962300 |
1 | Cellular response to starvation | 150 | 0.0002005 | 0.1757549 | 0.0004812 |
5 | Metabolism | 1909 | 0.0000000 | 0.1711056 | 0.0000000 |
16 | PTEN Regulation | 140 | 0.0036710 | 0.1421165 | 0.0067773 |
13 | Nervous system development | 527 | 0.0000001 | 0.1334218 | 0.0000004 |
7 | Mitochondrial biogenesis | 91 | 0.1013510 | 0.0993247 | 0.1430838 |
14 | Nucleotide biosynthesis | 12 | 0.6928407 | 0.0658512 | 0.7918179 |
m2top <- subset(mres2$enrichment_result,p.adjustANOVA<0.05)
m2up <- subset(m2top,s.dist>0)$set
m2dn <- subset(m2top,s.dist<0)$set
#mitch_report(mres2,outfile="mitch_mito2.html",overwrite=TRUE)
mitch_plots(mres2, outfile="mitch_mito2.pdf")
## png
## 2
write.table(mres2$enrichment_result,file="mitch_mito2.tsv",quote=FALSE,sep="\t",row.names=FALSE)
m3 <- mitch_import(dge3, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 26801
## Note: no. genes in output = 26741
## Note: estimated proportion of input genes in output = 0.998
mres3 <- mitch_calc(m3, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres3$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 3") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
19 | Receptor Mediated Mitophagy | 11 | 0.0030258 | -0.5162466 | 0.0051871 |
2 | Complex I biogenesis | 57 | 0.0000000 | 0.5011361 | 0.0000000 |
14 | Nucleotide biosynthesis | 12 | 0.0028637 | -0.4970943 | 0.0051871 |
11 | Mitophagy | 28 | 0.0000387 | -0.4491607 | 0.0001033 |
20 | Reduction of cytosolic Ca++ levels | 12 | 0.0200273 | -0.3877249 | 0.0320436 |
18 | Pyruvate metabolism and Citric Acid (TCA) cycle | 53 | 0.0000029 | -0.3714948 | 0.0000086 |
23 | Signaling by the B Cell Receptor (BCR) | 105 | 0.0000000 | -0.3275703 | 0.0000000 |
16 | PTEN Regulation | 140 | 0.0000000 | -0.2980253 | 0.0000000 |
22 | Respiratory electron transport | 103 | 0.0000008 | 0.2808048 | 0.0000029 |
17 | Phospholipid metabolism | 191 | 0.0000000 | -0.2642258 | 0.0000000 |
7 | Mitochondrial biogenesis | 91 | 0.0002971 | -0.2193167 | 0.0007131 |
13 | Nervous system development | 529 | 0.0000000 | -0.2054227 | 0.0000000 |
4 | Gene Silencing by RNA | 98 | 0.0007249 | -0.1974754 | 0.0015816 |
21 | Resolution of D-Loop Structures | 33 | 0.0625124 | 0.1873210 | 0.0789631 |
24 | Stabilization of p53 | 53 | 0.0360459 | -0.1664221 | 0.0515692 |
1 | Cellular response to starvation | 150 | 0.0017126 | 0.1482431 | 0.0034251 |
15 | Oxidative Stress Induced Senescence | 83 | 0.0365282 | -0.1327186 | 0.0515692 |
5 | Metabolism | 1899 | 0.0000000 | -0.1198167 | 0.0000000 |
8 | Mitochondrial iron-sulfur cluster biogenesis | 13 | 0.4675443 | -0.1163631 | 0.5100483 |
10 | Mitochondrial translation | 96 | 0.0966246 | 0.0980797 | 0.1159496 |
m3top <- subset(mres3$enrichment_result,p.adjustANOVA<0.05)
m3up <- subset(m3top,s.dist>0)$set
m3dn <- subset(m3top,s.dist<0)$set
#mitch_report(mres3,outfile="mitch_mito3.html",overwrite=TRUE)
mitch_plots(mres3, outfile="mitch_mito3.pdf")
## png
## 2
write.table(mres3$enrichment_result,file="mitch_mito3.tsv",quote=FALSE,sep="\t",row.names=FALSE)
m4 <- mitch_import(dge4, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 26222
## Note: no. genes in output = 26163
## Note: estimated proportion of input genes in output = 0.998
mres4 <- mitch_calc(m4, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres4$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 4") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
2 | Complex I biogenesis | 57 | 0.0000000 | 0.7304162 | 0.0000000 |
22 | Respiratory electron transport | 103 | 0.0000000 | 0.5865948 | 0.0000000 |
20 | Reduction of cytosolic Ca++ levels | 12 | 0.0020344 | -0.5142888 | 0.0048827 |
10 | Mitochondrial translation | 96 | 0.0000000 | 0.5132095 | 0.0000000 |
8 | Mitochondrial iron-sulfur cluster biogenesis | 13 | 0.0042094 | 0.4583909 | 0.0091842 |
1 | Cellular response to starvation | 150 | 0.0000000 | 0.4000246 | 0.0000000 |
14 | Nucleotide biosynthesis | 12 | 0.0423533 | -0.3384192 | 0.0610224 |
9 | Mitochondrial protein import | 64 | 0.0000109 | 0.3177325 | 0.0000328 |
24 | Stabilization of p53 | 53 | 0.0003285 | 0.2851073 | 0.0008761 |
17 | Phospholipid metabolism | 192 | 0.0000000 | -0.2809981 | 0.0000000 |
19 | Receptor Mediated Mitophagy | 11 | 0.1130800 | -0.2758906 | 0.1456370 |
12 | Mucopolysaccharidoses | 11 | 0.1195967 | -0.2710100 | 0.1456370 |
21 | Resolution of D-Loop Structures | 33 | 0.0161355 | 0.2419464 | 0.0270932 |
6 | Metabolism of amino acids and derivatives | 341 | 0.0000000 | 0.2246703 | 0.0000000 |
18 | Pyruvate metabolism and Citric Acid (TCA) cycle | 52 | 0.0153522 | -0.1942638 | 0.0270932 |
13 | Nervous system development | 524 | 0.0000000 | -0.1596025 | 0.0000000 |
16 | PTEN Regulation | 140 | 0.0169332 | -0.1168422 | 0.0270932 |
23 | Signaling by the B Cell Receptor (BCR) | 106 | 0.0432242 | -0.1135935 | 0.0610224 |
7 | Mitochondrial biogenesis | 91 | 0.1213642 | -0.0939200 | 0.1456370 |
4 | Gene Silencing by RNA | 97 | 0.2056272 | -0.0743359 | 0.2350026 |
m4top <- subset(mres4$enrichment_result,p.adjustANOVA<0.05)
m4up <- subset(m4top,s.dist>0)$set
m4dn <- subset(m4top,s.dist<0)$set
#mitch_report(mres4,outfile="mitch_mito4.html",overwrite=TRUE)
mitch_plots(mres4, outfile="mitch_mito4.pdf")
## png
## 2
write.table(mres4$enrichment_result,file="mitch_mito4.tsv",quote=FALSE,sep="\t",row.names=FALSE)
m5 <- mitch_import(dge5, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 26081
## Note: no. genes in output = 26025
## Note: estimated proportion of input genes in output = 0.998
mres5 <- mitch_calc(m5, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres5$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 5") %>% kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
2 | Complex I biogenesis | 57 | 0.0000000 | 0.5977708 | 0.0000000 |
19 | Receptor Mediated Mitophagy | 11 | 0.0013394 | -0.5584266 | 0.0024786 |
14 | Nucleotide biosynthesis | 12 | 0.0032789 | -0.4901460 | 0.0052463 |
22 | Respiratory electron transport | 103 | 0.0000000 | 0.4299096 | 0.0000000 |
21 | Resolution of D-Loop Structures | 33 | 0.0004749 | 0.3514112 | 0.0012664 |
10 | Mitochondrial translation | 96 | 0.0000000 | 0.3425773 | 0.0000000 |
11 | Mitophagy | 28 | 0.0062915 | -0.2982597 | 0.0088821 |
20 | Reduction of cytosolic Ca++ levels | 12 | 0.0797813 | -0.2920655 | 0.1007764 |
17 | Phospholipid metabolism | 190 | 0.0000000 | -0.2807057 | 0.0000000 |
18 | Pyruvate metabolism and Citric Acid (TCA) cycle | 52 | 0.0013426 | -0.2569873 | 0.0024786 |
1 | Cellular response to starvation | 150 | 0.0000001 | 0.2494691 | 0.0000006 |
16 | PTEN Regulation | 140 | 0.0000155 | -0.2114126 | 0.0000465 |
7 | Mitochondrial biogenesis | 91 | 0.0008071 | -0.2031149 | 0.0019371 |
4 | Gene Silencing by RNA | 97 | 0.0008931 | -0.1951037 | 0.0019486 |
12 | Mucopolysaccharidoses | 11 | 0.2998946 | 0.1805042 | 0.3271577 |
23 | Signaling by the B Cell Receptor (BCR) | 105 | 0.0025170 | -0.1705680 | 0.0043149 |
8 | Mitochondrial iron-sulfur cluster biogenesis | 13 | 0.3264363 | 0.1571819 | 0.3406292 |
6 | Metabolism of amino acids and derivatives | 342 | 0.0000006 | 0.1566651 | 0.0000021 |
3 | Diseases of carbohydrate metabolism | 29 | 0.1626241 | 0.1497870 | 0.1948517 |
13 | Nervous system development | 527 | 0.0000002 | -0.1314342 | 0.0000009 |
m5top <- subset(mres5$enrichment_result,p.adjustANOVA<0.05)
m5up <- subset(m5top,s.dist>0)$set
m5dn <- subset(m5top,s.dist<0)$set
#mitch_report(mres5,outfile="mitch_mito5.html",overwrite=TRUE)
mitch_plots(mres5, outfile="mitch_mito5.pdf")
## png
## 2
write.table(mres5$enrichment_result,file="mitch_mito5.tsv",quote=FALSE,sep="\t",row.names=FALSE)
Make some ridge plots.
myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
sets <- head(mres$enrichment_result,10)
sets <- sets[order(sets$s.dist),"set"]
ridgedata <- lapply( sets , function(myset) {
genes <- genesets[[which(names(genesets) == myset)]]
generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
return(generanks)
})
names(ridgedata) <- sets
myrange <- range(mres$ranked_profile[,1])
par(mar=c(5,20,3,2))
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,main="Top 10 genesets",
xlab="differential gene rank")
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,col="white",main="Top 10 genesets",
xlab="differential gene rank",
rectCol="gray")
beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})
#Fig2c
myres <- mres1
sets <- head(myres$enrichment_result,10)
sets <- sets[order(sets$s.dist),"set"]
ridgedata <- lapply( sets , function(myset) {
genes <- genesets[[which(names(genesets) == myset)]]
generanks <- myres$ranked_profile[which(rownames(myres$ranked_profile) %in% genes),]
return(generanks)
})
names(ridgedata) <- sets
myrange <- range(myres$ranked_profile[,1])
pdf("fig2C.pdf")
par(mar=c(5,20,3,2))
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,main="Top 10 genesets",
xlab="differential gene rank")
dev.off()
## png
## 2
# now custom gene sets part 1
custom_set_names <- readLines("custom_pathway_list.txt")
myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
sets <- custom_set_names
mres$enrichment_result <- mres$enrichment_result[which(mres$enrichment_result$set %in% sets),]
mres$enrichment_result <- mres$enrichment_result[order(mres$enrichment_result$s.dist),]
sets <- mres$enrichment_result$set
ridgedata <- lapply( sets , function(myset) {
genes <- genesets[[which(names(genesets) == myset)]]
generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
return(generanks)
})
names(ridgedata) <- sets
myrange <- range(mres$ranked_profile[,1])
par(mar=c(5,20,3,2))
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,main="Gene sets of interest",
xlab="differential gene rank")
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,col="white",main="Gene sets of interest",
rectCol="gray", xlab="differential gene rank")
beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})
# now custom gene sets part 2
custom_set_names2 <- readLines("custom_pathway_list2.txt")
myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
sets <- custom_set_names2[which( custom_set_names2 %in% names(custom_sets))]
mres$enrichment_result <- mres$enrichment_result[which(mres$enrichment_result$set %in% sets),]
mres$enrichment_result <- mres$enrichment_result[order(mres$enrichment_result$s.dist),]
sets <- mres$enrichment_result$set
ridgedata <- lapply( sets , function(myset) {
genes <- genesets[[which(names(genesets) == myset)]]
generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
return(generanks)
})
names(ridgedata) <- sets
myrange <- range(mres$ranked_profile[,1])
par(mar=c(5,20,3,2))
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,main="Gene sets of interest",
xlab="differential gene rank")
vioplot(ridgedata,horizontal=TRUE,las=1,
ylim=myrange,col="white",main="Gene sets of interest",
rectCol="gray", xlab="differential gene rank")
beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=1,corral = "gutter")
})
Now make some heatmaps
sets <- names(custom_sets)
z <- lapply(sets, function(set){
genes <- genesets[[which(names(genesets) == set)]]
refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
hm <- as.matrix(rpm1[which(refgenes %in% genes),])
hm <- hm/rowMeans(hm)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
cexCol=0.9, main=paste(set) )
})
z <- lapply(sets, function(set){
genes <- genesets[[which(names(genesets) == set)]]
refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
hm <- as.matrix(rpm[which(refgenes %in% genes),])
hm <- hm/rowMeans(hm)
hm[is.na(hm)] <- 0
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
cexCol=0.9, main=paste(set) )
})
refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
gshm <- lapply(sets, function(set){
genes <- genesets[[which(names(genesets) == set)]]
hm <- as.matrix(rpm1[which(refgenes %in% genes),])
hm <- hm/rowMeans(hm)
return(colMeans(hm))
})
gshm <- do.call(rbind,gshm)
rownames(gshm) <- sets
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(gshm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
cexCol=0.9, main="Gene set heatmap" )
refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
gshm <- lapply(sets, function(set){
genes <- genesets[[which(names(genesets) == set)]]
hm <- as.matrix(rpm[which(refgenes %in% genes),])
hm <- hm/rowMeans(hm)
hm[is.na(hm)] <- 0
return(colMeans(hm))
})
gshm <- do.call(rbind,gshm)
rownames(gshm) <- sets
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(gshm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
cexCol=0.9, main="Gene set heatmap" )
Mitch multi contrasts with custom gene sets
mm <- list("KO"=dge1,"drug"=dge4)
m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)
## Note: Mean no. genes in input = 25740
## Note: no. genes in output = 24473
## Note: estimated proportion of input genes in output = 0.951
mres <- mitch_calc(m, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(mres$enrichment_result,20) %>%
kbl(caption = "Top mito pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)
set | setSize | pMANOVA | s.KO | s.drug | p.KO | p.drug | s.dist | SD | p.adjustMANOVA | |
---|---|---|---|---|---|---|---|---|---|---|
19 | Receptor Mediated Mitophagy | 11 | 0.0000263 | -0.7759568 | -0.2626486 | 0.0000083 | 0.1314471 | 0.8192028 | 0.3629637 | 0.0000486 |
12 | Mucopolysaccharidoses | 11 | 0.0000136 | 0.7559480 | -0.2581369 | 0.0000141 | 0.1382052 | 0.7988066 | 0.7170663 | 0.0000271 |
2 | Complex I biogenesis | 57 | 0.0000000 | -0.1225498 | 0.7294196 | 0.1094569 | 0.0000000 | 0.7396427 | 0.6024333 | 0.0000000 |
20 | Reduction of cytosolic Ca++ levels | 12 | 0.0001986 | 0.4227546 | -0.5022417 | 0.0112145 | 0.0025882 | 0.6564816 | 0.6540711 | 0.0002804 |
8 | Mitochondrial iron-sulfur cluster biogenesis | 13 | 0.0001731 | -0.4368073 | 0.4615448 | 0.0063873 | 0.0039556 | 0.6354716 | 0.6352309 | 0.0002769 |
22 | Respiratory electron transport | 103 | 0.0000000 | -0.1603850 | 0.5889559 | 0.0049012 | 0.0000000 | 0.6104034 | 0.5298640 | 0.0000000 |
14 | Nucleotide biosynthesis | 12 | 0.0036745 | -0.4820258 | -0.3243122 | 0.0038335 | 0.0517303 | 0.5809710 | 0.1115204 | 0.0041994 |
10 | Mitochondrial translation | 96 | 0.0000000 | -0.1481990 | 0.5183498 | 0.0120692 | 0.0000000 | 0.5391191 | 0.4713211 | 0.0000000 |
11 | Mitophagy | 28 | 0.0000068 | -0.5324577 | -0.0475703 | 0.0000011 | 0.6630410 | 0.5345785 | 0.3428671 | 0.0000148 |
1 | Cellular response to starvation | 150 | 0.0000000 | -0.3219762 | 0.4055563 | 0.0000000 | 0.0000000 | 0.5178268 | 0.5144432 | 0.0000000 |
24 | Stabilization of p53 | 53 | 0.0000000 | -0.4250676 | 0.2913433 | 0.0000001 | 0.0002425 | 0.5153284 | 0.5065790 | 0.0000000 |
9 | Mitochondrial protein import | 64 | 0.0000002 | -0.2032652 | 0.3237894 | 0.0049093 | 0.0000074 | 0.3823039 | 0.3726839 | 0.0000006 |
21 | Resolution of D-Loop Structures | 33 | 0.0039391 | 0.2408322 | 0.2533800 | 0.0166361 | 0.0117544 | 0.3495734 | 0.0088726 | 0.0042972 |
3 | Diseases of carbohydrate metabolism | 30 | 0.0089882 | 0.3189516 | -0.0263170 | 0.0024936 | 0.8029745 | 0.3200355 | 0.2441418 | 0.0093790 |
17 | Phospholipid metabolism | 186 | 0.0000000 | -0.1473455 | -0.2833057 | 0.0005251 | 0.0000000 | 0.3193318 | 0.0961384 | 0.0000000 |
16 | PTEN Regulation | 140 | 0.0000000 | -0.2826367 | -0.1054324 | 0.0000000 | 0.0311991 | 0.3016613 | 0.1253024 | 0.0000000 |
18 | Pyruvate metabolism and Citric Acid (TCA) cycle | 52 | 0.0017466 | -0.2351995 | -0.1828266 | 0.0033395 | 0.0225437 | 0.2978999 | 0.0370333 | 0.0020959 |
6 | Metabolism of amino acids and derivatives | 337 | 0.0000000 | -0.1445284 | 0.2335738 | 0.0000050 | 0.0000000 | 0.2746729 | 0.2673586 | 0.0000000 |
23 | Signaling by the B Cell Receptor (BCR) | 106 | 0.0000599 | -0.2342334 | -0.1018537 | 0.0000307 | 0.0699335 | 0.2554202 | 0.0936066 | 0.0001027 |
4 | Gene Silencing by RNA | 95 | 0.0001968 | -0.2409968 | -0.0667932 | 0.0000488 | 0.2604344 | 0.2500816 | 0.1231806 | 0.0002804 |
#mitch_report(mres,outfile="mitchmulti_effect_mito.html",overwrite=TRUE)
write.table(mres$enrichment_result,file="mitch_multi_effect_mito.tsv",quote=FALSE,sep="\t",row.names=FALSE)
mx <- as.matrix(mres$enrichment_result[,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,22), cexRow=1,
cexCol=0.9, main=paste("Top ranked Reactomes") )
mres <- mitch_calc(m, custom_sets, priority="SD",resrows=52)
## Note: Prioritisation by SD after selecting sets with
## p.adjustMANOVA<=0.05.
head(mres$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)
set | setSize | pMANOVA | s.KO | s.drug | p.KO | p.drug | s.dist | SD | p.adjustMANOVA | |
---|---|---|---|---|---|---|---|---|---|---|
12 | Mucopolysaccharidoses | 11 | 0.0000136 | 0.7559480 | -0.2581369 | 0.0000141 | 0.1382052 | 0.7988066 | 0.7170663 | 0.0000271 |
20 | Reduction of cytosolic Ca++ levels | 12 | 0.0001986 | 0.4227546 | -0.5022417 | 0.0112145 | 0.0025882 | 0.6564816 | 0.6540711 | 0.0002804 |
8 | Mitochondrial iron-sulfur cluster biogenesis | 13 | 0.0001731 | -0.4368073 | 0.4615448 | 0.0063873 | 0.0039556 | 0.6354716 | 0.6352309 | 0.0002769 |
2 | Complex I biogenesis | 57 | 0.0000000 | -0.1225498 | 0.7294196 | 0.1094569 | 0.0000000 | 0.7396427 | 0.6024333 | 0.0000000 |
22 | Respiratory electron transport | 103 | 0.0000000 | -0.1603850 | 0.5889559 | 0.0049012 | 0.0000000 | 0.6104034 | 0.5298640 | 0.0000000 |
1 | Cellular response to starvation | 150 | 0.0000000 | -0.3219762 | 0.4055563 | 0.0000000 | 0.0000000 | 0.5178268 | 0.5144432 | 0.0000000 |
24 | Stabilization of p53 | 53 | 0.0000000 | -0.4250676 | 0.2913433 | 0.0000001 | 0.0002425 | 0.5153284 | 0.5065790 | 0.0000000 |
10 | Mitochondrial translation | 96 | 0.0000000 | -0.1481990 | 0.5183498 | 0.0120692 | 0.0000000 | 0.5391191 | 0.4713211 | 0.0000000 |
9 | Mitochondrial protein import | 64 | 0.0000002 | -0.2032652 | 0.3237894 | 0.0049093 | 0.0000074 | 0.3823039 | 0.3726839 | 0.0000006 |
19 | Receptor Mediated Mitophagy | 11 | 0.0000263 | -0.7759568 | -0.2626486 | 0.0000083 | 0.1314471 | 0.8192028 | 0.3629637 | 0.0000486 |
11 | Mitophagy | 28 | 0.0000068 | -0.5324577 | -0.0475703 | 0.0000011 | 0.6630410 | 0.5345785 | 0.3428671 | 0.0000148 |
6 | Metabolism of amino acids and derivatives | 337 | 0.0000000 | -0.1445284 | 0.2335738 | 0.0000050 | 0.0000000 | 0.2746729 | 0.2673586 | 0.0000000 |
3 | Diseases of carbohydrate metabolism | 30 | 0.0089882 | 0.3189516 | -0.0263170 | 0.0024936 | 0.8029745 | 0.3200355 | 0.2441418 | 0.0093790 |
16 | PTEN Regulation | 140 | 0.0000000 | -0.2826367 | -0.1054324 | 0.0000000 | 0.0311991 | 0.3016613 | 0.1253024 | 0.0000000 |
4 | Gene Silencing by RNA | 95 | 0.0001968 | -0.2409968 | -0.0667932 | 0.0000488 | 0.2604344 | 0.2500816 | 0.1231806 | 0.0002804 |
14 | Nucleotide biosynthesis | 12 | 0.0036745 | -0.4820258 | -0.3243122 | 0.0038335 | 0.0517303 | 0.5809710 | 0.1115204 | 0.0041994 |
15 | Oxidative Stress Induced Senescence | 82 | 0.0284702 | -0.1702472 | -0.0223408 | 0.0076805 | 0.7264911 | 0.1717068 | 0.1045856 | 0.0284702 |
7 | Mitochondrial biogenesis | 91 | 0.0006455 | -0.2234003 | -0.0841316 | 0.0002290 | 0.1653040 | 0.2387170 | 0.0984778 | 0.0008154 |
17 | Phospholipid metabolism | 186 | 0.0000000 | -0.1473455 | -0.2833057 | 0.0005251 | 0.0000000 | 0.3193318 | 0.0961384 | 0.0000000 |
23 | Signaling by the B Cell Receptor (BCR) | 106 | 0.0000599 | -0.2342334 | -0.1018537 | 0.0000307 | 0.0699335 | 0.2554202 | 0.0936066 | 0.0001027 |
#mitch_report(mres,outfile="mitchmulti_discord_mito.html",overwrite=TRUE)
write.table(mres$enrichment_result,file="mitch_multi_discord_mito.tsv",quote=FALSE,sep="\t",row.names=FALSE)
mx <- as.matrix(mres$enrichment_result[,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,22), cexRow=1,
cexCol=0.9, main=paste("Top ranked Reactomes") )
TODO
For reproducibility.
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] pkgload_1.3.0 GGally_2.1.2
## [3] gtools_3.9.2.2 echarts4r_0.4.4
## [5] beeswarm_0.4.0 vioplot_0.3.7
## [7] sm_2.2-5.7 kableExtra_1.3.4
## [9] topconfects_1.12.0 limma_3.52.1
## [11] eulerr_6.1.1 mitch_1.8.0
## [13] MASS_7.3-58 fgsea_1.22.0
## [15] gplots_3.1.3 DESeq2_1.36.0
## [17] SummarizedExperiment_1.26.1 Biobase_2.56.0
## [19] MatrixGenerics_1.8.0 matrixStats_0.62.0
## [21] GenomicRanges_1.48.0 GenomeInfoDb_1.32.2
## [23] IRanges_2.30.0 S4Vectors_0.34.0
## [25] BiocGenerics_0.42.0 reshape2_1.4.4
## [27] forcats_0.5.1 stringr_1.4.0
## [29] dplyr_1.0.9 purrr_0.3.4
## [31] readr_2.1.2 tidyr_1.2.0
## [33] tibble_3.1.7 ggplot2_3.3.6
## [35] tidyverse_1.3.1 zoo_1.8-10
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.0 backports_1.4.1 fastmatch_1.1-3
## [4] systemfonts_1.0.4 plyr_1.8.7 polylabelr_0.2.0
## [7] splines_4.2.1 BiocParallel_1.30.3 digest_0.6.29
## [10] htmltools_0.5.2 fansi_1.0.3 magrittr_2.0.3
## [13] memoise_2.0.1 tzdb_0.3.0 Biostrings_2.64.0
## [16] annotate_1.74.0 modelr_0.1.8 svglite_2.1.0
## [19] colorspace_2.0-3 blob_1.2.3 rvest_1.0.2
## [22] haven_2.5.0 xfun_0.31 crayon_1.5.1
## [25] RCurl_1.98-1.7 jsonlite_1.8.0 genefilter_1.78.0
## [28] survival_3.4-0 glue_1.6.2 polyclip_1.10-0
## [31] gtable_0.3.0 zlibbioc_1.42.0 XVector_0.36.0
## [34] webshot_0.5.3 DelayedArray_0.22.0 scales_1.2.0
## [37] DBI_1.1.3 Rcpp_1.0.8.3 viridisLite_0.4.0
## [40] xtable_1.8-4 bit_4.0.4 htmlwidgets_1.5.4
## [43] httr_1.4.3 RColorBrewer_1.1-3 ellipsis_0.3.2
## [46] farver_2.1.0 pkgconfig_2.0.3 reshape_0.8.9
## [49] XML_3.99-0.10 sass_0.4.1 dbplyr_2.2.1
## [52] locfit_1.5-9.5 utf8_1.2.2 labeling_0.4.2
## [55] tidyselect_1.1.2 rlang_1.0.3 later_1.3.0
## [58] AnnotationDbi_1.58.0 munsell_0.5.0 cellranger_1.1.0
## [61] tools_4.2.1 cachem_1.0.6 cli_3.3.0
## [64] generics_0.1.2 RSQLite_2.2.14 broom_0.8.0
## [67] evaluate_0.15 fastmap_1.1.0 yaml_2.3.5
## [70] knitr_1.39 bit64_4.0.5 fs_1.5.2
## [73] caTools_1.18.2 KEGGREST_1.36.2 mime_0.12
## [76] xml2_1.3.3 compiler_4.2.1 rstudioapi_0.13
## [79] png_0.1-7 reprex_2.0.1 geneplotter_1.74.0
## [82] bslib_0.3.1 stringi_1.7.6 highr_0.9
## [85] lattice_0.20-45 Matrix_1.4-1 vctrs_0.4.1
## [88] pillar_1.7.0 lifecycle_1.0.1 jquerylib_0.1.4
## [91] data.table_1.14.2 bitops_1.0-7 httpuv_1.6.5
## [94] R6_2.5.1 promises_1.2.0.1 KernSmooth_2.23-20
## [97] gridExtra_2.3 codetools_0.2-18 assertthat_0.2.1
## [100] withr_2.5.0 GenomeInfoDbData_1.2.8 parallel_4.2.1
## [103] hms_1.1.1 grid_4.2.1 rmarkdown_2.14
## [106] shiny_1.7.1 lubridate_1.8.0