Source: https://github.com/markziemann/echs1_rnaseq

Introduction

The samples are labelled as CON (control) and KO for knockout, ut are the untreated samples and dNs are the treated samples (mito biogenesis stimulator).

suppressPackageStartupMessages({
    library("zoo")
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("MASS")
    library("mitch")
    library("eulerr")
    library("limma")
    library("topconfects")
    library("kableExtra")
    library("vioplot")
    library("beeswarm")
})

Import read counts

Importing RNA-seq data

tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
xx$geneid = NULL
xx <- round(xx)
xx <- xx[,which(colnames(xx)!="test")]
xx[1:6,1:6]
##                             1_CON_ut_A_RNA_HB_HLF57DRXY_CTGTTGGTCC-AACGGTCTAT_L001
## ENSG00000000003.15 TSPAN6                                                      704
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        624
## ENSG00000000457.14 SCYL3                                                        97
## ENSG00000000460.17 C1orf112                                                    110
## ENSG00000000938.13 FGR                                                           0
##                             1_CON_ut_A_RNA_HB_HLF57DRXY_CTGTTGGTCC-AACGGTCTAT_L002
## ENSG00000000003.15 TSPAN6                                                      676
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        643
## ENSG00000000457.14 SCYL3                                                        99
## ENSG00000000460.17 C1orf112                                                    122
## ENSG00000000938.13 FGR                                                           0
##                             10_KO_ut_B_RNA_HB_HLF57DRXY_CACGGAACAA-GTGCTAGGTT_L001
## ENSG00000000003.15 TSPAN6                                                      752
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        360
## ENSG00000000457.14 SCYL3                                                       102
## ENSG00000000460.17 C1orf112                                                    110
## ENSG00000000938.13 FGR                                                           0
##                             10_KO_ut_B_RNA_HB_HLF57DRXY_CACGGAACAA-GTGCTAGGTT_L002
## ENSG00000000003.15 TSPAN6                                                      803
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        334
## ENSG00000000457.14 SCYL3                                                        75
## ENSG00000000460.17 C1orf112                                                    100
## ENSG00000000938.13 FGR                                                           0
##                             11_KO_ut_C_RNA_HB_HLF57DRXY_TGGAGTACTT-TCCACACAGA_L001
## ENSG00000000003.15 TSPAN6                                                     1502
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        703
## ENSG00000000457.14 SCYL3                                                       203
## ENSG00000000460.17 C1orf112                                                    175
## ENSG00000000938.13 FGR                                                           0
##                             11_KO_ut_C_RNA_HB_HLF57DRXY_TGGAGTACTT-TCCACACAGA_L002
## ENSG00000000003.15 TSPAN6                                                     1509
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        659
## ENSG00000000457.14 SCYL3                                                       184
## ENSG00000000460.17 C1orf112                                                    191
## ENSG00000000938.13 FGR                                                           0
dim(xx)
## [1] 60651    32

Fix the sample names.

They are duplicated for lane 1 and 2, which I will aggregate.

colnames(xx) <- sapply(strsplit(colnames(xx),"_RNA_"),"[[",1)
colnames(xx) <- sapply(strsplit(sub("_","@",colnames(xx)),"@"),"[[",2)
labels <- unique(sapply(strsplit(colnames(xx),"\\."),"[[",1))
l <- lapply(labels,function(x) { rowSums(xx[,grep(x,colnames(xx))]) } )
ll <- do.call(cbind,l)
colnames(ll) <- labels
ll <- as.data.frame(ll[,order(colnames(ll))])
write.table(ll,file="counts.tsv",sep="\t",quote=FALSE)

rpm <- ll/colSums(ll) *1e6
write.table(rpm,file="rpm.tsv",sep="\t",quote=FALSE)

Make a sample sheet. This will be easy because the groups are specified in the name.

ss <- as.data.frame(colnames(ll))
ss$ko <- factor(grepl("KO", colnames(ll))) 
ss$trt <- factor(grepl("dNs",colnames(ll)))
rownames(ss) <- ss[,1]
ss[,1]=NULL
# can also set some colours for later
ss$cols <- c(rep("gray",4),rep("lightblue",4),rep("pink",4),
  rep("orange",4))

ss %>% kbl(caption = "Sample sheet") %>%
  kable_paper("hover", full_width = F)
Sample sheet
ko trt cols
CON_dNs_A FALSE TRUE gray
CON_dNs_B FALSE TRUE gray
CON_dNs_C FALSE TRUE gray
CON_dNs_D FALSE TRUE gray
CON_ut_A FALSE FALSE lightblue
CON_ut_B FALSE FALSE lightblue
CON_ut_C FALSE FALSE lightblue
CON_ut_D FALSE FALSE lightblue
KO_dNs_A TRUE TRUE pink
KO_dNs_B TRUE TRUE pink
KO_dNs_C TRUE TRUE pink
KO_dNs_D TRUE TRUE pink
KO_ut_A TRUE FALSE orange
KO_ut_B TRUE FALSE orange
KO_ut_C TRUE FALSE orange
KO_ut_D TRUE FALSE orange

QC analysis

Here I’ll look at a few different quality control measures.

par(mar=c(5,8,3,1))
barplot(colSums(ll),horiz=TRUE,las=1,xlab="num reads",col=ss$cols)

sums <- colSums(ll)
sums <- sums[order(sums)]
barplot(sums,horiz=TRUE,las=1,xlab="num reads")
abline(v=15000000,col="red")

MDS plot for all samples

Multidimensional scaling plot to show the variation between all samples, very similar to PCA.

Firstly with the data before aggregating technical replicates.

mds <- cmdscale(dist(t(xx)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2", 
  type = "p",bty="n",pch=19, cex=4 ,col="gray")
text(mds, labels=rownames(mds) )

Now for the data after aggregation.

It looks like the drug has a bigger effect than the gene KO.

mds <- cmdscale(dist(t(ll)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2", 
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )

pdf("mds1.pdf")

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )

dev.off()
## png 
##   2

Correlation heatmap

heatmap.2(cor(ll),trace="n",main="Pearson correlation heatmap",margin=c(8,8))

pdf("cor1.pdf")
heatmap.2(cor(ll),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()
## png 
##   2

MDS plot for UT samples

ll2 <- ll[,grep("ut",colnames(ll))]

mds <- cmdscale(dist(t(ll2)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )

pdf("mds2.pdf")
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
dev.off()
## png 
##   2

MDS plot for dNs samples

ll3 <- ll[,grep("dNs",colnames(ll))]

mds <- cmdscale(dist(t(ll3)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )

pdf("mds3.pdf")
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
dev.off()
## png 
##   2

Correlation heatmaps for UT and dNs separately

heatmap.2(cor(ll2),trace="n",main="Pearson correlation heatmap",margin=c(8,8))

heatmap.2(cor(ll3),trace="n",main="Pearson correlation heatmap",margin=c(8,8))

pdf("cor2.pdf")
heatmap.2(cor(ll2),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()
## png 
##   2
pdf("cor3.pdf")
heatmap.2(cor(ll3),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()
## png 
##   2

Set up the different datasets for differential expression analysis

Don’t forget to remove poorly detected genes from the matrix with a threshold of 10 reads per sample on average.

There are 4 contrasts to set up.

  1. Effect of the KO in untreated cells

  2. Effect of the KO in treated cells

  3. Effect of the drug in WT cells

  4. Effect of the drug in KO cells

  5. CON ut vs KO dNs. (what was altered in ECHS1 KO, that has been returned to normal levels (and no longer significantly different) to CON ut).

ss1 <- ss[which(ss$trt=="FALSE"),]
xx1 <- ll[which(colnames(ll) %in% rownames(ss1))]
xx1 <- xx1[which(rowSums(xx1)>=10),]
rpm1 <- xx1/colSums(xx1) *1e6

ss2 <- ss[which(ss$trt=="TRUE"),]
xx2 <- ll[which(colnames(ll) %in% rownames(ss2))]
xx2 <- xx2[which(rowSums(xx2)>=10),]
rpm2 <- xx2/colSums(xx2) *1e6

ss3 <- ss[which(ss$ko=="FALSE"),]
xx3 <- ll[which(colnames(ll) %in% rownames(ss3))]
xx3 <- xx3[which(rowSums(xx3)>=10),]
rpm3 <- xx3/colSums(xx3) *1e6

ss4 <- ss[which(ss$ko=="TRUE"),]
xx4 <- ll[which(colnames(ll) %in% rownames(ss4))]
xx4 <- xx4[which(rowSums(xx4)>=10),]
rpm4 <- xx4/colSums(xx4) *1e6

ss5 <- subset(ss,ko == trt)
xx5 <- ll[which(colnames(ll) %in% rownames(ss5))]
xx5 <- xx5[which(rowSums(xx5)>=10),]
rpm5 <- xx5/colSums(xx5) *1e6

Differential expression with DESeq2

dds <- DESeqDataSetFromMatrix(countData = xx1 , colData = ss1, 
  design = ~ ko )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>% 
  kbl(caption = "Top gene expression differences for contrast 1: Effect of the KO in untreated cells") %>% 
  kable_paper("hover", full_width = F)
Top gene expression differences for contrast 1: Effect of the KO in untreated cells
baseMean log2FoldChange lfcSE stat pvalue padj
ENSG00000049192.15 ADAMTS6 1232.5643 -3.320315 0.0735209 -45.16150 0 0
ENSG00000090339.9 ICAM1 8426.0162 2.137068 0.0421343 50.72040 0 0
ENSG00000099875.16 MKNK2 5196.7000 1.646797 0.0435241 37.83644 0 0
ENSG00000105825.14 TFPI2 10749.0439 -2.758378 0.0645077 -42.76045 0 0
ENSG00000107551.21 RASSF4 1224.3947 3.078164 0.0812402 37.88965 0 0
ENSG00000116016.14 EPAS1 16685.0850 1.887592 0.0496555 38.01373 0 0
ENSG00000125538.12 IL1B 9573.0006 -1.925913 0.0457459 -42.10027 0 0
ENSG00000138080.14 EMILIN1 6299.0508 2.088516 0.0483164 43.22583 0 0
ENSG00000146197.9 SCUBE3 7326.2924 -2.787733 0.0674659 -41.32061 0 0
ENSG00000154146.13 NRGN 1528.8050 2.283661 0.0600431 38.03371 0 0
ENSG00000164949.8 GEM 1954.1763 -2.372324 0.0565928 -41.91920 0 0
ENSG00000165507.9 DEPP1 3368.7544 2.850455 0.0522246 54.58071 0 0
ENSG00000166250.12 CLMP 20860.4032 -1.674644 0.0397766 -42.10125 0 0
ENSG00000166670.10 MMP10 3155.5769 -6.120693 0.1021577 -59.91416 0 0
ENSG00000166920.13 C15orf48 2293.5579 2.687532 0.0645990 41.60330 0 0
ENSG00000167772.12 ANGPTL4 14717.2697 3.591520 0.0856872 41.91430 0 0
ENSG00000168528.12 SERINC2 4459.4824 1.937358 0.0495824 39.07346 0 0
ENSG00000170961.7 HAS2 3615.4379 -4.322465 0.0852007 -50.73275 0 0
ENSG00000172572.7 PDE3A 1574.2471 -3.854290 0.0926037 -41.62136 0 0
ENSG00000172927.8 MYEOV 928.0298 4.057927 0.0905625 44.80804 0 0
dge1 <- dge
d1up <- rownames(subset(dge1,padj <= 0.05 & log2FoldChange > 0))
d1dn <- rownames(subset(dge1,padj <= 0.05 & log2FoldChange < 0))
write.table(dge1,file="dge1.tsv",quote=FALSE,sep="\t")
dds <- DESeqDataSetFromMatrix(countData = xx2 , colData = ss2, 
  design = ~ ko )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
  kbl(caption = "Top gene expression differences for contrast 2: Effect of the KO in treated cells") %>%
  kable_paper("hover", full_width = F)
Top gene expression differences for contrast 2: Effect of the KO in treated cells
baseMean log2FoldChange lfcSE stat pvalue padj
ENSG00000081248.12 CACNA1S 1209.4345 4.728860 0.1048201 45.11406 0 0
ENSG00000142910.16 TINAGL1 4889.9014 3.128308 0.0629648 49.68348 0 0
ENSG00000162804.14 SNED1 6126.7547 -2.358713 0.0490839 -48.05475 0 0
ENSG00000166920.13 C15orf48 2299.7911 3.960911 0.0886313 44.68975 0 0
ENSG00000167772.12 ANGPTL4 9936.5018 3.543783 0.0816955 43.37797 0 0
ENSG00000113361.13 CDH6 816.5796 -3.817982 0.1059909 -36.02179 0 0
ENSG00000101187.16 SLCO4A1 19101.5516 1.934484 0.0565000 34.23868 0 0
ENSG00000143127.13 ITGA10 1156.7355 -3.687434 0.1104110 -33.39734 0 0
ENSG00000151062.15 CACNA2D4 655.2444 4.551764 0.1378037 33.03079 0 0
ENSG00000170961.7 HAS2 613.5916 -3.363180 0.1045664 -32.16312 0 0
ENSG00000168528.12 SERINC2 3732.8640 1.667401 0.0525189 31.74856 0 0
ENSG00000135245.10 HILPDA 1510.8969 2.621093 0.0845048 31.01708 0 0
ENSG00000172927.8 MYEOV 472.9211 4.325653 0.1395499 30.99717 0 0
ENSG00000115756.13 HPCAL1 4084.1250 1.608911 0.0524306 30.68650 0 0
ENSG00000100985.7 MMP9 14795.4099 -2.331039 0.0761929 -30.59392 0 0
ENSG00000163235.16 TGFA 1660.7730 2.758967 0.0910679 30.29573 0 0
ENSG00000125571.9 IL37 1230.5444 2.713549 0.0896194 30.27858 0 0
ENSG00000118257.17 NRP2 2033.2565 -2.000568 0.0681154 -29.37026 0 0
ENSG00000213658.12 LAT 1708.1308 -1.949041 0.0664827 -29.31650 0 0
ENSG00000101443.18 WFDC2 5495.9129 2.267593 0.0773716 29.30783 0 0
dge2 <- dge
d2up <- rownames(subset(dge2,padj <= 0.05 & log2FoldChange > 0))
d2dn <- rownames(subset(dge2,padj <= 0.05 & log2FoldChange < 0))
write.table(dge2,file="dge2.tsv",quote=FALSE,sep="\t")
dds <- DESeqDataSetFromMatrix(countData = xx3 , colData = ss3,
  design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
  kbl(caption = "Top gene expression differences for contrast 3: Effect of the drug in WT cells") %>%
  kable_paper("hover", full_width = F)
Top gene expression differences for contrast 3: Effect of the drug in WT cells
baseMean log2FoldChange lfcSE stat pvalue padj
ENSG00000068912.14 ERLEC1 1851.848 -2.762775 0.0670700 -41.19240 0 0
ENSG00000078747.16 ITCH 2736.282 -2.038664 0.0523301 -38.95780 0 0
ENSG00000100836.10 PABPN1 6539.865 2.072716 0.0487025 42.55874 0 0
ENSG00000102755.12 FLT1 5426.297 -3.705474 0.0880185 -42.09882 0 0
ENSG00000106366.9 SERPINE1 24625.790 -2.091105 0.0507831 -41.17717 0 0
ENSG00000108821.14 COL1A1 9382.064 2.181043 0.0456926 47.73301 0 0
ENSG00000110048.12 OSBP 3405.995 -2.355956 0.0614408 -38.34517 0 0
ENSG00000112473.18 SLC39A7 4521.830 -2.281225 0.0578603 -39.42640 0 0
ENSG00000116285.13 ERRFI1 36944.117 -2.684910 0.0567602 -47.30267 0 0
ENSG00000122884.12 P4HA1 4295.439 -2.579782 0.0582809 -44.26463 0 0
ENSG00000131016.17 AKAP12 21452.521 -3.317855 0.0663163 -50.03076 0 0
ENSG00000138166.6 DUSP5 4318.320 -2.155073 0.0498547 -43.22711 0 0
ENSG00000142949.17 PTPRF 7682.322 -1.848612 0.0487213 -37.94262 0 0
ENSG00000150961.15 SEC24D 2402.484 -3.024974 0.0617986 -48.94889 0 0
ENSG00000150995.20 ITPR1 4106.314 -3.021908 0.0665647 -45.39805 0 0
ENSG00000159167.12 STC1 13523.272 -3.145427 0.0722957 -43.50780 0 0
ENSG00000162804.14 SNED1 6283.910 2.862292 0.0481808 59.40725 0 0
ENSG00000164484.12 TMEM200A 1197.746 -3.470114 0.0881540 -39.36421 0 0
ENSG00000166250.12 CLMP 18283.547 -2.335014 0.0538393 -43.37009 0 0
ENSG00000166670.10 MMP10 3245.447 -3.537259 0.0771167 -45.86894 0 0
dge3 <- dge
d3up <- rownames(subset(dge3,padj <= 0.05 & log2FoldChange > 0))
d3dn <- rownames(subset(dge3,padj <= 0.05 & log2FoldChange < 0))
write.table(dge3,file="dge3.tsv",quote=FALSE,sep="\t")
dds <- DESeqDataSetFromMatrix(countData = xx4 , colData = ss4,
  design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
  kbl(caption = "Top gene expression differences for contrast 4: Effect of the drug in KO cells") %>%
  kable_paper("hover", full_width = F)
Top gene expression differences for contrast 4: Effect of the drug in KO cells
baseMean log2FoldChange lfcSE stat pvalue padj
ENSG00000059804.16 SLC2A3 5623.218 -1.769194 0.0468167 -37.78983 0 0
ENSG00000103888.17 CEMIP 25230.452 -2.601566 0.0673460 -38.62986 0 0
ENSG00000104419.17 NDRG1 38083.260 -2.549397 0.0511765 -49.81576 0 0
ENSG00000134250.20 NOTCH2 6533.444 -2.176937 0.0578054 -37.65975 0 0
ENSG00000197324.9 LRP10 9816.752 -1.770453 0.0378713 -46.74921 0 0
ENSG00000198712.1 MT-CO2 290785.069 2.997493 0.0744604 40.25622 0 0
ENSG00000198727.2 MT-CYB 116486.352 3.700459 0.0765560 48.33662 0 0
ENSG00000198763.3 MT-ND2 121199.785 3.262193 0.0678248 48.09735 0 0
ENSG00000198840.2 MT-ND3 17981.096 2.878705 0.0705335 40.81328 0 0
ENSG00000198959.12 TGM2 1712.100 -2.516008 0.0594500 -42.32139 0 0
ENSG00000225630.1 MTND2P28 5023.069 3.268132 0.0682696 47.87097 0 0
ENSG00000258017.2 AC011603.2 7033.925 2.918598 0.0767330 38.03574 0 0
ENSG00000259207.9 ITGB3 6216.236 -2.380112 0.0532419 -44.70377 0 0
ENSG00000198804.2 MT-CO1 506735.846 2.899638 0.0779490 37.19915 0 0
ENSG00000134531.10 EMP1 8062.151 -1.864264 0.0501442 -37.17805 0 0
ENSG00000198888.2 MT-ND1 70406.196 2.780834 0.0758103 36.68147 0 0
ENSG00000142949.17 PTPRF 7521.711 -1.802689 0.0498071 -36.19343 0 0
ENSG00000198938.2 MT-CO3 211300.735 2.731621 0.0754793 36.19031 0 0
ENSG00000178726.7 THBD 1419.592 -2.752535 0.0765193 -35.97178 0 0
ENSG00000131016.17 AKAP12 17614.587 -2.494844 0.0707156 -35.27996 0 0
dge4 <- dge
d4up <- rownames(subset(dge4,padj <= 0.05 & log2FoldChange > 0))
d4dn <- rownames(subset(dge4,padj <= 0.05 & log2FoldChange < 0))
write.table(dge4,file="dge4.tsv",quote=FALSE,sep="\t")
dds <- DESeqDataSetFromMatrix(countData = xx5 , colData = ss5,
  design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
  kbl(caption = "Top gene expression differences for contrast 5: Does the drug restore changes caused by KO?") %>%
  kable_paper("hover", full_width = F)
Top gene expression differences for contrast 5: Does the drug restore changes caused by KO?
baseMean log2FoldChange lfcSE stat pvalue padj
ENSG00000006459.11 KDM7A 2102.825 -2.968547 0.0756232 -39.25444 0 0
ENSG00000011638.11 TMEM159 1310.273 -3.080679 0.0735046 -41.91135 0 0
ENSG00000026559.14 KCNG1 3444.570 -2.168208 0.0544081 -39.85081 0 0
ENSG00000049192.15 ADAMTS6 1080.879 -4.046315 0.0968751 -41.76838 0 0
ENSG00000054356.14 PTPRN 2934.182 -2.849605 0.0710961 -40.08101 0 0
ENSG00000059804.16 SLC2A3 5889.284 -1.919649 0.0509892 -37.64813 0 0
ENSG00000081181.8 ARG2 7528.745 -2.886314 0.0654474 -44.10126 0 0
ENSG00000081248.12 CACNA1S 1201.973 6.324357 0.1441395 43.87665 0 0
ENSG00000085449.15 WDFY1 2455.389 -2.926425 0.0779364 -37.54886 0 0
ENSG00000100985.7 MMP9 34579.692 -3.683329 0.0725006 -50.80410 0 0
ENSG00000101443.18 WFDC2 5112.498 3.272677 0.0868267 37.69207 0 0
ENSG00000102755.12 FLT1 5139.855 -3.718650 0.0709667 -52.39995 0 0
ENSG00000106333.13 PCOLCE 9578.772 2.391137 0.0557298 42.90588 0 0
ENSG00000106366.9 SERPINE1 21920.897 -2.648544 0.0513740 -51.55418 0 0
ENSG00000112902.12 SEMA5A 1857.884 -3.979788 0.0893823 -44.52548 0 0
ENSG00000115170.15 ACVR1 1323.707 -2.656676 0.0684522 -38.81067 0 0
ENSG00000115677.17 HDLBP 13291.684 -2.302710 0.0521315 -44.17120 0 0
ENSG00000115758.13 ODC1 5532.578 -3.052796 0.0739435 -41.28553 0 0
ENSG00000118257.17 NRP2 5136.700 -3.513717 0.0603518 -58.22055 0 0
ENSG00000125430.9 HS3ST3B1 1104.229 -3.706736 0.0853897 -43.40962 0 0
dge5 <- dge
d5up <- rownames(subset(dge5,padj <= 0.05 & log2FoldChange > 0))
d5dn <- rownames(subset(dge5,padj <= 0.05 & log2FoldChange < 0))
write.table(dge5,file="dge5.tsv",quote=FALSE,sep="\t")

Here let’s look at some plots.

MA plot shows the average level and fold change of all detected genes. Volcano plot shows the fold change and the significance, as measured by -log(p-value). Significant genes are shown as red points.

There are heatmaps of the top ranked genes by p-value. Above the gene expression values there is a bar in yellow/orange colours. Yellow shows relatively low QuickDASH score and orange shows high score.

maplot <- function(de,contrast_name) {
  de <- de[which(!is.na(de$padj)),]
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=1)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 1)
}

make_volcano <- function(de,name) {
    de <- de[which(!is.na(de$padj)),]
    de$pvalue[which(de$pvalue==0)] <- 1e-320
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}

make_volcano2 <- function(de,name) {
    de <- de[which(!is.na(de$padj)),]
    de$pvalue[which(de$pvalue==0)] <- 1e-320
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    top <- head(sig,30)
    mylabels <- sapply(strsplit(rownames(top)," "),"[[",2)
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
    text(top$log2FoldChange+0.2,-log10(top$pval),labels=mylabels, srt=35 ,cex=0.7)
}

make_heatmap <- function(de,name,myss,mx,n=30){
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  mxn <- mx/rowSums(mx)*1000000
  x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
  heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(10,15), cexRow=0.7, 
    main=paste("Top ranked",n,"genes in",name) , ColSideColors = myss$cols  )
}

make_heatmap2 <- function(de,name,myss,mx,n=30){
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  mxn <- mx/rowSums(mx)*1000000
  x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
  rownames(x) <- sapply(strsplit(rownames(x)," "),"[[",2)
  heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(10,15), cexRow=0.6,
    main=paste("Top ranked",n,"genes in",name) , ColSideColors = myss$cols  )
}

mitch_barplot <- function(res){
  sig <- head(subset(res$enrichment_result,p.adjustANOVA<0.05),30)
  sig <- sig[order(sig$s.dist),]
  par(mar=c(3,25,1,1)); barplot(sig$s.dist,horiz=TRUE,las=2,cex.names = 0.6,cex.axis = 0.6,
    names.arg=sig$set,main="Enrichment score") ;grid()
}


mitch_barplot2 <- function(res){
  sig_up <- head(subset(res$enrichment_result,p.adjustANOVA<0.05 & s.dist>0),15)
  sig_dn <- head(subset(res$enrichment_result,p.adjustANOVA<0.05 & s.dist<0),15)
  sig <- rbind(sig_up,sig_dn)
  sig <- sig[order(sig$s.dist),]
  par(mar=c(3,25,1,1)); barplot(sig$s.dist,horiz=TRUE,las=2,cex.names = 0.6,cex.axis = 0.6,
    names.arg=sig$set,main="Enrichment score") ;grid()
}

maplot(dge1,"Cont1: Effect of KO in untr cells")

make_volcano(dge1,"Cont1: Effect of KO in untr cells")
make_volcano2(dge1,"Cont1: Effect of KO in untr cells")

make_heatmap(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)

make_heatmap2(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)

maplot(dge2,"Cont2: Effect of KO in trt cells")

make_volcano(dge2,"Cont2: Effect of KO in trt cells")
make_volcano2(dge2,"Cont2: Effect of KO in trt cells")

make_heatmap(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)

make_heatmap2(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)

maplot(dge3,"Cont3: Effect of drug in WT cells")

make_volcano(dge3,"Cont3: Effect of drug in WT cells")
make_volcano2(dge3,"Cont3: Effect of drug in WT cells")

make_heatmap(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)

make_heatmap2(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)

maplot(dge4,"Cont4: Effect of drug in KO cells")

make_volcano(dge4,"Cont4: Effect of drug in KO cells")
make_volcano2(dge4,"Cont4: Effect of drug in KO cells")

make_heatmap(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)

make_heatmap2(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)

maplot(dge5,"CON ut vs KO dNs")

make_volcano(dge5,"CON ut vs KO dNs")
make_volcano2(dge5,"CON ut vs KO dNs")

make_heatmap(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)

make_heatmap2(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)

pdf("dge1.pdf")
maplot(dge1,"Cont1: Effect of KO in untr cells")
make_volcano(dge1,"Cont1: Effect of KO in untr cells")
make_volcano2(dge1,"Cont1: Effect of KO in untr cells")
make_heatmap(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)
make_heatmap2(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)
dev.off()
## png 
##   2
pdf("dge2.pdf")
maplot(dge2,"Cont2: Effect of KO in trt cells")
make_volcano(dge2,"Cont2: Effect of KO in trt cells")
make_volcano2(dge2,"Cont2: Effect of KO in trt cells")
make_heatmap(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)
make_heatmap2(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)
dev.off()
## png 
##   2
pdf("dge3.pdf")
maplot(dge3,"Cont3: Effect of drug in WT cells")
make_volcano(dge3,"Cont3: Effect of drug in WT cells")
make_volcano2(dge3,"Cont3: Effect of drug in WT cells")
make_heatmap(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)
make_heatmap2(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)
dev.off()
## png 
##   2
pdf("dge4.pdf")
maplot(dge4,"Cont4: Effect of drug in KO cells")
make_volcano(dge4,"Cont4: Effect of drug in KO cells")
make_volcano2(dge4,"Cont4: Effect of drug in KO cells")
make_heatmap(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)
make_heatmap2(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)
dev.off()
## png 
##   2
pdf("dge5.pdf")
maplot(dge5,"CON ut vs KO dNs")
make_volcano(dge5,"CON ut vs KO dNs")
make_volcano2(dge5,"CON ut vs KO dNs")
make_heatmap(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)
make_heatmap2(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)
dev.off()
## png 
##   2

Heatmap of DEGs

Contrast 1 and 4 - get top 25 genes and make heatmap of all sample groups.

g1 <- head(rownames(dge1),25)
g4 <- head(rownames(dge4),25)
g14 <- union(g1,g4)

fc1 <- dge1[which(rownames(dge1) %in% g14),"log2FoldChange",drop=F]

rpm14 <- rpm[which(rownames(rpm) %in% g14),]
rpm14 <- merge(rpm14,fc1,by=0)
rpm14 <- rpm14[order(rpm14$log2FoldChange),]
rpm14$log2FoldChange=NULL
rownames(rpm14) <- rpm14$Row.names
rpm14$Row.names=NULL
rownames(rpm14) <- sapply(strsplit(rownames(rpm14)," "),"[[",2)
rpm14 <- rpm14[,rev(order(colnames(rpm14)))]

# reorder columns
rpm14 <- rpm14[,c( grep("CON_ut",colnames(rpm14)) , grep("KO_ut",colnames(rpm14)) , grep("KO_dN",colnames(rpm14)) , grep("CON_dN",colnames(rpm14)) )]

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  Colv="none" , dendrogram="none",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

pdf("heat1.pdf")

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  Colv="none" , dendrogram="none",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

dev.off()
## png 
##   2
rpm14 <- rpm[which(rownames(rpm) %in% g14),]

rownames(rpm14) <- sapply(strsplit(rownames(rpm14)," "),"[[",2)

# reorder columns
rpm14 <- rpm14[,c( grep("CON_ut",colnames(rpm14)) , grep("KO_ut",colnames(rpm14)) , grep("KO_dN",colnames(rpm14)) , grep("CON_dN",colnames(rpm14)) )]

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
  Colv="none" , dendrogram="none",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

pdf("heat2.pdf")

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  Colv="none" , dendrogram="none",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

dev.off()
## png 
##   2
rpm1x <- rpm1[which(rownames(rpm1) %in% g14),]
fc1 <- dge1[which(rownames(dge1) %in% g14),"log2FoldChange",drop=F]
rpm1x <- merge(rpm1x,fc1,by=0)
rpm1x <- rpm1x[order(rpm1x$log2FoldChange),]
rpm1x$log2FoldChange=NULL
rownames(rpm1x) <- rpm1x$Row.names
rpm1x$Row.names=NULL
rownames(rpm1x) <- sapply(strsplit(rownames(rpm1x)," "),"[[",2)
rpm1x <- rpm1x[,rev(order(colnames(rpm1x)))]

rpm4x <- rpm4[which(rownames(rpm4) %in% g14),]
rpm4x <- merge(rpm4x,fc1,by=0)
rpm4x <- rpm4x[order(rpm4x$log2FoldChange),]
rpm4x$log2FoldChange=NULL
rownames(rpm4x) <- rpm4x$Row.names
rpm4x$Row.names=NULL
rownames(rpm4x) <- sapply(strsplit(rownames(rpm4x)," "),"[[",2)
rpm4x <- rpm4x[,rev(order(colnames(rpm4x)))]

heatmap.2(as.matrix(rpm1x),trace="none",col=colfunc(25),scale="row", margins = c(8,15), 
  cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)

heatmap.2(as.matrix(rpm4x),trace="none",col=colfunc(25),scale="row", margins = c(8,15), 
  cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)

pdf("heat3.pdf")

heatmap.2(as.matrix(rpm1x),trace="none",col=colfunc(25),scale="row", margins = c(8,15), 
  cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)

heatmap.2(as.matrix(rpm4x),trace="none",col=colfunc(25),scale="row", margins = c(8,15), 
  cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)
dev.off()
## png 
##   2

Euler diagrams of DEGs

All DEGs

par(cex.main=0.5)

par(mar=c(2,2,2,2))

v0 <- list("C1 up"=d1up,"C1 dn"=d1dn, "C2 up" = d2up, "C2 dn"=d2dn, "C3 up"=d3up, "C3 dn"=d3dn, "C4 up"=d4up, "C4 dn"=d4dn)

plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")

pdf("euler1.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()
## png 
##   2

Now with just the DEGs from contrasts 1 and 4.

par(cex.main=0.5)

par(mar=c(2,2,2,2))

v0 <- list("KO up"=d1up,"KO dn"=d1dn, "drug up"=d4up, "drug dn"=d4dn)

plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")

pdf("euler2.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()
## png 
##   2

Now with just the DEGs from contrasts 1 and 5.

par(cex.main=0.5)

par(mar=c(2,2,2,2))

v0 <- list("KO up"=d1up,"KO dn"=d1dn, "drugtrt up"=d5up, "drugtrt dn"=d5dn)

plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")

pdf("euler3.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()
## png 
##   2

Single contrast pathway analysis with mitch

Firstly need to conduct mitch enrichment analysis for each contrast separately.

#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")

# gene table
gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)

m1 <- mitch_import(dge1, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 25258
## Note: no. genes in output = 25203
## Note: estimated proportion of input genes in output = 0.998
mres1 <- mitch_calc(m1, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres1$enrichment_result,20) %>%
  kbl(caption = "Top gene pathway differences in contrast 1") %>%
  kable_paper("hover", full_width = F)
Top gene pathway differences in contrast 1
set setSize pANOVA s.dist p.adjustANOVA
1008 Receptor Mediated Mitophagy 11 0.0000080 -0.7772236 0.0000639
720 Mucopolysaccharidoses 11 0.0000117 0.7629839 0.0000908
670 Membrane binding and targetting of GAG proteins 14 0.0000402 -0.6337233 0.0002665
1264 Synthesis And Processing Of GAG, GAGPOL Polyproteins 14 0.0000402 -0.6337233 0.0002665
1411 Unwinding of DNA 12 0.0002829 0.6051698 0.0014768
93 Assembly Of The HIV Virion 16 0.0000324 -0.6000020 0.0002240
121 Biotin transport and metabolism 11 0.0011127 0.5676477 0.0047067
1276 Synthesis of active ubiquitin: roles of E1 and E2 enzymes 30 0.0000001 -0.5630345 0.0000016
1115 SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs 11 0.0018488 -0.5420841 0.0073752
1166 Signaling by Activin 15 0.0003295 -0.5354190 0.0016612
710 Mitophagy 28 0.0000011 -0.5309604 0.0000116
1086 Response of EIF2AK1 (HRI) to heme deficiency 15 0.0005010 -0.5189191 0.0023789
55 Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S 58 0.0000000 -0.5187746 0.0000000
538 Hyaluronan uptake and degradation 10 0.0047720 0.5153177 0.0167247
572 Insertion of tail-anchored proteins into the endoplasmic reticulum membrane 22 0.0000312 -0.5127387 0.0002190
1376 Translation initiation complex formation 57 0.0000000 -0.5104645 0.0000000
644 Lysosphingolipid and LPA receptors 12 0.0024056 -0.5059280 0.0092214
1462 rRNA modification in the nucleus and cytosol 60 0.0000000 -0.5021477 0.0000000
517 HIV elongation arrest and recovery 32 0.0000009 -0.5012440 0.0000096
848 Pausing and recovery of HIV elongation 32 0.0000009 -0.5012440 0.0000096
mitch_barplot(mres1)

mitch_barplot2(mres1)

mitch_plots(mres1,outfile="mitch1.pdf")
## png 
##   2
m1top <- subset(mres1$enrichment_result,p.adjustANOVA<0.05)
m1up <- subset(m1top,s.dist>0)$set
m1dn <- subset(m1top,s.dist<0)$set
#mitch_report(mres1,outfile="mitch1.html",overwrite=TRUE)
write.table(mres1$enrichment_result,file="mitch1.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m1top_up <- head(subset(m1top,s.dist>0),10)[,"s.dist"]
names(m1top_up) <- head(subset(m1top,s.dist>0),10)[,"set"]
m1top_dn <- head(subset(m1top,s.dist<0),10)[,"s.dist"]
names(m1top_dn) <- head(subset(m1top,s.dist<0),10)[,"set"]
m1top_updn <- c(m1top_up,m1top_dn)
m1top_updn <- m1top_updn[order(m1top_updn)]

pdf("f2b.pdf")
par(mar=c(5,25,3,1))
barplot(m1top_updn,horiz=TRUE,las=1,col="darkgray",xlab="Enrichment score",cex.names=0.8,xlim=c(-1,1))
grid()
dev.off()
## png 
##   2
par(mar=c(5.1, 4.1, 4.1, 2.1))

# Reduction of cytosolic Ca++ levels
gs <- genesets[[which(names(genesets) == "Reduction of cytosolic Ca++ levels")]]
dge1 <- dge1[which(!is.na(dge1$pvalue )),]
dge1$gene <- sapply(strsplit(rownames(dge1)," "),"[[",2)
dge1gs <- dge1[which(dge1$gene %in% gs),]
# volcano
plot(dge1$log2FoldChange,-log10(dge1$pvalue+1e-312),pch=19,col="darkgray")
points(dge1gs$log2FoldChange,-log10(dge1gs$pvalue+1e-312),pch=19,col="red")

# rug
m1rank <- mres1$ranked_profile
rug1 <- m1rank[which(rownames(m1rank) %in% gs),]
names(rug1) <- rownames(m1rank)[which(rownames(m1rank) %in% gs)]
MAX=max(m1rank)
MIN=min(m1rank)
par(mfrow=c(2,1))
bs <- beeswarm(rug1,horizontal=TRUE,cex=5,
  xlim=c(MIN,MAX),pch=19,col="darkgray",
  xlab="gene rank")
text(bs[,2:1],labels=names(rug1))
mtext("Reduction of cytosolic Ca++ levels")
par(mfrow=c(1,1))

#geneset volcano
sig1 <- subset(mres1$enrichment_result,p.adjustANOVA<0.05)
plot(mres1$enrichment_result$s.dist,-log10(mres1$enrichment_result$pANOVA),
  col="darkgray",pch=19,xlab="s distance",ylab="-log10 ANOVA p-value")
mtext("Gene set enrichment")
points(sig1$s.dist,-log10(sig1$pANOVA), col="red", pch=19 )
which(sig1$set=="Reduction of cytosolic Ca++ levels")
## [1] 70
sig1b <- sig1[which(sig1$set=="Reduction of cytosolic Ca++ levels"),]
points(sig1b$s.dist,-log10(sig1b$pANOVA),cex=2,lwd=2)
text(sig1b$s.dist,(-log10(sig1b$pANOVA)-2),labels="Reduction of cytosolic Ca++ levels")

m2 <- mitch_import(dge2, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 27340
## Note: no. genes in output = 27278
## Note: estimated proportion of input genes in output = 0.998
mres2 <- mitch_calc(m2, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres2$enrichment_result,20) %>% 
  kbl(caption = "Top gene pathway differences in contrast 2") %>%
  kable_paper("hover", full_width = F)
Top gene pathway differences in contrast 2
set setSize pANOVA s.dist p.adjustANOVA
1427 Unwinding of DNA 12 0.0000101 0.7359532 0.0000679
640 Leading Strand Synthesis 14 0.0000029 0.7219567 0.0000217
893 Polymerase switching 14 0.0000029 0.7219567 0.0000217
1102 Response to metal ions 10 0.0000778 0.7214390 0.0004521
458 G1/S-Specific Transcription 29 0.0000000 0.6885884 0.0000000
216 Condensation of Prometaphase Chromosomes 11 0.0000792 0.6871343 0.0004567
1345 Tetrahydrobiopterin (BH4) synthesis, recycling, salvage and regulation 10 0.0002313 0.6723265 0.0011212
423 Formation of ATP by chemiosmotic coupling 18 0.0000009 0.6681544 0.0000075
1130 SLBP independent Processing of Histone Pre-mRNAs 10 0.0003221 0.6567625 0.0015025
1129 SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs 11 0.0001693 0.6547542 0.0008750
260 DNA strand elongation 32 0.0000000 0.6539698 0.0000000
1397 Translesion synthesis by REV1 16 0.0000094 0.6395441 0.0000640
1396 Translesion synthesis by POLK 17 0.0000057 0.6353722 0.0000407
422 Folding of actin by CCT/TriC 10 0.0005533 0.6306000 0.0024504
1118 SCF(Skp2)-mediated degradation of p27/p21 60 0.0000000 0.6273814 0.0000000
1351 The role of GTSE1 in G2/M progression after G2 checkpoint 58 0.0000000 0.6165776 0.0000000
686 Metabolism of cofactors 19 0.0000036 0.6139295 0.0000265
1050 Regulation of PTEN mRNA translation 12 0.0002439 -0.6115125 0.0011746
1424 Ubiquitin-dependent degradation of Cyclin D 50 0.0000000 0.6083723 0.0000000
636 Lagging Strand Synthesis 20 0.0000029 0.6041896 0.0000217
mitch_barplot(mres2)

mitch_barplot2(mres2)

mitch_plots(mres2,outfile="mitch2.pdf")
## png 
##   2
m2top <- subset(mres2$enrichment_result,p.adjustANOVA<0.05)
m2up <- subset(m2top,s.dist>0)$set
m2dn <- subset(m2top,s.dist<0)$set
#mitch_report(mres2,outfile="mitch2.html",overwrite=TRUE)
write.table(mres2$enrichment_result,file="mitch2.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m3 <- mitch_import(dge3, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 26801
## Note: no. genes in output = 26741
## Note: estimated proportion of input genes in output = 0.998
mres3 <- mitch_calc(m3, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres3$enrichment_result,20) %>%
  kbl(caption = "Top gene pathway differences in contrast 3") %>%
  kable_paper("hover", full_width = F)
Top gene pathway differences in contrast 3
set setSize pANOVA s.dist p.adjustANOVA
137 CLEC7A (Dectin-1) induces NFAT activation 11 1.70e-06 -0.8325273 0.0000081
375 Establishment of Sister Chromatid Cohesion 11 2.30e-06 -0.8231201 0.0000102
207 Cohesin Loading onto Chromatin 10 7.30e-06 -0.8190042 0.0000294
360 ERKs are inactivated 13 4.00e-07 -0.8096839 0.0000023
1108 S33 mutants of beta-catenin aren’t phosphorylated 15 1.00e-07 -0.8083414 0.0000004
1109 S37 mutants of beta-catenin aren’t phosphorylated 15 1.00e-07 -0.8083414 0.0000004
1110 S45 mutants of beta-catenin aren’t phosphorylated 15 1.00e-07 -0.8083414 0.0000004
1180 Signaling by CTNNB1 phospho-site mutants 15 1.00e-07 -0.8083414 0.0000004
1207 Signaling by GSK3beta mutants 15 1.00e-07 -0.8083414 0.0000004
1298 T41 mutants of beta-catenin aren’t phosphorylated 15 1.00e-07 -0.8083414 0.0000004
506 Golgi Cisternae Pericentriolar Stack Reorganization 14 2.00e-07 -0.8059533 0.0000011
11 APC truncation mutants have impaired AXIN binding 14 2.00e-07 -0.7983954 0.0000013
24 AXIN missense mutants destabilize the destruction complex 14 2.00e-07 -0.7983954 0.0000013
1173 Signaling by AMER1 mutants 14 2.00e-07 -0.7983954 0.0000013
1174 Signaling by APC mutants 14 2.00e-07 -0.7983954 0.0000013
1175 Signaling by AXIN mutants 14 2.00e-07 -0.7983954 0.0000013
1414 Truncations of AMER1 destabilize the destruction complex 14 2.00e-07 -0.7983954 0.0000013
1209 Signaling by Hippo 20 0.00e+00 -0.7935032 0.0000000
664 MET activates RAP1 and RAC1 11 5.40e-06 -0.7917695 0.0000227
659 MAPK3 (ERK1) activation 10 2.78e-05 -0.7651341 0.0001004
mitch_barplot(mres3)

mitch_barplot2(mres3)

mitch_plots(mres3,outfile="mitch3.pdf")
## png 
##   2
m3top <- subset(mres3$enrichment_result,p.adjustANOVA<0.05)
m3up <- subset(m3top,s.dist>0)$set
m3dn <- subset(m3top,s.dist<0)$set
#mitch_report(mres3,outfile="mitch3.html",overwrite=TRUE)
write.table(mres3$enrichment_result,file="mitch3.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m4 <- mitch_import(dge4, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 26222
## Note: no. genes in output = 26163
## Note: estimated proportion of input genes in output = 0.998
mres4 <- mitch_calc(m4, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres4$enrichment_result,20) %>%
  kbl(caption = "Top gene pathway differences in contrast 4") %>%
  kable_paper("hover", full_width = F)
Top gene pathway differences in contrast 4
set setSize pANOVA s.dist p.adjustANOVA
134 CLEC7A (Dectin-1) induces NFAT activation 11 1.0e-06 -0.8526516 6.10e-06
203 Cohesin Loading onto Chromatin 10 3.0e-06 -0.8523305 1.63e-05
630 Laminin interactions 23 0.0e+00 -0.7933735 0.00e+00
1103 S33 mutants of beta-catenin aren’t phosphorylated 15 2.0e-07 -0.7825965 1.20e-06
1104 S37 mutants of beta-catenin aren’t phosphorylated 15 2.0e-07 -0.7825965 1.20e-06
1105 S45 mutants of beta-catenin aren’t phosphorylated 15 2.0e-07 -0.7825965 1.20e-06
1175 Signaling by CTNNB1 phospho-site mutants 15 2.0e-07 -0.7825965 1.20e-06
1202 Signaling by GSK3beta mutants 15 2.0e-07 -0.7825965 1.20e-06
1293 T41 mutants of beta-catenin aren’t phosphorylated 15 2.0e-07 -0.7825965 1.20e-06
1204 Signaling by Hippo 20 0.0e+00 -0.7785564 0.00e+00
1268 Syndecan interactions 20 0.0e+00 -0.7770952 0.00e+00
658 MET activates RAP1 and RAC1 11 8.9e-06 -0.7734156 4.31e-05
11 APC truncation mutants have impaired AXIN binding 14 6.0e-07 -0.7679398 4.20e-06
24 AXIN missense mutants destabilize the destruction complex 14 6.0e-07 -0.7679398 4.20e-06
1168 Signaling by AMER1 mutants 14 6.0e-07 -0.7679398 4.20e-06
1169 Signaling by APC mutants 14 6.0e-07 -0.7679398 4.20e-06
1170 Signaling by AXIN mutants 14 6.0e-07 -0.7679398 4.20e-06
1410 Truncations of AMER1 destabilize the destruction complex 14 6.0e-07 -0.7679398 4.20e-06
357 ERKs are inactivated 13 1.7e-06 -0.7668245 9.70e-06
657 MET activates PTK2 signaling 18 0.0e+00 -0.7616753 2.00e-07
mitch_barplot(mres4)

mitch_barplot2(mres4)

mitch_plots(mres4,outfile="mitch4.pdf")
## png 
##   2
m4top <- subset(mres4$enrichment_result,p.adjustANOVA<0.05)
m4up <- subset(m4top,s.dist>0)$set
m4dn <- subset(m4top,s.dist<0)$set
#mitch_report(mres4,outfile="mitch4.html",overwrite=TRUE)
write.table(mres4$enrichment_result,file="mitch4.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m5 <- mitch_import(dge5, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 26081
## Note: no. genes in output = 26025
## Note: estimated proportion of input genes in output = 0.998
mres5 <- mitch_calc(m5, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres5$enrichment_result,20) %>%
  kbl(caption = "Top gene pathway differences in contrast 5") %>%
  kable_paper("hover", full_width = F)
Top gene pathway differences in contrast 5
set setSize pANOVA s.dist p.adjustANOVA
136 CLEC7A (Dectin-1) induces NFAT activation 11 1.80e-06 -0.8306926 0.0000126
206 Cohesin Loading onto Chromatin 10 1.05e-05 -0.8044820 0.0000609
660 MET activates RAP1 and RAC1 11 6.10e-06 -0.7873173 0.0000376
11 APC truncation mutants have impaired AXIN binding 14 4.00e-07 -0.7808455 0.0000033
24 AXIN missense mutants destabilize the destruction complex 14 4.00e-07 -0.7808455 0.0000033
1171 Signaling by AMER1 mutants 14 4.00e-07 -0.7808455 0.0000033
1172 Signaling by APC mutants 14 4.00e-07 -0.7808455 0.0000033
1173 Signaling by AXIN mutants 14 4.00e-07 -0.7808455 0.0000033
1412 Truncations of AMER1 destabilize the destruction complex 14 4.00e-07 -0.7808455 0.0000033
1106 S33 mutants of beta-catenin aren’t phosphorylated 15 2.00e-07 -0.7803050 0.0000014
1107 S37 mutants of beta-catenin aren’t phosphorylated 15 2.00e-07 -0.7803050 0.0000014
1108 S45 mutants of beta-catenin aren’t phosphorylated 15 2.00e-07 -0.7803050 0.0000014
1178 Signaling by CTNNB1 phospho-site mutants 15 2.00e-07 -0.7803050 0.0000014
1205 Signaling by GSK3beta mutants 15 2.00e-07 -0.7803050 0.0000014
1296 T41 mutants of beta-catenin aren’t phosphorylated 15 2.00e-07 -0.7803050 0.0000014
1207 Signaling by Hippo 20 0.00e+00 -0.7490483 0.0000001
373 Establishment of Sister Chromatid Cohesion 11 1.81e-05 -0.7464442 0.0000973
113 Beta-catenin phosphorylation cascade 17 1.00e-07 -0.7403288 0.0000012
358 ERKs are inactivated 13 4.10e-06 -0.7373224 0.0000266
605 Interleukin-6 signaling 11 3.09e-05 -0.7253577 0.0001591
mitch_barplot(mres5)

mitch_barplot2(mres5)

mitch_plots(mres5,outfile="mitch5.pdf")
## png 
##   2
m5top <- subset(mres5$enrichment_result,p.adjustANOVA<0.05)
m5up <- subset(m5top,s.dist>0)$set
m5dn <- subset(m5top,s.dist<0)$set
#mitch_report(mres5,outfile="mitch5.html",overwrite=TRUE)
write.table(mres5$enrichment_result,file="mitch5.tsv",quote=FALSE,sep="\t",row.names=FALSE)

pdf("mitch_barplots.pdf")
mitch_barplot(mres1)
mitch_barplot2(mres1)
mitch_barplot(mres2)
mitch_barplot2(mres2)
mitch_barplot(mres3)
mitch_barplot2(mres3)
mitch_barplot(mres4)
mitch_barplot2(mres4)
mitch_barplot(mres5)
mitch_barplot2(mres5)
dev.off()
## png 
##   2

Make some ridge plots.

myres <- list(mres1,mres2,mres3,mres4) 
z <- lapply(myres, function(mres) {
  sets <- head(mres$enrichment_result,10)
  sets <- sets[order(sets$s.dist),"set"]
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))
 
 vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Top 10 genesets",
    xlab="differential gene rank")

 vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Top 10 genesets",
    xlab="differential gene rank",
    rectCol="gray")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})

pdf("ridge1.pdf")
z <- lapply(myres, function(mres) {
  sets <- head(mres$enrichment_result,10)
  sets <- sets[order(sets$s.dist),"set"]
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))

 vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Top 10 genesets",
    xlab="differential gene rank")

 vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Top 10 genesets",
    xlab="differential gene rank",
    rectCol="gray")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})
dev.off()
## png 
##   2

Make some heatmaps

sets <- head(mres1$enrichment_result$set,10)

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
  hm <- as.matrix(rpm1[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
    cexCol=0.9, main=paste(set) )
})

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
  hm <- as.matrix(rpm[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
    cexCol=0.9, main=paste(set) )
})

pdf("peheat1.pdf")
z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
  hm <- as.matrix(rpm1[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
    cexCol=0.9, main=paste(set) )
})

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
  hm <- as.matrix(rpm[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
    cexCol=0.9, main=paste(set) )
})
dev.off()
## png 
##   2

Now an Euler diagram of differentially regulated gene sets from contrasts 1 and 4.

par(cex.main=0.5)

par(mar=c(2,2,2,2))

v0 <- list("KO up"=m1up,"KO dn"=m1dn, "drug up"=m4up, "drug dn"=m4dn)

plot(euler(v0),quantities = TRUE, edges = "gray", main="Reactome pathways")

pdf("gseuler1.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="Reactome pathways")
dev.off()
## png 
##   2

Multi contrast pathway analysis with mitch

I’m going to focus on the two contrasts which I think are the most important WRT mito disease; #1 and #4. The effect of the KO, and the effect of the drug when the gene is knocked out.

mm <- list("KO"=dge1,"drug"=dge4)

m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)
## Note: Mean no. genes in input = 25735
## Note: no. genes in output = 24468
## Note: estimated proportion of input genes in output = 0.951
mres <- mitch_calc(m, genesets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
head(mres$enrichment_result,20) %>% 
kbl(caption = "Top gene pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)
Top gene pathway differences in contrast 1 and 4
set setSize pMANOVA s.KO s.drug p.KO p.drug s.dist SD p.adjustMANOVA
133 CLEC7A (Dectin-1) induces NFAT activation 11 3.40e-06 -0.3079877 -0.8424396 0.0769234 0.0000013 0.8969732 0.3779146 1.31e-05
202 Cohesin Loading onto Chromatin 10 1.94e-05 -0.1962875 -0.8420967 0.2824423 0.0000040 0.8646708 0.4566560 6.19e-05
650 MET activates RAP1 and RAC1 11 1.27e-05 -0.3986700 -0.7577121 0.0220401 0.0000135 0.8561923 0.2538811 4.28e-05
1256 Syndecan interactions 19 0.00e+00 0.2156734 -0.8161027 0.1036039 0.0000000 0.8441200 0.7295759 0.00e+00
844 Peptide chain elongation 87 0.00e+00 -0.4225294 0.7146949 0.0000000 0.0000000 0.8302529 0.8041390 0.00e+00
1415 Viral mRNA Translation 87 0.00e+00 -0.4205258 0.7155030 0.0000000 0.0000000 0.8299316 0.8032936 0.00e+00
11 APC truncation mutants have impaired AXIN binding 14 1.30e-06 -0.3477374 -0.7518548 0.0242621 0.0000011 0.8283761 0.2857541 5.70e-06
24 AXIN missense mutants destabilize the destruction complex 14 1.30e-06 -0.3477374 -0.7518548 0.0242621 0.0000011 0.8283761 0.2857541 5.70e-06
1156 Signaling by AMER1 mutants 14 1.30e-06 -0.3477374 -0.7518548 0.0242621 0.0000011 0.8283761 0.2857541 5.70e-06
1157 Signaling by APC mutants 14 1.30e-06 -0.3477374 -0.7518548 0.0242621 0.0000011 0.8283761 0.2857541 5.70e-06
1158 Signaling by AXIN mutants 14 1.30e-06 -0.3477374 -0.7518548 0.0242621 0.0000011 0.8283761 0.2857541 5.70e-06
1396 Truncations of AMER1 destabilize the destruction complex 14 1.30e-06 -0.3477374 -0.7518548 0.0242621 0.0000011 0.8283761 0.2857541 5.70e-06
622 Laminin interactions 23 0.00e+00 0.2684980 -0.7821160 0.0257964 0.0000000 0.8269200 0.7428963 0.00e+00
351 ERKs are inactivated 13 4.30e-06 -0.3333627 -0.7506629 0.0374081 0.0000028 0.8213559 0.2950758 1.61e-05
1001 Receptor Mediated Mitophagy 11 2.63e-05 -0.7759630 -0.2626019 0.0000083 0.1315157 0.8191937 0.3630011 8.10e-05
1091 S33 mutants of beta-catenin aren’t phosphorylated 15 8.00e-07 -0.2604534 -0.7675268 0.0807061 0.0000003 0.8105143 0.3585551 3.50e-06
1092 S37 mutants of beta-catenin aren’t phosphorylated 15 8.00e-07 -0.2604534 -0.7675268 0.0807061 0.0000003 0.8105143 0.3585551 3.50e-06
1093 S45 mutants of beta-catenin aren’t phosphorylated 15 8.00e-07 -0.2604534 -0.7675268 0.0807061 0.0000003 0.8105143 0.3585551 3.50e-06
1163 Signaling by CTNNB1 phospho-site mutants 15 8.00e-07 -0.2604534 -0.7675268 0.0807061 0.0000003 0.8105143 0.3585551 3.50e-06
1190 Signaling by GSK3beta mutants 15 8.00e-07 -0.2604534 -0.7675268 0.0807061 0.0000003 0.8105143 0.3585551 3.50e-06
#mitch_report(mres,outfile="mitchmulti_effect.html",overwrite=TRUE)
mitch_plots(mres, outfile="mitchmulti_effect.pdf")
## png 
##   2
write.table(mres$enrichment_result,file="mitch_multi_effect.tsv",quote=FALSE,sep="\t",row.names=FALSE)
n=40
mx <- as.matrix(mres$enrichment_result[1:n,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set[1:n]

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,15), cexRow=0.7, 
    cexCol=0.9, main=paste("Top ranked Reactomes") )

Mitch can also be used to search for discordant pathways. Gene sets are prioritised by standard deviation.

mm <- list("KO"=dge1,"drug"=dge4)

m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)
## Note: Mean no. genes in input = 25735
## Note: no. genes in output = 24468
## Note: estimated proportion of input genes in output = 0.951
mres <- mitch_calc(m, genesets, priority="SD",resrows=52)
## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.
head(mres$enrichment_result,20) %>% kbl(caption = "Top gene pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)
Top gene pathway differences in contrast 1 and 4
set setSize pMANOVA s.KO s.drug p.KO p.drug s.dist SD p.adjustMANOVA
844 Peptide chain elongation 87 0.00e+00 -0.4225294 0.7146949 0.0000000 0.0000000 0.8302529 0.8041390 0.00e+00
1415 Viral mRNA Translation 87 0.00e+00 -0.4205258 0.7155030 0.0000000 0.0000000 0.8299316 0.8032936 0.00e+00
419 Formation of a pool of free 40S subunits 99 0.00e+00 -0.4445054 0.6516679 0.0000000 0.0000000 0.7888321 0.7751116 0.00e+00
371 Eukaryotic Translation Termination 91 0.00e+00 -0.4021351 0.6843304 0.0000000 0.0000000 0.7937385 0.7682471 0.00e+00
369 Eukaryotic Translation Elongation 92 0.00e+00 -0.4121062 0.6735777 0.0000000 0.0000000 0.7896445 0.7676944 0.00e+00
268 Defective EXT1 causes exostoses 1, TRPS2 and CHDS 13 1.40e-06 0.3576333 -0.7140808 0.0255604 0.0000082 0.7986320 0.7578163 5.90e-06
269 Defective EXT2 causes exostoses 2 13 1.40e-06 0.3576333 -0.7140808 0.0255604 0.0000082 0.7986320 0.7578163 5.90e-06
622 Laminin interactions 23 0.00e+00 0.2684980 -0.7821160 0.0257964 0.0000000 0.8269200 0.7428963 0.00e+00
1080 Response of EIF2AK4 (GCN2) to amino acid deficiency 99 0.00e+00 -0.4230715 0.6252044 0.0000000 0.0000000 0.7548973 0.7412430 0.00e+00
777 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC) 93 0.00e+00 -0.4239127 0.6173949 0.0000000 0.0000000 0.7489181 0.7363156 0.00e+00
1134 Selenocysteine synthesis 91 0.00e+00 -0.3553174 0.6805934 0.0000000 0.0000000 0.7677615 0.7324995 0.00e+00
425 Formation of the ternary complex, and subsequently, the 43S complex 50 0.00e+00 -0.5008404 0.5323941 0.0000000 0.0000000 0.7309477 0.7306071 0.00e+00
1256 Syndecan interactions 19 0.00e+00 0.2156734 -0.8161027 0.1036039 0.0000000 0.8441200 0.7295759 0.00e+00
713 Mucopolysaccharidoses 11 1.36e-05 0.7558981 -0.2580819 0.0000141 0.1382892 0.7987416 0.7169921 4.58e-05
265 Defective B4GALT7 causes EDS, progeroid type 18 1.00e-07 0.3708521 -0.6385867 0.0064446 0.0000027 0.7384607 0.7137810 6.00e-07
617 L13a-mediated translational silencing of Ceruloplasmin expression 109 0.00e+00 -0.4660904 0.5255157 0.0000000 0.0000000 0.7024293 0.7011714 0.00e+00
460 GTP hydrolysis and joining of the 60S ribosomal subunit 110 0.00e+00 -0.4419276 0.5303070 0.0000000 0.0000000 0.6903083 0.6874737 0.00e+00
264 Defective B3GAT3 causes JDSSDHD 18 9.00e-07 0.3764054 -0.5746058 0.0056912 0.0000243 0.6869154 0.6724664 4.00e-06
806 Other semaphorin interactions 15 8.70e-06 0.3594351 -0.5881460 0.0159331 0.0000799 0.6892817 0.6700410 3.06e-05
262 Defective B3GALT6 causes EDSP2 and SEMDJL1 18 1.30e-06 0.3746285 -0.5643717 0.0059232 0.0000338 0.6773935 0.6639734 5.60e-06
#mitch_report(mres,outfile="mitchmulti_discord.html",overwrite=TRUE)
mitch_plots(mres, outfile="mitchmulti_discord.pdf")
## png 
##   2
write.table(mres$enrichment_result,file="mitch_multi_discord.tsv",quote=FALSE,sep="\t",row.names=FALSE)

n=50

mx <- as.matrix(mres$enrichment_result[1:n,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set[1:n]

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,25), cexRow=0.6,
    cexCol=0.9, main=paste("Top ranked Reactomes") )

pdf("mitchmulti_discord_heat.pdf")
heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,25), cexRow=0.6,
    cexCol=0.9, main=paste("Top ranked Reactomes") )
dev.off()
## png 
##   2

Custom gene sets

custom_set_names <- readLines("custom_pathway_list.txt")
custom_sets <- genesets[which(names(genesets) %in% custom_set_names)]

m1 <- mitch_import(dge1, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 25248
## Note: no. genes in output = 25194
## Note: estimated proportion of input genes in output = 0.998
mres1 <- mitch_calc(m1, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres1$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 1") %>% kable_paper("hover", full_width = F)
Top mito pathway differences in contrast 1
set setSize pANOVA s.dist p.adjustANOVA
19 Receptor Mediated Mitophagy 11 0.0000080 -0.7772523 0.0000282
12 Mucopolysaccharidoses 11 0.0000117 0.7628992 0.0000352
11 Mitophagy 28 0.0000011 -0.5309970 0.0000069
14 Nucleotide biosynthesis 12 0.0038611 -0.4816469 0.0059087
8 Mitochondrial iron-sulfur cluster biogenesis 13 0.0068384 -0.4331868 0.0086379
20 Reduction of cytosolic Ca++ levels 12 0.0099164 0.4298838 0.0113330
24 Stabilization of p53 53 0.0000001 -0.4224979 0.0000008
3 Diseases of carbohydrate metabolism 30 0.0021372 0.3238356 0.0036637
1 Cellular response to starvation 150 0.0000000 -0.3190752 0.0000000
16 PTEN Regulation 140 0.0000000 -0.2798909 0.0000001
21 Resolution of D-Loop Structures 33 0.0141289 0.2467925 0.0154133
4 Gene Silencing by RNA 97 0.0000403 -0.2411329 0.0000974
18 Pyruvate metabolism and Citric Acid (TCA) cycle 52 0.0039391 -0.2310553 0.0059087
23 Signaling by the B Cell Receptor (BCR) 106 0.0000406 -0.2306049 0.0000974
7 Mitochondrial biogenesis 91 0.0003021 -0.2190759 0.0006592
9 Mitochondrial protein import 64 0.0059436 -0.1987751 0.0083909
15 Oxidative Stress Induced Senescence 82 0.0092365 -0.1662401 0.0110838
22 Respiratory electron transport 103 0.0064821 -0.1551823 0.0086379
17 Phospholipid metabolism 186 0.0007611 -0.1430381 0.0014051
10 Mitochondrial translation 96 0.0157290 -0.1425844 0.0164129
m1top <- subset(mres1$enrichment_result,p.adjustANOVA<0.05)
m1up <- subset(m1top,s.dist>0)$set
m1dn <- subset(m1top,s.dist<0)$set
#mitch_report(mres1,outfile="mitch_mito1.html",overwrite=TRUE)
mitch_plots(mres1, outfile="mitch_mito1.pdf")
## png 
##   2
write.table(mres1$enrichment_result,file="mitch_mito1.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m2 <- mitch_import(dge2, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 27340
## Note: no. genes in output = 27278
## Note: estimated proportion of input genes in output = 0.998
mres2 <- mitch_calc(m2, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres2$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 2") %>% kable_paper("hover", full_width = F)
Top mito pathway differences in contrast 2
set setSize pANOVA s.dist p.adjustANOVA
10 Mitochondrial translation 96 0.0000000 0.5392072 0.0000000
24 Stabilization of p53 53 0.0000000 0.5020005 0.0000000
9 Mitochondrial protein import 64 0.0000001 0.3891552 0.0000002
3 Diseases of carbohydrate metabolism 31 0.0002686 0.3780363 0.0005860
20 Reduction of cytosolic Ca++ levels 12 0.0289978 0.3640003 0.0463964
22 Respiratory electron transport 103 0.0000000 0.3397608 0.0000000
23 Signaling by the B Cell Receptor (BCR) 106 0.0000000 0.3265140 0.0000000
2 Complex I biogenesis 57 0.0000465 0.3117478 0.0001239
11 Mitophagy 28 0.0064809 0.2971848 0.0111100
12 Mucopolysaccharidoses 11 0.1441237 0.2543167 0.1921650
18 Pyruvate metabolism and Citric Acid (TCA) cycle 53 0.0023525 0.2414478 0.0047049
21 Resolution of D-Loop Structures 33 0.0359199 0.2109689 0.0538799
6 Metabolism of amino acids and derivatives 344 0.0000000 0.1954042 0.0000000
8 Mitochondrial iron-sulfur cluster biogenesis 13 0.2345154 0.1904132 0.2962300
1 Cellular response to starvation 150 0.0002005 0.1757549 0.0004812
5 Metabolism 1909 0.0000000 0.1711056 0.0000000
16 PTEN Regulation 140 0.0036710 0.1421165 0.0067773
13 Nervous system development 527 0.0000001 0.1334218 0.0000004
7 Mitochondrial biogenesis 91 0.1013510 0.0993247 0.1430838
14 Nucleotide biosynthesis 12 0.6928407 0.0658512 0.7918179
m2top <- subset(mres2$enrichment_result,p.adjustANOVA<0.05)
m2up <- subset(m2top,s.dist>0)$set
m2dn <- subset(m2top,s.dist<0)$set
#mitch_report(mres2,outfile="mitch_mito2.html",overwrite=TRUE)
mitch_plots(mres2, outfile="mitch_mito2.pdf")
## png 
##   2
write.table(mres2$enrichment_result,file="mitch_mito2.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m3 <- mitch_import(dge3, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 26801
## Note: no. genes in output = 26741
## Note: estimated proportion of input genes in output = 0.998
mres3 <- mitch_calc(m3, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres3$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 3") %>% kable_paper("hover", full_width = F)
Top mito pathway differences in contrast 3
set setSize pANOVA s.dist p.adjustANOVA
19 Receptor Mediated Mitophagy 11 0.0030258 -0.5162466 0.0051871
2 Complex I biogenesis 57 0.0000000 0.5011361 0.0000000
14 Nucleotide biosynthesis 12 0.0028637 -0.4970943 0.0051871
11 Mitophagy 28 0.0000387 -0.4491607 0.0001033
20 Reduction of cytosolic Ca++ levels 12 0.0200273 -0.3877249 0.0320436
18 Pyruvate metabolism and Citric Acid (TCA) cycle 53 0.0000029 -0.3714948 0.0000086
23 Signaling by the B Cell Receptor (BCR) 105 0.0000000 -0.3275703 0.0000000
16 PTEN Regulation 140 0.0000000 -0.2980253 0.0000000
22 Respiratory electron transport 103 0.0000008 0.2808048 0.0000029
17 Phospholipid metabolism 191 0.0000000 -0.2642258 0.0000000
7 Mitochondrial biogenesis 91 0.0002971 -0.2193167 0.0007131
13 Nervous system development 529 0.0000000 -0.2054227 0.0000000
4 Gene Silencing by RNA 98 0.0007249 -0.1974754 0.0015816
21 Resolution of D-Loop Structures 33 0.0625124 0.1873210 0.0789631
24 Stabilization of p53 53 0.0360459 -0.1664221 0.0515692
1 Cellular response to starvation 150 0.0017126 0.1482431 0.0034251
15 Oxidative Stress Induced Senescence 83 0.0365282 -0.1327186 0.0515692
5 Metabolism 1899 0.0000000 -0.1198167 0.0000000
8 Mitochondrial iron-sulfur cluster biogenesis 13 0.4675443 -0.1163631 0.5100483
10 Mitochondrial translation 96 0.0966246 0.0980797 0.1159496
m3top <- subset(mres3$enrichment_result,p.adjustANOVA<0.05)
m3up <- subset(m3top,s.dist>0)$set
m3dn <- subset(m3top,s.dist<0)$set
#mitch_report(mres3,outfile="mitch_mito3.html",overwrite=TRUE)
mitch_plots(mres3, outfile="mitch_mito3.pdf")
## png 
##   2
write.table(mres3$enrichment_result,file="mitch_mito3.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m4 <- mitch_import(dge4, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 26222
## Note: no. genes in output = 26163
## Note: estimated proportion of input genes in output = 0.998
mres4 <- mitch_calc(m4, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres4$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 4") %>% kable_paper("hover", full_width = F)
Top mito pathway differences in contrast 4
set setSize pANOVA s.dist p.adjustANOVA
2 Complex I biogenesis 57 0.0000000 0.7304162 0.0000000
22 Respiratory electron transport 103 0.0000000 0.5865948 0.0000000
20 Reduction of cytosolic Ca++ levels 12 0.0020344 -0.5142888 0.0048827
10 Mitochondrial translation 96 0.0000000 0.5132095 0.0000000
8 Mitochondrial iron-sulfur cluster biogenesis 13 0.0042094 0.4583909 0.0091842
1 Cellular response to starvation 150 0.0000000 0.4000246 0.0000000
14 Nucleotide biosynthesis 12 0.0423533 -0.3384192 0.0610224
9 Mitochondrial protein import 64 0.0000109 0.3177325 0.0000328
24 Stabilization of p53 53 0.0003285 0.2851073 0.0008761
17 Phospholipid metabolism 192 0.0000000 -0.2809981 0.0000000
19 Receptor Mediated Mitophagy 11 0.1130800 -0.2758906 0.1456370
12 Mucopolysaccharidoses 11 0.1195967 -0.2710100 0.1456370
21 Resolution of D-Loop Structures 33 0.0161355 0.2419464 0.0270932
6 Metabolism of amino acids and derivatives 341 0.0000000 0.2246703 0.0000000
18 Pyruvate metabolism and Citric Acid (TCA) cycle 52 0.0153522 -0.1942638 0.0270932
13 Nervous system development 524 0.0000000 -0.1596025 0.0000000
16 PTEN Regulation 140 0.0169332 -0.1168422 0.0270932
23 Signaling by the B Cell Receptor (BCR) 106 0.0432242 -0.1135935 0.0610224
7 Mitochondrial biogenesis 91 0.1213642 -0.0939200 0.1456370
4 Gene Silencing by RNA 97 0.2056272 -0.0743359 0.2350026
m4top <- subset(mres4$enrichment_result,p.adjustANOVA<0.05)
m4up <- subset(m4top,s.dist>0)$set
m4dn <- subset(m4top,s.dist<0)$set
#mitch_report(mres4,outfile="mitch_mito4.html",overwrite=TRUE)
mitch_plots(mres4, outfile="mitch_mito4.pdf")
## png 
##   2
write.table(mres4$enrichment_result,file="mitch_mito4.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m5 <- mitch_import(dge5, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 26081
## Note: no. genes in output = 26025
## Note: estimated proportion of input genes in output = 0.998
mres5 <- mitch_calc(m5, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(mres5$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 5") %>% kable_paper("hover", full_width = F)
Top mito pathway differences in contrast 5
set setSize pANOVA s.dist p.adjustANOVA
2 Complex I biogenesis 57 0.0000000 0.5977708 0.0000000
19 Receptor Mediated Mitophagy 11 0.0013394 -0.5584266 0.0024786
14 Nucleotide biosynthesis 12 0.0032789 -0.4901460 0.0052463
22 Respiratory electron transport 103 0.0000000 0.4299096 0.0000000
21 Resolution of D-Loop Structures 33 0.0004749 0.3514112 0.0012664
10 Mitochondrial translation 96 0.0000000 0.3425773 0.0000000
11 Mitophagy 28 0.0062915 -0.2982597 0.0088821
20 Reduction of cytosolic Ca++ levels 12 0.0797813 -0.2920655 0.1007764
17 Phospholipid metabolism 190 0.0000000 -0.2807057 0.0000000
18 Pyruvate metabolism and Citric Acid (TCA) cycle 52 0.0013426 -0.2569873 0.0024786
1 Cellular response to starvation 150 0.0000001 0.2494691 0.0000006
16 PTEN Regulation 140 0.0000155 -0.2114126 0.0000465
7 Mitochondrial biogenesis 91 0.0008071 -0.2031149 0.0019371
4 Gene Silencing by RNA 97 0.0008931 -0.1951037 0.0019486
12 Mucopolysaccharidoses 11 0.2998946 0.1805042 0.3271577
23 Signaling by the B Cell Receptor (BCR) 105 0.0025170 -0.1705680 0.0043149
8 Mitochondrial iron-sulfur cluster biogenesis 13 0.3264363 0.1571819 0.3406292
6 Metabolism of amino acids and derivatives 342 0.0000006 0.1566651 0.0000021
3 Diseases of carbohydrate metabolism 29 0.1626241 0.1497870 0.1948517
13 Nervous system development 527 0.0000002 -0.1314342 0.0000009
m5top <- subset(mres5$enrichment_result,p.adjustANOVA<0.05)
m5up <- subset(m5top,s.dist>0)$set
m5dn <- subset(m5top,s.dist<0)$set
#mitch_report(mres5,outfile="mitch_mito5.html",overwrite=TRUE)
mitch_plots(mres5, outfile="mitch_mito5.pdf")
## png 
##   2
write.table(mres5$enrichment_result,file="mitch_mito5.tsv",quote=FALSE,sep="\t",row.names=FALSE)

Make some ridge plots.

myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
  sets <- head(mres$enrichment_result,10)
  sets <- sets[order(sets$s.dist),"set"]
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))
  
  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Top 10 genesets",
    xlab="differential gene rank")

  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Top 10 genesets",
    xlab="differential gene rank",
    rectCol="gray")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})

#Fig2c
myres <- mres1
  sets <- head(myres$enrichment_result,10)
  sets <- sets[order(sets$s.dist),"set"]
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- myres$ranked_profile[which(rownames(myres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(myres$ranked_profile[,1])

pdf("fig2C.pdf")
  par(mar=c(5,20,3,2))
  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Top 10 genesets",
    xlab="differential gene rank")
dev.off()
## png 
##   2
# now custom gene sets part 1
custom_set_names <- readLines("custom_pathway_list.txt")
myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
  sets <- custom_set_names
  mres$enrichment_result <- mres$enrichment_result[which(mres$enrichment_result$set %in% sets),]
  mres$enrichment_result <- mres$enrichment_result[order(mres$enrichment_result$s.dist),]
  sets <- mres$enrichment_result$set
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))

  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Gene sets of interest",
    xlab="differential gene rank")


  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Gene sets of interest",
    rectCol="gray", xlab="differential gene rank")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})

# now custom gene sets part 2
custom_set_names2 <- readLines("custom_pathway_list2.txt")
myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
  sets <- custom_set_names2[which( custom_set_names2 %in% names(custom_sets))]
  mres$enrichment_result <- mres$enrichment_result[which(mres$enrichment_result$set %in% sets),]
  mres$enrichment_result <- mres$enrichment_result[order(mres$enrichment_result$s.dist),]
  sets <- mres$enrichment_result$set
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))

  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Gene sets of interest",
    xlab="differential gene rank")

  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Gene sets of interest",
    rectCol="gray", xlab="differential gene rank")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=1,corral = "gutter")
})

Now make some heatmaps

sets <- names(custom_sets)

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
  hm <- as.matrix(rpm1[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
    cexCol=0.9, main=paste(set) )
})

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
  hm <- as.matrix(rpm[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  hm[is.na(hm)] <- 0
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
    cexCol=0.9, main=paste(set) )
})

refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
gshm <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  hm <- as.matrix(rpm1[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  return(colMeans(hm))
})

gshm <- do.call(rbind,gshm)
rownames(gshm) <- sets

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(gshm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
  cexCol=0.9, main="Gene set heatmap" )

refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
gshm <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  hm <- as.matrix(rpm[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  hm[is.na(hm)] <- 0
  return(colMeans(hm))
})

gshm <- do.call(rbind,gshm)
rownames(gshm) <- sets

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(gshm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
  cexCol=0.9, main="Gene set heatmap" )

Mitch multi contrasts with custom gene sets

mm <- list("KO"=dge1,"drug"=dge4)
m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)
## Note: Mean no. genes in input = 25735
## Note: no. genes in output = 24468
## Note: estimated proportion of input genes in output = 0.951
mres <- mitch_calc(m, custom_sets, priority="effect",resrows=52)
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
head(mres$enrichment_result,20) %>%
  kbl(caption = "Top mito pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)
Top mito pathway differences in contrast 1 and 4
set setSize pMANOVA s.KO s.drug p.KO p.drug s.dist SD p.adjustMANOVA
19 Receptor Mediated Mitophagy 11 0.0000263 -0.7759630 -0.2626019 0.0000083 0.1315157 0.8191937 0.3630011 0.0000486
12 Mucopolysaccharidoses 11 0.0000136 0.7558981 -0.2580819 0.0000141 0.1382892 0.7987416 0.7169921 0.0000271
2 Complex I biogenesis 57 0.0000000 -0.1225986 0.7294490 0.1093157 0.0000000 0.7396798 0.6024886 0.0000000
20 Reduction of cytosolic Ca++ levels 12 0.0001988 0.4226979 -0.5021944 0.0112254 0.0025906 0.6564090 0.6539976 0.0002807
8 Mitochondrial iron-sulfur cluster biogenesis 13 0.0001725 -0.4368495 0.4616108 0.0063823 0.0039505 0.6355486 0.6353074 0.0002760
22 Respiratory electron transport 103 0.0000000 -0.1604358 0.5889871 0.0048876 0.0000000 0.6104469 0.5299220 0.0000000
14 Nucleotide biosynthesis 12 0.0036759 -0.4820562 -0.3242694 0.0038313 0.0517611 0.5809723 0.1115721 0.0042011
10 Mitochondrial translation 96 0.0000000 -0.1482465 0.5183894 0.0120417 0.0000000 0.5391703 0.4713828 0.0000000
11 Mitophagy 28 0.0000068 -0.5324790 -0.0475129 0.0000011 0.6634231 0.5345945 0.3429228 0.0000148
1 Cellular response to starvation 150 0.0000000 -0.3220144 0.4056013 0.0000000 0.0000000 0.5178858 0.5145020 0.0000000
24 Stabilization of p53 53 0.0000000 -0.4250982 0.2913945 0.0000001 0.0002419 0.5153826 0.5066369 0.0000000
9 Mitochondrial protein import 64 0.0000002 -0.2033094 0.3238455 0.0049000 0.0000074 0.3823750 0.3727548 0.0000006
21 Resolution of D-Loop Structures 33 0.0039393 0.2407810 0.2534479 0.0166593 0.0117319 0.3495874 0.0089568 0.0042974
3 Diseases of carbohydrate metabolism 30 0.0090010 0.3189077 -0.0262569 0.0024971 0.8034151 0.3199868 0.2440683 0.0093924
17 Phospholipid metabolism 186 0.0000000 -0.1473869 -0.2832498 0.0005232 0.0000000 0.3193013 0.0960695 0.0000000
16 PTEN Regulation 140 0.0000000 -0.2826678 -0.1053795 0.0000000 0.0312840 0.3016719 0.1253618 0.0000000
18 Pyruvate metabolism and Citric Acid (TCA) cycle 52 0.0017478 -0.2352398 -0.1827679 0.0033341 0.0225870 0.2978958 0.0371032 0.0020973
6 Metabolism of amino acids and derivatives 337 0.0000000 -0.1445765 0.2336243 0.0000050 0.0000000 0.2747411 0.2674283 0.0000000
23 Signaling by the B Cell Receptor (BCR) 106 0.0000599 -0.2342722 -0.1018064 0.0000306 0.0700635 0.2554369 0.0936675 0.0001026
4 Gene Silencing by RNA 95 0.0001965 -0.2410320 -0.0667322 0.0000487 0.2608703 0.2500992 0.1232486 0.0002807
#mitch_report(mres,outfile="mitchmulti_effect_mito.html",overwrite=TRUE)
write.table(mres$enrichment_result,file="mitch_multi_effect_mito.tsv",quote=FALSE,sep="\t",row.names=FALSE)
mx <- as.matrix(mres$enrichment_result[,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,22), cexRow=1,
    cexCol=0.9, main=paste("Top ranked Reactomes") )


mres <- mitch_calc(m, custom_sets, priority="SD",resrows=52)
## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.
head(mres$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)
Top mito pathway differences in contrast 1 and 4
set setSize pMANOVA s.KO s.drug p.KO p.drug s.dist SD p.adjustMANOVA
12 Mucopolysaccharidoses 11 0.0000136 0.7558981 -0.2580819 0.0000141 0.1382892 0.7987416 0.7169921 0.0000271
20 Reduction of cytosolic Ca++ levels 12 0.0001988 0.4226979 -0.5021944 0.0112254 0.0025906 0.6564090 0.6539976 0.0002807
8 Mitochondrial iron-sulfur cluster biogenesis 13 0.0001725 -0.4368495 0.4616108 0.0063823 0.0039505 0.6355486 0.6353074 0.0002760
2 Complex I biogenesis 57 0.0000000 -0.1225986 0.7294490 0.1093157 0.0000000 0.7396798 0.6024886 0.0000000
22 Respiratory electron transport 103 0.0000000 -0.1604358 0.5889871 0.0048876 0.0000000 0.6104469 0.5299220 0.0000000
1 Cellular response to starvation 150 0.0000000 -0.3220144 0.4056013 0.0000000 0.0000000 0.5178858 0.5145020 0.0000000
24 Stabilization of p53 53 0.0000000 -0.4250982 0.2913945 0.0000001 0.0002419 0.5153826 0.5066369 0.0000000
10 Mitochondrial translation 96 0.0000000 -0.1482465 0.5183894 0.0120417 0.0000000 0.5391703 0.4713828 0.0000000
9 Mitochondrial protein import 64 0.0000002 -0.2033094 0.3238455 0.0049000 0.0000074 0.3823750 0.3727548 0.0000006
19 Receptor Mediated Mitophagy 11 0.0000263 -0.7759630 -0.2626019 0.0000083 0.1315157 0.8191937 0.3630011 0.0000486
11 Mitophagy 28 0.0000068 -0.5324790 -0.0475129 0.0000011 0.6634231 0.5345945 0.3429228 0.0000148
6 Metabolism of amino acids and derivatives 337 0.0000000 -0.1445765 0.2336243 0.0000050 0.0000000 0.2747411 0.2674283 0.0000000
3 Diseases of carbohydrate metabolism 30 0.0090010 0.3189077 -0.0262569 0.0024971 0.8034151 0.3199868 0.2440683 0.0093924
16 PTEN Regulation 140 0.0000000 -0.2826678 -0.1053795 0.0000000 0.0312840 0.3016719 0.1253618 0.0000000
4 Gene Silencing by RNA 95 0.0001965 -0.2410320 -0.0667322 0.0000487 0.2608703 0.2500992 0.1232486 0.0002807
14 Nucleotide biosynthesis 12 0.0036759 -0.4820562 -0.3242694 0.0038313 0.0517611 0.5809723 0.1115721 0.0042011
15 Oxidative Stress Induced Senescence 82 0.0284316 -0.1702831 -0.0222774 0.0076676 0.7272367 0.1717342 0.1046559 0.0284316
7 Mitochondrial biogenesis 91 0.0006447 -0.2234438 -0.0840799 0.0002284 0.1655643 0.2387395 0.0985452 0.0008143
17 Phospholipid metabolism 186 0.0000000 -0.1473869 -0.2832498 0.0005232 0.0000000 0.3193013 0.0960695 0.0000000
23 Signaling by the B Cell Receptor (BCR) 106 0.0000599 -0.2342722 -0.1018064 0.0000306 0.0700635 0.2554369 0.0936675 0.0001026
#mitch_report(mres,outfile="mitchmulti_discord_mito.html",overwrite=TRUE)
write.table(mres$enrichment_result,file="mitch_multi_discord_mito.tsv",quote=FALSE,sep="\t",row.names=FALSE)

mx <- as.matrix(mres$enrichment_result[,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,22), cexRow=1,
    cexCol=0.9, main=paste("Top ranked Reactomes") )

Conclusion

TODO

Session information

For reproducibility.

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] beeswarm_0.4.0              vioplot_0.3.7              
##  [3] sm_2.2-5.7                  kableExtra_1.3.4           
##  [5] topconfects_1.12.0          limma_3.52.1               
##  [7] eulerr_6.1.1                mitch_1.8.0                
##  [9] MASS_7.3-58                 fgsea_1.22.0               
## [11] gplots_3.1.3                DESeq2_1.36.0              
## [13] SummarizedExperiment_1.26.1 Biobase_2.56.0             
## [15] MatrixGenerics_1.8.0        matrixStats_0.62.0         
## [17] GenomicRanges_1.48.0        GenomeInfoDb_1.32.2        
## [19] IRanges_2.30.0              S4Vectors_0.34.0           
## [21] BiocGenerics_0.42.0         reshape2_1.4.4             
## [23] forcats_0.5.1               stringr_1.4.0              
## [25] dplyr_1.0.9                 purrr_0.3.4                
## [27] readr_2.1.2                 tidyr_1.2.0                
## [29] tibble_3.1.7                ggplot2_3.3.6              
## [31] tidyverse_1.3.1             zoo_1.8-10                 
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.0           backports_1.4.1        fastmatch_1.1-3       
##   [4] systemfonts_1.0.4      plyr_1.8.7             polylabelr_0.2.0      
##   [7] splines_4.2.1          BiocParallel_1.30.3    digest_0.6.29         
##  [10] htmltools_0.5.2        fansi_1.0.3            magrittr_2.0.3        
##  [13] memoise_2.0.1          tzdb_0.3.0             Biostrings_2.64.0     
##  [16] annotate_1.74.0        modelr_0.1.8           svglite_2.1.0         
##  [19] colorspace_2.0-3       blob_1.2.3             rvest_1.0.2           
##  [22] haven_2.5.0            xfun_0.31              crayon_1.5.1          
##  [25] RCurl_1.98-1.7         jsonlite_1.8.0         genefilter_1.78.0     
##  [28] survival_3.4-0         glue_1.6.2             polyclip_1.10-0       
##  [31] gtable_0.3.0           zlibbioc_1.42.0        XVector_0.36.0        
##  [34] webshot_0.5.3          DelayedArray_0.22.0    scales_1.2.0          
##  [37] DBI_1.1.3              GGally_2.1.2           Rcpp_1.0.8.3          
##  [40] viridisLite_0.4.0      xtable_1.8-4           bit_4.0.4             
##  [43] htmlwidgets_1.5.4      httr_1.4.3             RColorBrewer_1.1-3    
##  [46] ellipsis_0.3.2         farver_2.1.0           pkgconfig_2.0.3       
##  [49] reshape_0.8.9          XML_3.99-0.10          sass_0.4.1            
##  [52] dbplyr_2.2.1           locfit_1.5-9.5         utf8_1.2.2            
##  [55] labeling_0.4.2         tidyselect_1.1.2       rlang_1.0.3           
##  [58] later_1.3.0            AnnotationDbi_1.58.0   munsell_0.5.0         
##  [61] cellranger_1.1.0       tools_4.2.1            cachem_1.0.6          
##  [64] cli_3.3.0              generics_0.1.2         RSQLite_2.2.14        
##  [67] broom_0.8.0            evaluate_0.15          fastmap_1.1.0         
##  [70] yaml_2.3.5             knitr_1.39             bit64_4.0.5           
##  [73] fs_1.5.2               caTools_1.18.2         KEGGREST_1.36.2       
##  [76] mime_0.12              xml2_1.3.3             compiler_4.2.1        
##  [79] rstudioapi_0.13        png_0.1-7              reprex_2.0.1          
##  [82] geneplotter_1.74.0     bslib_0.3.1            stringi_1.7.6         
##  [85] highr_0.9              lattice_0.20-45        Matrix_1.4-1          
##  [88] vctrs_0.4.1            pillar_1.7.0           lifecycle_1.0.1       
##  [91] jquerylib_0.1.4        data.table_1.14.2      bitops_1.0-7          
##  [94] httpuv_1.6.5           R6_2.5.1               promises_1.2.0.1      
##  [97] KernSmooth_2.23-20     echarts4r_0.4.4        gridExtra_2.3         
## [100] codetools_0.2-18       gtools_3.9.2.2         assertthat_0.2.1      
## [103] withr_2.5.0            GenomeInfoDbData_1.2.8 parallel_4.2.1        
## [106] hms_1.1.1              grid_4.2.1             rmarkdown_2.14        
## [109] shiny_1.7.1            lubridate_1.8.0