ECHS1 KO RNA-seq

Import read counts

Importing RNA-seq data

tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
xx$geneid = NULL
xx <- round(xx)
xx <- xx[,which(colnames(xx)!="test")]
xx[1:6,1:6]

##                             1_CON_ut_A_RNA_HB_HLF57DRXY_CTGTTGGTCC-AACGGTCTAT_L001
## ENSG00000000003.15 TSPAN6                                                      704
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        624
## ENSG00000000457.14 SCYL3                                                        97
## ENSG00000000460.17 C1orf112                                                    110
## ENSG00000000938.13 FGR                                                           0
##                             1_CON_ut_A_RNA_HB_HLF57DRXY_CTGTTGGTCC-AACGGTCTAT_L002
## ENSG00000000003.15 TSPAN6                                                      676
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        643
## ENSG00000000457.14 SCYL3                                                        99
## ENSG00000000460.17 C1orf112                                                    122
## ENSG00000000938.13 FGR                                                           0
##                             10_KO_ut_B_RNA_HB_HLF57DRXY_CACGGAACAA-GTGCTAGGTT_L001
## ENSG00000000003.15 TSPAN6                                                      752
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        360
## ENSG00000000457.14 SCYL3                                                       102
## ENSG00000000460.17 C1orf112                                                    110
## ENSG00000000938.13 FGR                                                           0
##                             10_KO_ut_B_RNA_HB_HLF57DRXY_CACGGAACAA-GTGCTAGGTT_L002
## ENSG00000000003.15 TSPAN6                                                      803
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        334
## ENSG00000000457.14 SCYL3                                                        75
## ENSG00000000460.17 C1orf112                                                    100
## ENSG00000000938.13 FGR                                                           0
##                             11_KO_ut_C_RNA_HB_HLF57DRXY_TGGAGTACTT-TCCACACAGA_L001
## ENSG00000000003.15 TSPAN6                                                     1502
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        703
## ENSG00000000457.14 SCYL3                                                       203
## ENSG00000000460.17 C1orf112                                                    175
## ENSG00000000938.13 FGR                                                           0
##                             11_KO_ut_C_RNA_HB_HLF57DRXY_TGGAGTACTT-TCCACACAGA_L002
## ENSG00000000003.15 TSPAN6                                                     1509
## ENSG00000000005.6 TNMD                                                           0
## ENSG00000000419.13 DPM1                                                        659
## ENSG00000000457.14 SCYL3                                                       184
## ENSG00000000460.17 C1orf112                                                    191
## ENSG00000000938.13 FGR                                                           0

dim(xx)

## [1] 60651    32

Fix the sample names.

They are duplicated for lane 1 and 2, which I will aggregate.

colnames(xx) <- sapply(strsplit(colnames(xx),"_RNA_"),"[[",1)
colnames(xx) <- sapply(strsplit(sub("_","@",colnames(xx)),"@"),"[[",2)
labels <- unique(sapply(strsplit(colnames(xx),"\\."),"[[",1))
l <- lapply(labels,function(x) { rowSums(xx[,grep(x,colnames(xx))]) } )
ll <- do.call(cbind,l)
colnames(ll) <- labels
ll <- as.data.frame(ll[,order(colnames(ll))])
write.table(ll,file="counts.tsv",sep="\t",quote=FALSE)

rpm <- ll/colSums(ll) *1e6
write.table(rpm,file="rpm.tsv",sep="\t",quote=FALSE)

Make a sample sheet. This will be easy because the groups are specified in the name.

ss <- as.data.frame(colnames(ll))
ss$ko <- factor(grepl("KO", colnames(ll))) 
ss$trt <- factor(grepl("dNs",colnames(ll)))
rownames(ss) <- ss[,1]
ss[,1]=NULL
# can also set some colours for later
ss$cols <- c(rep("gray",4),rep("lightblue",4),rep("pink",4),
  rep("orange",4))

ss %>% kbl(caption = "Sample sheet") %>%
  kable_paper("hover", full_width = F)

Sample sheet
	ko	trt	cols
CON_dNs_A	FALSE	TRUE	gray
CON_dNs_B	FALSE	TRUE	gray
CON_dNs_C	FALSE	TRUE	gray
CON_dNs_D	FALSE	TRUE	gray
CON_ut_A	FALSE	FALSE	lightblue
CON_ut_B	FALSE	FALSE	lightblue
CON_ut_C	FALSE	FALSE	lightblue
CON_ut_D	FALSE	FALSE	lightblue
KO_dNs_A	TRUE	TRUE	pink
KO_dNs_B	TRUE	TRUE	pink
KO_dNs_C	TRUE	TRUE	pink
KO_dNs_D	TRUE	TRUE	pink
KO_ut_A	TRUE	FALSE	orange
KO_ut_B	TRUE	FALSE	orange
KO_ut_C	TRUE	FALSE	orange
KO_ut_D	TRUE	FALSE	orange

QC analysis

Here I’ll look at a few different quality control measures.

par(mar=c(5,8,3,1))
barplot(colSums(ll),horiz=TRUE,las=1,xlab="num reads",col=ss$cols)

sums <- colSums(ll)
sums <- sums[order(sums)]
barplot(sums,horiz=TRUE,las=1,xlab="num reads")
abline(v=15000000,col="red")

MDS plot for all samples

Multidimensional scaling plot to show the variation between all samples, very similar to PCA.

Firstly with the data before aggregating technical replicates.

mds <- cmdscale(dist(t(xx)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2", 
  type = "p",bty="n",pch=19, cex=4 ,col="gray")
text(mds, labels=rownames(mds) )

Now for the data after aggregation.

It looks like the drug has a bigger effect than the gene KO.

mds <- cmdscale(dist(t(ll)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2", 
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )

pdf("mds1.pdf")

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )

dev.off()

## png 
##   2

Correlation heatmap

heatmap.2(cor(ll),trace="n",main="Pearson correlation heatmap",margin=c(8,8))

pdf("cor1.pdf")
heatmap.2(cor(ll),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()

## png 
##   2

MDS plot for UT samples

ll2 <- ll[,grep("ut",colnames(ll))]

mds <- cmdscale(dist(t(ll2)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )

pdf("mds2.pdf")
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
dev.off()

## png 
##   2

MDS plot for dNs samples

ll3 <- ll[,grep("dNs",colnames(ll))]

mds <- cmdscale(dist(t(ll3)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )

pdf("mds3.pdf")
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4 ,col=ss$cols)
text(mds, labels=rownames(mds) )
dev.off()

## png 
##   2

Correlation heatmaps for UT and dNs separately

heatmap.2(cor(ll2),trace="n",main="Pearson correlation heatmap",margin=c(8,8))

heatmap.2(cor(ll3),trace="n",main="Pearson correlation heatmap",margin=c(8,8))

pdf("cor2.pdf")
heatmap.2(cor(ll2),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()

## png 
##   2

pdf("cor3.pdf")
heatmap.2(cor(ll3),trace="n",main="Pearson correlation heatmap",margin=c(8,8))
dev.off()

## png 
##   2

Set up the different datasets for differential expression analysis

Don’t forget to remove poorly detected genes from the matrix with a threshold of 10 reads per sample on average.

There are 4 contrasts to set up.

Effect of the KO in untreated cells
Effect of the KO in treated cells
Effect of the drug in WT cells
Effect of the drug in KO cells
CON ut vs KO dNs. (what was altered in ECHS1 KO, that has been returned to normal levels (and no longer significantly different) to CON ut).

ss1 <- ss[which(ss$trt=="FALSE"),]
xx1 <- ll[which(colnames(ll) %in% rownames(ss1))]
xx1 <- xx1[which(rowSums(xx1)>=10),]
rpm1 <- xx1/colSums(xx1) *1e6

ss2 <- ss[which(ss$trt=="TRUE"),]
xx2 <- ll[which(colnames(ll) %in% rownames(ss2))]
xx2 <- xx2[which(rowSums(xx2)>=10),]
rpm2 <- xx2/colSums(xx2) *1e6

ss3 <- ss[which(ss$ko=="FALSE"),]
xx3 <- ll[which(colnames(ll) %in% rownames(ss3))]
xx3 <- xx3[which(rowSums(xx3)>=10),]
rpm3 <- xx3/colSums(xx3) *1e6

ss4 <- ss[which(ss$ko=="TRUE"),]
xx4 <- ll[which(colnames(ll) %in% rownames(ss4))]
xx4 <- xx4[which(rowSums(xx4)>=10),]
rpm4 <- xx4/colSums(xx4) *1e6

ss5 <- subset(ss,ko == trt)
xx5 <- ll[which(colnames(ll) %in% rownames(ss5))]
xx5 <- xx5[which(rowSums(xx5)>=10),]
rpm5 <- xx5/colSums(xx5) *1e6

Differential expression with DESeq2

dds <- DESeqDataSetFromMatrix(countData = xx1 , colData = ss1, 
  design = ~ ko )

## converting counts to integer mode

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>% 
  kbl(caption = "Top gene expression differences for contrast 1: Effect of the KO in untreated cells") %>% 
  kable_paper("hover", full_width = F)

Top gene expression differences for contrast 1: Effect of the KO in untreated cells
	baseMean	log2FoldChange	lfcSE	stat
ENSG00000049192.15 ADAMTS6	1232.5643	-3.320315	0.0735209	-45.16150
ENSG00000090339.9 ICAM1	8426.0162	2.137068	0.0421343	50.72040
ENSG00000099875.16 MKNK2	5196.7000	1.646797	0.0435241	37.83644
ENSG00000105825.14 TFPI2	10749.0439	-2.758378	0.0645077	-42.76045
ENSG00000107551.21 RASSF4	1224.3947	3.078164	0.0812402	37.88965
ENSG00000116016.14 EPAS1	16685.0850	1.887592	0.0496555	38.01373
ENSG00000125538.12 IL1B	9573.0006	-1.925913	0.0457459	-42.10027
ENSG00000138080.14 EMILIN1	6299.0508	2.088516	0.0483164	43.22583
ENSG00000146197.9 SCUBE3	7326.2924	-2.787733	0.0674659	-41.32061
ENSG00000154146.13 NRGN	1528.8050	2.283661	0.0600431	38.03371
ENSG00000164949.8 GEM	1954.1763	-2.372324	0.0565928	-41.91920
ENSG00000165507.9 DEPP1	3368.7544	2.850455	0.0522246	54.58071
ENSG00000166250.12 CLMP	20860.4032	-1.674644	0.0397766	-42.10125
ENSG00000166670.10 MMP10	3155.5769	-6.120693	0.1021577	-59.91416
ENSG00000166920.13 C15orf48	2293.5579	2.687532	0.0645990	41.60330
ENSG00000167772.12 ANGPTL4	14717.2697	3.591520	0.0856872	41.91430
ENSG00000168528.12 SERINC2	4459.4824	1.937358	0.0495824	39.07346
ENSG00000170961.7 HAS2	3615.4379	-4.322465	0.0852007	-50.73275
ENSG00000172572.7 PDE3A	1574.2471	-3.854290	0.0926037	-41.62136
ENSG00000172927.8 MYEOV	928.0298	4.057927	0.0905625	44.80804

dge1 <- dge
d1up <- rownames(subset(dge1,padj <= 0.05 & log2FoldChange > 0))
d1dn <- rownames(subset(dge1,padj <= 0.05 & log2FoldChange < 0))
write.table(dge1,file="dge1.tsv",quote=FALSE,sep="\t")

dds <- DESeqDataSetFromMatrix(countData = xx2 , colData = ss2, 
  design = ~ ko )

## converting counts to integer mode

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
  kbl(caption = "Top gene expression differences for contrast 2: Effect of the KO in treated cells") %>%
  kable_paper("hover", full_width = F)

Top gene expression differences for contrast 2: Effect of the KO in treated cells
	baseMean	log2FoldChange	lfcSE	stat
ENSG00000081248.12 CACNA1S	1209.4345	4.728860	0.1048201	45.11406
ENSG00000142910.16 TINAGL1	4889.9014	3.128308	0.0629648	49.68348
ENSG00000162804.14 SNED1	6126.7547	-2.358713	0.0490839	-48.05475
ENSG00000166920.13 C15orf48	2299.7911	3.960911	0.0886313	44.68975
ENSG00000167772.12 ANGPTL4	9936.5018	3.543783	0.0816955	43.37797
ENSG00000113361.13 CDH6	816.5796	-3.817982	0.1059909	-36.02179
ENSG00000101187.16 SLCO4A1	19101.5516	1.934484	0.0565000	34.23868
ENSG00000143127.13 ITGA10	1156.7355	-3.687434	0.1104110	-33.39734
ENSG00000151062.15 CACNA2D4	655.2444	4.551764	0.1378037	33.03079
ENSG00000170961.7 HAS2	613.5916	-3.363180	0.1045664	-32.16312
ENSG00000168528.12 SERINC2	3732.8640	1.667401	0.0525189	31.74856
ENSG00000135245.10 HILPDA	1510.8969	2.621093	0.0845048	31.01708
ENSG00000172927.8 MYEOV	472.9211	4.325653	0.1395499	30.99717
ENSG00000115756.13 HPCAL1	4084.1250	1.608911	0.0524306	30.68650
ENSG00000100985.7 MMP9	14795.4099	-2.331039	0.0761929	-30.59392
ENSG00000163235.16 TGFA	1660.7730	2.758967	0.0910679	30.29573
ENSG00000125571.9 IL37	1230.5444	2.713549	0.0896194	30.27858
ENSG00000118257.17 NRP2	2033.2565	-2.000568	0.0681154	-29.37026
ENSG00000213658.12 LAT	1708.1308	-1.949041	0.0664827	-29.31650
ENSG00000101443.18 WFDC2	5495.9129	2.267593	0.0773716	29.30783

dge2 <- dge
d2up <- rownames(subset(dge2,padj <= 0.05 & log2FoldChange > 0))
d2dn <- rownames(subset(dge2,padj <= 0.05 & log2FoldChange < 0))
write.table(dge2,file="dge2.tsv",quote=FALSE,sep="\t")

dds <- DESeqDataSetFromMatrix(countData = xx3 , colData = ss3,
  design = ~ trt )

## converting counts to integer mode

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
  kbl(caption = "Top gene expression differences for contrast 3: Effect of the drug in WT cells") %>%
  kable_paper("hover", full_width = F)

Top gene expression differences for contrast 3: Effect of the drug in WT cells
	baseMean	log2FoldChange	lfcSE	stat
ENSG00000068912.14 ERLEC1	1851.848	-2.762775	0.0670700	-41.19240
ENSG00000078747.16 ITCH	2736.282	-2.038664	0.0523301	-38.95780
ENSG00000100836.10 PABPN1	6539.865	2.072716	0.0487025	42.55874
ENSG00000102755.12 FLT1	5426.297	-3.705474	0.0880185	-42.09882
ENSG00000106366.9 SERPINE1	24625.790	-2.091105	0.0507831	-41.17717
ENSG00000108821.14 COL1A1	9382.064	2.181043	0.0456926	47.73301
ENSG00000110048.12 OSBP	3405.995	-2.355956	0.0614408	-38.34517
ENSG00000112473.18 SLC39A7	4521.830	-2.281225	0.0578603	-39.42640
ENSG00000116285.13 ERRFI1	36944.117	-2.684910	0.0567602	-47.30267
ENSG00000122884.12 P4HA1	4295.439	-2.579782	0.0582809	-44.26463
ENSG00000131016.17 AKAP12	21452.521	-3.317855	0.0663163	-50.03076
ENSG00000138166.6 DUSP5	4318.320	-2.155073	0.0498547	-43.22711
ENSG00000142949.17 PTPRF	7682.322	-1.848612	0.0487213	-37.94262
ENSG00000150961.15 SEC24D	2402.484	-3.024974	0.0617986	-48.94889
ENSG00000150995.20 ITPR1	4106.314	-3.021908	0.0665647	-45.39805
ENSG00000159167.12 STC1	13523.272	-3.145427	0.0722957	-43.50780
ENSG00000162804.14 SNED1	6283.910	2.862292	0.0481808	59.40725
ENSG00000164484.12 TMEM200A	1197.746	-3.470114	0.0881540	-39.36421
ENSG00000166250.12 CLMP	18283.547	-2.335014	0.0538393	-43.37009
ENSG00000166670.10 MMP10	3245.447	-3.537259	0.0771167	-45.86894

dge3 <- dge
d3up <- rownames(subset(dge3,padj <= 0.05 & log2FoldChange > 0))
d3dn <- rownames(subset(dge3,padj <= 0.05 & log2FoldChange < 0))
write.table(dge3,file="dge3.tsv",quote=FALSE,sep="\t")

dds <- DESeqDataSetFromMatrix(countData = xx4 , colData = ss4,
  design = ~ trt )

## converting counts to integer mode

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
  kbl(caption = "Top gene expression differences for contrast 4: Effect of the drug in KO cells") %>%
  kable_paper("hover", full_width = F)

Top gene expression differences for contrast 4: Effect of the drug in KO cells
	baseMean	log2FoldChange	lfcSE	stat
ENSG00000059804.16 SLC2A3	5623.218	-1.769194	0.0468167	-37.78983
ENSG00000103888.17 CEMIP	25230.452	-2.601566	0.0673460	-38.62986
ENSG00000104419.17 NDRG1	38083.260	-2.549397	0.0511765	-49.81576
ENSG00000134250.20 NOTCH2	6533.444	-2.176937	0.0578054	-37.65975
ENSG00000197324.9 LRP10	9816.752	-1.770453	0.0378713	-46.74921
ENSG00000198712.1 MT-CO2	290785.069	2.997493	0.0744604	40.25622
ENSG00000198727.2 MT-CYB	116486.352	3.700459	0.0765560	48.33662
ENSG00000198763.3 MT-ND2	121199.785	3.262193	0.0678248	48.09735
ENSG00000198840.2 MT-ND3	17981.096	2.878705	0.0705335	40.81328
ENSG00000198959.12 TGM2	1712.100	-2.516008	0.0594500	-42.32139
ENSG00000225630.1 MTND2P28	5023.069	3.268132	0.0682696	47.87097
ENSG00000258017.2 AC011603.2	7033.925	2.918598	0.0767330	38.03574
ENSG00000259207.9 ITGB3	6216.236	-2.380112	0.0532419	-44.70377
ENSG00000198804.2 MT-CO1	506735.846	2.899638	0.0779490	37.19915
ENSG00000134531.10 EMP1	8062.151	-1.864264	0.0501442	-37.17805
ENSG00000198888.2 MT-ND1	70406.196	2.780834	0.0758103	36.68147
ENSG00000142949.17 PTPRF	7521.711	-1.802689	0.0498071	-36.19343
ENSG00000198938.2 MT-CO3	211300.735	2.731621	0.0754793	36.19031
ENSG00000178726.7 THBD	1419.592	-2.752535	0.0765193	-35.97178
ENSG00000131016.17 AKAP12	17614.587	-2.494844	0.0707156	-35.27996

dge4 <- dge
d4up <- rownames(subset(dge4,padj <= 0.05 & log2FoldChange > 0))
d4dn <- rownames(subset(dge4,padj <= 0.05 & log2FoldChange < 0))
write.table(dge4,file="dge4.tsv",quote=FALSE,sep="\t")

dds <- DESeqDataSetFromMatrix(countData = xx5 , colData = ss5,
  design = ~ trt )

## converting counts to integer mode

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>%
  kbl(caption = "Top gene expression differences for contrast 5: Does the drug restore changes caused by KO?") %>%
  kable_paper("hover", full_width = F)

Top gene expression differences for contrast 5: Does the drug restore changes caused by KO?
	baseMean	log2FoldChange	lfcSE	stat
ENSG00000006459.11 KDM7A	2102.825	-2.968547	0.0756232	-39.25444
ENSG00000011638.11 TMEM159	1310.273	-3.080679	0.0735046	-41.91135
ENSG00000026559.14 KCNG1	3444.570	-2.168208	0.0544081	-39.85081
ENSG00000049192.15 ADAMTS6	1080.879	-4.046315	0.0968751	-41.76838
ENSG00000054356.14 PTPRN	2934.182	-2.849605	0.0710961	-40.08101
ENSG00000059804.16 SLC2A3	5889.284	-1.919649	0.0509892	-37.64813
ENSG00000081181.8 ARG2	7528.745	-2.886314	0.0654474	-44.10126
ENSG00000081248.12 CACNA1S	1201.973	6.324357	0.1441395	43.87665
ENSG00000085449.15 WDFY1	2455.389	-2.926425	0.0779364	-37.54886
ENSG00000100985.7 MMP9	34579.692	-3.683329	0.0725006	-50.80410
ENSG00000101443.18 WFDC2	5112.498	3.272677	0.0868267	37.69207
ENSG00000102755.12 FLT1	5139.855	-3.718650	0.0709667	-52.39995
ENSG00000106333.13 PCOLCE	9578.772	2.391137	0.0557298	42.90588
ENSG00000106366.9 SERPINE1	21920.897	-2.648544	0.0513740	-51.55418
ENSG00000112902.12 SEMA5A	1857.884	-3.979788	0.0893823	-44.52548
ENSG00000115170.15 ACVR1	1323.707	-2.656676	0.0684522	-38.81067
ENSG00000115677.17 HDLBP	13291.684	-2.302710	0.0521315	-44.17120
ENSG00000115758.13 ODC1	5532.578	-3.052796	0.0739435	-41.28553
ENSG00000118257.17 NRP2	5136.700	-3.513717	0.0603518	-58.22055
ENSG00000125430.9 HS3ST3B1	1104.229	-3.706736	0.0853897	-43.40962

dge5 <- dge
d5up <- rownames(subset(dge5,padj <= 0.05 & log2FoldChange > 0))
d5dn <- rownames(subset(dge5,padj <= 0.05 & log2FoldChange < 0))
write.table(dge5,file="dge5.tsv",quote=FALSE,sep="\t")

Here let’s look at some plots.

MA plot shows the average level and fold change of all detected genes. Volcano plot shows the fold change and the significance, as measured by -log(p-value). Significant genes are shown as red points.

There are heatmaps of the top ranked genes by p-value. Above the gene expression values there is a bar in yellow/orange colours. Yellow shows relatively low QuickDASH score and orange shows high score.

maplot <- function(de,contrast_name) {
  de <- de[which(!is.na(de$padj)),]
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=1)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 1)
}

make_volcano <- function(de,name) {
    de <- de[which(!is.na(de$padj)),]
    de$pvalue[which(de$pvalue==0)] <- 1e-320
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}

make_volcano2 <- function(de,name) {
    de <- de[which(!is.na(de$padj)),]
    de$pvalue[which(de$pvalue==0)] <- 1e-320
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    top <- head(sig,30)
    mylabels <- sapply(strsplit(rownames(top)," "),"[[",2)
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
    text(top$log2FoldChange+0.2,-log10(top$pval),labels=mylabels, srt=35 ,cex=0.7)
}

make_heatmap <- function(de,name,myss,mx,n=30){
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  mxn <- mx/rowSums(mx)*1000000
  x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
  heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(10,15), cexRow=0.7, 
    main=paste("Top ranked",n,"genes in",name) , ColSideColors = myss$cols  )
}

make_heatmap2 <- function(de,name,myss,mx,n=30){
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  mxn <- mx/rowSums(mx)*1000000
  x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
  rownames(x) <- sapply(strsplit(rownames(x)," "),"[[",2)
  heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(10,15), cexRow=0.6,
    main=paste("Top ranked",n,"genes in",name) , ColSideColors = myss$cols  )
}

mitch_barplot <- function(res){
  sig <- head(subset(res$enrichment_result,p.adjustANOVA<0.05),30)
  sig <- sig[order(sig$s.dist),]
  par(mar=c(3,25,1,1)); barplot(sig$s.dist,horiz=TRUE,las=2,cex.names = 0.6,cex.axis = 0.6,
    names.arg=sig$set,main="Enrichment score") ;grid()
}


mitch_barplot2 <- function(res){
  sig_up <- head(subset(res$enrichment_result,p.adjustANOVA<0.05 & s.dist>0),15)
  sig_dn <- head(subset(res$enrichment_result,p.adjustANOVA<0.05 & s.dist<0),15)
  sig <- rbind(sig_up,sig_dn)
  sig <- sig[order(sig$s.dist),]
  par(mar=c(3,25,1,1)); barplot(sig$s.dist,horiz=TRUE,las=2,cex.names = 0.6,cex.axis = 0.6,
    names.arg=sig$set,main="Enrichment score") ;grid()
}

maplot(dge1,"Cont1: Effect of KO in untr cells")

make_volcano(dge1,"Cont1: Effect of KO in untr cells")
make_volcano2(dge1,"Cont1: Effect of KO in untr cells")

make_heatmap(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)

make_heatmap2(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)

maplot(dge2,"Cont2: Effect of KO in trt cells")

make_volcano(dge2,"Cont2: Effect of KO in trt cells")
make_volcano2(dge2,"Cont2: Effect of KO in trt cells")

make_heatmap(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)

make_heatmap2(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)

maplot(dge3,"Cont3: Effect of drug in WT cells")

make_volcano(dge3,"Cont3: Effect of drug in WT cells")
make_volcano2(dge3,"Cont3: Effect of drug in WT cells")

make_heatmap(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)

make_heatmap2(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)

maplot(dge4,"Cont4: Effect of drug in KO cells")

make_volcano(dge4,"Cont4: Effect of drug in KO cells")
make_volcano2(dge4,"Cont4: Effect of drug in KO cells")

make_heatmap(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)

make_heatmap2(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)

maplot(dge5,"CON ut vs KO dNs")

make_volcano(dge5,"CON ut vs KO dNs")
make_volcano2(dge5,"CON ut vs KO dNs")

make_heatmap(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)

make_heatmap2(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)

pdf("dge1.pdf")
maplot(dge1,"Cont1: Effect of KO in untr cells")
make_volcano(dge1,"Cont1: Effect of KO in untr cells")
make_volcano2(dge1,"Cont1: Effect of KO in untr cells")
make_heatmap(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)
make_heatmap2(dge1,"Cont1: Effect of KO in untr cells",ss1,xx1,n=50)
dev.off()

## png 
##   2

pdf("dge2.pdf")
maplot(dge2,"Cont2: Effect of KO in trt cells")
make_volcano(dge2,"Cont2: Effect of KO in trt cells")
make_volcano2(dge2,"Cont2: Effect of KO in trt cells")
make_heatmap(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)
make_heatmap2(dge2,"Cont2: Effect of KO in trt cells",ss2,xx2,n=50)
dev.off()

## png 
##   2

pdf("dge3.pdf")
maplot(dge3,"Cont3: Effect of drug in WT cells")
make_volcano(dge3,"Cont3: Effect of drug in WT cells")
make_volcano2(dge3,"Cont3: Effect of drug in WT cells")
make_heatmap(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)
make_heatmap2(dge3,"Cont3: Effect of drug in WT cells",ss3,xx3,n=50)
dev.off()

## png 
##   2

pdf("dge4.pdf")
maplot(dge4,"Cont4: Effect of drug in KO cells")
make_volcano(dge4,"Cont4: Effect of drug in KO cells")
make_volcano2(dge4,"Cont4: Effect of drug in KO cells")
make_heatmap(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)
make_heatmap2(dge4,"Cont4: Effect of drug in KO cells",ss4,xx4,n=50)
dev.off()

## png 
##   2

pdf("dge5.pdf")
maplot(dge5,"CON ut vs KO dNs")
make_volcano(dge5,"CON ut vs KO dNs")
make_volcano2(dge5,"CON ut vs KO dNs")
make_heatmap(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)
make_heatmap2(dge5,"CON ut vs KO dNs",ss5,xx5,n=50)
dev.off()

## png 
##   2

Heatmap of DEGs

Contrast 1 and 4 - get top 25 genes and make heatmap of all sample groups.

g1 <- head(rownames(dge1),25)
g4 <- head(rownames(dge4),25)
g14 <- union(g1,g4)

fc1 <- dge1[which(rownames(dge1) %in% g14),"log2FoldChange",drop=F]

rpm14 <- rpm[which(rownames(rpm) %in% g14),]
rpm14 <- merge(rpm14,fc1,by=0)
rpm14 <- rpm14[order(rpm14$log2FoldChange),]
rpm14$log2FoldChange=NULL
rownames(rpm14) <- rpm14$Row.names
rpm14$Row.names=NULL
rownames(rpm14) <- sapply(strsplit(rownames(rpm14)," "),"[[",2)
rpm14 <- rpm14[,rev(order(colnames(rpm14)))]

# reorder columns
rpm14 <- rpm14[,c( grep("CON_ut",colnames(rpm14)) , grep("KO_ut",colnames(rpm14)) , grep("KO_dN",colnames(rpm14)) , grep("CON_dN",colnames(rpm14)) )]

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  Colv="none" , dendrogram="none",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

pdf("heat1.pdf")

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  Colv="none" , dendrogram="none",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

dev.off()

## png 
##   2

rpm14 <- rpm[which(rownames(rpm) %in% g14),]

rownames(rpm14) <- sapply(strsplit(rownames(rpm14)," "),"[[",2)

# reorder columns
rpm14 <- rpm14[,c( grep("CON_ut",colnames(rpm14)) , grep("KO_ut",colnames(rpm14)) , grep("KO_dN",colnames(rpm14)) , grep("CON_dN",colnames(rpm14)) )]

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
  Colv="none" , dendrogram="none",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

pdf("heat2.pdf")

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

heatmap.2(as.matrix(rpm14),trace="none",col=colfunc(25),scale="row", 
  Colv="none" , dendrogram="none",
  margins = c(8,15), cexRow=0.5 , cexCol=0.8)

dev.off()

## png 
##   2

rpm1x <- rpm1[which(rownames(rpm1) %in% g14),]
fc1 <- dge1[which(rownames(dge1) %in% g14),"log2FoldChange",drop=F]
rpm1x <- merge(rpm1x,fc1,by=0)
rpm1x <- rpm1x[order(rpm1x$log2FoldChange),]
rpm1x$log2FoldChange=NULL
rownames(rpm1x) <- rpm1x$Row.names
rpm1x$Row.names=NULL
rownames(rpm1x) <- sapply(strsplit(rownames(rpm1x)," "),"[[",2)
rpm1x <- rpm1x[,rev(order(colnames(rpm1x)))]

rpm4x <- rpm4[which(rownames(rpm4) %in% g14),]
rpm4x <- merge(rpm4x,fc1,by=0)
rpm4x <- rpm4x[order(rpm4x$log2FoldChange),]
rpm4x$log2FoldChange=NULL
rownames(rpm4x) <- rpm4x$Row.names
rpm4x$Row.names=NULL
rownames(rpm4x) <- sapply(strsplit(rownames(rpm4x)," "),"[[",2)
rpm4x <- rpm4x[,rev(order(colnames(rpm4x)))]

heatmap.2(as.matrix(rpm1x),trace="none",col=colfunc(25),scale="row", margins = c(8,15), 
  cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)

heatmap.2(as.matrix(rpm4x),trace="none",col=colfunc(25),scale="row", margins = c(8,15), 
  cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)

pdf("heat3.pdf")

heatmap.2(as.matrix(rpm1x),trace="none",col=colfunc(25),scale="row", margins = c(8,15), 
  cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)

heatmap.2(as.matrix(rpm4x),trace="none",col=colfunc(25),scale="row", margins = c(8,15), 
  cexRow=0.5 , cexCol=0.8, Rowv=FALSE, dendrogram='none',Colv=FALSE)
dev.off()

## png 
##   2

Euler diagrams of DEGs

All DEGs

par(cex.main=0.5)

par(mar=c(2,2,2,2))

v0 <- list("C1 up"=d1up,"C1 dn"=d1dn, "C2 up" = d2up, "C2 dn"=d2dn, "C3 up"=d3up, "C3 dn"=d3dn, "C4 up"=d4up, "C4 dn"=d4dn)

plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")

pdf("euler1.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()

## png 
##   2

Now with just the DEGs from contrasts 1 and 4.

par(cex.main=0.5)

par(mar=c(2,2,2,2))

v0 <- list("KO up"=d1up,"KO dn"=d1dn, "drug up"=d4up, "drug dn"=d4dn)

plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")

pdf("euler2.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()

## png 
##   2

Now with just the DEGs from contrasts 1 and 5.

par(cex.main=0.5)

par(mar=c(2,2,2,2))

v0 <- list("KO up"=d1up,"KO dn"=d1dn, "drugtrt up"=d5up, "drugtrt dn"=d5dn)

plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")

pdf("euler3.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="DEGs")
dev.off()

## png 
##   2

Single contrast pathway analysis with mitch

Firstly need to conduct mitch enrichment analysis for each contrast separately.

#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")

# gene table
gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)

m1 <- mitch_import(dge1, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 25258

## Note: no. genes in output = 25203

## Note: estimated proportion of input genes in output = 0.998

mres1 <- mitch_calc(m1, genesets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres1$enrichment_result,20) %>%
  kbl(caption = "Top gene pathway differences in contrast 1") %>%
  kable_paper("hover", full_width = F)

Top gene pathway differences in contrast 1
	set	setSize	pANOVA	s.dist	p.adjustANOVA
1008	Receptor Mediated Mitophagy	11	0.0000080	-0.7772236	0.0000639
720	Mucopolysaccharidoses	11	0.0000117	0.7629839	0.0000908
670	Membrane binding and targetting of GAG proteins	14	0.0000402	-0.6337233	0.0002665
1264	Synthesis And Processing Of GAG, GAGPOL Polyproteins	14	0.0000402	-0.6337233	0.0002665
1411	Unwinding of DNA	12	0.0002829	0.6051698	0.0014768
93	Assembly Of The HIV Virion	16	0.0000324	-0.6000020	0.0002240
121	Biotin transport and metabolism	11	0.0011127	0.5676477	0.0047067
1276	Synthesis of active ubiquitin: roles of E1 and E2 enzymes	30	0.0000001	-0.5630345	0.0000016
1115	SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs	11	0.0018488	-0.5420841	0.0073752
1166	Signaling by Activin	15	0.0003295	-0.5354190	0.0016612
710	Mitophagy	28	0.0000011	-0.5309604	0.0000116
1086	Response of EIF2AK1 (HRI) to heme deficiency	15	0.0005010	-0.5189191	0.0023789
55	Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S	58	0.0000000	-0.5187746	0.0000000
538	Hyaluronan uptake and degradation	10	0.0047720	0.5153177	0.0167247
572	Insertion of tail-anchored proteins into the endoplasmic reticulum membrane	22	0.0000312	-0.5127387	0.0002190
1376	Translation initiation complex formation	57	0.0000000	-0.5104645	0.0000000
644	Lysosphingolipid and LPA receptors	12	0.0024056	-0.5059280	0.0092214
1462	rRNA modification in the nucleus and cytosol	60	0.0000000	-0.5021477	0.0000000
517	HIV elongation arrest and recovery	32	0.0000009	-0.5012440	0.0000096
848	Pausing and recovery of HIV elongation	32	0.0000009	-0.5012440	0.0000096

mitch_barplot(mres1)

mitch_barplot2(mres1)

mitch_plots(mres1,outfile="mitch1.pdf")

## png 
##   2

m1top <- subset(mres1$enrichment_result,p.adjustANOVA<0.05)
m1up <- subset(m1top,s.dist>0)$set
m1dn <- subset(m1top,s.dist<0)$set
#mitch_report(mres1,outfile="mitch1.html",overwrite=TRUE)
write.table(mres1$enrichment_result,file="mitch1.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m1top_up <- head(subset(m1top,s.dist>0),10)[,"s.dist"]
names(m1top_up) <- head(subset(m1top,s.dist>0),10)[,"set"]
m1top_dn <- head(subset(m1top,s.dist<0),10)[,"s.dist"]
names(m1top_dn) <- head(subset(m1top,s.dist<0),10)[,"set"]
m1top_updn <- c(m1top_up,m1top_dn)
m1top_updn <- m1top_updn[order(m1top_updn)]

pdf("f2b.pdf")
par(mar=c(5,25,3,1))
barplot(m1top_updn,horiz=TRUE,las=1,col="darkgray",xlab="Enrichment score",cex.names=0.8,xlim=c(-1,1))
grid()
dev.off()

## png 
##   2

par(mar=c(5.1, 4.1, 4.1, 2.1))

# Reduction of cytosolic Ca++ levels
gs <- genesets[[which(names(genesets) == "Reduction of cytosolic Ca++ levels")]]
dge1 <- dge1[which(!is.na(dge1$pvalue )),]
dge1$gene <- sapply(strsplit(rownames(dge1)," "),"[[",2)
dge1gs <- dge1[which(dge1$gene %in% gs),]
# volcano
plot(dge1$log2FoldChange,-log10(dge1$pvalue+1e-312),pch=19,col="darkgray")
points(dge1gs$log2FoldChange,-log10(dge1gs$pvalue+1e-312),pch=19,col="red")

# rug
m1rank <- mres1$ranked_profile
rug1 <- m1rank[which(rownames(m1rank) %in% gs),]
names(rug1) <- rownames(m1rank)[which(rownames(m1rank) %in% gs)]
MAX=max(m1rank)
MIN=min(m1rank)
par(mfrow=c(2,1))
bs <- beeswarm(rug1,horizontal=TRUE,cex=5,
  xlim=c(MIN,MAX),pch=19,col="darkgray",
  xlab="gene rank")
text(bs[,2:1],labels=names(rug1))
mtext("Reduction of cytosolic Ca++ levels")
par(mfrow=c(1,1))

#geneset volcano
sig1 <- subset(mres1$enrichment_result,p.adjustANOVA<0.05)
plot(mres1$enrichment_result$s.dist,-log10(mres1$enrichment_result$pANOVA),
  col="darkgray",pch=19,xlab="s distance",ylab="-log10 ANOVA p-value")
mtext("Gene set enrichment")
points(sig1$s.dist,-log10(sig1$pANOVA), col="red", pch=19 )
which(sig1$set=="Reduction of cytosolic Ca++ levels")

## [1] 70

sig1b <- sig1[which(sig1$set=="Reduction of cytosolic Ca++ levels"),]
points(sig1b$s.dist,-log10(sig1b$pANOVA),cex=2,lwd=2)
text(sig1b$s.dist,(-log10(sig1b$pANOVA)-2),labels="Reduction of cytosolic Ca++ levels")

m2 <- mitch_import(dge2, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 27340

## Note: no. genes in output = 27278

## Note: estimated proportion of input genes in output = 0.998

mres2 <- mitch_calc(m2, genesets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres2$enrichment_result,20) %>% 
  kbl(caption = "Top gene pathway differences in contrast 2") %>%
  kable_paper("hover", full_width = F)

Top gene pathway differences in contrast 2
	set	setSize	pANOVA	s.dist	p.adjustANOVA
1427	Unwinding of DNA	12	0.0000101	0.7359532	0.0000679
640	Leading Strand Synthesis	14	0.0000029	0.7219567	0.0000217
893	Polymerase switching	14	0.0000029	0.7219567	0.0000217
1102	Response to metal ions	10	0.0000778	0.7214390	0.0004521
458	G1/S-Specific Transcription	29	0.0000000	0.6885884	0.0000000
216	Condensation of Prometaphase Chromosomes	11	0.0000792	0.6871343	0.0004567
1345	Tetrahydrobiopterin (BH4) synthesis, recycling, salvage and regulation	10	0.0002313	0.6723265	0.0011212
423	Formation of ATP by chemiosmotic coupling	18	0.0000009	0.6681544	0.0000075
1130	SLBP independent Processing of Histone Pre-mRNAs	10	0.0003221	0.6567625	0.0015025
1129	SLBP Dependent Processing of Replication-Dependent Histone Pre-mRNAs	11	0.0001693	0.6547542	0.0008750
260	DNA strand elongation	32	0.0000000	0.6539698	0.0000000
1397	Translesion synthesis by REV1	16	0.0000094	0.6395441	0.0000640
1396	Translesion synthesis by POLK	17	0.0000057	0.6353722	0.0000407
422	Folding of actin by CCT/TriC	10	0.0005533	0.6306000	0.0024504
1118	SCF(Skp2)-mediated degradation of p27/p21	60	0.0000000	0.6273814	0.0000000
1351	The role of GTSE1 in G2/M progression after G2 checkpoint	58	0.0000000	0.6165776	0.0000000
686	Metabolism of cofactors	19	0.0000036	0.6139295	0.0000265
1050	Regulation of PTEN mRNA translation	12	0.0002439	-0.6115125	0.0011746
1424	Ubiquitin-dependent degradation of Cyclin D	50	0.0000000	0.6083723	0.0000000
636	Lagging Strand Synthesis	20	0.0000029	0.6041896	0.0000217

mitch_barplot(mres2)

mitch_barplot2(mres2)

mitch_plots(mres2,outfile="mitch2.pdf")

## png 
##   2

m2top <- subset(mres2$enrichment_result,p.adjustANOVA<0.05)
m2up <- subset(m2top,s.dist>0)$set
m2dn <- subset(m2top,s.dist<0)$set
#mitch_report(mres2,outfile="mitch2.html",overwrite=TRUE)
write.table(mres2$enrichment_result,file="mitch2.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m3 <- mitch_import(dge3, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 26801

## Note: no. genes in output = 26741

## Note: estimated proportion of input genes in output = 0.998

mres3 <- mitch_calc(m3, genesets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres3$enrichment_result,20) %>%
  kbl(caption = "Top gene pathway differences in contrast 3") %>%
  kable_paper("hover", full_width = F)

Top gene pathway differences in contrast 3
	set	setSize	pANOVA	s.dist	p.adjustANOVA
137	CLEC7A (Dectin-1) induces NFAT activation	11	1.70e-06	-0.8325273	0.0000081
375	Establishment of Sister Chromatid Cohesion	11	2.30e-06	-0.8231201	0.0000102
207	Cohesin Loading onto Chromatin	10	7.30e-06	-0.8190042	0.0000294
360	ERKs are inactivated	13	4.00e-07	-0.8096839	0.0000023
1108	S33 mutants of beta-catenin aren’t phosphorylated	15	1.00e-07	-0.8083414	0.0000004
1109	S37 mutants of beta-catenin aren’t phosphorylated	15	1.00e-07	-0.8083414	0.0000004
1110	S45 mutants of beta-catenin aren’t phosphorylated	15	1.00e-07	-0.8083414	0.0000004
1180	Signaling by CTNNB1 phospho-site mutants	15	1.00e-07	-0.8083414	0.0000004
1207	Signaling by GSK3beta mutants	15	1.00e-07	-0.8083414	0.0000004
1298	T41 mutants of beta-catenin aren’t phosphorylated	15	1.00e-07	-0.8083414	0.0000004
506	Golgi Cisternae Pericentriolar Stack Reorganization	14	2.00e-07	-0.8059533	0.0000011
11	APC truncation mutants have impaired AXIN binding	14	2.00e-07	-0.7983954	0.0000013
24	AXIN missense mutants destabilize the destruction complex	14	2.00e-07	-0.7983954	0.0000013
1173	Signaling by AMER1 mutants	14	2.00e-07	-0.7983954	0.0000013
1174	Signaling by APC mutants	14	2.00e-07	-0.7983954	0.0000013
1175	Signaling by AXIN mutants	14	2.00e-07	-0.7983954	0.0000013
1414	Truncations of AMER1 destabilize the destruction complex	14	2.00e-07	-0.7983954	0.0000013
1209	Signaling by Hippo	20	0.00e+00	-0.7935032	0.0000000
664	MET activates RAP1 and RAC1	11	5.40e-06	-0.7917695	0.0000227
659	MAPK3 (ERK1) activation	10	2.78e-05	-0.7651341	0.0001004

mitch_barplot(mres3)

mitch_barplot2(mres3)

mitch_plots(mres3,outfile="mitch3.pdf")

## png 
##   2

m3top <- subset(mres3$enrichment_result,p.adjustANOVA<0.05)
m3up <- subset(m3top,s.dist>0)$set
m3dn <- subset(m3top,s.dist<0)$set
#mitch_report(mres3,outfile="mitch3.html",overwrite=TRUE)
write.table(mres3$enrichment_result,file="mitch3.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m4 <- mitch_import(dge4, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 26222

## Note: no. genes in output = 26163

## Note: estimated proportion of input genes in output = 0.998

mres4 <- mitch_calc(m4, genesets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres4$enrichment_result,20) %>%
  kbl(caption = "Top gene pathway differences in contrast 4") %>%
  kable_paper("hover", full_width = F)

Top gene pathway differences in contrast 4
	set	setSize	pANOVA	s.dist	p.adjustANOVA
134	CLEC7A (Dectin-1) induces NFAT activation	11	1.0e-06	-0.8526516	6.10e-06
203	Cohesin Loading onto Chromatin	10	3.0e-06	-0.8523305	1.63e-05
630	Laminin interactions	23	0.0e+00	-0.7933735	0.00e+00
1103	S33 mutants of beta-catenin aren’t phosphorylated	15	2.0e-07	-0.7825965	1.20e-06
1104	S37 mutants of beta-catenin aren’t phosphorylated	15	2.0e-07	-0.7825965	1.20e-06
1105	S45 mutants of beta-catenin aren’t phosphorylated	15	2.0e-07	-0.7825965	1.20e-06
1175	Signaling by CTNNB1 phospho-site mutants	15	2.0e-07	-0.7825965	1.20e-06
1202	Signaling by GSK3beta mutants	15	2.0e-07	-0.7825965	1.20e-06
1293	T41 mutants of beta-catenin aren’t phosphorylated	15	2.0e-07	-0.7825965	1.20e-06
1204	Signaling by Hippo	20	0.0e+00	-0.7785564	0.00e+00
1268	Syndecan interactions	20	0.0e+00	-0.7770952	0.00e+00
658	MET activates RAP1 and RAC1	11	8.9e-06	-0.7734156	4.31e-05
11	APC truncation mutants have impaired AXIN binding	14	6.0e-07	-0.7679398	4.20e-06
24	AXIN missense mutants destabilize the destruction complex	14	6.0e-07	-0.7679398	4.20e-06
1168	Signaling by AMER1 mutants	14	6.0e-07	-0.7679398	4.20e-06
1169	Signaling by APC mutants	14	6.0e-07	-0.7679398	4.20e-06
1170	Signaling by AXIN mutants	14	6.0e-07	-0.7679398	4.20e-06
1410	Truncations of AMER1 destabilize the destruction complex	14	6.0e-07	-0.7679398	4.20e-06
357	ERKs are inactivated	13	1.7e-06	-0.7668245	9.70e-06
657	MET activates PTK2 signaling	18	0.0e+00	-0.7616753	2.00e-07

mitch_barplot(mres4)

mitch_barplot2(mres4)

mitch_plots(mres4,outfile="mitch4.pdf")

## png 
##   2

m4top <- subset(mres4$enrichment_result,p.adjustANOVA<0.05)
m4up <- subset(m4top,s.dist>0)$set
m4dn <- subset(m4top,s.dist<0)$set
#mitch_report(mres4,outfile="mitch4.html",overwrite=TRUE)
write.table(mres4$enrichment_result,file="mitch4.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m5 <- mitch_import(dge5, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 26081

## Note: no. genes in output = 26025

## Note: estimated proportion of input genes in output = 0.998

mres5 <- mitch_calc(m5, genesets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres5$enrichment_result,20) %>%
  kbl(caption = "Top gene pathway differences in contrast 5") %>%
  kable_paper("hover", full_width = F)

Top gene pathway differences in contrast 5
	set	setSize	pANOVA	s.dist	p.adjustANOVA
136	CLEC7A (Dectin-1) induces NFAT activation	11	1.80e-06	-0.8306926	0.0000126
206	Cohesin Loading onto Chromatin	10	1.05e-05	-0.8044820	0.0000609
660	MET activates RAP1 and RAC1	11	6.10e-06	-0.7873173	0.0000376
11	APC truncation mutants have impaired AXIN binding	14	4.00e-07	-0.7808455	0.0000033
24	AXIN missense mutants destabilize the destruction complex	14	4.00e-07	-0.7808455	0.0000033
1171	Signaling by AMER1 mutants	14	4.00e-07	-0.7808455	0.0000033
1172	Signaling by APC mutants	14	4.00e-07	-0.7808455	0.0000033
1173	Signaling by AXIN mutants	14	4.00e-07	-0.7808455	0.0000033
1412	Truncations of AMER1 destabilize the destruction complex	14	4.00e-07	-0.7808455	0.0000033
1106	S33 mutants of beta-catenin aren’t phosphorylated	15	2.00e-07	-0.7803050	0.0000014
1107	S37 mutants of beta-catenin aren’t phosphorylated	15	2.00e-07	-0.7803050	0.0000014
1108	S45 mutants of beta-catenin aren’t phosphorylated	15	2.00e-07	-0.7803050	0.0000014
1178	Signaling by CTNNB1 phospho-site mutants	15	2.00e-07	-0.7803050	0.0000014
1205	Signaling by GSK3beta mutants	15	2.00e-07	-0.7803050	0.0000014
1296	T41 mutants of beta-catenin aren’t phosphorylated	15	2.00e-07	-0.7803050	0.0000014
1207	Signaling by Hippo	20	0.00e+00	-0.7490483	0.0000001
373	Establishment of Sister Chromatid Cohesion	11	1.81e-05	-0.7464442	0.0000973
113	Beta-catenin phosphorylation cascade	17	1.00e-07	-0.7403288	0.0000012
358	ERKs are inactivated	13	4.10e-06	-0.7373224	0.0000266
605	Interleukin-6 signaling	11	3.09e-05	-0.7253577	0.0001591

mitch_barplot(mres5)

mitch_barplot2(mres5)

mitch_plots(mres5,outfile="mitch5.pdf")

## png 
##   2

m5top <- subset(mres5$enrichment_result,p.adjustANOVA<0.05)
m5up <- subset(m5top,s.dist>0)$set
m5dn <- subset(m5top,s.dist<0)$set
#mitch_report(mres5,outfile="mitch5.html",overwrite=TRUE)
write.table(mres5$enrichment_result,file="mitch5.tsv",quote=FALSE,sep="\t",row.names=FALSE)

pdf("mitch_barplots.pdf")
mitch_barplot(mres1)
mitch_barplot2(mres1)
mitch_barplot(mres2)
mitch_barplot2(mres2)
mitch_barplot(mres3)
mitch_barplot2(mres3)
mitch_barplot(mres4)
mitch_barplot2(mres4)
mitch_barplot(mres5)
mitch_barplot2(mres5)
dev.off()

## png 
##   2

Make some ridge plots.

myres <- list(mres1,mres2,mres3,mres4) 
z <- lapply(myres, function(mres) {
  sets <- head(mres$enrichment_result,10)
  sets <- sets[order(sets$s.dist),"set"]
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))
 
 vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Top 10 genesets",
    xlab="differential gene rank")

 vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Top 10 genesets",
    xlab="differential gene rank",
    rectCol="gray")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})

pdf("ridge1.pdf")
z <- lapply(myres, function(mres) {
  sets <- head(mres$enrichment_result,10)
  sets <- sets[order(sets$s.dist),"set"]
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))

 vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Top 10 genesets",
    xlab="differential gene rank")

 vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Top 10 genesets",
    xlab="differential gene rank",
    rectCol="gray")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})
dev.off()

## png 
##   2

Make some heatmaps

sets <- head(mres1$enrichment_result$set,10)

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
  hm <- as.matrix(rpm1[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
    cexCol=0.9, main=paste(set) )
})

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
  hm <- as.matrix(rpm[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
    cexCol=0.9, main=paste(set) )
})

pdf("peheat1.pdf")
z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
  hm <- as.matrix(rpm1[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
    cexCol=0.9, main=paste(set) )
})

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
  hm <- as.matrix(rpm[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
    cexCol=0.9, main=paste(set) )
})
dev.off()

## png 
##   2

Now an Euler diagram of differentially regulated gene sets from contrasts 1 and 4.

par(cex.main=0.5)

par(mar=c(2,2,2,2))

v0 <- list("KO up"=m1up,"KO dn"=m1dn, "drug up"=m4up, "drug dn"=m4dn)

plot(euler(v0),quantities = TRUE, edges = "gray", main="Reactome pathways")

pdf("gseuler1.pdf")
plot(euler(v0),quantities = TRUE, edges = "gray", main="Reactome pathways")
dev.off()

## png 
##   2

Multi contrast pathway analysis with mitch

I’m going to focus on the two contrasts which I think are the most important WRT mito disease; #1 and #4. The effect of the KO, and the effect of the drug when the gene is knocked out.

mm <- list("KO"=dge1,"drug"=dge4)

m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)

## Note: Mean no. genes in input = 25735

## Note: no. genes in output = 24468

## Note: estimated proportion of input genes in output = 0.951

mres <- mitch_calc(m, genesets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be 
##             statistically significant.

head(mres$enrichment_result,20) %>% 
kbl(caption = "Top gene pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)

Top gene pathway differences in contrast 1 and 4
	set	setSize	pMANOVA	s.KO	s.drug	p.KO	p.drug	s.dist	SD	p.adjustMANOVA
133	CLEC7A (Dectin-1) induces NFAT activation	11	3.40e-06	-0.3079877	-0.8424396	0.0769234	0.0000013	0.8969732	0.3779146	1.31e-05
202	Cohesin Loading onto Chromatin	10	1.94e-05	-0.1962875	-0.8420967	0.2824423	0.0000040	0.8646708	0.4566560	6.19e-05
650	MET activates RAP1 and RAC1	11	1.27e-05	-0.3986700	-0.7577121	0.0220401	0.0000135	0.8561923	0.2538811	4.28e-05
1256	Syndecan interactions	19	0.00e+00	0.2156734	-0.8161027	0.1036039	0.0000000	0.8441200	0.7295759	0.00e+00
844	Peptide chain elongation	87	0.00e+00	-0.4225294	0.7146949	0.0000000	0.0000000	0.8302529	0.8041390	0.00e+00
1415	Viral mRNA Translation	87	0.00e+00	-0.4205258	0.7155030	0.0000000	0.0000000	0.8299316	0.8032936	0.00e+00
11	APC truncation mutants have impaired AXIN binding	14	1.30e-06	-0.3477374	-0.7518548	0.0242621	0.0000011	0.8283761	0.2857541	5.70e-06
24	AXIN missense mutants destabilize the destruction complex	14	1.30e-06	-0.3477374	-0.7518548	0.0242621	0.0000011	0.8283761	0.2857541	5.70e-06
1156	Signaling by AMER1 mutants	14	1.30e-06	-0.3477374	-0.7518548	0.0242621	0.0000011	0.8283761	0.2857541	5.70e-06
1157	Signaling by APC mutants	14	1.30e-06	-0.3477374	-0.7518548	0.0242621	0.0000011	0.8283761	0.2857541	5.70e-06
1158	Signaling by AXIN mutants	14	1.30e-06	-0.3477374	-0.7518548	0.0242621	0.0000011	0.8283761	0.2857541	5.70e-06
1396	Truncations of AMER1 destabilize the destruction complex	14	1.30e-06	-0.3477374	-0.7518548	0.0242621	0.0000011	0.8283761	0.2857541	5.70e-06
622	Laminin interactions	23	0.00e+00	0.2684980	-0.7821160	0.0257964	0.0000000	0.8269200	0.7428963	0.00e+00
351	ERKs are inactivated	13	4.30e-06	-0.3333627	-0.7506629	0.0374081	0.0000028	0.8213559	0.2950758	1.61e-05
1001	Receptor Mediated Mitophagy	11	2.63e-05	-0.7759630	-0.2626019	0.0000083	0.1315157	0.8191937	0.3630011	8.10e-05
1091	S33 mutants of beta-catenin aren’t phosphorylated	15	8.00e-07	-0.2604534	-0.7675268	0.0807061	0.0000003	0.8105143	0.3585551	3.50e-06
1092	S37 mutants of beta-catenin aren’t phosphorylated	15	8.00e-07	-0.2604534	-0.7675268	0.0807061	0.0000003	0.8105143	0.3585551	3.50e-06
1093	S45 mutants of beta-catenin aren’t phosphorylated	15	8.00e-07	-0.2604534	-0.7675268	0.0807061	0.0000003	0.8105143	0.3585551	3.50e-06
1163	Signaling by CTNNB1 phospho-site mutants	15	8.00e-07	-0.2604534	-0.7675268	0.0807061	0.0000003	0.8105143	0.3585551	3.50e-06
1190	Signaling by GSK3beta mutants	15	8.00e-07	-0.2604534	-0.7675268	0.0807061	0.0000003	0.8105143	0.3585551	3.50e-06

#mitch_report(mres,outfile="mitchmulti_effect.html",overwrite=TRUE)
mitch_plots(mres, outfile="mitchmulti_effect.pdf")

## png 
##   2

write.table(mres$enrichment_result,file="mitch_multi_effect.tsv",quote=FALSE,sep="\t",row.names=FALSE)
n=40
mx <- as.matrix(mres$enrichment_result[1:n,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set[1:n]

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,15), cexRow=0.7, 
    cexCol=0.9, main=paste("Top ranked Reactomes") )

Mitch can also be used to search for discordant pathways. Gene sets are prioritised by standard deviation.

mm <- list("KO"=dge1,"drug"=dge4)

m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)

## Note: Mean no. genes in input = 25735

## Note: no. genes in output = 24468

## Note: estimated proportion of input genes in output = 0.951

mres <- mitch_calc(m, genesets, priority="SD",resrows=52)

## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.

head(mres$enrichment_result,20) %>% kbl(caption = "Top gene pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)

Top gene pathway differences in contrast 1 and 4
	set	setSize	pMANOVA	s.KO	s.drug	p.KO	p.drug	s.dist	SD	p.adjustMANOVA
844	Peptide chain elongation	87	0.00e+00	-0.4225294	0.7146949	0.0000000	0.0000000	0.8302529	0.8041390	0.00e+00
1415	Viral mRNA Translation	87	0.00e+00	-0.4205258	0.7155030	0.0000000	0.0000000	0.8299316	0.8032936	0.00e+00
419	Formation of a pool of free 40S subunits	99	0.00e+00	-0.4445054	0.6516679	0.0000000	0.0000000	0.7888321	0.7751116	0.00e+00
371	Eukaryotic Translation Termination	91	0.00e+00	-0.4021351	0.6843304	0.0000000	0.0000000	0.7937385	0.7682471	0.00e+00
369	Eukaryotic Translation Elongation	92	0.00e+00	-0.4121062	0.6735777	0.0000000	0.0000000	0.7896445	0.7676944	0.00e+00
268	Defective EXT1 causes exostoses 1, TRPS2 and CHDS	13	1.40e-06	0.3576333	-0.7140808	0.0255604	0.0000082	0.7986320	0.7578163	5.90e-06
269	Defective EXT2 causes exostoses 2	13	1.40e-06	0.3576333	-0.7140808	0.0255604	0.0000082	0.7986320	0.7578163	5.90e-06
622	Laminin interactions	23	0.00e+00	0.2684980	-0.7821160	0.0257964	0.0000000	0.8269200	0.7428963	0.00e+00
1080	Response of EIF2AK4 (GCN2) to amino acid deficiency	99	0.00e+00	-0.4230715	0.6252044	0.0000000	0.0000000	0.7548973	0.7412430	0.00e+00
777	Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)	93	0.00e+00	-0.4239127	0.6173949	0.0000000	0.0000000	0.7489181	0.7363156	0.00e+00
1134	Selenocysteine synthesis	91	0.00e+00	-0.3553174	0.6805934	0.0000000	0.0000000	0.7677615	0.7324995	0.00e+00
425	Formation of the ternary complex, and subsequently, the 43S complex	50	0.00e+00	-0.5008404	0.5323941	0.0000000	0.0000000	0.7309477	0.7306071	0.00e+00
1256	Syndecan interactions	19	0.00e+00	0.2156734	-0.8161027	0.1036039	0.0000000	0.8441200	0.7295759	0.00e+00
713	Mucopolysaccharidoses	11	1.36e-05	0.7558981	-0.2580819	0.0000141	0.1382892	0.7987416	0.7169921	4.58e-05
265	Defective B4GALT7 causes EDS, progeroid type	18	1.00e-07	0.3708521	-0.6385867	0.0064446	0.0000027	0.7384607	0.7137810	6.00e-07
617	L13a-mediated translational silencing of Ceruloplasmin expression	109	0.00e+00	-0.4660904	0.5255157	0.0000000	0.0000000	0.7024293	0.7011714	0.00e+00
460	GTP hydrolysis and joining of the 60S ribosomal subunit	110	0.00e+00	-0.4419276	0.5303070	0.0000000	0.0000000	0.6903083	0.6874737	0.00e+00
264	Defective B3GAT3 causes JDSSDHD	18	9.00e-07	0.3764054	-0.5746058	0.0056912	0.0000243	0.6869154	0.6724664	4.00e-06
806	Other semaphorin interactions	15	8.70e-06	0.3594351	-0.5881460	0.0159331	0.0000799	0.6892817	0.6700410	3.06e-05
262	Defective B3GALT6 causes EDSP2 and SEMDJL1	18	1.30e-06	0.3746285	-0.5643717	0.0059232	0.0000338	0.6773935	0.6639734	5.60e-06

#mitch_report(mres,outfile="mitchmulti_discord.html",overwrite=TRUE)
mitch_plots(mres, outfile="mitchmulti_discord.pdf")

## png 
##   2

write.table(mres$enrichment_result,file="mitch_multi_discord.tsv",quote=FALSE,sep="\t",row.names=FALSE)

n=50

mx <- as.matrix(mres$enrichment_result[1:n,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set[1:n]

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,25), cexRow=0.6,
    cexCol=0.9, main=paste("Top ranked Reactomes") )

pdf("mitchmulti_discord_heat.pdf")
heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,25), cexRow=0.6,
    cexCol=0.9, main=paste("Top ranked Reactomes") )
dev.off()

## png 
##   2

Custom gene sets

custom_set_names <- readLines("custom_pathway_list.txt")
custom_sets <- genesets[which(names(genesets) %in% custom_set_names)]

m1 <- mitch_import(dge1, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 25248

## Note: no. genes in output = 25194

## Note: estimated proportion of input genes in output = 0.998

mres1 <- mitch_calc(m1, custom_sets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres1$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 1") %>% kable_paper("hover", full_width = F)

Top mito pathway differences in contrast 1
	set	setSize	pANOVA	s.dist	p.adjustANOVA
19	Receptor Mediated Mitophagy	11	0.0000080	-0.7772523	0.0000282
12	Mucopolysaccharidoses	11	0.0000117	0.7628992	0.0000352
11	Mitophagy	28	0.0000011	-0.5309970	0.0000069
14	Nucleotide biosynthesis	12	0.0038611	-0.4816469	0.0059087
8	Mitochondrial iron-sulfur cluster biogenesis	13	0.0068384	-0.4331868	0.0086379
20	Reduction of cytosolic Ca++ levels	12	0.0099164	0.4298838	0.0113330
24	Stabilization of p53	53	0.0000001	-0.4224979	0.0000008
3	Diseases of carbohydrate metabolism	30	0.0021372	0.3238356	0.0036637
1	Cellular response to starvation	150	0.0000000	-0.3190752	0.0000000
16	PTEN Regulation	140	0.0000000	-0.2798909	0.0000001
21	Resolution of D-Loop Structures	33	0.0141289	0.2467925	0.0154133
4	Gene Silencing by RNA	97	0.0000403	-0.2411329	0.0000974
18	Pyruvate metabolism and Citric Acid (TCA) cycle	52	0.0039391	-0.2310553	0.0059087
23	Signaling by the B Cell Receptor (BCR)	106	0.0000406	-0.2306049	0.0000974
7	Mitochondrial biogenesis	91	0.0003021	-0.2190759	0.0006592
9	Mitochondrial protein import	64	0.0059436	-0.1987751	0.0083909
15	Oxidative Stress Induced Senescence	82	0.0092365	-0.1662401	0.0110838
22	Respiratory electron transport	103	0.0064821	-0.1551823	0.0086379
17	Phospholipid metabolism	186	0.0007611	-0.1430381	0.0014051
10	Mitochondrial translation	96	0.0157290	-0.1425844	0.0164129

m1top <- subset(mres1$enrichment_result,p.adjustANOVA<0.05)
m1up <- subset(m1top,s.dist>0)$set
m1dn <- subset(m1top,s.dist<0)$set
#mitch_report(mres1,outfile="mitch_mito1.html",overwrite=TRUE)
mitch_plots(mres1, outfile="mitch_mito1.pdf")

## png 
##   2

write.table(mres1$enrichment_result,file="mitch_mito1.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m2 <- mitch_import(dge2, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 27340

## Note: no. genes in output = 27278

## Note: estimated proportion of input genes in output = 0.998

mres2 <- mitch_calc(m2, custom_sets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres2$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 2") %>% kable_paper("hover", full_width = F)

Top mito pathway differences in contrast 2
	set	setSize	pANOVA	s.dist	p.adjustANOVA
10	Mitochondrial translation	96	0.0000000	0.5392072	0.0000000
24	Stabilization of p53	53	0.0000000	0.5020005	0.0000000
9	Mitochondrial protein import	64	0.0000001	0.3891552	0.0000002
3	Diseases of carbohydrate metabolism	31	0.0002686	0.3780363	0.0005860
20	Reduction of cytosolic Ca++ levels	12	0.0289978	0.3640003	0.0463964
22	Respiratory electron transport	103	0.0000000	0.3397608	0.0000000
23	Signaling by the B Cell Receptor (BCR)	106	0.0000000	0.3265140	0.0000000
2	Complex I biogenesis	57	0.0000465	0.3117478	0.0001239
11	Mitophagy	28	0.0064809	0.2971848	0.0111100
12	Mucopolysaccharidoses	11	0.1441237	0.2543167	0.1921650
18	Pyruvate metabolism and Citric Acid (TCA) cycle	53	0.0023525	0.2414478	0.0047049
21	Resolution of D-Loop Structures	33	0.0359199	0.2109689	0.0538799
6	Metabolism of amino acids and derivatives	344	0.0000000	0.1954042	0.0000000
8	Mitochondrial iron-sulfur cluster biogenesis	13	0.2345154	0.1904132	0.2962300
1	Cellular response to starvation	150	0.0002005	0.1757549	0.0004812
5	Metabolism	1909	0.0000000	0.1711056	0.0000000
16	PTEN Regulation	140	0.0036710	0.1421165	0.0067773
13	Nervous system development	527	0.0000001	0.1334218	0.0000004
7	Mitochondrial biogenesis	91	0.1013510	0.0993247	0.1430838
14	Nucleotide biosynthesis	12	0.6928407	0.0658512	0.7918179

m2top <- subset(mres2$enrichment_result,p.adjustANOVA<0.05)
m2up <- subset(m2top,s.dist>0)$set
m2dn <- subset(m2top,s.dist<0)$set
#mitch_report(mres2,outfile="mitch_mito2.html",overwrite=TRUE)
mitch_plots(mres2, outfile="mitch_mito2.pdf")

## png 
##   2

write.table(mres2$enrichment_result,file="mitch_mito2.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m3 <- mitch_import(dge3, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 26801

## Note: no. genes in output = 26741

## Note: estimated proportion of input genes in output = 0.998

mres3 <- mitch_calc(m3, custom_sets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres3$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 3") %>% kable_paper("hover", full_width = F)

Top mito pathway differences in contrast 3
	set	setSize	pANOVA	s.dist	p.adjustANOVA
19	Receptor Mediated Mitophagy	11	0.0030258	-0.5162466	0.0051871
2	Complex I biogenesis	57	0.0000000	0.5011361	0.0000000
14	Nucleotide biosynthesis	12	0.0028637	-0.4970943	0.0051871
11	Mitophagy	28	0.0000387	-0.4491607	0.0001033
20	Reduction of cytosolic Ca++ levels	12	0.0200273	-0.3877249	0.0320436
18	Pyruvate metabolism and Citric Acid (TCA) cycle	53	0.0000029	-0.3714948	0.0000086
23	Signaling by the B Cell Receptor (BCR)	105	0.0000000	-0.3275703	0.0000000
16	PTEN Regulation	140	0.0000000	-0.2980253	0.0000000
22	Respiratory electron transport	103	0.0000008	0.2808048	0.0000029
17	Phospholipid metabolism	191	0.0000000	-0.2642258	0.0000000
7	Mitochondrial biogenesis	91	0.0002971	-0.2193167	0.0007131
13	Nervous system development	529	0.0000000	-0.2054227	0.0000000
4	Gene Silencing by RNA	98	0.0007249	-0.1974754	0.0015816
21	Resolution of D-Loop Structures	33	0.0625124	0.1873210	0.0789631
24	Stabilization of p53	53	0.0360459	-0.1664221	0.0515692
1	Cellular response to starvation	150	0.0017126	0.1482431	0.0034251
15	Oxidative Stress Induced Senescence	83	0.0365282	-0.1327186	0.0515692
5	Metabolism	1899	0.0000000	-0.1198167	0.0000000
8	Mitochondrial iron-sulfur cluster biogenesis	13	0.4675443	-0.1163631	0.5100483
10	Mitochondrial translation	96	0.0966246	0.0980797	0.1159496

m3top <- subset(mres3$enrichment_result,p.adjustANOVA<0.05)
m3up <- subset(m3top,s.dist>0)$set
m3dn <- subset(m3top,s.dist<0)$set
#mitch_report(mres3,outfile="mitch_mito3.html",overwrite=TRUE)
mitch_plots(mres3, outfile="mitch_mito3.pdf")

## png 
##   2

write.table(mres3$enrichment_result,file="mitch_mito3.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m4 <- mitch_import(dge4, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 26222

## Note: no. genes in output = 26163

## Note: estimated proportion of input genes in output = 0.998

mres4 <- mitch_calc(m4, custom_sets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres4$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 4") %>% kable_paper("hover", full_width = F)

Top mito pathway differences in contrast 4
	set	setSize	pANOVA	s.dist	p.adjustANOVA
2	Complex I biogenesis	57	0.0000000	0.7304162	0.0000000
22	Respiratory electron transport	103	0.0000000	0.5865948	0.0000000
20	Reduction of cytosolic Ca++ levels	12	0.0020344	-0.5142888	0.0048827
10	Mitochondrial translation	96	0.0000000	0.5132095	0.0000000
8	Mitochondrial iron-sulfur cluster biogenesis	13	0.0042094	0.4583909	0.0091842
1	Cellular response to starvation	150	0.0000000	0.4000246	0.0000000
14	Nucleotide biosynthesis	12	0.0423533	-0.3384192	0.0610224
9	Mitochondrial protein import	64	0.0000109	0.3177325	0.0000328
24	Stabilization of p53	53	0.0003285	0.2851073	0.0008761
17	Phospholipid metabolism	192	0.0000000	-0.2809981	0.0000000
19	Receptor Mediated Mitophagy	11	0.1130800	-0.2758906	0.1456370
12	Mucopolysaccharidoses	11	0.1195967	-0.2710100	0.1456370
21	Resolution of D-Loop Structures	33	0.0161355	0.2419464	0.0270932
6	Metabolism of amino acids and derivatives	341	0.0000000	0.2246703	0.0000000
18	Pyruvate metabolism and Citric Acid (TCA) cycle	52	0.0153522	-0.1942638	0.0270932
13	Nervous system development	524	0.0000000	-0.1596025	0.0000000
16	PTEN Regulation	140	0.0169332	-0.1168422	0.0270932
23	Signaling by the B Cell Receptor (BCR)	106	0.0432242	-0.1135935	0.0610224
7	Mitochondrial biogenesis	91	0.1213642	-0.0939200	0.1456370
4	Gene Silencing by RNA	97	0.2056272	-0.0743359	0.2350026

m4top <- subset(mres4$enrichment_result,p.adjustANOVA<0.05)
m4up <- subset(m4top,s.dist>0)$set
m4dn <- subset(m4top,s.dist<0)$set
#mitch_report(mres4,outfile="mitch_mito4.html",overwrite=TRUE)
mitch_plots(mres4, outfile="mitch_mito4.pdf")

## png 
##   2

write.table(mres4$enrichment_result,file="mitch_mito4.tsv",quote=FALSE,sep="\t",row.names=FALSE)

m5 <- mitch_import(dge5, DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 26081

## Note: no. genes in output = 26025

## Note: estimated proportion of input genes in output = 0.998

mres5 <- mitch_calc(m5, custom_sets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

head(mres5$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 5") %>% kable_paper("hover", full_width = F)

Top mito pathway differences in contrast 5
	set	setSize	pANOVA	s.dist	p.adjustANOVA
2	Complex I biogenesis	57	0.0000000	0.5977708	0.0000000
19	Receptor Mediated Mitophagy	11	0.0013394	-0.5584266	0.0024786
14	Nucleotide biosynthesis	12	0.0032789	-0.4901460	0.0052463
22	Respiratory electron transport	103	0.0000000	0.4299096	0.0000000
21	Resolution of D-Loop Structures	33	0.0004749	0.3514112	0.0012664
10	Mitochondrial translation	96	0.0000000	0.3425773	0.0000000
11	Mitophagy	28	0.0062915	-0.2982597	0.0088821
20	Reduction of cytosolic Ca++ levels	12	0.0797813	-0.2920655	0.1007764
17	Phospholipid metabolism	190	0.0000000	-0.2807057	0.0000000
18	Pyruvate metabolism and Citric Acid (TCA) cycle	52	0.0013426	-0.2569873	0.0024786
1	Cellular response to starvation	150	0.0000001	0.2494691	0.0000006
16	PTEN Regulation	140	0.0000155	-0.2114126	0.0000465
7	Mitochondrial biogenesis	91	0.0008071	-0.2031149	0.0019371
4	Gene Silencing by RNA	97	0.0008931	-0.1951037	0.0019486
12	Mucopolysaccharidoses	11	0.2998946	0.1805042	0.3271577
23	Signaling by the B Cell Receptor (BCR)	105	0.0025170	-0.1705680	0.0043149
8	Mitochondrial iron-sulfur cluster biogenesis	13	0.3264363	0.1571819	0.3406292
6	Metabolism of amino acids and derivatives	342	0.0000006	0.1566651	0.0000021
3	Diseases of carbohydrate metabolism	29	0.1626241	0.1497870	0.1948517
13	Nervous system development	527	0.0000002	-0.1314342	0.0000009

m5top <- subset(mres5$enrichment_result,p.adjustANOVA<0.05)
m5up <- subset(m5top,s.dist>0)$set
m5dn <- subset(m5top,s.dist<0)$set
#mitch_report(mres5,outfile="mitch_mito5.html",overwrite=TRUE)
mitch_plots(mres5, outfile="mitch_mito5.pdf")

## png 
##   2

write.table(mres5$enrichment_result,file="mitch_mito5.tsv",quote=FALSE,sep="\t",row.names=FALSE)

Make some ridge plots.

myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
  sets <- head(mres$enrichment_result,10)
  sets <- sets[order(sets$s.dist),"set"]
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))
  
  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Top 10 genesets",
    xlab="differential gene rank")

  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Top 10 genesets",
    xlab="differential gene rank",
    rectCol="gray")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})

#Fig2c
myres <- mres1
  sets <- head(myres$enrichment_result,10)
  sets <- sets[order(sets$s.dist),"set"]
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- myres$ranked_profile[which(rownames(myres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(myres$ranked_profile[,1])

pdf("fig2C.pdf")
  par(mar=c(5,20,3,2))
  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Top 10 genesets",
    xlab="differential gene rank")
dev.off()

## png 
##   2

# now custom gene sets part 1
custom_set_names <- readLines("custom_pathway_list.txt")
myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
  sets <- custom_set_names
  mres$enrichment_result <- mres$enrichment_result[which(mres$enrichment_result$set %in% sets),]
  mres$enrichment_result <- mres$enrichment_result[order(mres$enrichment_result$s.dist),]
  sets <- mres$enrichment_result$set
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))

  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Gene sets of interest",
    xlab="differential gene rank")


  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Gene sets of interest",
    rectCol="gray", xlab="differential gene rank")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=0.6,corral = "gutter")
})

# now custom gene sets part 2
custom_set_names2 <- readLines("custom_pathway_list2.txt")
myres <- list(mres1,mres2,mres3,mres4)
z <- lapply(myres, function(mres) {
  sets <- custom_set_names2[which( custom_set_names2 %in% names(custom_sets))]
  mres$enrichment_result <- mres$enrichment_result[which(mres$enrichment_result$set %in% sets),]
  mres$enrichment_result <- mres$enrichment_result[order(mres$enrichment_result$s.dist),]
  sets <- mres$enrichment_result$set
  ridgedata <- lapply( sets , function(myset) {
    genes <- genesets[[which(names(genesets) == myset)]]
    generanks <- mres$ranked_profile[which(rownames(mres$ranked_profile) %in% genes),]
    return(generanks)
  })
  names(ridgedata) <- sets
  myrange <- range(mres$ranked_profile[,1])
  par(mar=c(5,20,3,2))

  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,main="Gene sets of interest",
    xlab="differential gene rank")

  vioplot(ridgedata,horizontal=TRUE,las=1,
    ylim=myrange,col="white",main="Gene sets of interest",
    rectCol="gray", xlab="differential gene rank")
  beeswarm(ridgedata,add=TRUE,horizontal = TRUE, pch=19,cex=1,corral = "gutter")
})

Now make some heatmaps

sets <- names(custom_sets)

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
  hm <- as.matrix(rpm1[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.9,
    cexCol=0.9, main=paste(set) )
})

z <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
  hm <- as.matrix(rpm[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  hm[is.na(hm)] <- 0
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(hm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
    cexCol=0.9, main=paste(set) )
})

refgenes <- sapply(strsplit(rownames(rpm1)," "),"[[",2)
gshm <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  hm <- as.matrix(rpm1[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  return(colMeans(hm))
})

gshm <- do.call(rbind,gshm)
rownames(gshm) <- sets

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(gshm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
  cexCol=0.9, main="Gene set heatmap" )

refgenes <- sapply(strsplit(rownames(rpm)," "),"[[",2)
gshm <- lapply(sets, function(set){
  genes <- genesets[[which(names(genesets) == set)]]
  hm <- as.matrix(rpm[which(refgenes %in% genes),])
  hm <- hm/rowMeans(hm)
  hm[is.na(hm)] <- 0
  return(colMeans(hm))
})

gshm <- do.call(rbind,gshm)
rownames(gshm) <- sets

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(gshm,trace="none",col=colfunc(25),scale="row", margins = c(6,15), cexRow=0.7,
  cexCol=0.9, main="Gene set heatmap" )

Mitch multi contrasts with custom gene sets

mm <- list("KO"=dge1,"drug"=dge4)
m <- mitch_import(mm, DEtype="deseq2",geneTable=gt)

## Note: Mean no. genes in input = 25735

## Note: no. genes in output = 24468

## Note: estimated proportion of input genes in output = 0.951

mres <- mitch_calc(m, custom_sets, priority="effect",resrows=52)

## Note: Enrichments with large effect sizes may not be 
##             statistically significant.

head(mres$enrichment_result,20) %>%
  kbl(caption = "Top mito pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)

Top mito pathway differences in contrast 1 and 4
	set	setSize	pMANOVA	s.KO	s.drug	p.KO	p.drug	s.dist	SD	p.adjustMANOVA
19	Receptor Mediated Mitophagy	11	0.0000263	-0.7759630	-0.2626019	0.0000083	0.1315157	0.8191937	0.3630011	0.0000486
12	Mucopolysaccharidoses	11	0.0000136	0.7558981	-0.2580819	0.0000141	0.1382892	0.7987416	0.7169921	0.0000271
2	Complex I biogenesis	57	0.0000000	-0.1225986	0.7294490	0.1093157	0.0000000	0.7396798	0.6024886	0.0000000
20	Reduction of cytosolic Ca++ levels	12	0.0001988	0.4226979	-0.5021944	0.0112254	0.0025906	0.6564090	0.6539976	0.0002807
8	Mitochondrial iron-sulfur cluster biogenesis	13	0.0001725	-0.4368495	0.4616108	0.0063823	0.0039505	0.6355486	0.6353074	0.0002760
22	Respiratory electron transport	103	0.0000000	-0.1604358	0.5889871	0.0048876	0.0000000	0.6104469	0.5299220	0.0000000
14	Nucleotide biosynthesis	12	0.0036759	-0.4820562	-0.3242694	0.0038313	0.0517611	0.5809723	0.1115721	0.0042011
10	Mitochondrial translation	96	0.0000000	-0.1482465	0.5183894	0.0120417	0.0000000	0.5391703	0.4713828	0.0000000
11	Mitophagy	28	0.0000068	-0.5324790	-0.0475129	0.0000011	0.6634231	0.5345945	0.3429228	0.0000148
1	Cellular response to starvation	150	0.0000000	-0.3220144	0.4056013	0.0000000	0.0000000	0.5178858	0.5145020	0.0000000
24	Stabilization of p53	53	0.0000000	-0.4250982	0.2913945	0.0000001	0.0002419	0.5153826	0.5066369	0.0000000
9	Mitochondrial protein import	64	0.0000002	-0.2033094	0.3238455	0.0049000	0.0000074	0.3823750	0.3727548	0.0000006
21	Resolution of D-Loop Structures	33	0.0039393	0.2407810	0.2534479	0.0166593	0.0117319	0.3495874	0.0089568	0.0042974
3	Diseases of carbohydrate metabolism	30	0.0090010	0.3189077	-0.0262569	0.0024971	0.8034151	0.3199868	0.2440683	0.0093924
17	Phospholipid metabolism	186	0.0000000	-0.1473869	-0.2832498	0.0005232	0.0000000	0.3193013	0.0960695	0.0000000
16	PTEN Regulation	140	0.0000000	-0.2826678	-0.1053795	0.0000000	0.0312840	0.3016719	0.1253618	0.0000000
18	Pyruvate metabolism and Citric Acid (TCA) cycle	52	0.0017478	-0.2352398	-0.1827679	0.0033341	0.0225870	0.2978958	0.0371032	0.0020973
6	Metabolism of amino acids and derivatives	337	0.0000000	-0.1445765	0.2336243	0.0000050	0.0000000	0.2747411	0.2674283	0.0000000
23	Signaling by the B Cell Receptor (BCR)	106	0.0000599	-0.2342722	-0.1018064	0.0000306	0.0700635	0.2554369	0.0936675	0.0001026
4	Gene Silencing by RNA	95	0.0001965	-0.2410320	-0.0667322	0.0000487	0.2608703	0.2500992	0.1232486	0.0002807

#mitch_report(mres,outfile="mitchmulti_effect_mito.html",overwrite=TRUE)
write.table(mres$enrichment_result,file="mitch_multi_effect_mito.tsv",quote=FALSE,sep="\t",row.names=FALSE)
mx <- as.matrix(mres$enrichment_result[,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,22), cexRow=1,
    cexCol=0.9, main=paste("Top ranked Reactomes") )


mres <- mitch_calc(m, custom_sets, priority="SD",resrows=52)

## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.

head(mres$enrichment_result,20) %>% kbl(caption = "Top mito pathway differences in contrast 1 and 4") %>% kable_paper("hover", full_width = F)

Top mito pathway differences in contrast 1 and 4
	set	setSize	pMANOVA	s.KO	s.drug	p.KO	p.drug	s.dist	SD	p.adjustMANOVA
12	Mucopolysaccharidoses	11	0.0000136	0.7558981	-0.2580819	0.0000141	0.1382892	0.7987416	0.7169921	0.0000271
20	Reduction of cytosolic Ca++ levels	12	0.0001988	0.4226979	-0.5021944	0.0112254	0.0025906	0.6564090	0.6539976	0.0002807
8	Mitochondrial iron-sulfur cluster biogenesis	13	0.0001725	-0.4368495	0.4616108	0.0063823	0.0039505	0.6355486	0.6353074	0.0002760
2	Complex I biogenesis	57	0.0000000	-0.1225986	0.7294490	0.1093157	0.0000000	0.7396798	0.6024886	0.0000000
22	Respiratory electron transport	103	0.0000000	-0.1604358	0.5889871	0.0048876	0.0000000	0.6104469	0.5299220	0.0000000
1	Cellular response to starvation	150	0.0000000	-0.3220144	0.4056013	0.0000000	0.0000000	0.5178858	0.5145020	0.0000000
24	Stabilization of p53	53	0.0000000	-0.4250982	0.2913945	0.0000001	0.0002419	0.5153826	0.5066369	0.0000000
10	Mitochondrial translation	96	0.0000000	-0.1482465	0.5183894	0.0120417	0.0000000	0.5391703	0.4713828	0.0000000
9	Mitochondrial protein import	64	0.0000002	-0.2033094	0.3238455	0.0049000	0.0000074	0.3823750	0.3727548	0.0000006
19	Receptor Mediated Mitophagy	11	0.0000263	-0.7759630	-0.2626019	0.0000083	0.1315157	0.8191937	0.3630011	0.0000486
11	Mitophagy	28	0.0000068	-0.5324790	-0.0475129	0.0000011	0.6634231	0.5345945	0.3429228	0.0000148
6	Metabolism of amino acids and derivatives	337	0.0000000	-0.1445765	0.2336243	0.0000050	0.0000000	0.2747411	0.2674283	0.0000000
3	Diseases of carbohydrate metabolism	30	0.0090010	0.3189077	-0.0262569	0.0024971	0.8034151	0.3199868	0.2440683	0.0093924
16	PTEN Regulation	140	0.0000000	-0.2826678	-0.1053795	0.0000000	0.0312840	0.3016719	0.1253618	0.0000000
4	Gene Silencing by RNA	95	0.0001965	-0.2410320	-0.0667322	0.0000487	0.2608703	0.2500992	0.1232486	0.0002807
14	Nucleotide biosynthesis	12	0.0036759	-0.4820562	-0.3242694	0.0038313	0.0517611	0.5809723	0.1115721	0.0042011
15	Oxidative Stress Induced Senescence	82	0.0284316	-0.1702831	-0.0222774	0.0076676	0.7272367	0.1717342	0.1046559	0.0284316
7	Mitochondrial biogenesis	91	0.0006447	-0.2234438	-0.0840799	0.0002284	0.1655643	0.2387395	0.0985452	0.0008143
17	Phospholipid metabolism	186	0.0000000	-0.1473869	-0.2832498	0.0005232	0.0000000	0.3193013	0.0960695	0.0000000
23	Signaling by the B Cell Receptor (BCR)	106	0.0000599	-0.2342722	-0.1018064	0.0000306	0.0700635	0.2554369	0.0936675	0.0001026

#mitch_report(mres,outfile="mitchmulti_discord_mito.html",overwrite=TRUE)
write.table(mres$enrichment_result,file="mitch_multi_discord_mito.tsv",quote=FALSE,sep="\t",row.names=FALSE)

mx <- as.matrix(mres$enrichment_result[,4:5])
rownames(mx) <- mres$set
rownames(mx) <- mres$enrichment_result$set

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(mx,trace="none",col=colfunc(25),scale="none", margins = c(5,22), cexRow=1,
    cexCol=0.9, main=paste("Top ranked Reactomes") )

Conclusion

TODO

Session information

For reproducibility.

sessionInfo()

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] beeswarm_0.4.0              vioplot_0.3.7              
##  [3] sm_2.2-5.7                  kableExtra_1.3.4           
##  [5] topconfects_1.12.0          limma_3.52.1               
##  [7] eulerr_6.1.1                mitch_1.8.0                
##  [9] MASS_7.3-58                 fgsea_1.22.0               
## [11] gplots_3.1.3                DESeq2_1.36.0              
## [13] SummarizedExperiment_1.26.1 Biobase_2.56.0             
## [15] MatrixGenerics_1.8.0        matrixStats_0.62.0         
## [17] GenomicRanges_1.48.0        GenomeInfoDb_1.32.2        
## [19] IRanges_2.30.0              S4Vectors_0.34.0           
## [21] BiocGenerics_0.42.0         reshape2_1.4.4             
## [23] forcats_0.5.1               stringr_1.4.0              
## [25] dplyr_1.0.9                 purrr_0.3.4                
## [27] readr_2.1.2                 tidyr_1.2.0                
## [29] tibble_3.1.7                ggplot2_3.3.6              
## [31] tidyverse_1.3.1             zoo_1.8-10                 
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.0           backports_1.4.1        fastmatch_1.1-3       
##   [4] systemfonts_1.0.4      plyr_1.8.7             polylabelr_0.2.0      
##   [7] splines_4.2.1          BiocParallel_1.30.3    digest_0.6.29         
##  [10] htmltools_0.5.2        fansi_1.0.3            magrittr_2.0.3        
##  [13] memoise_2.0.1          tzdb_0.3.0             Biostrings_2.64.0     
##  [16] annotate_1.74.0        modelr_0.1.8           svglite_2.1.0         
##  [19] colorspace_2.0-3       blob_1.2.3             rvest_1.0.2           
##  [22] haven_2.5.0            xfun_0.31              crayon_1.5.1          
##  [25] RCurl_1.98-1.7         jsonlite_1.8.0         genefilter_1.78.0     
##  [28] survival_3.4-0         glue_1.6.2             polyclip_1.10-0       
##  [31] gtable_0.3.0           zlibbioc_1.42.0        XVector_0.36.0        
##  [34] webshot_0.5.3          DelayedArray_0.22.0    scales_1.2.0          
##  [37] DBI_1.1.3              GGally_2.1.2           Rcpp_1.0.8.3          
##  [40] viridisLite_0.4.0      xtable_1.8-4           bit_4.0.4             
##  [43] htmlwidgets_1.5.4      httr_1.4.3             RColorBrewer_1.1-3    
##  [46] ellipsis_0.3.2         farver_2.1.0           pkgconfig_2.0.3       
##  [49] reshape_0.8.9          XML_3.99-0.10          sass_0.4.1            
##  [52] dbplyr_2.2.1           locfit_1.5-9.5         utf8_1.2.2            
##  [55] labeling_0.4.2         tidyselect_1.1.2       rlang_1.0.3           
##  [58] later_1.3.0            AnnotationDbi_1.58.0   munsell_0.5.0         
##  [61] cellranger_1.1.0       tools_4.2.1            cachem_1.0.6          
##  [64] cli_3.3.0              generics_0.1.2         RSQLite_2.2.14        
##  [67] broom_0.8.0            evaluate_0.15          fastmap_1.1.0         
##  [70] yaml_2.3.5             knitr_1.39             bit64_4.0.5           
##  [73] fs_1.5.2               caTools_1.18.2         KEGGREST_1.36.2       
##  [76] mime_0.12              xml2_1.3.3             compiler_4.2.1        
##  [79] rstudioapi_0.13        png_0.1-7              reprex_2.0.1          
##  [82] geneplotter_1.74.0     bslib_0.3.1            stringi_1.7.6         
##  [85] highr_0.9              lattice_0.20-45        Matrix_1.4-1          
##  [88] vctrs_0.4.1            pillar_1.7.0           lifecycle_1.0.1       
##  [91] jquerylib_0.1.4        data.table_1.14.2      bitops_1.0-7          
##  [94] httpuv_1.6.5           R6_2.5.1               promises_1.2.0.1      
##  [97] KernSmooth_2.23-20     echarts4r_0.4.4        gridExtra_2.3         
## [100] codetools_0.2-18       gtools_3.9.2.2         assertthat_0.2.1      
## [103] withr_2.5.0            GenomeInfoDbData_1.2.8 parallel_4.2.1        
## [106] hms_1.1.1              grid_4.2.1             rmarkdown_2.14        
## [109] shiny_1.7.1            lubridate_1.8.0