Source codes: https://github.com/markziemann/dftd_rnaseq
suppressPackageStartupMessages({
library("reshape2")
library("gplots")
library("DESeq2")
library("mitch")
library("limma")
library("kableExtra")
library("dplyr")
})
The goal of our study is to compare the expression of genes related to metabolism of DFT1 cell lines to DFT2 cell lines. Our main goal is comparing genes expressed in the following pathways: Glycolysis, Pentose Phosphate Pathway, Fatty Acid Metabolism, Glutaminolysis and Oxidative Phosphorylation. A secondary goal would be to compare the expression of genes related to the production of Reactive Oxygen Species (ROS) and protection against ROS (genes involved in DNA repair, production of antioxydants etc). Finally, we might also be interested in looking into genes linked to cholesterol metabolism as this has been shown to be important for DFT1 but I need to do some additional reading to know what exactly we’re after. See sample sheet below (edited from Yin Peng), there are 3 DFT1 and 3 DFT2 cell lines, and 3 replicates of each cell line:
ss <- read.table("../ss.tsv",sep="\t",fill=TRUE,header=TRUE)
ss$DFT <- as.factor(ss$DFT)
ss$clone <- sapply(strsplit(ss$ClientID,"_"),"[[",1)
ss %>%
kbl(caption="Sample sheet for all samples") %>%
kable_paper("hover", full_width = F)
C | ClientID | DFT | Replicate | Index1sequence | Index2sequence | X.ReadsIdentified.PF. | TotalReads | TotalBases.Gbases. | clone |
---|---|---|---|---|---|---|---|---|---|
6180766 | 4906_1-1 | DFT1 | 1 | TTGTTGCA | GACGTCGT | 2.4465 | 103481165 | 15.5 | 4906 |
6180767 | C5065_1-1 | DFT1 | 1 | CCACACTT | CGTCATAC | 2.8116 | 118924031 | 17.8 | C5065 |
6180768 | 1426_1-1 | DFT1 | 1 | CCCGTTTG | TTGCTGTA | 2.3264 | 98401219 | 14.8 | 1426 |
6180769 | RV_1-1 | DFT2 | 1 | ATGCTCCC | CCCTGCTG | 2.2811 | 96485136 | 14.5 | RV |
6180770 | SN_1-1 | DFT2 | 1 | GCTCAATA | CATTTATC | 2.3947 | 101290147 | 15.2 | SN |
6180771 | TD549_1-1 | DFT2 | 1 | GTAGTTCG | CTTGATGC | 2.1732 | 91921221 | 13.8 | TD549 |
6180772 | 4906_2-1 | DFT1 | 2 | CGAGAACC | ATGTATCG | 2.2322 | 94416781 | 14.2 | 4906 |
6180773 | C5065_2-1 | DFT1 | 2 | GCCATGTA | ATAGGGTT | 2.4505 | 103650355 | 15.5 | C5065 |
6180774 | 1426_2-1 | DFT1 | 2 | TTTCTCTA | CTCGACGT | 2.4778 | 104805081 | 15.7 | 1426 |
6180775 | RV_2-1 | DFT2 | 2 | CCAGCGAT | TCTAGTCA | 2.9882 | 126393794 | 19.0 | RV |
6180776 | SN_2-1 | DFT2 | 2 | TGGGAGTG | CCCGTCTA | 2.4459 | 103455786 | 15.5 | SN |
6180777 | TD549_2-1 | DFT2 | 2 | CCCTCGTA | TAGAAGAG | 2.5437 | 107592495 | 16.1 | TD549 |
6180778 | 4906_3-1 | DFT1 | 3 | CGATATGG | GGGACGTA | 2.2711 | 96062159 | 14.4 | 4906 |
6180779 | C5065_3-1 | DFT1 | 3 | TTGTGCCC | TTTCCATC | 2.2763 | 96282107 | 14.4 | C5065 |
6180780 | 1426_3-1 | DFT1 | 3 | TGTCCTCT | CGACGAAC | 2.4952 | 105541059 | 15.8 | 1426 |
6180781 | RV_3-1 | DFT2 | 3 | GTATAGTC | TTGGTCTC | 2.3325 | 98659234 | 14.8 | RV |
6180782 | SN_3-1 | DFT2 | 3 | TTTGGGAT | GTTCACGT | 2.6750 | 113146174 | 17.0 | SN |
6180783 | TD549_3-1 | DFT2 | 3 | CACCAAGC | AAAGGGAA | 2.1610 | 91405190 | 13.7 | TD549 |
DEA5_4NEG | DEA4_6NEG | NA | CGGAGAGG | CGAGCGTC | 0.0002 | 8460 | 0.0 | DEA4 |
We are mainly interested in comparing all DFT1samples against all DFT2 samples. However, we did notice that some cell lines within a same DFT produced different results in the metabolism experiments so we might need to compare all 6 cell lines against one another and not be able to simply do a DFT1 vs DFT2 contrast, if that makes sense.
Regarding the reference transcriptome, we will use Ensembl v109.
maplot <- function(de,contrast_name) {
sig <-subset(de, padj < 0.05 )
up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
GENESUP <- length(up)
GENESDN <- length(dn)
DET=nrow(de)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=1, col="dark gray",
main=contrast_name, cex.main=0.7)
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=1, col="red")
mtext(SUBHEADER,cex = 0.7)
grid()
}
make_volcano <- function(de,name) {
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval")
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="red")
}
Here we load the data in from the aligner.
tmp <- read.table("../fastq/3col.tsv.gz")
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3",fun.aggregate=sum))
dim(x)
## [1] 43512 19
Load gene names.
gn <- read.table("../ref/Sarcophilus_harrisii.mSarHar1.11.cdna+ncrna.genenames.tsv",fill=TRUE)
gn <- gn[order(gn$V1),]
dim(gn)
## [1] 43512 3
Load homology map
hm <- read.table("../ref/mart_export_ensembl109_2023-07-14.txt",sep="\t",header=TRUE)
Now need to collapse transcript data to genes.
x$gene <- paste(gn$V2,gn$V3)
y <- aggregate(. ~ gene,x,sum)
rownames(y) <- y$gene
y$gene = NULL
dim(y)
## [1] 23829 19
Samples with <1M reads should be omitted. Will also round values to integers.
cs <- colSums(y)
cs <- cs[order(cs)]
par(mar=c(5,10,5,2))
barplot(cs,,main="All samples",horiz=TRUE,las=1)
abline(v=1e7,col="red",lty=2)
y <- round(y)
This will help us to visualise the sources of variability in the overall dataset.
Plot MDS and then remove the negative control and run Plot MDS again.
Also fix the sample names.
par(mar = c(5.1, 4.1, 4.1, 2.1) )
plotMDS(y)
y <- y[,colnames(y) != "DEA5-4NEG"]
plotMDS(y)
ss <- ss[ss$ClientID != "DEA4_6NEG",]
colnames(y) <- sapply(strsplit(ss$ClientID,"-"),"[[",1)
cs <- colSums(y)
cs <- cs[order(cs)]
par(mar=c(5,10,5,2))
barplot(cs,,main="All samples",horiz=TRUE,las=1)
par(mar = c(5.1, 4.1, 4.1, 2.1) )
plotMDS(y)
cols <- ss$DFT
cols <- gsub("DFT1","pink",cols)
cols <- gsub("DFT2","lightblue",cols)
mymds <- plotMDS(y,plot=FALSE)
# fix the xlims
XMIN=min(mymds$x)
XMAX=max(mymds$x)
XMID=(XMAX+XMIN)/2
XMIN <- XMID + (XMIN-XMID)*1.1
XMAX <- XMID+(XMAX-XMID)*1.1
plotMDS(mymds,pch=19,cex=3,col=cols,main="MDS plot",xlim=c(XMIN,XMAX))
text(mymds,labels=colnames(y))
mtext("pink=DFT1,blue=DFT2")
Run a simple differential analysis comparing DFT1 vs DFT2 ignoring the clone type as a source of variation.
# split data will be necessary for the smaller comparisons (but not this one)
y1 <- y
ss1 <- ss
y1 <- y1[which(rowMeans(y1)>10),]
dim(y1)
## [1] 16009 18
dds <- DESeqDataSetFromMatrix(countData = y1 , colData = ss1, design = ~ DFT )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z<- results(res)
vsd <- vst(dds)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge1 <- dge
sig <- subset(dge,padj<0.05)
sig1_up <- rownames(subset(sig,log2FoldChange>0))
sig1_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig1_up)
## [1] 5871
length(sig1_dn)
## [1] 6006
maplot(dge1,"DFT1 vs DFT2")
make_volcano(dge1,"DFT1 vs DFT2")
sig[1:50,1:6] %>%
kbl(caption="Comparison of DFT1 vs DFT2") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSSHAG00000000893.2 NA | 761.6994 | -4.745935 | 0.1042514 | -45.52393 | 0 | 0 |
ENSSHAG00000001230.2 NA | 9964.7623 | -7.268224 | 0.1727670 | -42.06952 | 0 | 0 |
ENSSHAG00000001308.2 NA | 1591.3721 | -4.481411 | 0.1093947 | -40.96554 | 0 | 0 |
ENSSHAG00000001552.2 TDRD5 | 1386.5142 | -5.550630 | 0.0945061 | -58.73301 | 0 | 0 |
ENSSHAG00000001728.2 ABRAXAS2 | 2447.7801 | -2.611974 | 0.0580287 | -45.01174 | 0 | 0 |
ENSSHAG00000001754.2 NA | 573.3750 | -4.129922 | 0.1018094 | -40.56522 | 0 | 0 |
ENSSHAG00000002134.2 NA | 31866.6922 | 2.222360 | 0.0386947 | 57.43325 | 0 | 0 |
ENSSHAG00000002808.2 MICAL1 | 9230.0290 | -2.032272 | 0.0528958 | -38.42032 | 0 | 0 |
ENSSHAG00000003434.2 NA | 5640.3455 | -9.630769 | 0.2085777 | -46.17354 | 0 | 0 |
ENSSHAG00000003458.2 JMJD1C | 2343.7511 | -2.896812 | 0.0747449 | -38.75599 | 0 | 0 |
ENSSHAG00000003737.2 NA | 9677.3586 | 11.783418 | 0.2132465 | 55.25727 | 0 | 0 |
ENSSHAG00000004947.2 THBS2 | 24172.5467 | 13.496286 | 0.3231374 | 41.76640 | 0 | 0 |
ENSSHAG00000005182.2 SERPINE1 | 7064.0510 | 11.869506 | 0.2994322 | 39.64005 | 0 | 0 |
ENSSHAG00000005550.2 MEIOC | 1485.7532 | -7.289256 | 0.1694244 | -43.02364 | 0 | 0 |
ENSSHAG00000007070.2 SMC1B | 3980.3698 | -10.063501 | 0.1851324 | -54.35841 | 0 | 0 |
ENSSHAG00000007582.2 NA | 2902.0019 | -7.095020 | 0.0846261 | -83.83964 | 0 | 0 |
ENSSHAG00000007784.2 NA | 5517.6882 | -7.314052 | 0.1015625 | -72.01526 | 0 | 0 |
ENSSHAG00000007965.2 NA | 5215.8477 | -1.747971 | 0.0370471 | -47.18243 | 0 | 0 |
ENSSHAG00000008051.2 TEX14 | 1537.4858 | -3.531985 | 0.0617160 | -57.22965 | 0 | 0 |
ENSSHAG00000009584.2 PLEKHG1 | 1861.7962 | 9.379637 | 0.2481401 | 37.79977 | 0 | 0 |
ENSSHAG00000010367.2 ECM1 | 12497.0681 | 9.411970 | 0.1164250 | 80.84150 | 0 | 0 |
ENSSHAG00000010645.2 COL16A1 | 6094.7776 | 6.056254 | 0.0950462 | 63.71906 | 0 | 0 |
ENSSHAG00000010824.2 MPDZ | 4232.6012 | -5.438458 | 0.0941097 | -57.78850 | 0 | 0 |
ENSSHAG00000010902.2 SMARCD3 | 2489.7010 | -2.649718 | 0.0440929 | -60.09400 | 0 | 0 |
ENSSHAG00000011135.2 NA | 5620.2000 | 4.064532 | 0.0895867 | 45.36980 | 0 | 0 |
ENSSHAG00000011480.2 ADGRB2 | 1933.7632 | -7.544502 | 0.1814507 | -41.57880 | 0 | 0 |
ENSSHAG00000011750.2 NA | 3080.7121 | 3.088431 | 0.0735615 | 41.98432 | 0 | 0 |
ENSSHAG00000011809.2 NA | 5508.1945 | 3.140454 | 0.0593806 | 52.88691 | 0 | 0 |
ENSSHAG00000012182.2 NA | 1038.5611 | 7.742252 | 0.2036373 | 38.01982 | 0 | 0 |
ENSSHAG00000013722.2 NA | 524.0949 | -4.688777 | 0.0900165 | -52.08798 | 0 | 0 |
ENSSHAG00000014390.2 IFRD1 | 2454.6507 | -1.386811 | 0.0345922 | -40.09028 | 0 | 0 |
ENSSHAG00000014430.2 NA | 1157.2248 | -1.742533 | 0.0399895 | -43.57472 | 0 | 0 |
ENSSHAG00000015236.2 FKBP6 | 598.2945 | -5.562106 | 0.1042482 | -53.35446 | 0 | 0 |
ENSSHAG00000016212.2 SLC25A14 | 887.2862 | -6.791421 | 0.1667459 | -40.72915 | 0 | 0 |
ENSSHAG00000016252.2 LOXL2 | 8715.4741 | 8.867010 | 0.1933921 | 45.84991 | 0 | 0 |
ENSSHAG00000016335.2 NA | 21006.6606 | 2.668634 | 0.0567139 | 47.05436 | 0 | 0 |
ENSSHAG00000017128.2 F10 | 515.2000 | 6.578090 | 0.1519688 | 43.28578 | 0 | 0 |
ENSSHAG00000017996.2 TNFRSF21 | 1876.2975 | -7.424893 | 0.1564041 | -47.47248 | 0 | 0 |
ENSSHAG00000018060.2 HR | 3216.8474 | -7.036189 | 0.1791811 | -39.26859 | 0 | 0 |
ENSSHAG00000018469.2 PDE10A | 1279.8709 | -5.655401 | 0.1229592 | -45.99414 | 0 | 0 |
ENSSHAG00000020521.1 CDK3 | 2068.9726 | 4.241310 | 0.0605836 | 70.00762 | 0 | 0 |
ENSSHAG00000021318.1 NA | 2418.0580 | -2.504689 | 0.0455484 | -54.98958 | 0 | 0 |
ENSSHAG00000021464.1 NA | 3962.2831 | -8.392331 | 0.1785238 | -47.00960 | 0 | 0 |
ENSSHAG00000022172.1 HAPLN2 | 3697.2723 | -7.391660 | 0.1427380 | -51.78480 | 0 | 0 |
ENSSHAG00000022666.1 NA | 550.9103 | 6.404774 | 0.1677749 | 38.17480 | 0 | 0 |
ENSSHAG00000022966.1 SOX8 | 1304.3613 | -4.733014 | 0.1196103 | -39.57027 | 0 | 0 |
ENSSHAG00000023142.1 NA | 20998.8159 | -2.483109 | 0.0319815 | -77.64204 | 0 | 0 |
ENSSHAG00000023249.1 NA | 974.9689 | 5.489140 | 0.1204114 | 45.58656 | 0 | 0 |
ENSSHAG00000023680.1 EGR3 | 7293.3439 | -4.014795 | 0.1044284 | -38.44541 | 0 | 0 |
ENSSHAG00000024021.1 NA | 1284.6234 | 5.713744 | 0.1294418 | 44.14140 | 0 | 0 |
write.table(dge,file="dge1.tsv",sep="\t")
# heatmap
mx <- sig[,7:ncol(sig)]
mx <- head(mx,30)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(mx),trace="none",scale="row",
col=colfunc(25),ColSideColors=cols,mar=c(5,14),
cexRow=0.8,cexCol=0.7)
Sum replicates.
x4906 <- rowSums(y[,ss$clone=="4906"])
xC5065 <- rowSums(y[,ss$clone=="C5065"])
x1426 <- rowSums(y[,ss$clone=="1426"])
xRV <- rowSums(y[,ss$clone=="RV"])
xSN <- rowSums(y[,ss$clone=="SN"])
xTD549 <- rowSums(y[,ss$clone=="TD549"])
y2 <- data.frame(x4906,xC5065,x1426,xRV,xSN,xTD549)
ss2 <- as.data.frame(colnames(y2))
colnames(ss2) <- "clone"
ss2$DFT <- factor(c("DFT1","DFT1","DFT1","DFT2","DFT2","DFT2"))
y2 <- y2[which(rowMeans(y2)>10),]
dim(y2)
## [1] 17468 6
dds <- DESeqDataSetFromMatrix(countData = y2 , colData = ss2, design = ~ DFT )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge2 <- dge
sig <- subset(dge,padj<0.05)
sig2_up <- rownames(subset(sig,log2FoldChange>0))
sig2_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig2_up)
## [1] 4200
length(sig2_dn)
## [1] 4412
maplot(dge2,"DFT1 vs DFT2 aggregated")
make_volcano(dge2,"DFT1 vs DFT2 aggregated")
sig[1:50,1:6] %>%
kbl(caption="Comparison of DFT1 vs DFT2") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSSHAG00000007582.2 NA | 8720.2072 | -7.090091 | 0.1734257 | -40.88258 | 0 | 0 |
ENSSHAG00000024692.1 NA | 11176.1973 | -8.580770 | 0.2275391 | -37.71118 | 0 | 0 |
ENSSHAG00000007784.2 NA | 16519.5514 | -7.300242 | 0.1954949 | -37.34236 | 0 | 0 |
ENSSHAG00000010367.2 ECM1 | 37884.8561 | 9.429385 | 0.2567216 | 36.73000 | 0 | 0 |
ENSSHAG00000026705.1 LOXL3 | 35605.2404 | 3.028419 | 0.0834311 | 36.29846 | 0 | 0 |
ENSSHAG00000003737.2 NA | 29315.0122 | 11.793401 | 0.3388989 | 34.79917 | 0 | 0 |
ENSSHAG00000032086.1 NA | 4270.9538 | -9.087633 | 0.2656490 | -34.20918 | 0 | 0 |
ENSSHAG00000020521.1 CDK3 | 6254.9578 | 4.247335 | 0.1329351 | 31.95045 | 0 | 0 |
ENSSHAG00000010645.2 COL16A1 | 18470.4887 | 6.069424 | 0.1909073 | 31.79252 | 0 | 0 |
ENSSHAG00000015236.2 FKBP6 | 1799.3194 | -5.552135 | 0.1749135 | -31.74217 | 0 | 0 |
ENSSHAG00000031815.1 RAB33A | 4153.4227 | -8.057672 | 0.2543513 | -31.67930 | 0 | 0 |
ENSSHAG00000031607.1 NA | 2388.4986 | 6.299003 | 0.2049613 | 30.73264 | 0 | 0 |
ENSSHAG00000003434.2 NA | 16944.8869 | -9.624643 | 0.3183782 | -30.23022 | 0 | 0 |
ENSSHAG00000013199.2 PDGFRA | 101648.6230 | 15.673386 | 0.5200441 | 30.13857 | 0 | 0 |
ENSSHAG00000024117.1 NA | 6007.3116 | 7.935832 | 0.2646438 | 29.98684 | 0 | 0 |
ENSSHAG00000031627.1 PRX | 228602.7417 | -10.824218 | 0.3631920 | -29.80302 | 0 | 0 |
ENSSHAG00000007782.2 CA8 | 2062.4938 | 8.726159 | 0.2990458 | 29.18000 | 0 | 0 |
ENSSHAG00000018310.2 SLIT2 | 44695.3018 | -14.414172 | 0.4983908 | -28.92142 | 0 | 0 |
ENSSHAG00000021464.1 NA | 11908.1052 | -8.384005 | 0.2916414 | -28.74765 | 0 | 0 |
ENSSHAG00000010824.2 MPDZ | 12727.1471 | -5.429957 | 0.1920191 | -28.27821 | 0 | 0 |
ENSSHAG00000000643.2 STK31 | 4182.6920 | -9.537041 | 0.3373093 | -28.27387 | 0 | 0 |
ENSSHAG00000023142.1 NA | 63163.2147 | -2.474963 | 0.0903962 | -27.37905 | 0 | 0 |
ENSSHAG00000025566.1 ABLIM1 | 7772.7034 | 5.814237 | 0.2129981 | 27.29713 | 0 | 0 |
ENSSHAG00000021789.1 NA | 1860.1298 | 8.577176 | 0.3143469 | 27.28571 | 0 | 0 |
ENSSHAG00000024021.1 NA | 3895.6967 | 5.733097 | 0.2107928 | 27.19778 | 0 | 0 |
ENSSHAG00000030292.1 NA | 1933.0281 | -5.410220 | 0.1994706 | -27.12289 | 0 | 0 |
ENSSHAG00000031074.1 NEXMIF | 4117.0924 | -8.404272 | 0.3123135 | -26.90973 | 0 | 0 |
ENSSHAG00000008051.2 TEX14 | 4625.9527 | -3.525105 | 0.1326009 | -26.58431 | 0 | 0 |
ENSSHAG00000004947.2 THBS2 | 73193.5959 | 13.509919 | 0.5132476 | 26.32242 | 0 | 0 |
ENSSHAG00000022172.1 HAPLN2 | 11123.1806 | -7.382997 | 0.2822149 | -26.16090 | 0 | 0 |
ENSSHAG00000026637.1 NA | 2213.7312 | -5.071669 | 0.1942619 | -26.10738 | 0 | 0 |
ENSSHAG00000005380.2 NA | 3108.7669 | 10.228790 | 0.3940820 | 25.95599 | 0 | 0 |
ENSSHAG00000018469.2 PDE10A | 3854.3231 | -5.647349 | 0.2179279 | -25.91384 | 0 | 0 |
ENSSHAG00000011809.2 NA | 16636.2524 | 3.149445 | 0.1232734 | 25.54845 | 0 | 0 |
ENSSHAG00000029638.1 NA | 18764.0415 | -6.179495 | 0.2425972 | -25.47224 | 0 | 0 |
ENSSHAG00000016252.2 LOXL2 | 26417.8025 | 8.878268 | 0.3521803 | 25.20944 | 0 | 0 |
ENSSHAG00000001552.2 TDRD5 | 4168.0657 | -5.549233 | 0.2251434 | -24.64755 | 0 | 0 |
ENSSHAG00000006728.2 NEFM | 7555.0072 | -10.855938 | 0.4479166 | -24.23652 | 0 | 0 |
ENSSHAG00000013722.2 NA | 1575.4562 | -4.678030 | 0.1936782 | -24.15363 | 0 | 0 |
ENSSHAG00000014886.2 CYP2R1 | 2488.5667 | -7.465866 | 0.3091695 | -24.14813 | 0 | 0 |
ENSSHAG00000031108.1 NA | 964.2578 | -7.703956 | 0.3207488 | -24.01866 | 0 | 0 |
ENSSHAG00000005492.2 NA | 1659.5069 | -8.999424 | 0.3748897 | -24.00553 | 0 | 0 |
ENSSHAG00000027513.1 NA | 1802.2901 | -5.271118 | 0.2200576 | -23.95335 | 0 | 0 |
ENSSHAG00000000893.2 NA | 2287.3973 | -4.736996 | 0.1987138 | -23.83828 | 0 | 0 |
ENSSHAG00000005182.2 SERPINE1 | 21367.7733 | 11.866757 | 0.5041184 | 23.53962 | 0 | 0 |
ENSSHAG00000010102.2 UNC5A | 1472.0133 | -8.985720 | 0.3860205 | -23.27783 | 0 | 0 |
ENSSHAG00000031142.1 TPPP | 2664.1676 | -5.954920 | 0.2565656 | -23.21012 | 0 | 0 |
ENSSHAG00000022666.1 NA | 1669.6189 | 6.414065 | 0.2781309 | 23.06132 | 0 | 0 |
ENSSHAG00000010515.2 NA | 2972.5658 | 9.167129 | 0.3999684 | 22.91964 | 0 | 0 |
ENSSHAG00000010902.2 SMARCD3 | 7489.5510 | -2.642803 | 0.1163671 | -22.71092 | 0 | 0 |
write.table(dge,file="dge2.tsv",sep="\t")
# heatmap
mx <- sig[,7:ncol(sig)]
mx <- head(mx,30)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(mx),trace="none",scale="row",
col=colfunc(25),ColSideColors=cols[1:6],mar=c(8,14),
cexRow=0.7,cexCol=1)
Need to get human homologs of these genes. These were obtained from Ensembl v109 biomart (website).
rownames(dge2) <- sapply(strsplit(rownames(dge2),"\\."),"[[",1)
hm2 <- hm[hm$Tasmanian.devil.gene.stable.ID != "",]
gt <- hm2[,2:3]
length(unique(gt$Tasmanian.devil.gene.stable.ID))
## [1] 16979
length(unique(gt$Gene.name))
## [1] 16646
Now run mitch for DGE2. There will be a report generated that has more details on the enrichment analysis. The pathways are sourced from Reactome 7th July 2023.
genesets <- gmt_import("../ref/ReactomePathways_2023-07-14.gmt")
m2 <- mitch_import(dge2, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 17468
## Note: no. genes in output = 13505
## Note: estimated proportion of input genes in output = 0.773
head(m2)
## x
## -35.37246663
## A2M -13.13626596
## A4GALT -0.07278225
## AAAS -4.33508400
## AACS 2.13961007
## AADAC -2.90578338
res2 <- mitch_calc(m2, genesets, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
if ( !file.exists("mitch2.html") ) {
mitch_report(res2, "mitch2.html")
}
top <- subset(res2$enrichment_result,p.adjustANOVA<0.05)
topup <- head(subset(top,s.dist>0),20)
topup %>%
kbl(caption="Top 20 upregulated pathways") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
687 | Metallothioneins bind metals | 10 | 0.0000122 | 0.7985180 | 0.0013450 |
1075 | Response to metal ions | 13 | 0.0000083 | 0.7140414 | 0.0009936 |
229 | Crosslinking of collagen fibrils | 16 | 0.0001190 | 0.5556657 | 0.0071547 |
42 | Activation of Matrix Metalloproteinases | 22 | 0.0000130 | 0.5368882 | 0.0013450 |
207 | Collagen degradation | 47 | 0.0000000 | 0.5178538 | 0.0000004 |
157 | Caspase activation via Death Receptors in the presence of ligand | 12 | 0.0019600 | 0.5162245 | 0.0468695 |
456 | GRB2:SOS provides linkage to MAPK signaling for Integrins | 12 | 0.0019968 | 0.5153042 | 0.0468695 |
206 | Collagen chain trimerization | 30 | 0.0000013 | 0.5097934 | 0.0001768 |
185 | Cholesterol biosynthesis | 24 | 0.0000363 | 0.4870311 | 0.0027570 |
205 | Collagen biosynthesis and modifying enzymes | 50 | 0.0000000 | 0.4808651 | 0.0000010 |
171 | Cell-extracellular matrix interactions | 18 | 0.0004420 | 0.4784031 | 0.0211221 |
572 | Integrin cell surface interactions | 63 | 0.0000000 | 0.4333917 | 0.0000008 |
337 | ECM proteoglycans | 61 | 0.0000000 | 0.4176109 | 0.0000031 |
1147 | Signal transduction by L1 | 21 | 0.0016204 | 0.3974128 | 0.0436118 |
208 | Collagen formation | 68 | 0.0000001 | 0.3788135 | 0.0000097 |
573 | Integrin signaling | 23 | 0.0016795 | 0.3785079 | 0.0436118 |
1247 | Syndecan interactions | 24 | 0.0014070 | 0.3766227 | 0.0436118 |
93 | Assembly of collagen fibrils and other multimeric structures | 47 | 0.0000246 | 0.3557735 | 0.0022208 |
183 | Chemokine receptors bind chemokines | 27 | 0.0015670 | 0.3516513 | 0.0436118 |
287 | Degradation of the extracellular matrix | 107 | 0.0000000 | 0.3430412 | 0.0000004 |
topdn <- head(subset(top,s.dist<0),20)
topdn %>%
kbl(caption="Top 20 downregulated pathways") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
286 | Degradation of cysteine and homocysteine | 11 | 0.0003126 | -0.6276460 | 0.0161108 |
1243 | Sulfur amino acid metabolism | 23 | 0.0016925 | -0.3782370 | 0.0436118 |
1359 | Transmission across Chemical Synapses | 195 | 0.0009446 | -0.1376906 | 0.0334999 |
top2 <- rbind(head(topup,10),head(topdn,10))
top2 <- top2[order(-top2$s.dist),]
par(mar=c(5,15,5,2))
barplot(rev(abs(top2$s.dist)),horiz=TRUE,las=1,names.arg=rev(top2$set),
main="Top Pathways",cex.names=0.6,xlab="Enrichment Score",
col=c(rep("blue",nrow(head(topdn,10))),rep("red",nrow(head(topup,10)))),
xlim=c(0,0.8))
For reproducibility.
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] pkgload_1.3.2 GGally_2.1.2
## [3] ggplot2_3.4.0 beeswarm_0.4.0
## [5] gtools_3.9.4 tibble_3.1.8
## [7] echarts4r_0.4.4 dplyr_1.0.10
## [9] kableExtra_1.3.4 limma_3.54.0
## [11] mitch_1.10.0 DESeq2_1.38.2
## [13] SummarizedExperiment_1.28.0 Biobase_2.58.0
## [15] MatrixGenerics_1.10.0 matrixStats_0.63.0
## [17] GenomicRanges_1.50.2 GenomeInfoDb_1.34.6
## [19] IRanges_2.32.0 S4Vectors_0.36.1
## [21] BiocGenerics_0.44.0 gplots_3.1.3
## [23] reshape2_1.4.4
##
## loaded via a namespace (and not attached):
## [1] DBI_1.1.3 bitops_1.0-7 gridExtra_2.3
## [4] rlang_1.0.6 magrittr_2.0.3 compiler_4.3.1
## [7] RSQLite_2.2.20 systemfonts_1.0.4 png_0.1-8
## [10] vctrs_0.5.1 rvest_1.0.3 stringr_1.5.0
## [13] pkgconfig_2.0.3 crayon_1.5.2 fastmap_1.1.0
## [16] XVector_0.38.0 ellipsis_0.3.2 caTools_1.18.2
## [19] utf8_1.2.2 promises_1.2.0.1 rmarkdown_2.19
## [22] bit_4.0.5 xfun_0.36 zlibbioc_1.44.0
## [25] cachem_1.0.6 jsonlite_1.8.4 blob_1.2.3
## [28] highr_0.10 later_1.3.0 DelayedArray_0.24.0
## [31] reshape_0.8.9 BiocParallel_1.32.5 parallel_4.3.1
## [34] R6_2.5.1 bslib_0.4.2 stringi_1.7.8
## [37] RColorBrewer_1.1-3 jquerylib_0.1.4 Rcpp_1.0.9
## [40] assertthat_0.2.1 knitr_1.41 httpuv_1.6.7
## [43] Matrix_1.5-4.1 tidyselect_1.2.0 yaml_2.3.6
## [46] rstudioapi_0.14 codetools_0.2-19 lattice_0.21-8
## [49] plyr_1.8.8 withr_2.5.0 shiny_1.7.4
## [52] KEGGREST_1.38.0 evaluate_0.19 xml2_1.3.3
## [55] Biostrings_2.66.0 pillar_1.8.1 KernSmooth_2.23-20
## [58] generics_0.1.3 RCurl_1.98-1.9 munsell_0.5.0
## [61] scales_1.2.1 xtable_1.8-4 glue_1.6.2
## [64] tools_4.3.1 webshot_0.5.4 annotate_1.76.0
## [67] locfit_1.5-9.7 XML_3.99-0.13 grid_4.3.1
## [70] AnnotationDbi_1.60.0 colorspace_2.0-3 GenomeInfoDbData_1.2.9
## [73] cli_3.6.0 fansi_1.0.3 viridisLite_0.4.1
## [76] svglite_2.1.0 gtable_0.3.1 sass_0.4.4
## [79] digest_0.6.31 geneplotter_1.76.0 htmlwidgets_1.6.1
## [82] memoise_2.0.1 htmltools_0.5.4 lifecycle_1.0.3
## [85] httr_1.4.4 mime_0.12 bit64_4.0.5
## [88] MASS_7.3-59