Source: https://github.com/markziemann/fran_mbd/blob/master/main_report_meth.Rmd

Introduction

In this report, I will take you through an analysis of the RNA-seq that was previosly generated. I will perform an analysis that begins with DESeq2 for differential expression followed by pathway enrichment analysis with mitch.

suppressPackageStartupMessages({
    # to be run in R4
    library("plyr")
    library("statmod")
    library("locfit")
    library("parallel")
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("mitch")
})

Import profiling data

Kallisto was used to map the reads to the transcriptome. Here I'm importing the transcript level counts and aggregating to gene level.

tmp<-read.table("3col.tsv",header=F)
x<-as.matrix(acast(tmp, V2~V1, value.var="V3"))
g<-read.table("tx2gn.tsv",header=F,row.names=1)
x<-merge(g,x,by=0)
rownames(x)=x$Row.names
x$Row.names=NULL
# aggregate to gene names
x<-aggregate(. ~ V2,x,sum)
rownames(x)=x$V2
x$V2=NULL
x$t=NULL
x <- round(x)

# separate heart and spleen data
hr <- x[,grep("H_",colnames(x) )]
sr <- x[,grep("S_",colnames(x) )]
b <- data.frame(sr,hr)

MDS plots

plot(cmdscale(dist(t(b))), bty='l', xlab="Coordinate 1", ylab="Coordinate 2", 
 type = "p", pch=19, cex.axis=1.3, cex.lab=1.3 , col="gray") 
text(cmdscale(dist(t(b))), labels=colnames(b),cex=1.3) 
mtext("spleen and heart samples")

plot(cmdscale(dist(t(hr))), bty='l', xlab="Coordinate 1", ylab="Coordinate 2",
 type = "p", pch=19, cex.axis=1.3, cex.lab=1.3 , col="gray")
text(cmdscale(dist(t(hr))), labels=colnames(hr),cex=1.3)
mtext("heart samples")

plot(cmdscale(dist(t(sr))), bty='l', xlab="Coordinate 1", ylab="Coordinate 2",
 type = "p", pch=19, cex.axis=1.3, cex.lab=1.3 , col="gray")
text(cmdscale(dist(t(sr))), labels=colnames(sr),cex=1.3)
mtext("spleen samples")

Curate the samplesheet

ctrl<-as.factor(as.numeric(grepl("ctrl",colnames(x))))
hifib<-as.factor(as.numeric(grepl("hifibre",colnames(x))))
acetate<-as.factor(as.numeric(grepl("acetate",colnames(x))))
heart<-as.factor(as.numeric(grepl("H",colnames(x))))
spleen<-as.factor(as.numeric(grepl("S",colnames(x))))
ss<-cbind(heart,spleen,ctrl,hifib,acetate)-1
row.names(ss)=colnames(x)

Differential analysis

We will be running some comparisons between groups:

  1. HEART ctrl vs acetate

  2. HEART ctrl vs hifib

  3. SPLEEN ctrl vs acetate

  4. SPLEEN ctrl vs hifibre

HEART ctrl vs acetate

ss_hr <- ss[grep("H_",rownames(ss)),]
x <- hr[,grep("hifi" , colnames(hr) ,invert=TRUE)]
x<-x[which(rowSums(x)/ncol(x)>=(10)),]
xs <- data.frame(ss_hr[which( rownames(ss_hr) %in% colnames(x) ),1,drop=FALSE])
xs$trt <- factor(as.numeric(grepl("acet",rownames(xs))))
dds <- DESeqDataSetFromMatrix(countData = x , colData = xs, design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
head(dge)
##                           baseMean log2FoldChange     lfcSE      stat
## ENSMUSG00000059824_Dbp   1991.2105       3.054743 0.1331054  22.94981
## ENSMUSG00000055116_Arntl  565.2687      -2.629012 0.1548155 -16.98158
## ENSMUSG00000026077_Npas2  213.3276      -1.937798 0.1346362 -14.39285
## ENSMUSG00000028957_Per3   935.6254       2.156847 0.1626108  13.26386
## ENSMUSG00000022389_Tef   4566.2299       1.375636 0.1088460  12.63836
## ENSMUSG00000056749_Nfil3  437.1786      -1.164011 0.1000657 -11.63247
##                                 pvalue          padj H_acetate1_f H_acetate2_f
## ENSMUSG00000059824_Dbp   1.479878e-116 2.098171e-112    11.783665    11.733681
## ENSMUSG00000055116_Arntl  1.124185e-64  7.969350e-61     8.794148     8.428261
## ENSMUSG00000026077_Npas2  5.738187e-47  2.711867e-43     8.147724     8.236002
## ENSMUSG00000028957_Per3   3.750633e-40  1.329412e-36    10.455784    10.971571
## ENSMUSG00000022389_Tef    1.297243e-36  3.678461e-33    12.610977    12.844831
## ENSMUSG00000056749_Nfil3  2.818335e-31  6.659726e-28     9.025515     8.987172
##                          H_acetate3_f H_acetate4_f H_ctrl1_f H_ctrl2_f
## ENSMUSG00000059824_Dbp      12.224099    11.793457  9.395012  9.330056
## ENSMUSG00000055116_Arntl     8.398259     8.551378 10.172594 10.300776
## ENSMUSG00000026077_Npas2     8.211326     8.229558  9.167179  9.111096
## ENSMUSG00000028957_Per3     11.048890    10.654782  9.116020  8.981180
## ENSMUSG00000022389_Tef      12.861079    12.636268 11.360739 11.519617
## ENSMUSG00000056749_Nfil3     8.935310     8.931409  9.720595  9.710099
##                          H_ctrl3_f H_ctrl4_f
## ENSMUSG00000059824_Dbp    9.265394  9.534909
## ENSMUSG00000055116_Arntl 10.305100 10.259650
## ENSMUSG00000026077_Npas2  9.234786  9.141036
## ENSMUSG00000028957_Per3   9.248265  9.330330
## ENSMUSG00000022389_Tef   11.249615 11.648337
## ENSMUSG00000056749_Nfil3  9.815852  9.676046
write.table(dge,file="heart_ctrl_vs_acetate_rna.tsv",quote=F,sep="\t")
#some plots
sig<-subset(dge,padj<0.05)
SIG=nrow(sig)
DN=nrow(subset(sig,log2FoldChange<0))
UP=nrow(subset(sig,log2FoldChange>0))
HEADER=paste("ctrl vs acetate:", SIG , "DGEs,", UP ,"up,", DN, "down")
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, 
 xlab="log2 base mean",
 ,ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top<-head(sig,20)
plot(dge$log2FoldChange, -log2(dge$pvalue)+1E-307 ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue)+1E-307, cex=0.6,pch=19,col="red")  
mtext((HEADER),cex=1.2)