Source codes: https://github.com/markziemann/dftd_immune_expression
We have designed a target enrichment panel to examine gene expression of immune genes. This is so we can inexpensively profile immune gene expression in devils. We will be looking at a lot of different variables such as dftd-free vs affected, DFTD1 vs DFTD2, etc.
The main comparisons are:
-Wild vs Captive devils
-Healthy vs DFTD infected devils
-DFT1 and DFT2 devils
I have their sex/age/seasons categories there as well if we decide to go with more detail analysis in the future, but I think just stick with these three for now. Some devils are a part of the Captive control and Wild control groups . There are a couple samples that won’t go into the analysis that I made note of. Meliza and Simba.
suppressPackageStartupMessages({
library("reshape2")
library("gplots")
library("DESeq2")
library("mitch")
library("limma")
library("kableExtra")
library("dplyr")
})
Functions.
maplot <- function(de,contrast_name) {
sig <-subset(de, padj < 0.05 )
up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
GENESUP <- length(up)
GENESDN <- length(dn)
DET=nrow(de)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=1, col="dark gray",
main=contrast_name, cex.main=0.7)
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=1, col="red")
mtext(SUBHEADER,cex = 0.7)
grid()
}
make_volcano <- function(de,name) {
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="red")
}
Here we load the data in from the aligner.
tmp <- read.table("3col.tsv.gz")
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3"))
colnames(x) <- gsub("fastq/","",colnames(x))
dim(x)
## [1] 310 143
Samples with <1M reads should be omitted. Will also round values to integers.
cs <- colSums(x)
cs <- cs[order(cs)]
barplot(cs,ylim=c(1e2,2e7),log="y",main="All samples",cex.names=0.5)
barplot(head(cs),log="y",main="Samples with low reads",cex.names=0.7)
abline(h=1e6,col="red",lty=2)
x <- x[,colSums(x)>1e6]
x <- round(x)
rc <- rowMeans(x)
rc <- rc[order(rc)]
barplot(rc+1,log="y")
hist(log(rc+1))
round(rc[which(rc<10)],digits=1)
## CNTF CTF1 PTX3 RAG2 OSM AIF1 IL17B IL36RN STAT2 CLEC4E CD3g
## 0.0 0.0 0.0 0.0 0.0 0.2 0.4 0.4 0.5 0.5 0.6
## CSF2 CXCL13 IL25 Casp9 CX3CL1 CCLD16 IL17A IFNB1 CSF3 CCL19 CCL26
## 0.7 0.8 0.8 0.9 0.9 1.3 1.5 1.8 2.0 2.3 2.4
## IFNA1 FCAMR KITLG IL17F CCLD15 LIF CCL22 IL11 IFNL1 IL33 IL26
## 2.5 2.5 2.7 3.2 3.4 3.9 3.9 4.0 4.4 4.5 4.6
## IL12A IL1F10 RPS18 CD70 CCL27 ACKR4 IL36L1 FGB IL31Ra CXCL11 CXCL14
## 4.9 5.0 5.0 5.3 5.6 5.8 6.0 6.2 6.8 7.5 7.7
## IL36L2 CXCL17 IL4 IL9 CCL17 CCLD4
## 8.1 8.1 8.2 8.3 8.6 9.8
A lot of the shorter genes (cytokines) had lower expression.
ss <- read.table("samplesheet.tsv",sep="\t",header=TRUE)
plotMDS(x)
Run a differential analysis. Wild vs captive.
ss1 <- ss[which(grepl("Wild",ss$Category) + grepl("Captive",ss$Category)>0),]
ss1 <- ss1[ss1$Sample %in% colnames(x),]
dim(ss1)
## [1] 36 15
x1 <- x[,which(colnames(x) %in% ss1$Sample)]
dim(x1)
## [1] 310 36
x1 <- x1[which(rowMeans(x1)>10),]
dim(x1)
## [1] 260 36
col <- as.character(as.numeric(grepl("Cap",ss1$Category)))
col <- gsub("0","lightgreen",col)
col <- gsub("1","lightblue",col)
mds<-plotMDS(x1,pch=19,cex=3,col=col,main="MDS plot: wild and captive devils")
mtext("wild=green, captive=blue")
text(mds,labels=colnames(x1),cex=0.7)