Source codes: https://github.com/markziemann/dftd_immune_expression

Background

We have designed a target enrichment panel to examine gene expression of immune genes. This is so we can inexpensively profile immune gene expression in devils. We will be looking at a lot of different variables such as dftd-free vs affected, DFTD1 vs DFTD2, etc.

The main comparisons are:

-Wild vs Captive devils

-Healthy vs DFTD infected devils

-DFT1 and DFT2 devils

I have their sex/age/seasons categories there as well if we decide to go with more detail analysis in the future, but I think just stick with these three for now. Some devils are a part of the Captive control and Wild control groups . There are a couple samples that won’t go into the analysis that I made note of. Meliza and Simba.

suppressPackageStartupMessages({
    library("reshape2")
    library("gplots")
    library("DESeq2")
    library("mitch")
    library("limma")
    library("kableExtra")
    library("dplyr")
})

Functions.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=1, col="dark gray",
       main=contrast_name, cex.main=0.7)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=1, col="red")
  mtext(SUBHEADER,cex = 0.7)
  grid()
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="red")
}

Load data

Here we load the data in from the aligner.

tmp <- read.table("3col.tsv.gz")
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3"))
colnames(x) <- gsub("fastq/","",colnames(x))
dim(x)
## [1] 310 143

Remove poor samples

Samples with <1M reads should be omitted. Will also round values to integers.

cs <- colSums(x)
cs <- cs[order(cs)]

barplot(cs,ylim=c(1e2,2e7),log="y",main="All samples",cex.names=0.5)

barplot(head(cs),log="y",main="Samples with low reads",cex.names=0.7)

abline(h=1e6,col="red",lty=2)

x <- x[,colSums(x)>1e6]
x <- round(x)

Identify poorly represented genes

rc <- rowMeans(x)
rc <- rc[order(rc)]
barplot(rc+1,log="y")

hist(log(rc+1))

round(rc[which(rc<10)],digits=1)
##   CNTF   CTF1   PTX3   RAG2    OSM   AIF1  IL17B IL36RN  STAT2 CLEC4E   CD3g 
##    0.0    0.0    0.0    0.0    0.0    0.2    0.4    0.4    0.5    0.5    0.6 
##   CSF2 CXCL13   IL25  Casp9 CX3CL1 CCLD16  IL17A  IFNB1   CSF3  CCL19  CCL26 
##    0.7    0.8    0.8    0.9    0.9    1.3    1.5    1.8    2.0    2.3    2.4 
##  IFNA1  FCAMR  KITLG  IL17F CCLD15    LIF  CCL22   IL11  IFNL1   IL33   IL26 
##    2.5    2.5    2.7    3.2    3.4    3.9    3.9    4.0    4.4    4.5    4.6 
##  IL12A IL1F10  RPS18   CD70  CCL27  ACKR4 IL36L1    FGB IL31Ra CXCL11 CXCL14 
##    4.9    5.0    5.0    5.3    5.6    5.8    6.0    6.2    6.8    7.5    7.7 
## IL36L2 CXCL17    IL4    IL9  CCL17  CCLD4 
##    8.1    8.1    8.2    8.3    8.6    9.8

A lot of the shorter genes (cytokines) had lower expression.

Sample sheet

ss <- read.table("samplesheet.tsv",sep="\t",header=TRUE)

MDS

plotMDS(x)

DESeq2

Run a differential analysis. Wild vs captive.

ss1 <- ss[which(grepl("Wild",ss$Category) + grepl("Captive",ss$Category)>0),]
ss1 <- ss1[ss1$Sample %in% colnames(x),]
dim(ss1)
## [1] 36 15
x1 <- x[,which(colnames(x) %in% ss1$Sample)]
dim(x1)
## [1] 310  36
x1 <- x1[which(rowMeans(x1)>10),]
dim(x1)
## [1] 260  36
col <- as.character(as.numeric(grepl("Cap",ss1$Category)))
col <- gsub("0","lightgreen",col)
col <- gsub("1","lightblue",col)

mds<-plotMDS(x1,pch=19,cex=3,col=col,main="MDS plot: wild and captive devils")
mtext("wild=green, captive=blue")
text(mds,labels=colnames(x1),cex=0.7)