Examining expression of immune genes in devils with DFTD

Source codes: https://github.com/markziemann/dftd_immune_expression

Background

We have designed a target enrichment panel to examine gene expression of immune genes. This is so we can inexpensively profile immune gene expression in devils. We will be looking at a lot of different variables such as dftd-free vs affected, DFTD1 vs DFTD2, etc.

The main comparisons are:

-Wild vs Captive devils

-Healthy vs DFTD infected devils

-DFT1 and DFT2 devils

I have their sex/age/seasons categories there as well if we decide to go with more detail analysis in the future, but I think just stick with these three for now. Some devils are a part of the Captive control and Wild control groups . There are a couple samples that won’t go into the analysis that I made note of. Meliza and Simba.

suppressPackageStartupMessages({
    library("reshape2")
    library("gplots")
    library("DESeq2")
    library("mitch")
    library("limma")
    library("kableExtra")
    library("dplyr")
})

Functions.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=1, col="dark gray",
       main=contrast_name, cex.main=0.7)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=1, col="red")
  mtext(SUBHEADER,cex = 0.7)
  grid()
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="red")
}

Load data

Here we load the data in from the aligner.

tmp <- read.table("3col.tsv.gz")
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3"))
colnames(x) <- gsub("fastq/","",colnames(x))
dim(x)

## [1] 310 143

Remove poor samples

Samples with <1M reads should be omitted. Will also round values to integers.

cs <- colSums(x)
cs <- cs[order(cs)]

barplot(cs,ylim=c(1e2,2e7),log="y",main="All samples",cex.names=0.5)

barplot(head(cs),log="y",main="Samples with low reads",cex.names=0.7)

abline(h=1e6,col="red",lty=2)

x <- x[,colSums(x)>1e6]
x <- round(x)

Identify poorly represented genes

rc <- rowMeans(x)
rc <- rc[order(rc)]
barplot(rc+1,log="y")

hist(log(rc+1))

round(rc[which(rc<10)],digits=1)

##   CNTF   CTF1   PTX3   RAG2    OSM   AIF1  IL17B IL36RN  STAT2 CLEC4E   CD3g 
##    0.0    0.0    0.0    0.0    0.0    0.2    0.4    0.4    0.5    0.5    0.6 
##   CSF2 CXCL13   IL25  Casp9 CX3CL1 CCLD16  IL17A  IFNB1   CSF3  CCL19  CCL26 
##    0.7    0.8    0.8    0.9    0.9    1.3    1.5    1.8    2.0    2.3    2.4 
##  IFNA1  FCAMR  KITLG  IL17F CCLD15    LIF  CCL22   IL11  IFNL1   IL33   IL26 
##    2.5    2.5    2.7    3.2    3.4    3.9    3.9    4.0    4.4    4.5    4.6 
##  IL12A IL1F10  RPS18   CD70  CCL27  ACKR4 IL36L1    FGB IL31Ra CXCL11 CXCL14 
##    4.9    5.0    5.0    5.3    5.6    5.8    6.0    6.2    6.8    7.5    7.7 
## IL36L2 CXCL17    IL4    IL9  CCL17  CCLD4 
##    8.1    8.1    8.2    8.3    8.6    9.8

A lot of the shorter genes (cytokines) had lower expression.

Sample sheet

ss <- read.table("samplesheet.tsv",sep="\t",header=TRUE)

MDS

plotMDS(x)

DESeq2

Run a differential analysis. Wild vs captive.

ss1 <- ss[which(grepl("Wild",ss$Category) + grepl("Captive",ss$Category)>0),]
ss1 <- ss1[ss1$Sample %in% colnames(x),]
dim(ss1)

## [1] 36 15

x1 <- x[,which(colnames(x) %in% ss1$Sample)]
dim(x1)

## [1] 310  36

x1 <- x1[which(rowMeans(x1)>10),]
dim(x1)

## [1] 260  36

col <- as.character(as.numeric(grepl("Cap",ss1$Category)))
col <- gsub("0","lightgreen",col)
col <- gsub("1","lightblue",col)

mds<-plotMDS(x1,pch=19,cex=3,col=col,main="MDS plot: wild and captive devils")
mtext("wild=green, captive=blue")
text(mds,labels=colnames(x1),cex=0.7)

# coding captive
ss1$captive <- grepl("Cap",ss1$Category)

dds <- DESeqDataSetFromMatrix(countData = x1 , colData = ss1, design = ~ captive )

## converting counts to integer mode

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 12 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z<- results(res)
vsd <- vst(dds, blind=FALSE,nsub=nrow(x1)-50)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge1 <- dge
sig <- subset(dge,padj<0.05)
sig1_up <- rownames(subset(sig,log2FoldChange>0))
sig1_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig1_up)

## [1] 36

length(sig1_dn)

## [1] 44

maplot(dge1,"wild vs captive devils")

make_volcano(dge1,"wild vs captive devils")

sig[1:50,1:6] %>%
  kbl(caption="Comparison of wild vs captive devils") %>%
  kable_paper("hover", full_width = F)

Comparison of wild vs captive devils
	baseMean	log2FoldChange	lfcSE	stat	pvalue	padj
EPX	4183.07123	-3.4079536	0.4607668	-7.396266	0.0000000	0.0000000
CCR2	3733.31997	-1.0692199	0.1766932	-6.051279	0.0000000	0.0000002
ITGAX	5814.14465	-0.9504545	0.1624694	-5.850051	0.0000000	0.0000004
CCR7	14909.18108	1.1232280	0.1931247	5.816076	0.0000000	0.0000004
NFKB1	6160.50562	0.5305511	0.0970251	5.468182	0.0000000	0.0000024
CD1d	1276.84335	1.1328849	0.2088254	5.425033	0.0000001	0.0000025
C3	41378.49782	-1.1728466	0.2201943	-5.326417	0.0000001	0.0000037
CCL21	16.48466	-6.0299163	1.1947100	-5.047180	0.0000004	0.0000146
IL7RA	5802.93992	0.9843279	0.2049357	4.803106	0.0000016	0.0000431
CATH3	11222.02464	-2.0001644	0.4174722	-4.791132	0.0000017	0.0000431
CD177	828.62505	-1.8702926	0.4021087	-4.651211	0.0000033	0.0000780
Spi1	29735.23452	-1.0732955	0.2328274	-4.609834	0.0000040	0.0000873
CD28	931.91839	1.1402306	0.2552503	4.467107	0.0000079	0.0001555
CXCR4	16719.56435	0.6964438	0.1563121	4.455469	0.0000084	0.0001555
MRC1	1272.54271	-1.0945398	0.2486038	-4.402748	0.0000107	0.0001853
MPO	118.84914	-1.9761122	0.4569679	-4.324400	0.0000153	0.0002485
CD68	4028.85982	-0.8024057	0.1900471	-4.222142	0.0000242	0.0003615
TGFb1	54708.72507	-0.9858531	0.2343185	-4.207321	0.0000258	0.0003615
TNFSF13	443.78996	-0.7609083	0.1810674	-4.202348	0.0000264	0.0003615
IL34	1419.53594	-1.6697384	0.4043484	-4.129454	0.0000364	0.0004727
GATA2	1406.89288	-0.8609619	0.2097084	-4.105520	0.0000403	0.0004995
IL10Rb	418.87031	0.7853816	0.1958234	4.010662	0.0000605	0.0007156
TXLNA	3770.10495	-0.4767453	0.1192597	-3.997538	0.0000640	0.0007235
CCLD3	92.87741	-2.5274807	0.6524846	-3.873625	0.0001072	0.0011616
CCR1	433.10707	-0.9258979	0.2462342	-3.760232	0.0001698	0.0017655
CCLD13	246.24333	-1.1544510	0.3130101	-3.688223	0.0002258	0.0022583
IFIH1	1799.91319	0.6921871	0.1925865	3.594162	0.0003254	0.0031338
TLR3	459.09865	0.8319981	0.2336188	3.561349	0.0003690	0.0034200
TNFSF8	456.78476	0.7558417	0.2127582	3.552586	0.0003815	0.0034200
GATA1	262633.59066	-1.8249054	0.5193252	-3.513994	0.0004414	0.0037099
CCR6	895.03032	1.0130940	0.2889628	3.505967	0.0004550	0.0037099
TLR9	674.33669	-0.9121711	0.2602483	-3.505002	0.0004566	0.0037099
TCRm	4539.86415	0.8327992	0.2400706	3.468976	0.0005224	0.0040538
TCF3	30815.76980	-1.0644012	0.3071809	-3.465063	0.0005301	0.0040538
SIRPA	131.39314	-1.0876220	0.3177563	-3.422818	0.0006198	0.0046039
TCRd	231.15787	1.1654867	0.3433128	3.394825	0.0006867	0.0049597
STAT1	7854.81269	0.5645835	0.1672044	3.376606	0.0007339	0.0051569
TNFRSF1A	6749.97652	-0.4554037	0.1360066	-3.348394	0.0008128	0.0055614
TCRa	8172.55767	0.4759892	0.1438695	3.308479	0.0009380	0.0062536
STAT4	1471.30776	0.6440130	0.1952423	3.298532	0.0009719	0.0063175
CCLD1	32.55610	-2.1740793	0.6670158	-3.259412	0.0011164	0.0070798
CD14	32080.70523	-0.7784355	0.2403961	-3.238138	0.0012031	0.0074479
IL10RA	10099.34196	0.8032078	0.2492640	3.222318	0.0012716	0.0076886
CCLD14	168.91060	-1.2959646	0.4103225	-3.158405	0.0015863	0.0093739
CCR5	286.65976	-0.7147980	0.2325221	-3.074108	0.0021113	0.0121988
IL4I0	111.10456	-1.3857984	0.4537710	-3.053959	0.0022584	0.0127650
IRF3	13379.83096	-0.6653475	0.2191661	-3.035814	0.0023989	0.0132704
PRG3	8577.73587	1.0096619	0.3372969	2.993393	0.0027589	0.0149443
CD3e	15796.35503	0.4521622	0.1514440	2.985673	0.0028295	0.0150139
MMP9	58076.06374	-0.6397997	0.2172009	-2.945659	0.0032227	0.0165056

write.table(dge,file="dge1.tsv",sep="\t")

mx <- sig[,7:ncol(sig)]
mx <- head(mx,30)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(mx),trace="none",scale="row",
  col=colfunc(25),ColSideColors=col,mar=c(5,12))

Session information

For reproducibility.

sessionInfo()

## R version 4.2.3 (2023-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] dplyr_1.1.0                 kableExtra_1.3.4           
##  [3] limma_3.52.2                mitch_1.8.0                
##  [5] DESeq2_1.36.0               SummarizedExperiment_1.26.1
##  [7] Biobase_2.56.0              MatrixGenerics_1.8.1       
##  [9] matrixStats_0.63.0          GenomicRanges_1.48.0       
## [11] GenomeInfoDb_1.32.2         IRanges_2.30.0             
## [13] S4Vectors_0.34.0            BiocGenerics_0.42.0        
## [15] gplots_3.1.3                reshape2_1.4.4             
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7           bit64_4.0.5            webshot_0.5.4         
##  [4] RColorBrewer_1.1-3     httr_1.4.5             tools_4.2.3           
##  [7] bslib_0.4.2            utf8_1.2.3             R6_2.5.1              
## [10] KernSmooth_2.23-20     DBI_1.1.3              colorspace_2.1-0      
## [13] gridExtra_2.3          tidyselect_1.2.0       GGally_2.1.2          
## [16] bit_4.0.5              compiler_4.2.3         rvest_1.0.3           
## [19] cli_3.6.0              xml2_1.3.3             DelayedArray_0.22.0   
## [22] sass_0.4.5             caTools_1.18.2         scales_1.2.1          
## [25] genefilter_1.78.0      systemfonts_1.0.4      stringr_1.5.0         
## [28] digest_0.6.31          svglite_2.1.1          rmarkdown_2.20        
## [31] XVector_0.36.0         pkgconfig_2.0.3        htmltools_0.5.4       
## [34] highr_0.10             fastmap_1.1.1          htmlwidgets_1.6.2     
## [37] rlang_1.1.0            rstudioapi_0.14        RSQLite_2.3.0         
## [40] shiny_1.7.4            jquerylib_0.1.4        generics_0.1.3        
## [43] jsonlite_1.8.4         BiocParallel_1.30.3    gtools_3.9.4          
## [46] RCurl_1.98-1.10        magrittr_2.0.3         GenomeInfoDbData_1.2.8
## [49] Matrix_1.5-3           Rcpp_1.0.10            munsell_0.5.0         
## [52] fansi_1.0.4            lifecycle_1.0.3        stringi_1.7.12        
## [55] yaml_2.3.7             MASS_7.3-58.3          zlibbioc_1.42.0       
## [58] plyr_1.8.8             grid_4.2.3             blob_1.2.4            
## [61] promises_1.2.0.1       parallel_4.2.3         crayon_1.5.2          
## [64] lattice_0.20-45        Biostrings_2.64.0      echarts4r_0.4.4       
## [67] splines_4.2.3          annotate_1.74.0        KEGGREST_1.36.3       
## [70] locfit_1.5-9.7         knitr_1.42             pillar_1.8.1          
## [73] geneplotter_1.74.0     codetools_0.2-19       XML_3.99-0.13         
## [76] glue_1.6.2             evaluate_0.20          httpuv_1.6.9          
## [79] png_0.1-8              vctrs_0.6.0            gtable_0.3.2          
## [82] reshape_0.8.9          cachem_1.0.7           ggplot2_3.4.1         
## [85] xfun_0.37              mime_0.12              xtable_1.8-4          
## [88] later_1.3.0            viridisLite_0.4.1      survival_3.5-5        
## [91] tibble_3.2.0           beeswarm_0.4.0         AnnotationDbi_1.58.0  
## [94] memoise_2.0.1          ellipsis_0.3.2