• Background
  • Load data
  • Remove poor samples
  • Identify poorly represented genes
  • Sample sheet
  • MDS
  • DESeq2
  • Session information

Source codes: https://github.com/markziemann/dftd_immune_expression

Background

We have designed a target enrichment panel to examine gene expression of immune genes. This is so we can inexpensively profile immune gene expression in devils. We will be looking at a lot of different variables such as dftd-free vs affected, DFTD1 vs DFTD2, etc.

The main comparisons are:

-Wild vs Captive devils

-Healthy vs DFTD infected devils

-DFT1 and DFT2 devils

I have their sex/age/seasons categories there as well if we decide to go with more detail analysis in the future, but I think just stick with these three for now. Some devils are a part of the Captive control and Wild control groups . There are a couple samples that won’t go into the analysis that I made note of. Meliza and Simba.

suppressPackageStartupMessages({
    library("reshape2")
    library("gplots")
    library("DESeq2")
    library("mitch")
    library("limma")
    library("kableExtra")
    library("dplyr")
})

Functions.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=1, col="dark gray",
       main=contrast_name, cex.main=0.7)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=1, col="red")
  mtext(SUBHEADER,cex = 0.7)
  grid()
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="red")
}

Load data

Here we load the data in from the aligner.

tmp <- read.table("3col.tsv.gz")
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3"))
colnames(x) <- gsub("fastq/","",colnames(x))
dim(x)
## [1] 310 143

Remove poor samples

Samples with <1M reads should be omitted. Will also round values to integers.

cs <- colSums(x)
cs <- cs[order(cs)]

barplot(cs,ylim=c(1e2,2e7),log="y",main="All samples",cex.names=0.5)

barplot(head(cs),log="y",main="Samples with low reads",cex.names=0.7)

abline(h=1e6,col="red",lty=2)

x <- x[,colSums(x)>1e6]
x <- round(x)

Identify poorly represented genes

rc <- rowMeans(x)
rc <- rc[order(rc)]
barplot(rc+1,log="y")

hist(log(rc+1))

round(rc[which(rc<10)],digits=1)
##   CNTF   CTF1   PTX3   RAG2    OSM   AIF1  IL17B IL36RN  STAT2 CLEC4E   CD3g 
##    0.0    0.0    0.0    0.0    0.0    0.2    0.4    0.4    0.5    0.5    0.6 
##   CSF2 CXCL13   IL25  Casp9 CX3CL1 CCLD16  IL17A  IFNB1   CSF3  CCL19  CCL26 
##    0.7    0.8    0.8    0.9    0.9    1.3    1.5    1.8    2.0    2.3    2.4 
##  IFNA1  FCAMR  KITLG  IL17F CCLD15    LIF  CCL22   IL11  IFNL1   IL33   IL26 
##    2.5    2.5    2.7    3.2    3.4    3.9    3.9    4.0    4.4    4.5    4.6 
##  IL12A IL1F10  RPS18   CD70  CCL27  ACKR4 IL36L1    FGB IL31Ra CXCL11 CXCL14 
##    4.9    5.0    5.0    5.3    5.6    5.8    6.0    6.2    6.8    7.5    7.7 
## IL36L2 CXCL17    IL4    IL9  CCL17  CCLD4 
##    8.1    8.1    8.2    8.3    8.6    9.8

A lot of the shorter genes (cytokines) had lower expression.

Sample sheet

ss <- read.table("samplesheet.tsv",sep="\t",header=TRUE)

MDS

plotMDS(x)

DESeq2

Run a differential analysis. Wild vs captive.

ss1 <- ss[which(grepl("Wild",ss$Category) + grepl("Captive",ss$Category)>0),]
ss1 <- ss1[ss1$Sample %in% colnames(x),]
dim(ss1)
## [1] 36 15
x1 <- x[,which(colnames(x) %in% ss1$Sample)]
dim(x1)
## [1] 310  36
x1 <- x1[which(rowMeans(x1)>10),]
dim(x1)
## [1] 260  36
col <- as.character(as.numeric(grepl("Cap",ss1$Category)))
col <- gsub("0","lightgreen",col)
col <- gsub("1","lightblue",col)

mds<-plotMDS(x1,pch=19,cex=3,col=col,main="MDS plot: wild and captive devils")
mtext("wild=green, captive=blue")
text(mds,labels=colnames(x1),cex=0.7)

# coding captive
ss1$captive <- grepl("Cap",ss1$Category)

dds <- DESeqDataSetFromMatrix(countData = x1 , colData = ss1, design = ~ captive )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 12 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE,nsub=nrow(x1)-50)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge1 <- dge
sig <- subset(dge,padj<0.05)
sig1_up <- rownames(subset(sig,log2FoldChange>0))
sig1_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig1_up)
## [1] 36
length(sig1_dn)
## [1] 44
maplot(dge1,"wild vs captive devils")

make_volcano(dge1,"wild vs captive devils")

sig[1:50,1:6] %>%
  kbl(caption="Comparison of wild vs captive devils") %>%
  kable_paper("hover", full_width = F)
Comparison of wild vs captive devils
baseMean log2FoldChange lfcSE stat pvalue padj
EPX 4183.07123 -3.4079536 0.4607668 -7.396266 0.0000000 0.0000000
CCR2 3733.31997 -1.0692199 0.1766932 -6.051279 0.0000000 0.0000002
ITGAX 5814.14465 -0.9504545 0.1624694 -5.850051 0.0000000 0.0000004
CCR7 14909.18108 1.1232280 0.1931247 5.816076 0.0000000 0.0000004
NFKB1 6160.50562 0.5305511 0.0970251 5.468182 0.0000000 0.0000024
CD1d 1276.84335 1.1328849 0.2088254 5.425033 0.0000001 0.0000025
C3 41378.49782 -1.1728466 0.2201943 -5.326417 0.0000001 0.0000037
CCL21 16.48466 -6.0299163 1.1947100 -5.047180 0.0000004 0.0000146
IL7RA 5802.93992 0.9843279 0.2049357 4.803106 0.0000016 0.0000431
CATH3 11222.02464 -2.0001644 0.4174722 -4.791132 0.0000017 0.0000431
CD177 828.62505 -1.8702926 0.4021087 -4.651211 0.0000033 0.0000780
Spi1 29735.23452 -1.0732955 0.2328274 -4.609834 0.0000040 0.0000873
CD28 931.91839 1.1402306 0.2552503 4.467107 0.0000079 0.0001555
CXCR4 16719.56435 0.6964438 0.1563121 4.455469 0.0000084 0.0001555
MRC1 1272.54271 -1.0945398 0.2486038 -4.402748 0.0000107 0.0001853
MPO 118.84914 -1.9761122 0.4569679 -4.324400 0.0000153 0.0002485
CD68 4028.85982 -0.8024057 0.1900471 -4.222142 0.0000242 0.0003615
TGFb1 54708.72507 -0.9858531 0.2343185 -4.207321 0.0000258 0.0003615
TNFSF13 443.78996 -0.7609083 0.1810674 -4.202348 0.0000264 0.0003615
IL34 1419.53594 -1.6697384 0.4043484 -4.129454 0.0000364 0.0004727
GATA2 1406.89288 -0.8609619 0.2097084 -4.105520 0.0000403 0.0004995
IL10Rb 418.87031 0.7853816 0.1958234 4.010662 0.0000605 0.0007156
TXLNA 3770.10495 -0.4767453 0.1192597 -3.997538 0.0000640 0.0007235
CCLD3 92.87741 -2.5274807 0.6524846 -3.873625 0.0001072 0.0011616
CCR1 433.10707 -0.9258979 0.2462342 -3.760232 0.0001698 0.0017655
CCLD13 246.24333 -1.1544510 0.3130101 -3.688223 0.0002258 0.0022583
IFIH1 1799.91319 0.6921871 0.1925865 3.594162 0.0003254 0.0031338
TLR3 459.09865 0.8319981 0.2336188 3.561349 0.0003690 0.0034200
TNFSF8 456.78476 0.7558417 0.2127582 3.552586 0.0003815 0.0034200
GATA1 262633.59066 -1.8249054 0.5193252 -3.513994 0.0004414 0.0037099
CCR6 895.03032 1.0130940 0.2889628 3.505967 0.0004550 0.0037099
TLR9 674.33669 -0.9121711 0.2602483 -3.505002 0.0004566 0.0037099
TCRm 4539.86415 0.8327992 0.2400706 3.468976 0.0005224 0.0040538
TCF3 30815.76980 -1.0644012 0.3071809 -3.465063 0.0005301 0.0040538
SIRPA 131.39314 -1.0876220 0.3177563 -3.422818 0.0006198 0.0046039
TCRd 231.15787 1.1654867 0.3433128 3.394825 0.0006867 0.0049597
STAT1 7854.81269 0.5645835 0.1672044 3.376606 0.0007339 0.0051569
TNFRSF1A 6749.97652 -0.4554037 0.1360066 -3.348394 0.0008128 0.0055614
TCRa 8172.55767 0.4759892 0.1438695 3.308479 0.0009380 0.0062536
STAT4 1471.30776 0.6440130 0.1952423 3.298532 0.0009719 0.0063175
CCLD1 32.55610 -2.1740793 0.6670158 -3.259412 0.0011164 0.0070798
CD14 32080.70523 -0.7784355 0.2403961 -3.238138 0.0012031 0.0074479
IL10RA 10099.34196 0.8032078 0.2492640 3.222318 0.0012716 0.0076886
CCLD14 168.91060 -1.2959646 0.4103225 -3.158405 0.0015863 0.0093739
CCR5 286.65976 -0.7147980 0.2325221 -3.074108 0.0021113 0.0121988
IL4I0 111.10456 -1.3857984 0.4537710 -3.053959 0.0022584 0.0127650
IRF3 13379.83096 -0.6653475 0.2191661 -3.035814 0.0023989 0.0132704
PRG3 8577.73587 1.0096619 0.3372969 2.993393 0.0027589 0.0149443
CD3e 15796.35503 0.4521622 0.1514440 2.985673 0.0028295 0.0150139
MMP9 58076.06374 -0.6397997 0.2172009 -2.945659 0.0032227 0.0165056
write.table(dge,file="dge1.tsv",sep="\t")

mx <- sig[,7:ncol(sig)]
mx <- head(mx,30)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(mx),trace="none",scale="row",
  col=colfunc(25),ColSideColors=col,mar=c(5,12))

Session information

For reproducibility.

sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] dplyr_1.1.0                 kableExtra_1.3.4           
##  [3] limma_3.52.2                mitch_1.8.0                
##  [5] DESeq2_1.36.0               SummarizedExperiment_1.26.1
##  [7] Biobase_2.56.0              MatrixGenerics_1.8.1       
##  [9] matrixStats_0.63.0          GenomicRanges_1.48.0       
## [11] GenomeInfoDb_1.32.2         IRanges_2.30.0             
## [13] S4Vectors_0.34.0            BiocGenerics_0.42.0        
## [15] gplots_3.1.3                reshape2_1.4.4             
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7           bit64_4.0.5            webshot_0.5.4         
##  [4] RColorBrewer_1.1-3     httr_1.4.5             tools_4.2.3           
##  [7] bslib_0.4.2            utf8_1.2.3             R6_2.5.1              
## [10] KernSmooth_2.23-20     DBI_1.1.3              colorspace_2.1-0      
## [13] gridExtra_2.3          tidyselect_1.2.0       GGally_2.1.2          
## [16] bit_4.0.5              compiler_4.2.3         rvest_1.0.3           
## [19] cli_3.6.0              xml2_1.3.3             DelayedArray_0.22.0   
## [22] sass_0.4.5             caTools_1.18.2         scales_1.2.1          
## [25] genefilter_1.78.0      systemfonts_1.0.4      stringr_1.5.0         
## [28] digest_0.6.31          svglite_2.1.1          rmarkdown_2.20        
## [31] XVector_0.36.0         pkgconfig_2.0.3        htmltools_0.5.4       
## [34] highr_0.10             fastmap_1.1.1          htmlwidgets_1.6.2     
## [37] rlang_1.1.0            rstudioapi_0.14        RSQLite_2.3.0         
## [40] shiny_1.7.4            jquerylib_0.1.4        generics_0.1.3        
## [43] jsonlite_1.8.4         BiocParallel_1.30.3    gtools_3.9.4          
## [46] RCurl_1.98-1.10        magrittr_2.0.3         GenomeInfoDbData_1.2.8
## [49] Matrix_1.5-3           Rcpp_1.0.10            munsell_0.5.0         
## [52] fansi_1.0.4            lifecycle_1.0.3        stringi_1.7.12        
## [55] yaml_2.3.7             MASS_7.3-58.3          zlibbioc_1.42.0       
## [58] plyr_1.8.8             grid_4.2.3             blob_1.2.4            
## [61] promises_1.2.0.1       parallel_4.2.3         crayon_1.5.2          
## [64] lattice_0.20-45        Biostrings_2.64.0      echarts4r_0.4.4       
## [67] splines_4.2.3          annotate_1.74.0        KEGGREST_1.36.3       
## [70] locfit_1.5-9.7         knitr_1.42             pillar_1.8.1          
## [73] geneplotter_1.74.0     codetools_0.2-19       XML_3.99-0.13         
## [76] glue_1.6.2             evaluate_0.20          httpuv_1.6.9          
## [79] png_0.1-8              vctrs_0.6.0            gtable_0.3.2          
## [82] reshape_0.8.9          cachem_1.0.7           ggplot2_3.4.1         
## [85] xfun_0.37              mime_0.12              xtable_1.8-4          
## [88] later_1.3.0            viridisLite_0.4.1      survival_3.5-5        
## [91] tibble_3.2.0           beeswarm_0.4.0         AnnotationDbi_1.58.0  
## [94] memoise_2.0.1          ellipsis_0.3.2