Source codes: https://github.com/markziemann/dftd_immune_expression
We have designed a target enrichment panel to examine gene expression of immune genes. This is so we can inexpensively profile immune gene expression in devils. We will be looking at a lot of different variables such as dftd-free vs affected, DFTD1 vs DFTD2, etc.
The main comparisons are:
-Wild vs Captive devils
-Healthy vs DFTD infected devils
-DFT1 and DFT2 devils
I have their sex/age/seasons categories there as well if we decide to go with more detail analysis in the future, but I think just stick with these three for now. Some devils are a part of the Captive control and Wild control groups . There are a couple samples that won’t go into the analysis that I made note of. Meliza and Simba.
suppressPackageStartupMessages({
library("reshape2")
library("gplots")
library("DESeq2")
library("mitch")
library("limma")
library("kableExtra")
library("dplyr")
})
Functions.
maplot <- function(de,contrast_name) {
sig <-subset(de, padj < 0.05 )
up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
GENESUP <- length(up)
GENESDN <- length(dn)
DET=nrow(de)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=1, col="dark gray",
main=contrast_name, cex.main=0.7)
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=1, col="red")
mtext(SUBHEADER,cex = 0.7)
grid()
}
make_volcano <- function(de,name) {
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="red")
}
Here we load the data in from the aligner.
tmp <- read.table("3col.tsv.gz")
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3"))
colnames(x) <- gsub("fastq/","",colnames(x))
dim(x)
## [1] 310 143
Samples with <1M reads should be omitted. Will also round values to integers.
cs <- colSums(x)
cs <- cs[order(cs)]
barplot(cs,ylim=c(1e2,2e7),log="y",main="All samples",cex.names=0.5)
barplot(head(cs),log="y",main="Samples with low reads",cex.names=0.7)
abline(h=1e6,col="red",lty=2)
x <- x[,colSums(x)>1e6]
x <- round(x)
rc <- rowMeans(x)
rc <- rc[order(rc)]
barplot(rc+1,log="y")
hist(log(rc+1))
round(rc[which(rc<10)],digits=1)
## CNTF CTF1 PTX3 RAG2 OSM AIF1 IL17B IL36RN STAT2 CLEC4E CD3g
## 0.0 0.0 0.0 0.0 0.0 0.2 0.4 0.4 0.5 0.5 0.6
## CSF2 CXCL13 IL25 Casp9 CX3CL1 CCLD16 IL17A IFNB1 CSF3 CCL19 CCL26
## 0.7 0.8 0.8 0.9 0.9 1.3 1.5 1.8 2.0 2.3 2.4
## IFNA1 FCAMR KITLG IL17F CCLD15 LIF CCL22 IL11 IFNL1 IL33 IL26
## 2.5 2.5 2.7 3.2 3.4 3.9 3.9 4.0 4.4 4.5 4.6
## IL12A IL1F10 RPS18 CD70 CCL27 ACKR4 IL36L1 FGB IL31Ra CXCL11 CXCL14
## 4.9 5.0 5.0 5.3 5.6 5.8 6.0 6.2 6.8 7.5 7.7
## IL36L2 CXCL17 IL4 IL9 CCL17 CCLD4
## 8.1 8.1 8.2 8.3 8.6 9.8
A lot of the shorter genes (cytokines) had lower expression.
ss <- read.table("samplesheet.tsv",sep="\t",header=TRUE)
plotMDS(x)
Run a differential analysis. Wild vs captive.
ss1 <- ss[which(grepl("Wild",ss$Category) + grepl("Captive",ss$Category)>0),]
ss1 <- ss1[ss1$Sample %in% colnames(x),]
dim(ss1)
## [1] 36 15
x1 <- x[,which(colnames(x) %in% ss1$Sample)]
dim(x1)
## [1] 310 36
x1 <- x1[which(rowMeans(x1)>10),]
dim(x1)
## [1] 260 36
col <- as.character(as.numeric(grepl("Cap",ss1$Category)))
col <- gsub("0","lightgreen",col)
col <- gsub("1","lightblue",col)
mds<-plotMDS(x1,pch=19,cex=3,col=col,main="MDS plot: wild and captive devils")
mtext("wild=green, captive=blue")
text(mds,labels=colnames(x1),cex=0.7)
# coding captive
ss1$captive <- grepl("Cap",ss1$Category)
dds <- DESeqDataSetFromMatrix(countData = x1 , colData = ss1, design = ~ captive )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 12 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE,nsub=nrow(x1)-50)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge1 <- dge
sig <- subset(dge,padj<0.05)
sig1_up <- rownames(subset(sig,log2FoldChange>0))
sig1_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig1_up)
## [1] 36
length(sig1_dn)
## [1] 44
maplot(dge1,"wild vs captive devils")
make_volcano(dge1,"wild vs captive devils")
sig[1:50,1:6] %>%
kbl(caption="Comparison of wild vs captive devils") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
EPX | 4183.07123 | -3.4079536 | 0.4607668 | -7.396266 | 0.0000000 | 0.0000000 |
CCR2 | 3733.31997 | -1.0692199 | 0.1766932 | -6.051279 | 0.0000000 | 0.0000002 |
ITGAX | 5814.14465 | -0.9504545 | 0.1624694 | -5.850051 | 0.0000000 | 0.0000004 |
CCR7 | 14909.18108 | 1.1232280 | 0.1931247 | 5.816076 | 0.0000000 | 0.0000004 |
NFKB1 | 6160.50562 | 0.5305511 | 0.0970251 | 5.468182 | 0.0000000 | 0.0000024 |
CD1d | 1276.84335 | 1.1328849 | 0.2088254 | 5.425033 | 0.0000001 | 0.0000025 |
C3 | 41378.49782 | -1.1728466 | 0.2201943 | -5.326417 | 0.0000001 | 0.0000037 |
CCL21 | 16.48466 | -6.0299163 | 1.1947100 | -5.047180 | 0.0000004 | 0.0000146 |
IL7RA | 5802.93992 | 0.9843279 | 0.2049357 | 4.803106 | 0.0000016 | 0.0000431 |
CATH3 | 11222.02464 | -2.0001644 | 0.4174722 | -4.791132 | 0.0000017 | 0.0000431 |
CD177 | 828.62505 | -1.8702926 | 0.4021087 | -4.651211 | 0.0000033 | 0.0000780 |
Spi1 | 29735.23452 | -1.0732955 | 0.2328274 | -4.609834 | 0.0000040 | 0.0000873 |
CD28 | 931.91839 | 1.1402306 | 0.2552503 | 4.467107 | 0.0000079 | 0.0001555 |
CXCR4 | 16719.56435 | 0.6964438 | 0.1563121 | 4.455469 | 0.0000084 | 0.0001555 |
MRC1 | 1272.54271 | -1.0945398 | 0.2486038 | -4.402748 | 0.0000107 | 0.0001853 |
MPO | 118.84914 | -1.9761122 | 0.4569679 | -4.324400 | 0.0000153 | 0.0002485 |
CD68 | 4028.85982 | -0.8024057 | 0.1900471 | -4.222142 | 0.0000242 | 0.0003615 |
TGFb1 | 54708.72507 | -0.9858531 | 0.2343185 | -4.207321 | 0.0000258 | 0.0003615 |
TNFSF13 | 443.78996 | -0.7609083 | 0.1810674 | -4.202348 | 0.0000264 | 0.0003615 |
IL34 | 1419.53594 | -1.6697384 | 0.4043484 | -4.129454 | 0.0000364 | 0.0004727 |
GATA2 | 1406.89288 | -0.8609619 | 0.2097084 | -4.105520 | 0.0000403 | 0.0004995 |
IL10Rb | 418.87031 | 0.7853816 | 0.1958234 | 4.010662 | 0.0000605 | 0.0007156 |
TXLNA | 3770.10495 | -0.4767453 | 0.1192597 | -3.997538 | 0.0000640 | 0.0007235 |
CCLD3 | 92.87741 | -2.5274807 | 0.6524846 | -3.873625 | 0.0001072 | 0.0011616 |
CCR1 | 433.10707 | -0.9258979 | 0.2462342 | -3.760232 | 0.0001698 | 0.0017655 |
CCLD13 | 246.24333 | -1.1544510 | 0.3130101 | -3.688223 | 0.0002258 | 0.0022583 |
IFIH1 | 1799.91319 | 0.6921871 | 0.1925865 | 3.594162 | 0.0003254 | 0.0031338 |
TLR3 | 459.09865 | 0.8319981 | 0.2336188 | 3.561349 | 0.0003690 | 0.0034200 |
TNFSF8 | 456.78476 | 0.7558417 | 0.2127582 | 3.552586 | 0.0003815 | 0.0034200 |
GATA1 | 262633.59066 | -1.8249054 | 0.5193252 | -3.513994 | 0.0004414 | 0.0037099 |
CCR6 | 895.03032 | 1.0130940 | 0.2889628 | 3.505967 | 0.0004550 | 0.0037099 |
TLR9 | 674.33669 | -0.9121711 | 0.2602483 | -3.505002 | 0.0004566 | 0.0037099 |
TCRm | 4539.86415 | 0.8327992 | 0.2400706 | 3.468976 | 0.0005224 | 0.0040538 |
TCF3 | 30815.76980 | -1.0644012 | 0.3071809 | -3.465063 | 0.0005301 | 0.0040538 |
SIRPA | 131.39314 | -1.0876220 | 0.3177563 | -3.422818 | 0.0006198 | 0.0046039 |
TCRd | 231.15787 | 1.1654867 | 0.3433128 | 3.394825 | 0.0006867 | 0.0049597 |
STAT1 | 7854.81269 | 0.5645835 | 0.1672044 | 3.376606 | 0.0007339 | 0.0051569 |
TNFRSF1A | 6749.97652 | -0.4554037 | 0.1360066 | -3.348394 | 0.0008128 | 0.0055614 |
TCRa | 8172.55767 | 0.4759892 | 0.1438695 | 3.308479 | 0.0009380 | 0.0062536 |
STAT4 | 1471.30776 | 0.6440130 | 0.1952423 | 3.298532 | 0.0009719 | 0.0063175 |
CCLD1 | 32.55610 | -2.1740793 | 0.6670158 | -3.259412 | 0.0011164 | 0.0070798 |
CD14 | 32080.70523 | -0.7784355 | 0.2403961 | -3.238138 | 0.0012031 | 0.0074479 |
IL10RA | 10099.34196 | 0.8032078 | 0.2492640 | 3.222318 | 0.0012716 | 0.0076886 |
CCLD14 | 168.91060 | -1.2959646 | 0.4103225 | -3.158405 | 0.0015863 | 0.0093739 |
CCR5 | 286.65976 | -0.7147980 | 0.2325221 | -3.074108 | 0.0021113 | 0.0121988 |
IL4I0 | 111.10456 | -1.3857984 | 0.4537710 | -3.053959 | 0.0022584 | 0.0127650 |
IRF3 | 13379.83096 | -0.6653475 | 0.2191661 | -3.035814 | 0.0023989 | 0.0132704 |
PRG3 | 8577.73587 | 1.0096619 | 0.3372969 | 2.993393 | 0.0027589 | 0.0149443 |
CD3e | 15796.35503 | 0.4521622 | 0.1514440 | 2.985673 | 0.0028295 | 0.0150139 |
MMP9 | 58076.06374 | -0.6397997 | 0.2172009 | -2.945659 | 0.0032227 | 0.0165056 |
write.table(dge,file="dge1.tsv",sep="\t")
mx <- sig[,7:ncol(sig)]
mx <- head(mx,30)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(mx),trace="none",scale="row",
col=colfunc(25),ColSideColors=col,mar=c(5,12))
For reproducibility.
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] dplyr_1.1.0 kableExtra_1.3.4
## [3] limma_3.52.2 mitch_1.8.0
## [5] DESeq2_1.36.0 SummarizedExperiment_1.26.1
## [7] Biobase_2.56.0 MatrixGenerics_1.8.1
## [9] matrixStats_0.63.0 GenomicRanges_1.48.0
## [11] GenomeInfoDb_1.32.2 IRanges_2.30.0
## [13] S4Vectors_0.34.0 BiocGenerics_0.42.0
## [15] gplots_3.1.3 reshape2_1.4.4
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 bit64_4.0.5 webshot_0.5.4
## [4] RColorBrewer_1.1-3 httr_1.4.5 tools_4.2.3
## [7] bslib_0.4.2 utf8_1.2.3 R6_2.5.1
## [10] KernSmooth_2.23-20 DBI_1.1.3 colorspace_2.1-0
## [13] gridExtra_2.3 tidyselect_1.2.0 GGally_2.1.2
## [16] bit_4.0.5 compiler_4.2.3 rvest_1.0.3
## [19] cli_3.6.0 xml2_1.3.3 DelayedArray_0.22.0
## [22] sass_0.4.5 caTools_1.18.2 scales_1.2.1
## [25] genefilter_1.78.0 systemfonts_1.0.4 stringr_1.5.0
## [28] digest_0.6.31 svglite_2.1.1 rmarkdown_2.20
## [31] XVector_0.36.0 pkgconfig_2.0.3 htmltools_0.5.4
## [34] highr_0.10 fastmap_1.1.1 htmlwidgets_1.6.2
## [37] rlang_1.1.0 rstudioapi_0.14 RSQLite_2.3.0
## [40] shiny_1.7.4 jquerylib_0.1.4 generics_0.1.3
## [43] jsonlite_1.8.4 BiocParallel_1.30.3 gtools_3.9.4
## [46] RCurl_1.98-1.10 magrittr_2.0.3 GenomeInfoDbData_1.2.8
## [49] Matrix_1.5-3 Rcpp_1.0.10 munsell_0.5.0
## [52] fansi_1.0.4 lifecycle_1.0.3 stringi_1.7.12
## [55] yaml_2.3.7 MASS_7.3-58.3 zlibbioc_1.42.0
## [58] plyr_1.8.8 grid_4.2.3 blob_1.2.4
## [61] promises_1.2.0.1 parallel_4.2.3 crayon_1.5.2
## [64] lattice_0.20-45 Biostrings_2.64.0 echarts4r_0.4.4
## [67] splines_4.2.3 annotate_1.74.0 KEGGREST_1.36.3
## [70] locfit_1.5-9.7 knitr_1.42 pillar_1.8.1
## [73] geneplotter_1.74.0 codetools_0.2-19 XML_3.99-0.13
## [76] glue_1.6.2 evaluate_0.20 httpuv_1.6.9
## [79] png_0.1-8 vctrs_0.6.0 gtable_0.3.2
## [82] reshape_0.8.9 cachem_1.0.7 ggplot2_3.4.1
## [85] xfun_0.37 mime_0.12 xtable_1.8-4
## [88] later_1.3.0 viridisLite_0.4.1 survival_3.5-5
## [91] tibble_3.2.0 beeswarm_0.4.0 AnnotationDbi_1.58.0
## [94] memoise_2.0.1 ellipsis_0.3.2