Source: https://github.com/markziemann/mesangial_rageko
Looking at doing comparison of control and RAGE KO Mouse mesangial cells RNA-seq and miRNA-seq.
RAGE KO data sequenced at Baker. 61 nt length.
WT data sequenced at BGI with 91 nt length.
Quality trimming with skewer 0.2.2 to ensure 3’ end base quality > 20.
microRNA hairpins were downloaded from miRbase
Mouse hairpins were extracted from the reference and indexed with kallisto using a seed length of 15 bp.
kallisto was run in single end mode to map the reads.
Read counts were loaded into R and analysed with DESeq2.
OS, R and package versions are shown in the session info section at the end of this report.
suppressPackageStartupMessages({
library("reshape2")
library("DESeq2")
library("gplots")
library("mitch")
library("dplyr")
library("biomaRt")
library("kableExtra")
})
tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
xx <- round(x)
dim(xx)
## [1] 1234 12
head(xx)
## miR_rageko_ctrl_1 miR_rageko_ctrl_2 miR_rageko_ctrl_3
## mmu-let-7a-1 20363 20727 9947
## mmu-let-7a-2 1543 1441 691
## mmu-let-7b 42713 48743 23034
## mmu-let-7c-1 37266 34769 21005
## mmu-let-7c-2 74907 92800 46649
## mmu-let-7d 8707 9948 4206
## miR_rageko_TGF_1 miR_rageko_TGF_2 miR_rageko_TGF_3 miR_wt_ctrl_1
## mmu-let-7a-1 16769 12800 12352 1306100
## mmu-let-7a-2 1489 614 608 44116
## mmu-let-7b 22473 15060 13756 3184290
## mmu-let-7c-1 22296 8455 5260 3453180
## mmu-let-7c-2 57307 32425 30454 1240720
## mmu-let-7d 6126 6035 5399 189697
## miR_wt_ctrl_2 miR_wt_ctrl_3 miR_wt_TGF_1 miR_wt_TGF_2 miR_wt_TGF_3
## mmu-let-7a-1 1348620 1472040 1669340 1426850 1563840
## mmu-let-7a-2 43526 50652 78374 78690 84683
## mmu-let-7b 3163160 3465470 3348090 2859190 3046660
## mmu-let-7c-1 3466790 3664760 3491620 2982230 3103810
## mmu-let-7c-2 1190810 1395550 1427850 1286160 1329270
## mmu-let-7d 195022 196772 242764 212549 217202
write.table(x=xx,file="miRNA_count_matrix.tsv",sep="\t")
Now quantify the top 10 species. Starting with all samples.
rpm <- apply(xx, 2, function(x){x/sum(x,na.rm=T)}) * 1000000
rpm <- rpm[order(rowMeans(rpm) ),]
rpm <- tail(rpm,10)
# create color palette:
library(RColorBrewer)
n=10
pal <- brewer.pal(n, "Paired")
par(mfrow=c(1,2))
par(mar=c(10,5,3,1))
barplot(rpm,col=pal,las=2)
plot.new()
legend("topleft", legend=rev(rownames(rpm)),fill=rev(pal),bg="white",cex=1)
#restore default setting
par(mar=c(5.1, 4.1, 4.1, 2.1))
Now calculate diversity of each sample.
pc <- apply(xx, 2, function(x){x/sum(x,na.rm=T)}) * 100
res <- lapply(1:ncol(pc),function(i){
dat <- pc[,i]
dat <- dat[order(-dat)]
head(cumsum(dat))
})
names(res) <- colnames(pc)
res
## $miR_rageko_ctrl_1
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b mmu-let-7i
## 35.04691 50.68179 56.93842 62.08391 65.17234 67.31654
##
## $miR_rageko_ctrl_2
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b mmu-let-7i
## 33.26619 49.71553 55.91072 60.54882 63.63909 65.95557
##
## $miR_rageko_ctrl_3
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b mmu-mir-30d
## 33.03525 50.11035 58.16282 62.36956 65.16494 67.86357
##
## $miR_rageko_TGF_1
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-30d
## 37.08525 51.32986 58.80983 63.22891 66.24793
## mmu-mir-199a-1
## 68.14504
##
## $miR_rageko_TGF_2
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-199a-1
## 45.01689 58.06857 63.68706 65.88644 68.07317
## mmu-mir-22
## 70.25966
##
## $miR_rageko_TGF_3
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-22 mmu-mir-199a-1
## 47.72168 62.21030 66.64633 69.00270 71.20109
## mmu-mir-10a
## 73.29187
##
## $miR_wt_ctrl_1
## mmu-let-7c-1 mmu-let-7b mmu-let-7a-1 mmu-let-7c-2 mmu-let-7f-2 mmu-let-7e
## 22.41564 43.08582 51.56412 59.61800 66.69586 73.01094
##
## $miR_wt_ctrl_2
## mmu-let-7c-1 mmu-let-7b mmu-let-7a-1 mmu-let-7c-2 mmu-let-7f-2 mmu-let-7e
## 22.21449 42.48338 51.12507 58.75554 65.99981 72.87769
##
## $miR_wt_ctrl_3
## mmu-let-7c-1 mmu-let-7b mmu-let-7a-1 mmu-let-7c-2 mmu-let-7e mmu-let-7f-2
## 22.04385 42.88896 51.74341 60.13776 67.33343 74.10425
##
## $miR_wt_TGF_1
## mmu-let-7c-1 mmu-let-7b mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2
## 19.23706 37.68335 47.70534 56.90256 65.38002 73.24676
##
## $miR_wt_TGF_2
## mmu-let-7c-1 mmu-let-7b mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2
## 19.07475 37.36251 47.31784 56.44416 65.47192 73.69838
##
## $miR_wt_TGF_3
## mmu-let-7c-1 mmu-let-7b mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2
## 18.82160 37.29665 47.61448 57.09765 66.01360 74.07433
#colnames(xx) <- sapply(strsplit(colnames(xx),"_"),"[[",1)
mysamples <- colnames(xx)
ko <- as.numeric(grepl("ko",mysamples))
tgf <- as.numeric(grepl("TGF",mysamples))
ss <- data.frame(mysamples,ko,tgf)
rownames(ss) <- mysamples
ss
## mysamples ko tgf
## miR_rageko_ctrl_1 miR_rageko_ctrl_1 1 0
## miR_rageko_ctrl_2 miR_rageko_ctrl_2 1 0
## miR_rageko_ctrl_3 miR_rageko_ctrl_3 1 0
## miR_rageko_TGF_1 miR_rageko_TGF_1 1 1
## miR_rageko_TGF_2 miR_rageko_TGF_2 1 1
## miR_rageko_TGF_3 miR_rageko_TGF_3 1 1
## miR_wt_ctrl_1 miR_wt_ctrl_1 0 0
## miR_wt_ctrl_2 miR_wt_ctrl_2 0 0
## miR_wt_ctrl_3 miR_wt_ctrl_3 0 0
## miR_wt_TGF_1 miR_wt_TGF_1 0 1
## miR_wt_TGF_2 miR_wt_TGF_2 0 1
## miR_wt_TGF_3 miR_wt_TGF_3 0 1
TODO: rRNA carryover.
par(mar=c(5,8,3,1))
barplot(colSums(xx),horiz=TRUE,las=1,xlab="num reads",col=ss$cols)
grid()
And correlation heat map.
par(mar=c(5,5,3,3))
mds <- cmdscale(dist(t(xx)))
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n", cex=4 )
text(mds, labels=rownames(mds) ,col="black")
heatmap.2(cor(xx),trace="n",main="Pearson correlation heatmap",margin=c(8,8),cexRow=0.7,cexCol=0.7)
Compare wt control to RAGE KO control.
maplot <- function(de,contrast_name) {
de <- de[which(!is.na(de$padj)),]
sig <-subset(de, padj < 0.05 )
up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
GENESUP <- length(up)
GENESDN <- length(dn)
DET=nrow(de)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=0.5, col="dark gray",
main=contrast_name, cex.main=1)
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=0.5, col="red")
mtext(SUBHEADER,cex = 1)
}
make_volcano <- function(de,name) {
de <- de[which(!is.na(de$padj)),]
de$pvalue[which(de$pvalue==0)] <- 1e-320
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval")
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}
ss1 <- subset(ss,tgf==0)
xx1 <- xx[,which(colnames(xx) %in% rownames(ss1) )]
dim(xx1)
## [1] 1234 6
xx1 <- xx1[which(rowMeans(xx1)>10),]
dim(xx1)
## [1] 436 6
dds <- DESeqDataSetFromMatrix(countData = xx1 , colData = ss1 , design = ~ ko )
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE,nsub=nrow(xx1))
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>% kbl(caption = "Top gene expression differences") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
mmu-let-7a-1 | 688836.774 | -6.405851 | 0.1409814 | -45.43755 | 0 | 0 |
mmu-let-7b | 1637945.068 | -6.481798 | 0.1472216 | -44.02751 | 0 | 0 |
mmu-let-7c-1 | 1760492.704 | -6.861097 | 0.1451876 | -47.25676 | 0 | 0 |
mmu-let-7e | 542795.327 | -6.057295 | 0.1478637 | -40.96540 | 0 | 0 |
mmu-let-7k | 6972.734 | -8.746573 | 0.1986122 | -44.03844 | 0 | 0 |
mmu-mir-183 | 10684.981 | 5.936845 | 0.1475355 | 40.24012 | 0 | 0 |
mmu-mir-503 | 7238.823 | -7.025117 | 0.1669449 | -42.08044 | 0 | 0 |
mmu-mir-5099 | 50115.479 | 5.904955 | 0.1485551 | 39.74927 | 0 | 0 |
mmu-mir-21c | 3170.357 | 8.086983 | 0.2186157 | 36.99178 | 0 | 0 |
mmu-mir-181d | 11207.275 | -6.469621 | 0.1763896 | -36.67802 | 0 | 0 |
mmu-mir-10b | 51943.154 | 4.448196 | 0.1251096 | 35.55440 | 0 | 0 |
mmu-mir-181b-2 | 16158.569 | -5.081381 | 0.1431792 | -35.48966 | 0 | 0 |
mmu-let-7a-2 | 23438.059 | -5.314162 | 0.1523568 | -34.87973 | 0 | 0 |
mmu-mir-3074-2 | 12854.203 | 5.334356 | 0.1547168 | 34.47820 | 0 | 0 |
mmu-mir-298 | 2757.555 | -5.573552 | 0.1623121 | -34.33848 | 0 | 0 |
mmu-mir-214 | 5444.600 | 5.150779 | 0.1534990 | 33.55579 | 0 | 0 |
mmu-let-7d | 99318.677 | -4.746768 | 0.1415774 | -33.52772 | 0 | 0 |
mmu-mir-1839 | 17238.902 | -4.554979 | 0.1395659 | -32.63676 | 0 | 0 |
mmu-mir-222 | 125551.730 | -4.952649 | 0.1518771 | -32.60957 | 0 | 0 |
mmu-mir-320 | 103212.812 | -4.961718 | 0.1534058 | -32.34374 | 0 | 0 |
d1up <- rownames(subset(dge,padj <= 0.05 & log2FoldChange > 0))
d1dn <- rownames(subset(dge,padj <= 0.05 & log2FoldChange < 0))
write.table(dge,file="miRNA_DE_ctrl_vs_rageko.tsv",quote=FALSE,sep="\t")
maplot(dge,"Cont1: Effect of RAGE KO")
make_volcano(dge,"Cont1: Effect of RAGE KO")
#agerdat <- assay(vsd)[grep("Ager",rownames(assay(vsd))),]
#par(mar=c(5,8,3,1))
#barplot(agerdat,horiz=TRUE,las=1,xlab="Normalised Ager expression",xlim=c(0,10))
Make a heatmap of the top 30 genes.
rpm1 <- apply(xx1, 2, function(x){x/sum(x,na.rm=T)}) * 1000000
colfunc <- colorRampPalette(c("blue", "white", "red"))
rpm2 <- rpm1[which(rownames(rpm1) %in% rownames(head(dge,30))),]
heatmap.2(as.matrix(rpm2),margin=c(8, 22),cexRow=0.85,trace="none",
cexCol=0.9,col=colfunc(20),scale="row")
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 kableExtra_1.3.4
## [3] biomaRt_2.52.0 dplyr_1.0.9
## [5] mitch_1.8.0 gplots_3.1.3
## [7] DESeq2_1.36.0 SummarizedExperiment_1.26.1
## [9] Biobase_2.56.0 MatrixGenerics_1.8.0
## [11] matrixStats_0.62.0 GenomicRanges_1.48.0
## [13] GenomeInfoDb_1.32.2 IRanges_2.30.0
## [15] S4Vectors_0.34.0 BiocGenerics_0.42.0
## [17] reshape2_1.4.4
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ellipsis_0.3.2 XVector_0.36.0
## [4] rstudioapi_0.13 bit64_4.0.5 AnnotationDbi_1.58.0
## [7] fansi_1.0.3 xml2_1.3.3 codetools_0.2-18
## [10] splines_4.2.1 cachem_1.0.6 geneplotter_1.74.0
## [13] knitr_1.39 jsonlite_1.8.0 annotate_1.74.0
## [16] dbplyr_2.2.1 png_0.1-7 shiny_1.7.1
## [19] compiler_4.2.1 httr_1.4.3 assertthat_0.2.1
## [22] Matrix_1.4-1 fastmap_1.1.0 cli_3.3.0
## [25] later_1.3.0 htmltools_0.5.2 prettyunits_1.1.1
## [28] tools_4.2.1 gtable_0.3.0 glue_1.6.2
## [31] GenomeInfoDbData_1.2.8 rappdirs_0.3.3 Rcpp_1.0.8.3
## [34] jquerylib_0.1.4 vctrs_0.4.1 Biostrings_2.64.0
## [37] svglite_2.1.0 xfun_0.31 stringr_1.4.0
## [40] rvest_1.0.2 mime_0.12 lifecycle_1.0.1
## [43] gtools_3.9.2.2 XML_3.99-0.10 zlibbioc_1.42.0
## [46] MASS_7.3-58 scales_1.2.0 hms_1.1.1
## [49] promises_1.2.0.1 parallel_4.2.1 yaml_2.3.5
## [52] curl_4.3.2 memoise_2.0.1 gridExtra_2.3
## [55] ggplot2_3.3.6 sass_0.4.1 reshape_0.8.9
## [58] stringi_1.7.6 RSQLite_2.2.14 highr_0.9
## [61] genefilter_1.78.0 caTools_1.18.2 filelock_1.0.2
## [64] BiocParallel_1.30.3 echarts4r_0.4.4 systemfonts_1.0.4
## [67] rlang_1.0.3 pkgconfig_2.0.3 bitops_1.0-7
## [70] evaluate_0.15 lattice_0.20-45 purrr_0.3.4
## [73] htmlwidgets_1.5.4 bit_4.0.4 tidyselect_1.1.2
## [76] GGally_2.1.2 plyr_1.8.7 magrittr_2.0.3
## [79] R6_2.5.1 generics_0.1.2 DelayedArray_0.22.0
## [82] DBI_1.1.3 pillar_1.7.0 survival_3.4-0
## [85] KEGGREST_1.36.2 RCurl_1.98-1.7 tibble_3.1.7
## [88] crayon_1.5.1 KernSmooth_2.23-20 utf8_1.2.2
## [91] BiocFileCache_2.4.0 rmarkdown_2.14 progress_1.2.2
## [94] locfit_1.5-9.5 grid_4.2.1 blob_1.2.3
## [97] webshot_0.5.3 digest_0.6.29 xtable_1.8-4
## [100] httpuv_1.6.5 munsell_0.5.0 viridisLite_0.4.0
## [103] beeswarm_0.4.0 bslib_0.3.1