Source: https://github.com/markziemann/mesangial_rageko
Looking at doing comparison of control and RAGE KO Mouse mesangial cells RNA-seq and miRNA-seq.
RAGE KO data sequenced at Baker. 61 nt length.
WT data sequenced at BGI with 91 nt length.
Quality trimming with skewer 0.2.2 to ensure 3’ end base quality > 20.
GENECODE mouse transcriptome v30 was downloaded and indexed.
Kallisto 0.46.0 was used for mapping paired end reads to the transcriptome.
Count data was loaded into R, followed by DE analysis with DESeq2.
Enrichment analysis was performed with mitch, using REACTOME pathways.
OS, R and package versions are shown in the session info section at the end of this report.
suppressPackageStartupMessages({
library("reshape2")
library("DESeq2")
library("gplots")
library("mitch")
library("dplyr")
library("biomaRt")
library("kableExtra")
})
tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
xx <- round(x)
dim(xx)
## [1] 1234 12
head(xx)
## miR_rageko_ctrl_1 miR_rageko_ctrl_2 miR_rageko_ctrl_3
## mmu-let-7a-1 20363 20727 9947
## mmu-let-7a-2 1543 1441 691
## mmu-let-7b 42713 48743 23034
## mmu-let-7c-1 37266 34769 21005
## mmu-let-7c-2 74907 92800 46649
## mmu-let-7d 8707 9948 4206
## miR_rageko_TGF_1 miR_rageko_TGF_2 miR_rageko_TGF_3 miR_wt_ctrl_1
## mmu-let-7a-1 16769 12800 12352 1306100
## mmu-let-7a-2 1489 614 608 44116
## mmu-let-7b 22473 15060 13756 3184290
## mmu-let-7c-1 22296 8455 5260 3453180
## mmu-let-7c-2 57307 32425 30454 1240720
## mmu-let-7d 6126 6035 5399 189697
## miR_wt_ctrl_2 miR_wt_ctrl_3 miR_wt_TGF_1 miR_wt_TGF_2 miR_wt_TGF_3
## mmu-let-7a-1 1348620 1472040 1669340 1426850 1563840
## mmu-let-7a-2 43526 50652 78374 78690 84683
## mmu-let-7b 3163160 3465470 3348090 2859190 3046660
## mmu-let-7c-1 3466790 3664760 3491620 2982230 3103810
## mmu-let-7c-2 1190810 1395550 1427850 1286160 1329270
## mmu-let-7d 195022 196772 242764 212549 217202
write.table(x=xx,file="miRNA_count_matrix.tsv",sep="\t")
Now quantify the top 10 species. Starting with all samples.
rpm <- apply(xx, 2, function(x){x/sum(x,na.rm=T)}) * 1000000
rpm <- rpm[order(rowMeans(rpm) ),]
rpm <- tail(rpm,10)
# create color palette:
library(RColorBrewer)
n=10
pal <- brewer.pal(n, "Paired")
par(mfrow=c(1,2))
par(mar=c(10,5,3,1))
barplot(rpm,col=pal,las=2)
plot.new()
legend("topleft", legend=rev(rownames(rpm)),fill=rev(pal),bg="white",cex=1)
#restore default setting
par(mar=c(5.1, 4.1, 4.1, 2.1))
Now calculate diversity of each sample.
pc <- apply(xx, 2, function(x){x/sum(x,na.rm=T)}) * 100
res <- lapply(1:ncol(pc),function(i){
dat <- pc[,i]
dat <- dat[order(-dat)]
head(cumsum(dat))
})
names(res) <- colnames(pc)
res
## $miR_rageko_ctrl_1
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b mmu-let-7i
## 35.04691 50.68179 56.93842 62.08391 65.17234 67.31654
##
## $miR_rageko_ctrl_2
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b mmu-let-7i
## 33.26619 49.71553 55.91072 60.54882 63.63909 65.95557
##
## $miR_rageko_ctrl_3
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b mmu-mir-30d
## 33.03525 50.11035 58.16282 62.36956 65.16494 67.86357
##
## $miR_rageko_TGF_1
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-30d
## 37.08525 51.32986 58.80983 63.22891 66.24793
## mmu-mir-199a-1
## 68.14504
##
## $miR_rageko_TGF_2
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-199a-1
## 45.01689 58.06857 63.68706 65.88644 68.07317
## mmu-mir-22
## 70.25966
##
## $miR_rageko_TGF_3
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-22 mmu-mir-199a-1
## 47.72168 62.21030 66.64633 69.00270 71.20109
## mmu-mir-10a
## 73.29187
##
## $miR_wt_ctrl_1
## mmu-let-7c-1 mmu-let-7b mmu-let-7a-1 mmu-let-7c-2 mmu-let-7f-2 mmu-let-7e
## 22.41564 43.08582 51.56412 59.61800 66.69586 73.01094
##
## $miR_wt_ctrl_2
## mmu-let-7c-1 mmu-let-7b mmu-let-7a-1 mmu-let-7c-2 mmu-let-7f-2 mmu-let-7e
## 22.21449 42.48338 51.12507 58.75554 65.99981 72.87769
##
## $miR_wt_ctrl_3
## mmu-let-7c-1 mmu-let-7b mmu-let-7a-1 mmu-let-7c-2 mmu-let-7e mmu-let-7f-2
## 22.04385 42.88896 51.74341 60.13776 67.33343 74.10425
##
## $miR_wt_TGF_1
## mmu-let-7c-1 mmu-let-7b mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2
## 19.23706 37.68335 47.70534 56.90256 65.38002 73.24676
##
## $miR_wt_TGF_2
## mmu-let-7c-1 mmu-let-7b mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2
## 19.07475 37.36251 47.31784 56.44416 65.47192 73.69838
##
## $miR_wt_TGF_3
## mmu-let-7c-1 mmu-let-7b mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2
## 18.82160 37.29665 47.61448 57.09765 66.01360 74.07433
#colnames(xx) <- sapply(strsplit(colnames(xx),"_"),"[[",1)
mysamples <- colnames(xx)
ko <- as.numeric(grepl("ko",mysamples))
tgf <- as.numeric(grepl("TGF",mysamples))
ss <- data.frame(mysamples,ko,tgf)
rownames(ss) <- mysamples
ss
## mysamples ko tgf
## miR_rageko_ctrl_1 miR_rageko_ctrl_1 1 0
## miR_rageko_ctrl_2 miR_rageko_ctrl_2 1 0
## miR_rageko_ctrl_3 miR_rageko_ctrl_3 1 0
## miR_rageko_TGF_1 miR_rageko_TGF_1 1 1
## miR_rageko_TGF_2 miR_rageko_TGF_2 1 1
## miR_rageko_TGF_3 miR_rageko_TGF_3 1 1
## miR_wt_ctrl_1 miR_wt_ctrl_1 0 0
## miR_wt_ctrl_2 miR_wt_ctrl_2 0 0
## miR_wt_ctrl_3 miR_wt_ctrl_3 0 0
## miR_wt_TGF_1 miR_wt_TGF_1 0 1
## miR_wt_TGF_2 miR_wt_TGF_2 0 1
## miR_wt_TGF_3 miR_wt_TGF_3 0 1
TODO: rRNA carryover.
par(mar=c(5,8,3,1))
barplot(colSums(xx),horiz=TRUE,las=1,xlab="num reads",col=ss$cols)
grid()
And correlation heat map.
par(mar=c(5,5,3,3))
mds <- cmdscale(dist(t(xx)))
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n", cex=4 )
text(mds, labels=rownames(mds) ,col="black")
heatmap.2(cor(xx),trace="n",main="Pearson correlation heatmap",margin=c(8,8),cexRow=0.7,cexCol=0.7)
Compare wt control to RAGE KO control.
maplot <- function(de,contrast_name) {
de <- de[which(!is.na(de$padj)),]
sig <-subset(de, padj < 0.05 )
up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
GENESUP <- length(up)
GENESDN <- length(dn)
DET=nrow(de)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=0.5, col="dark gray",
main=contrast_name, cex.main=1)
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=0.5, col="red")
mtext(SUBHEADER,cex = 1)
}
make_volcano <- function(de,name) {
de <- de[which(!is.na(de$padj)),]
de$pvalue[which(de$pvalue==0)] <- 1e-320
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval")
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}
ss1 <- subset(ss,tgf==0)
xx1 <- xx[,which(colnames(xx) %in% rownames(ss1) )]
dim(xx1)
## [1] 1234 6
xx1 <- xx1[which(rowMeans(xx1)>10),]
dim(xx1)
## [1] 436 6
dds <- DESeqDataSetFromMatrix(countData = xx1 , colData = ss1 , design = ~ ko )
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE,nsub=nrow(xx1))
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>% kbl(caption = "Top gene expression differences") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
mmu-let-7a-1 | 688836.774 | -6.405851 | 0.1409814 | -45.43755 | 0 | 0 |
mmu-let-7b | 1637945.068 | -6.481798 | 0.1472216 | -44.02751 | 0 | 0 |
mmu-let-7c-1 | 1760492.704 | -6.861097 | 0.1451876 | -47.25676 | 0 | 0 |
mmu-let-7e | 542795.327 | -6.057295 | 0.1478637 | -40.96540 | 0 | 0 |
mmu-let-7k | 6972.734 | -8.746573 | 0.1986122 | -44.03844 | 0 | 0 |
mmu-mir-183 | 10684.981 | 5.936845 | 0.1475355 | 40.24012 | 0 | 0 |
mmu-mir-503 | 7238.823 | -7.025117 | 0.1669449 | -42.08044 | 0 | 0 |
mmu-mir-5099 | 50115.479 | 5.904955 | 0.1485551 | 39.74927 | 0 | 0 |
mmu-mir-21c | 3170.357 | 8.086983 | 0.2186157 | 36.99178 | 0 | 0 |
mmu-mir-181d | 11207.275 | -6.469621 | 0.1763896 | -36.67802 | 0 | 0 |
mmu-mir-10b | 51943.154 | 4.448196 | 0.1251096 | 35.55440 | 0 | 0 |
mmu-mir-181b-2 | 16158.569 | -5.081381 | 0.1431792 | -35.48966 | 0 | 0 |
mmu-let-7a-2 | 23438.059 | -5.314162 | 0.1523568 | -34.87973 | 0 | 0 |
mmu-mir-3074-2 | 12854.203 | 5.334356 | 0.1547168 | 34.47820 | 0 | 0 |
mmu-mir-298 | 2757.555 | -5.573552 | 0.1623121 | -34.33848 | 0 | 0 |
mmu-mir-214 | 5444.600 | 5.150779 | 0.1534990 | 33.55579 | 0 | 0 |
mmu-let-7d | 99318.677 | -4.746768 | 0.1415774 | -33.52772 | 0 | 0 |
mmu-mir-1839 | 17238.902 | -4.554979 | 0.1395659 | -32.63676 | 0 | 0 |
mmu-mir-222 | 125551.730 | -4.952649 | 0.1518771 | -32.60957 | 0 | 0 |
mmu-mir-320 | 103212.812 | -4.961718 | 0.1534058 | -32.34374 | 0 | 0 |
d1up <- rownames(subset(dge,padj <= 0.05 & log2FoldChange > 0))
d1dn <- rownames(subset(dge,padj <= 0.05 & log2FoldChange < 0))
write.table(dge,file="miRNA_DE_ctrl_vs_rageko.tsv",quote=FALSE,sep="\t")
maplot(dge,"Cont1: Effect of RAGE KO")
make_volcano(dge,"Cont1: Effect of RAGE KO")
#agerdat <- assay(vsd)[grep("Ager",rownames(assay(vsd))),]
#par(mar=c(5,8,3,1))
#barplot(agerdat,horiz=TRUE,las=1,xlab="Normalised Ager expression",xlim=c(0,10))
Make a heatmap of the top 30 genes.
rpm1 <- apply(xx1, 2, function(x){x/sum(x,na.rm=T)}) * 1000000
colfunc <- colorRampPalette(c("blue", "white", "red"))
rpm2 <- rpm1[which(rownames(rpm1) %in% rownames(head(dge,30))),]
heatmap.2(as.matrix(rpm2),margin=c(8, 22),cexRow=0.85,trace="none",
cexCol=0.9,col=colfunc(20),scale="row")
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 kableExtra_1.3.4
## [3] biomaRt_2.52.0 dplyr_1.0.9
## [5] mitch_1.8.0 gplots_3.1.3
## [7] DESeq2_1.36.0 SummarizedExperiment_1.26.1
## [9] Biobase_2.56.0 MatrixGenerics_1.8.0
## [11] matrixStats_0.62.0 GenomicRanges_1.48.0
## [13] GenomeInfoDb_1.32.2 IRanges_2.30.0
## [15] S4Vectors_0.34.0 BiocGenerics_0.42.0
## [17] reshape2_1.4.4
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ellipsis_0.3.2 XVector_0.36.0
## [4] rstudioapi_0.13 bit64_4.0.5 AnnotationDbi_1.58.0
## [7] fansi_1.0.3 xml2_1.3.3 codetools_0.2-18
## [10] splines_4.2.1 cachem_1.0.6 geneplotter_1.74.0
## [13] knitr_1.39 jsonlite_1.8.0 annotate_1.74.0
## [16] dbplyr_2.2.1 png_0.1-7 shiny_1.7.1
## [19] compiler_4.2.1 httr_1.4.3 assertthat_0.2.1
## [22] Matrix_1.4-1 fastmap_1.1.0 cli_3.3.0
## [25] later_1.3.0 htmltools_0.5.2 prettyunits_1.1.1
## [28] tools_4.2.1 gtable_0.3.0 glue_1.6.2
## [31] GenomeInfoDbData_1.2.8 rappdirs_0.3.3 Rcpp_1.0.8.3
## [34] jquerylib_0.1.4 vctrs_0.4.1 Biostrings_2.64.0
## [37] svglite_2.1.0 xfun_0.31 stringr_1.4.0
## [40] rvest_1.0.2 mime_0.12 lifecycle_1.0.1
## [43] gtools_3.9.2.2 XML_3.99-0.10 zlibbioc_1.42.0
## [46] MASS_7.3-58 scales_1.2.0 hms_1.1.1
## [49] promises_1.2.0.1 parallel_4.2.1 yaml_2.3.5
## [52] curl_4.3.2 memoise_2.0.1 gridExtra_2.3
## [55] ggplot2_3.3.6 sass_0.4.1 reshape_0.8.9
## [58] stringi_1.7.6 RSQLite_2.2.14 highr_0.9
## [61] genefilter_1.78.0 caTools_1.18.2 filelock_1.0.2
## [64] BiocParallel_1.30.3 echarts4r_0.4.4 rlang_1.0.3
## [67] pkgconfig_2.0.3 systemfonts_1.0.4 bitops_1.0-7
## [70] evaluate_0.15 lattice_0.20-45 purrr_0.3.4
## [73] htmlwidgets_1.5.4 bit_4.0.4 tidyselect_1.1.2
## [76] GGally_2.1.2 plyr_1.8.7 magrittr_2.0.3
## [79] R6_2.5.1 generics_0.1.2 DelayedArray_0.22.0
## [82] DBI_1.1.3 pillar_1.7.0 survival_3.4-0
## [85] KEGGREST_1.36.2 RCurl_1.98-1.7 tibble_3.1.7
## [88] crayon_1.5.1 KernSmooth_2.23-20 utf8_1.2.2
## [91] BiocFileCache_2.4.0 rmarkdown_2.14 progress_1.2.2
## [94] locfit_1.5-9.5 grid_4.2.1 blob_1.2.3
## [97] digest_0.6.29 webshot_0.5.3 xtable_1.8-4
## [100] httpuv_1.6.5 munsell_0.5.0 beeswarm_0.4.0
## [103] viridisLite_0.4.0 bslib_0.3.1