Source: https://github.com/markziemann/mesangial_rageko

Introduction

Looking at doing comparison of control and RAGE KO Mouse mesangial cells RNA-seq and miRNA-seq.

Methods

RAGE KO data sequenced at Baker. 61 nt length.
WT data sequenced at BGI with 91 nt length.
Quality trimming with skewer 0.2.2 to ensure 3’ end base quality > 20.
GENECODE mouse transcriptome v30 was downloaded and indexed.
Kallisto 0.46.0 was used for mapping paired end reads to the transcriptome.
Count data was loaded into R, followed by DE analysis with DESeq2.
Enrichment analysis was performed with mitch, using REACTOME pathways.
OS, R and package versions are shown in the session info section at the end of this report.

suppressPackageStartupMessages({
  library("reshape2")
  library("DESeq2")
  library("gplots")
  library("mitch")
  library("dplyr")
  library("biomaRt")
  library("kableExtra")
})

Data

tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)

xx <- round(x)
dim(xx)

## [1] 1234   12

head(xx)

##              miR_rageko_ctrl_1 miR_rageko_ctrl_2 miR_rageko_ctrl_3
## mmu-let-7a-1             20363             20727              9947
## mmu-let-7a-2              1543              1441               691
## mmu-let-7b               42713             48743             23034
## mmu-let-7c-1             37266             34769             21005
## mmu-let-7c-2             74907             92800             46649
## mmu-let-7d                8707              9948              4206
##              miR_rageko_TGF_1 miR_rageko_TGF_2 miR_rageko_TGF_3 miR_wt_ctrl_1
## mmu-let-7a-1            16769            12800            12352       1306100
## mmu-let-7a-2             1489              614              608         44116
## mmu-let-7b              22473            15060            13756       3184290
## mmu-let-7c-1            22296             8455             5260       3453180
## mmu-let-7c-2            57307            32425            30454       1240720
## mmu-let-7d               6126             6035             5399        189697
##              miR_wt_ctrl_2 miR_wt_ctrl_3 miR_wt_TGF_1 miR_wt_TGF_2 miR_wt_TGF_3
## mmu-let-7a-1       1348620       1472040      1669340      1426850      1563840
## mmu-let-7a-2         43526         50652        78374        78690        84683
## mmu-let-7b         3163160       3465470      3348090      2859190      3046660
## mmu-let-7c-1       3466790       3664760      3491620      2982230      3103810
## mmu-let-7c-2       1190810       1395550      1427850      1286160      1329270
## mmu-let-7d          195022        196772       242764       212549       217202

write.table(x=xx,file="miRNA_count_matrix.tsv",sep="\t")

Now quantify the top 10 species. Starting with all samples.

rpm <- apply(xx, 2, function(x){x/sum(x,na.rm=T)}) * 1000000

rpm <- rpm[order(rowMeans(rpm) ),]

rpm <- tail(rpm,10)

# create color palette:
library(RColorBrewer)
n=10
pal <- brewer.pal(n, "Paired")

par(mfrow=c(1,2))
par(mar=c(10,5,3,1))
barplot(rpm,col=pal,las=2)
plot.new()
legend("topleft", legend=rev(rownames(rpm)),fill=rev(pal),bg="white",cex=1)

#restore default setting
par(mar=c(5.1, 4.1, 4.1, 2.1))

Now calculate diversity of each sample.

pc <- apply(xx, 2, function(x){x/sum(x,na.rm=T)}) * 100

res <- lapply(1:ncol(pc),function(i){
  dat <- pc[,i]
  dat <- dat[order(-dat)]
  head(cumsum(dat))
})
names(res) <- colnames(pc)
res

## $miR_rageko_ctrl_1
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b  mmu-let-7i 
##    35.04691    50.68179    56.93842    62.08391    65.17234    67.31654 
## 
## $miR_rageko_ctrl_2
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b  mmu-let-7i 
##    33.26619    49.71553    55.91072    60.54882    63.63909    65.95557 
## 
## $miR_rageko_ctrl_3
## mmu-mir-21a mmu-mir-143 mmu-mir-30a mmu-mir-10a mmu-mir-27b mmu-mir-30d 
##    33.03525    50.11035    58.16282    62.36956    65.16494    67.86357 
## 
## $miR_rageko_TGF_1
##    mmu-mir-21a    mmu-mir-143    mmu-mir-30a    mmu-mir-10a    mmu-mir-30d 
##       37.08525       51.32986       58.80983       63.22891       66.24793 
## mmu-mir-199a-1 
##       68.14504 
## 
## $miR_rageko_TGF_2
##    mmu-mir-21a    mmu-mir-143    mmu-mir-30a    mmu-mir-10a mmu-mir-199a-1 
##       45.01689       58.06857       63.68706       65.88644       68.07317 
##     mmu-mir-22 
##       70.25966 
## 
## $miR_rageko_TGF_3
##    mmu-mir-21a    mmu-mir-143    mmu-mir-30a     mmu-mir-22 mmu-mir-199a-1 
##       47.72168       62.21030       66.64633       69.00270       71.20109 
##    mmu-mir-10a 
##       73.29187 
## 
## $miR_wt_ctrl_1
## mmu-let-7c-1   mmu-let-7b mmu-let-7a-1 mmu-let-7c-2 mmu-let-7f-2   mmu-let-7e 
##     22.41564     43.08582     51.56412     59.61800     66.69586     73.01094 
## 
## $miR_wt_ctrl_2
## mmu-let-7c-1   mmu-let-7b mmu-let-7a-1 mmu-let-7c-2 mmu-let-7f-2   mmu-let-7e 
##     22.21449     42.48338     51.12507     58.75554     65.99981     72.87769 
## 
## $miR_wt_ctrl_3
## mmu-let-7c-1   mmu-let-7b mmu-let-7a-1 mmu-let-7c-2   mmu-let-7e mmu-let-7f-2 
##     22.04385     42.88896     51.74341     60.13776     67.33343     74.10425 
## 
## $miR_wt_TGF_1
## mmu-let-7c-1   mmu-let-7b   mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2 
##     19.23706     37.68335     47.70534     56.90256     65.38002     73.24676 
## 
## $miR_wt_TGF_2
## mmu-let-7c-1   mmu-let-7b   mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2 
##     19.07475     37.36251     47.31784     56.44416     65.47192     73.69838 
## 
## $miR_wt_TGF_3
## mmu-let-7c-1   mmu-let-7b   mmu-let-7e mmu-let-7a-1 mmu-let-7f-1 mmu-let-7c-2 
##     18.82160     37.29665     47.61448     57.09765     66.01360     74.07433

Samplesheet

#colnames(xx) <- sapply(strsplit(colnames(xx),"_"),"[[",1)
mysamples <- colnames(xx)
ko <- as.numeric(grepl("ko",mysamples))
tgf <- as.numeric(grepl("TGF",mysamples))
ss <- data.frame(mysamples,ko,tgf)
rownames(ss) <- mysamples
ss

##                           mysamples ko tgf
## miR_rageko_ctrl_1 miR_rageko_ctrl_1  1   0
## miR_rageko_ctrl_2 miR_rageko_ctrl_2  1   0
## miR_rageko_ctrl_3 miR_rageko_ctrl_3  1   0
## miR_rageko_TGF_1   miR_rageko_TGF_1  1   1
## miR_rageko_TGF_2   miR_rageko_TGF_2  1   1
## miR_rageko_TGF_3   miR_rageko_TGF_3  1   1
## miR_wt_ctrl_1         miR_wt_ctrl_1  0   0
## miR_wt_ctrl_2         miR_wt_ctrl_2  0   0
## miR_wt_ctrl_3         miR_wt_ctrl_3  0   0
## miR_wt_TGF_1           miR_wt_TGF_1  0   1
## miR_wt_TGF_2           miR_wt_TGF_2  0   1
## miR_wt_TGF_3           miR_wt_TGF_3  0   1

QC

TODO: rRNA carryover.

par(mar=c(5,8,3,1))
barplot(colSums(xx),horiz=TRUE,las=1,xlab="num reads",col=ss$cols)
grid()

MDS plot

And correlation heat map.

par(mar=c(5,5,3,3))

mds <- cmdscale(dist(t(xx)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n", cex=4 )

text(mds, labels=rownames(mds) ,col="black")

heatmap.2(cor(xx),trace="n",main="Pearson correlation heatmap",margin=c(8,8),cexRow=0.7,cexCol=0.7)

Differential expression

Compare wt control to RAGE KO control.

maplot <- function(de,contrast_name) {
  de <- de[which(!is.na(de$padj)),]
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange,
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=1)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 1)
}
make_volcano <- function(de,name) {
    de <- de[which(!is.na(de$padj)),]
    de$pvalue[which(de$pvalue==0)] <- 1e-320
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}

ss1 <- subset(ss,tgf==0)
xx1 <- xx[,which(colnames(xx) %in% rownames(ss1) )]
dim(xx1)

## [1] 1234    6

xx1 <- xx1[which(rowMeans(xx1)>10),]
dim(xx1)

## [1] 436   6

dds <- DESeqDataSetFromMatrix(countData = xx1 , colData = ss1 , design = ~ ko )

## converting counts to integer mode

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE,nsub=nrow(xx1))

zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>% kbl(caption = "Top gene expression differences") %>% 
  kable_paper("hover", full_width = F)

Top gene expression differences
	baseMean	log2FoldChange	lfcSE	stat
mmu-let-7a-1	688836.774	-6.405851	0.1409814	-45.43755
mmu-let-7b	1637945.068	-6.481798	0.1472216	-44.02751
mmu-let-7c-1	1760492.704	-6.861097	0.1451876	-47.25676
mmu-let-7e	542795.327	-6.057295	0.1478637	-40.96540
mmu-let-7k	6972.734	-8.746573	0.1986122	-44.03844
mmu-mir-183	10684.981	5.936845	0.1475355	40.24012
mmu-mir-503	7238.823	-7.025117	0.1669449	-42.08044
mmu-mir-5099	50115.479	5.904955	0.1485551	39.74927
mmu-mir-21c	3170.357	8.086983	0.2186157	36.99178
mmu-mir-181d	11207.275	-6.469621	0.1763896	-36.67802
mmu-mir-10b	51943.154	4.448196	0.1251096	35.55440
mmu-mir-181b-2	16158.569	-5.081381	0.1431792	-35.48966
mmu-let-7a-2	23438.059	-5.314162	0.1523568	-34.87973
mmu-mir-3074-2	12854.203	5.334356	0.1547168	34.47820
mmu-mir-298	2757.555	-5.573552	0.1623121	-34.33848
mmu-mir-214	5444.600	5.150779	0.1534990	33.55579
mmu-let-7d	99318.677	-4.746768	0.1415774	-33.52772
mmu-mir-1839	17238.902	-4.554979	0.1395659	-32.63676
mmu-mir-222	125551.730	-4.952649	0.1518771	-32.60957
mmu-mir-320	103212.812	-4.961718	0.1534058	-32.34374

d1up <- rownames(subset(dge,padj <= 0.05 & log2FoldChange > 0))
d1dn <- rownames(subset(dge,padj <= 0.05 & log2FoldChange < 0))
write.table(dge,file="miRNA_DE_ctrl_vs_rageko.tsv",quote=FALSE,sep="\t")

maplot(dge,"Cont1: Effect of RAGE KO")

make_volcano(dge,"Cont1: Effect of RAGE KO")

#agerdat <- assay(vsd)[grep("Ager",rownames(assay(vsd))),]
#par(mar=c(5,8,3,1))
#barplot(agerdat,horiz=TRUE,las=1,xlab="Normalised Ager expression",xlim=c(0,10))

Make a heatmap of the top 30 genes.

rpm1 <- apply(xx1, 2, function(x){x/sum(x,na.rm=T)}) * 1000000

colfunc <- colorRampPalette(c("blue", "white", "red"))

rpm2 <- rpm1[which(rownames(rpm1) %in% rownames(head(dge,30))),]

heatmap.2(as.matrix(rpm2),margin=c(8, 22),cexRow=0.85,trace="none",
    cexCol=0.9,col=colfunc(20),scale="row")

SessionInfo

sessionInfo()

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-3          kableExtra_1.3.4           
##  [3] biomaRt_2.52.0              dplyr_1.0.9                
##  [5] mitch_1.8.0                 gplots_3.1.3               
##  [7] DESeq2_1.36.0               SummarizedExperiment_1.26.1
##  [9] Biobase_2.56.0              MatrixGenerics_1.8.0       
## [11] matrixStats_0.62.0          GenomicRanges_1.48.0       
## [13] GenomeInfoDb_1.32.2         IRanges_2.30.0             
## [15] S4Vectors_0.34.0            BiocGenerics_0.42.0        
## [17] reshape2_1.4.4             
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3       ellipsis_0.3.2         XVector_0.36.0        
##   [4] rstudioapi_0.13        bit64_4.0.5            AnnotationDbi_1.58.0  
##   [7] fansi_1.0.3            xml2_1.3.3             codetools_0.2-18      
##  [10] splines_4.2.1          cachem_1.0.6           geneplotter_1.74.0    
##  [13] knitr_1.39             jsonlite_1.8.0         annotate_1.74.0       
##  [16] dbplyr_2.2.1           png_0.1-7              shiny_1.7.1           
##  [19] compiler_4.2.1         httr_1.4.3             assertthat_0.2.1      
##  [22] Matrix_1.4-1           fastmap_1.1.0          cli_3.3.0             
##  [25] later_1.3.0            htmltools_0.5.2        prettyunits_1.1.1     
##  [28] tools_4.2.1            gtable_0.3.0           glue_1.6.2            
##  [31] GenomeInfoDbData_1.2.8 rappdirs_0.3.3         Rcpp_1.0.8.3          
##  [34] jquerylib_0.1.4        vctrs_0.4.1            Biostrings_2.64.0     
##  [37] svglite_2.1.0          xfun_0.31              stringr_1.4.0         
##  [40] rvest_1.0.2            mime_0.12              lifecycle_1.0.1       
##  [43] gtools_3.9.2.2         XML_3.99-0.10          zlibbioc_1.42.0       
##  [46] MASS_7.3-58            scales_1.2.0           hms_1.1.1             
##  [49] promises_1.2.0.1       parallel_4.2.1         yaml_2.3.5            
##  [52] curl_4.3.2             memoise_2.0.1          gridExtra_2.3         
##  [55] ggplot2_3.3.6          sass_0.4.1             reshape_0.8.9         
##  [58] stringi_1.7.6          RSQLite_2.2.14         highr_0.9             
##  [61] genefilter_1.78.0      caTools_1.18.2         filelock_1.0.2        
##  [64] BiocParallel_1.30.3    echarts4r_0.4.4        rlang_1.0.3           
##  [67] pkgconfig_2.0.3        systemfonts_1.0.4      bitops_1.0-7          
##  [70] evaluate_0.15          lattice_0.20-45        purrr_0.3.4           
##  [73] htmlwidgets_1.5.4      bit_4.0.4              tidyselect_1.1.2      
##  [76] GGally_2.1.2           plyr_1.8.7             magrittr_2.0.3        
##  [79] R6_2.5.1               generics_0.1.2         DelayedArray_0.22.0   
##  [82] DBI_1.1.3              pillar_1.7.0           survival_3.4-0        
##  [85] KEGGREST_1.36.2        RCurl_1.98-1.7         tibble_3.1.7          
##  [88] crayon_1.5.1           KernSmooth_2.23-20     utf8_1.2.2            
##  [91] BiocFileCache_2.4.0    rmarkdown_2.14         progress_1.2.2        
##  [94] locfit_1.5-9.5         grid_4.2.1             blob_1.2.3            
##  [97] digest_0.6.29          webshot_0.5.3          xtable_1.8-4          
## [100] httpuv_1.6.5           munsell_0.5.0          beeswarm_0.4.0        
## [103] viridisLite_0.4.0      bslib_0.3.1

miRNA: Mesangial RAGE KO Transcriptomic Analysis

Mark Ziemann

2022-08-22