suppressPackageStartupMessages({
library("zoo")
library("tidyverse")
library("reshape2")
library("DESeq2")
library("gplots")
library("fgsea")
library("MASS")
library("mitch")
library("eulerr")
library("limma")
library("topconfects")
})
tmp <- read.table("3col.tsv",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)
dim(xx)
## [1] 60662 96
rpm <- apply(xx, 2 , function(x) { x / sum(x) } ) * 1000000
rpm <- rpm[rowMeans(rpm) > 1,]
dim(rpm)
## [1] 16852 96
mysd <- apply(rpm,1,sd)
mymean <- apply(rpm,1,mean)
mycv <- mysd/mymean
best <- names(head(mycv[order(mycv)],20))
best_sd <- mysd[which(names(mysd) %in% best)]
best_mean <- mymean[which(names(mymean) %in% best)]
worst <- names(tail(mycv[order(mycv)],20))
plot(mymean,mysd,log="xy", xlab="mean (RPM)",ylab="SD (RPM)",
main="mean expression Vs standard deviation")
mtext("red: most stable genes")
points(best_mean,best_sd,cex=2,col="red")
Now some visualisation. Start with heatmap of most stable and least stable genes.
mxbest <- rpm[rownames(rpm) %in% best,]
heatmap.2(mxbest,trace="none",scale="row",main="most stable", mar=c(5,13),cexRow=0.8)
mxworst <- rpm[rownames(rpm) %in% worst,]
heatmap.2(mxworst,trace="none",scale="row",main="least stable",mar=c(5,13), cexRow=0.8)
Now show as barplots. 20 most stable and 20 least stable.
null <- sapply(best, function(g) {
par(mar=c(5,10,5,2))
dat <- rpm[rownames(rpm) == g,]
sd1 <- signif(sd(dat),4)
mean1 <- signif(mean(dat),4)
cv1 <- signif(sd1/mean1,4)
barplot(dat,horiz=TRUE,las=1,main=g,xlab="RPM",cex.names=0.5)
HEADER=paste("mean=",mean1,"sd=",sd1,"cv=",cv1)
mtext(HEADER)
})
null <- sapply(worst, function(g) {
par(mar=c(5,10,5,2))
dat <- rpm[rownames(rpm) == g,]
sd1 <- signif(sd(dat),4)
mean1 <- signif(mean(dat),4)
cv1 <- signif(sd1/mean1,4)
barplot(dat,horiz=TRUE,las=1,main=g,xlab="RPM",cex.names=0.5)
HEADER=paste("mean=",mean1,"sd=",sd1,"cv=",cv1)
mtext(HEADER)
})
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] topconfects_1.12.0 limma_3.52.1
## [3] eulerr_6.1.1 mitch_1.8.0
## [5] MASS_7.3-58 fgsea_1.22.0
## [7] gplots_3.1.3 DESeq2_1.36.0
## [9] SummarizedExperiment_1.26.1 Biobase_2.56.0
## [11] MatrixGenerics_1.8.0 matrixStats_0.62.0
## [13] GenomicRanges_1.48.0 GenomeInfoDb_1.32.2
## [15] IRanges_2.30.0 S4Vectors_0.34.0
## [17] BiocGenerics_0.42.0 reshape2_1.4.4
## [19] forcats_0.5.1 stringr_1.4.0
## [21] dplyr_1.0.9 purrr_0.3.4
## [23] readr_2.1.2 tidyr_1.2.0
## [25] tibble_3.1.7 ggplot2_3.3.6
## [27] tidyverse_1.3.1 zoo_1.8-10
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ellipsis_0.3.2 XVector_0.36.0
## [4] fs_1.5.2 rstudioapi_0.13 bit64_4.0.5
## [7] AnnotationDbi_1.58.0 fansi_1.0.3 lubridate_1.8.0
## [10] xml2_1.3.3 codetools_0.2-18 splines_4.2.1
## [13] cachem_1.0.6 knitr_1.39 geneplotter_1.74.0
## [16] jsonlite_1.8.0 broom_0.8.0 annotate_1.74.0
## [19] dbplyr_2.2.1 png_0.1-7 shiny_1.7.1
## [22] compiler_4.2.1 httr_1.4.3 backports_1.4.1
## [25] assertthat_0.2.1 Matrix_1.4-1 fastmap_1.1.0
## [28] cli_3.3.0 later_1.3.0 htmltools_0.5.2
## [31] tools_4.2.1 gtable_0.3.0 glue_1.6.2
## [34] GenomeInfoDbData_1.2.8 fastmatch_1.1-3 Rcpp_1.0.8.3
## [37] jquerylib_0.1.4 cellranger_1.1.0 vctrs_0.4.1
## [40] Biostrings_2.64.0 xfun_0.31 rvest_1.0.2
## [43] mime_0.12 lifecycle_1.0.1 gtools_3.9.2.2
## [46] XML_3.99-0.10 zlibbioc_1.42.0 scales_1.2.0
## [49] hms_1.1.1 promises_1.2.0.1 parallel_4.2.1
## [52] RColorBrewer_1.1-3 yaml_2.3.5 memoise_2.0.1
## [55] gridExtra_2.3 sass_0.4.1 reshape_0.8.9
## [58] stringi_1.7.6 RSQLite_2.2.14 highr_0.9
## [61] genefilter_1.78.0 caTools_1.18.2 BiocParallel_1.30.3
## [64] echarts4r_0.4.4 rlang_1.0.3 pkgconfig_2.0.3
## [67] bitops_1.0-7 evaluate_0.15 lattice_0.20-45
## [70] htmlwidgets_1.5.4 bit_4.0.4 tidyselect_1.1.2
## [73] GGally_2.1.2 plyr_1.8.7 magrittr_2.0.3
## [76] R6_2.5.1 generics_0.1.2 DelayedArray_0.22.0
## [79] DBI_1.1.3 pillar_1.7.0 haven_2.5.0
## [82] withr_2.5.0 survival_3.4-0 KEGGREST_1.36.2
## [85] RCurl_1.98-1.7 modelr_0.1.8 crayon_1.5.1
## [88] KernSmooth_2.23-20 utf8_1.2.2 rmarkdown_2.14
## [91] tzdb_0.3.0 locfit_1.5-9.5 grid_4.2.1
## [94] readxl_1.4.0 data.table_1.14.2 blob_1.2.3
## [97] reprex_2.0.1 digest_0.6.29 xtable_1.8-4
## [100] httpuv_1.6.5 munsell_0.5.0 beeswarm_0.4.0
## [103] bslib_0.3.1