Source: https://github.com/markziemann/phipseq-shinhae/
suppressPackageStartupMessages({
library("tidyverse")
library("reshape2")
library("gplots")
library("eulerr")
})
First read in the data. We can see how many reads were assigned.
tmp <- read.table("3col.tsv",header=F)
x <- acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum)
x <- as.data.frame(x)
dim(x)
## [1] 4321 97
head(x,2)
## A1.kal A10.kal A11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 3 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 8 0 0
## A12.kal A2.kal A3.kal A4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 8 226 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 2 10 594 0
## A5.kal A6.kal A7.kal A8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 0 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 0 0 0
## A9.kal B1.kal B10.kal B11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 0 1 1
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 2 0 0
## B12.kal B2.kal B3.kal B4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 242 0 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 583 0 0 0
## B5.kal B6.kal B7.kal B8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 29 54 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 52 105 0 0
## B9.kal C1.kal C10.kal C11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 262 0 0 2
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 439 0 1 2
## C12.kal C2.kal C3.kal C4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 1 0 1 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 3 0 0 0
## C5.kal C6.kal C7.kal C8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 0 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 0 0 0
## C9.kal D1.kal D10.kal D11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 0 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 1 0 1
## D12.kal D2.kal D3.kal D4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 28 1 25 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 37 1 43 2
## D5.kal D6.kal D7.kal D8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 195 162 48 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 485 344 88 0
## D9.kal E1.kal E10.kal E11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 75 137 243 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 183 327 496 1
## E12.kal E2.kal E3.kal E4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 186 0 66 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 478 0 155 0
## E5.kal E6.kal E7.kal E8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 0 0 3
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 0 2 1
## E9.kal F1.kal F10.kal F11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 20 0 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 44 0 1 0
## F12.kal F2.kal F3.kal F4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 0 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 0 1 0
## F5.kal F6.kal F7.kal F8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 0 0 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 1 1 0
## F9.kal G1.kal G10.kal G11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 195 221 1
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 377 459 2
## G12.kal G2.kal G3.kal G4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 1 181 272 190
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 1 365 496 392
## G5.kal G6.kal G7.kal G8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 168 251 411 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 302 428 819 3
## G9.kal H1.kal H10.kal H11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 241 0 3 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 419 0 9 0
## H12.kal H2.kal H3.kal H4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 0 210 48 244
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 0 437 103 407
## H5.kal H6.kal H7.kal H8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 206 273 411 0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 470 523 780 1
## H9.kal Undetermined.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36 194 219
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141 481 376
par(mar=c(5,15,5,5))
colSums(x)
## A1.kal A10.kal A11.kal A12.kal
## 1861 61 289 346
## A2.kal A3.kal A4.kal A5.kal
## 2829 214055 49 124
## A6.kal A7.kal A8.kal A9.kal
## 48 66 82 72
## B1.kal B10.kal B11.kal B12.kal
## 72 59 292 200256
## B2.kal B3.kal B4.kal B5.kal
## 52 174 72 18934
## B6.kal B7.kal B8.kal B9.kal
## 38467 135 112 158456
## C1.kal C10.kal C11.kal C12.kal
## 90 106 370 510
## C2.kal C3.kal C4.kal C5.kal
## 40 132 38 95
## C6.kal C7.kal C8.kal C9.kal
## 62 71 282 136
## D1.kal D10.kal D11.kal D12.kal
## 142 225 234 16741
## D2.kal D3.kal D4.kal D5.kal
## 1894 14411 698 167177
## D6.kal D7.kal D8.kal D9.kal
## 117229 30791 82 61973
## E1.kal E10.kal E11.kal E12.kal
## 113759 195211 306 160851
## E2.kal E3.kal E4.kal E5.kal
## 52 58714 24 148
## E6.kal E7.kal E8.kal E9.kal
## 61 87 957 19183
## F1.kal F10.kal F11.kal F12.kal
## 23 56 233 239
## F2.kal F3.kal F4.kal F5.kal
## 39 49 27 76
## F6.kal F7.kal F8.kal F9.kal
## 52 35 79 80
## G1.kal G10.kal G11.kal G12.kal
## 126204 170169 225 333
## G2.kal G3.kal G4.kal G5.kal
## 130669 181984 144339 128408
## G6.kal G7.kal G8.kal G9.kal
## 178893 309317 76 166663
## H1.kal H10.kal H11.kal H12.kal
## 44 3263 121 231
## H2.kal H3.kal H4.kal H5.kal
## 171237 40095 161437 171444
## H6.kal H7.kal H8.kal H9.kal
## 195853 276806 53 171724
## Undetermined.kal
## 135445
colnames(x) <- gsub(".kal","",colnames(x))
barplot(colSums(x),horiz=TRUE,las=2,main="assigned reads")
barplot(colSums(x),horiz=TRUE,las=2,main="assigned reads",log="x")
grid()
par(mar = c(5.1, 4.1, 4.1, 2.1))
hist(colSums(x),breaks=20,xlab="No. assigned reads")
write.table(x,file="phipseq1.tsv")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] eulerr_7.0.0 gplots_3.1.3 reshape2_1.4.4 lubridate_1.9.2
## [5] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1
## [9] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2
## [13] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.7 utf8_1.2.3 generics_0.1.3 bitops_1.0-7
## [5] KernSmooth_2.23-22 gtools_3.9.4 stringi_1.7.12 hms_1.1.3
## [9] digest_0.6.33 magrittr_2.0.3 caTools_1.18.2 evaluate_0.21
## [13] grid_4.3.1 timechange_0.2.0 fastmap_1.1.1 plyr_1.8.8
## [17] jsonlite_1.8.7 fansi_1.0.4 scales_1.2.1 jquerylib_0.1.4
## [21] cli_3.6.1 rlang_1.1.1 munsell_0.5.0 withr_2.5.0
## [25] cachem_1.0.8 yaml_2.3.7 tools_4.3.1 tzdb_0.4.0
## [29] colorspace_2.1-0 vctrs_0.6.3 R6_2.5.1 lifecycle_1.0.3
## [33] pkgconfig_2.0.3 pillar_1.9.0 bslib_0.5.0 gtable_0.3.3
## [37] glue_1.6.2 Rcpp_1.0.11 highr_0.10 xfun_0.39
## [41] tidyselect_1.2.0 knitr_1.43 htmltools_0.5.5 rmarkdown_2.23
## [45] compiler_4.3.1