Source: https://github.com/markziemann/phipseq-shinhae/

Introduction

Methods

Libraries

suppressPackageStartupMessages({
  library("tidyverse")
  library("reshape2")
  library("gplots")
  library("eulerr")
})

Read in the data

First read in the data. We can see how many reads were assigned.

tmp <- read.table("3col.tsv",header=F)
x <- acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum)
x <- as.data.frame(x)
dim(x)
## [1] 4321   97
head(x,2)
##                                                   A1.kal A10.kal A11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         3       0       0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      8       0       0
##                                                   A12.kal A2.kal A3.kal A4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36          0      8    226      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141       2     10    594      0
##                                                   A5.kal A6.kal A7.kal A8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         0      0      0      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      0      0      0      0
##                                                   A9.kal B1.kal B10.kal B11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         0      0       1       1
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      0      2       0       0
##                                                   B12.kal B2.kal B3.kal B4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36        242      0      0      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141     583      0      0      0
##                                                   B5.kal B6.kal B7.kal B8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36        29     54      0      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141     52    105      0      0
##                                                   B9.kal C1.kal C10.kal C11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36       262      0       0       2
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141    439      0       1       2
##                                                   C12.kal C2.kal C3.kal C4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36          1      0      1      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141       3      0      0      0
##                                                   C5.kal C6.kal C7.kal C8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         0      0      0      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      0      0      0      0
##                                                   C9.kal D1.kal D10.kal D11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         0      0       0       0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      0      1       0       1
##                                                   D12.kal D2.kal D3.kal D4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         28      1     25      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      37      1     43      2
##                                                   D5.kal D6.kal D7.kal D8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36       195    162     48      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141    485    344     88      0
##                                                   D9.kal E1.kal E10.kal E11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36        75    137     243       0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141    183    327     496       1
##                                                   E12.kal E2.kal E3.kal E4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36        186      0     66      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141     478      0    155      0
##                                                   E5.kal E6.kal E7.kal E8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         0      0      0      3
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      0      0      2      1
##                                                   E9.kal F1.kal F10.kal F11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36        20      0       0       0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141     44      0       1       0
##                                                   F12.kal F2.kal F3.kal F4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36          0      0      0      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141       0      0      1      0
##                                                   F5.kal F6.kal F7.kal F8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         0      0      0      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      0      1      1      0
##                                                   F9.kal G1.kal G10.kal G11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36         0    195     221       1
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141      0    377     459       2
##                                                   G12.kal G2.kal G3.kal G4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36          1    181    272    190
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141       1    365    496    392
##                                                   G5.kal G6.kal G7.kal G8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36       168    251    411      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141    302    428    819      3
##                                                   G9.kal H1.kal H10.kal H11.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36       241      0       3       0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141    419      0       9       0
##                                                   H12.kal H2.kal H3.kal H4.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36          0    210     48    244
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141       0    437    103    407
##                                                   H5.kal H6.kal H7.kal H8.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36       206    273    411      0
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141    470    523    780      1
##                                                   H9.kal Undetermined.kal
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_0-36       194              219
## BTV1_BTV01IND2010-KRM07_seg2_VP2_ALI51175_105-141    481              376
par(mar=c(5,15,5,5))
colSums(x)
##           A1.kal          A10.kal          A11.kal          A12.kal 
##             1861               61              289              346 
##           A2.kal           A3.kal           A4.kal           A5.kal 
##             2829           214055               49              124 
##           A6.kal           A7.kal           A8.kal           A9.kal 
##               48               66               82               72 
##           B1.kal          B10.kal          B11.kal          B12.kal 
##               72               59              292           200256 
##           B2.kal           B3.kal           B4.kal           B5.kal 
##               52              174               72            18934 
##           B6.kal           B7.kal           B8.kal           B9.kal 
##            38467              135              112           158456 
##           C1.kal          C10.kal          C11.kal          C12.kal 
##               90              106              370              510 
##           C2.kal           C3.kal           C4.kal           C5.kal 
##               40              132               38               95 
##           C6.kal           C7.kal           C8.kal           C9.kal 
##               62               71              282              136 
##           D1.kal          D10.kal          D11.kal          D12.kal 
##              142              225              234            16741 
##           D2.kal           D3.kal           D4.kal           D5.kal 
##             1894            14411              698           167177 
##           D6.kal           D7.kal           D8.kal           D9.kal 
##           117229            30791               82            61973 
##           E1.kal          E10.kal          E11.kal          E12.kal 
##           113759           195211              306           160851 
##           E2.kal           E3.kal           E4.kal           E5.kal 
##               52            58714               24              148 
##           E6.kal           E7.kal           E8.kal           E9.kal 
##               61               87              957            19183 
##           F1.kal          F10.kal          F11.kal          F12.kal 
##               23               56              233              239 
##           F2.kal           F3.kal           F4.kal           F5.kal 
##               39               49               27               76 
##           F6.kal           F7.kal           F8.kal           F9.kal 
##               52               35               79               80 
##           G1.kal          G10.kal          G11.kal          G12.kal 
##           126204           170169              225              333 
##           G2.kal           G3.kal           G4.kal           G5.kal 
##           130669           181984           144339           128408 
##           G6.kal           G7.kal           G8.kal           G9.kal 
##           178893           309317               76           166663 
##           H1.kal          H10.kal          H11.kal          H12.kal 
##               44             3263              121              231 
##           H2.kal           H3.kal           H4.kal           H5.kal 
##           171237            40095           161437           171444 
##           H6.kal           H7.kal           H8.kal           H9.kal 
##           195853           276806               53           171724 
## Undetermined.kal 
##           135445
colnames(x) <- gsub(".kal","",colnames(x))

barplot(colSums(x),horiz=TRUE,las=2,main="assigned reads")

barplot(colSums(x),horiz=TRUE,las=2,main="assigned reads",log="x")
grid()

par(mar = c(5.1, 4.1, 4.1, 2.1))
hist(colSums(x),breaks=20,xlab="No. assigned reads")

write.table(x,file="phipseq1.tsv")

Session information

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] eulerr_7.0.0    gplots_3.1.3    reshape2_1.4.4  lubridate_1.9.2
##  [5] forcats_1.0.0   stringr_1.5.0   dplyr_1.1.2     purrr_1.0.1    
##  [9] readr_2.1.4     tidyr_1.3.0     tibble_3.2.1    ggplot2_3.4.2  
## [13] tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.7         utf8_1.2.3         generics_0.1.3     bitops_1.0-7      
##  [5] KernSmooth_2.23-22 gtools_3.9.4       stringi_1.7.12     hms_1.1.3         
##  [9] digest_0.6.33      magrittr_2.0.3     caTools_1.18.2     evaluate_0.21     
## [13] grid_4.3.1         timechange_0.2.0   fastmap_1.1.1      plyr_1.8.8        
## [17] jsonlite_1.8.7     fansi_1.0.4        scales_1.2.1       jquerylib_0.1.4   
## [21] cli_3.6.1          rlang_1.1.1        munsell_0.5.0      withr_2.5.0       
## [25] cachem_1.0.8       yaml_2.3.7         tools_4.3.1        tzdb_0.4.0        
## [29] colorspace_2.1-0   vctrs_0.6.3        R6_2.5.1           lifecycle_1.0.3   
## [33] pkgconfig_2.0.3    pillar_1.9.0       bslib_0.5.0        gtable_0.3.3      
## [37] glue_1.6.2         Rcpp_1.0.11        highr_0.10         xfun_0.39         
## [41] tidyselect_1.2.0   knitr_1.43         htmltools_0.5.5    rmarkdown_2.23    
## [45] compiler_4.3.1