Here we’re doing an analysis to identify CNV using normalised reads counts across genomic windows.
library("cobs")
library("quantreg")
library("parallel")
library("gplots")
library("dplyr")
library("kableExtra")
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
interpolate_points<-function(row,dat,curve){
MY_X=dat[row,1]
MY_Y=dat[row,2]
VAL1=tail(which(curve[,1]<MY_X),1)
VAL2=VAL1+1
X <- curve[c(VAL1,VAL2),1]
Y <- curve[c(VAL1,VAL2),2]
INTERP_Y=approx(X,Y,xout=MY_X)$y
INTERP_Y
}
x <- read.table("SRP199233.1e6_fmt.tsv",header=T,row.names=1)
x <- x[which(rowSums(x)>=10),]
x <- sweep(x, 2, colSums(x), FUN="/")*1000000
mysd <- apply(x,1,sd)
mean <- apply(x,1,mean)
y <- data.frame(log10(mean),mysd/mean)
colnames(y) = c("logMean","cv")
Rbs.9 <- cobs(y$logMean,y$cv, nknots=10,constraint="none",tau=0.99)
## qbsks2():
## Performing general knot selection ...
##
## Deleting unnecessary knots ...
Rbs.median <- cobs(y$logMean,y$cv,nknots=10,constraint="none",tau=0.5)
## qbsks2():
## Performing general knot selection ...
##
## Deleting unnecessary knots ...
pred <- data.frame(predict(Rbs.9))
res <- mclapply(X=1:nrow(y),function(row) {
interpolate_points(row,y,pred)
},mc.cores=8)
y$interpolated <- unlist(res)
y$diff=y$cv-y$interpolated
yy <- y[which(y$diff>0.05),]
yy <- yy[order(-yy$diff),]
write.table(yy,file="SRP199233.1e6_regions.tsv")
yy %>% kbl() %>% kable_paper("hover", full_width = F)
logMean | cv | interpolated | diff | |
---|---|---|---|---|
7:32000000-33000000 | 1.514337 | 0.3780389 | 0.1594371 | 0.2186018 |
MT:0-16299 | 1.621759 | 0.2621939 | 0.1369478 | 0.1252461 |
9:124000000-124595110 | 1.941474 | 0.1903983 | 0.0882176 | 0.1021807 |
plot(y$logMean,y$cv,pch=18,cex=0.5,xlab="log10(mean)",ylab="CV")
lines(predict(Rbs.9), col = "red", lwd = 1.0)
lines(predict(Rbs.median), col = "blue", lwd = 1.0)
points(yy$logMean,yy$cv)
text(yy$logMean,yy$cv+0.02,labels=rownames(yy),cex=0.8)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
zz<-x[which(rownames(x) %in% rownames(yy)),]
heatmap.2(as.matrix(zz),margin=c(8, 22),cexRow=0.65,trace="none",
cexCol=0.8,col=my_palette,scale="row")
heatmap.2(cor(t(zz)),trace="none",scale="none",margins=c(12,12),
cexRow=0.8, cexCol=0.8)
x<-read.table("SRP199233.1e5_fmt.tsv",header=T,row.names=1)
x<-x[which(rowSums(x)>=10),]
x<-sweep(x, 2, colSums(x), FUN="/")*1000000
mysd<-apply(x,1,sd)
mean<-apply(x,1,mean)
y<-data.frame(log10(mean),mysd/mean)
colnames(y)=c("logMean","cv")
Rbs.9 <- cobs(y$logMean,y$cv, nknots=10,constraint="none",tau=0.99)
## qbsks2():
## Performing general knot selection ...
##
## WARNING! Since the number of 10 knots selected by AIC reached the
## upper bound during general knot selection, you might want to rerun
## cobs with a larger number of knots.
##
## Deleting unnecessary knots ...
##
## WARNING! Since the number of 10 knots selected by AIC reached the
## upper bound during general knot selection, you might want to rerun
## cobs with a larger number of knots.
Rbs.median <- cobs(y$logMean,y$cv,nknots=10,constraint="none",tau=0.5)
## qbsks2():
## Performing general knot selection ...
##
## Deleting unnecessary knots ...
pred<-data.frame(predict(Rbs.9))
res <- mclapply(X=1:nrow(y),function(row) {
interpolate_points(row,y,pred)
},mc.cores=8)
y$interpolated <- unlist(res)
y$diff=y$cv-y$interpolated
yy <- y[which(y$diff>0.05),]
yy <- yy[order(-yy$diff),]
yy <- head(yy,200)
write.table(yy,file="SRP199233.1e5_regions.tsv")
yy %>% kbl() %>% kable_paper("hover", full_width = F)
logMean | cv | interpolated | diff | |
---|---|---|---|---|
7:26000000-26100000 | 0.7328296 | 0.8076524 | 0.3768477 | 0.4308047 |
Y:42900000-43000000 | -0.7795757 | 1.8135944 | 1.4661506 | 0.3474438 |
7:26100000-26200000 | 0.7526029 | 0.6582584 | 0.3678549 | 0.2904035 |
Y:19400000-19500000 | -0.5049940 | 1.4829307 | 1.2101645 | 0.2727662 |
9:124200000-124300000 | 1.4183689 | 0.3858147 | 0.1432172 | 0.2425976 |
7:32300000-32400000 | 0.8291872 | 0.5611718 | 0.3339716 | 0.2272002 |
9:123900000-124000000 | 1.7001917 | 0.3110071 | 0.1010840 | 0.2099232 |
13:119500000-119600000 | 0.9692719 | 0.4388120 | 0.2774641 | 0.1613479 |
MT:0-16299 | 1.6221543 | 0.2621907 | 0.1129953 | 0.1491954 |
Y:12200000-12300000 | -0.6857873 | 1.5206623 | 1.3757258 | 0.1449365 |
Y:24700000-24800000 | -0.6804049 | 1.5066416 | 1.3706704 | 0.1359712 |
7:32200000-32300000 | 0.6757996 | 0.5376975 | 0.4036827 | 0.1340149 |
7:31700000-31800000 | 0.7891310 | 0.4692208 | 0.3514426 | 0.1177782 |
7:32500000-32600000 | 0.5227468 | 0.5736655 | 0.4813127 | 0.0923529 |
14:6900000-7000000 | 1.5638312 | 0.1921610 | 0.1208570 | 0.0713040 |
7:38300000-38400000 | 1.8168547 | 0.1634987 | 0.1030466 | 0.0604521 |
7:39100000-39200000 | 0.9358565 | 0.3461525 | 0.2902758 | 0.0558766 |
14:5900000-6000000 | 1.5713234 | 0.1755526 | 0.1200704 | 0.0554822 |
4:6200000-6300000 | 1.5866750 | 0.1712088 | 0.1184586 | 0.0527503 |
plot(y$logMean,y$cv,pch=18,cex=0.5,xlab="log10(mean)",ylab="CV")
lines(predict(Rbs.9), col = "red", lwd = 1.0)
lines(predict(Rbs.median), col = "blue", lwd = 1.0)
points(yy$logMean,yy$cv)
text(yy$logMean,yy$cv+0.02,labels=rownames(yy),cex=0.8)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
zz<-x[which(rownames(x) %in% rownames(yy)),]
heatmap.2(as.matrix(zz),margin=c(8, 22),cexRow=0.65,trace="none",
cexCol=0.8,col=my_palette,scale="row")
heatmap.2(cor(t(zz)),trace="none",scale="none",margins=c(12,12),
cexRow=0.8, cexCol=0.8)
x<-read.table("SRP199233.1e4_fmt.tsv",header=T,row.names=1)
x<-x[which(rowSums(x)>=10),]
x<-sweep(x, 2, colSums(x), FUN="/")*1000000
mysd<-apply(x,1,sd)
mean<-apply(x,1,mean)
y<-data.frame(log10(mean),mysd/mean)
colnames(y)=c("logMean","cv")
Rbs.9 <- cobs(y$logMean,y$cv, nknots=10,constraint="none",tau=0.99)
## qbsks2():
## Performing general knot selection ...
##
## Deleting unnecessary knots ...
## Warning in cobs(y$logMean, y$cv, nknots = 10, constraint = "none", tau = 0.99):
## The algorithm has not converged after 100 iterations
Rbs.median <- cobs(y$logMean,y$cv,nknots=10,constraint="none",tau=0.5)
## qbsks2():
## Performing general knot selection ...
##
## Deleting unnecessary knots ...
pred<-data.frame(predict(Rbs.9))
res <- mclapply(X=1:nrow(y),function(row) {
interpolate_points(row,y,pred)
},mc.cores=8)
y$interpolated <- unlist(res)
y$diff=y$cv-y$interpolated
yy <- y[which(y$diff>0.2),]
yy <- yy[order(-yy$diff),]
yy <- head(yy,200)
write.table(yy,file="SRP199233.1e4_regions.tsv")
yy %>% kbl() %>% kable_paper("hover", full_width = F)
logMean | cv | interpolated | diff | |
---|---|---|---|---|
7:32240000-32250000 | -0.2373112 | 1.4993115 | 0.9505398 | 0.5487717 |
7:7960000-7970000 | -0.4359363 | 1.6231229 | 1.1993986 | 0.4237243 |
X:30040000-30050000 | -0.7175903 | 2.0028778 | 1.6273673 | 0.3755105 |
9:123930000-123940000 | 1.2312546 | 0.6430982 | 0.2923801 | 0.3507181 |
12:114070000-114080000 | -0.3445578 | 1.3963290 | 1.0795868 | 0.3167422 |
8:52960000-52970000 | -0.5796499 | 1.7137573 | 1.4067886 | 0.3069688 |
7:32540000-32550000 | -0.6211188 | 1.7778557 | 1.4710081 | 0.3068476 |
16:6120000-6130000 | 0.3082189 | 0.7689337 | 0.4908537 | 0.2780800 |
X:90380000-90390000 | -0.1537834 | 1.1316620 | 0.8585531 | 0.2731089 |
9:123920000-123930000 | 1.0217671 | 0.5278489 | 0.2644314 | 0.2634176 |
X:37530000-37540000 | -0.7138833 | 1.8795928 | 1.6211113 | 0.2584815 |
2:177610000-177620000 | -0.7695387 | 1.9725124 | 1.7158171 | 0.2566953 |
2:71360000-71370000 | 0.4755420 | 0.6526598 | 0.4100564 | 0.2426034 |
7:26160000-26170000 | -0.2149329 | 1.1674789 | 0.9250583 | 0.2424206 |
X:25410000-25420000 | 0.1645395 | 0.8147801 | 0.5800721 | 0.2347080 |
9:80520000-80530000 | 0.1234258 | 0.8395154 | 0.6094926 | 0.2300228 |
X:144590000-144600000 | 0.0714536 | 0.8733448 | 0.6496522 | 0.2236926 |
2:50990000-51000000 | -0.1363275 | 1.0625373 | 0.8405647 | 0.2219725 |
X:70050000-70060000 | 0.0597184 | 0.8794734 | 0.6593997 | 0.2200736 |
X:82820000-82830000 | 0.1297576 | 0.8240179 | 0.6048271 | 0.2191908 |
7:26060000-26070000 | -0.2734820 | 1.2111514 | 0.9925381 | 0.2186134 |
3:64530000-64540000 | 0.2562980 | 0.7382075 | 0.5205607 | 0.2176468 |
11:83130000-83140000 | -0.5466603 | 1.5692902 | 1.3571514 | 0.2121388 |
1:66640000-66650000 | 0.6777967 | 0.5315521 | 0.3264069 | 0.2051452 |
X:34280000-34290000 | -1.0512068 | 2.4494897 | 2.2476713 | 0.2018185 |
3:22150000-22160000 | 0.5698086 | 0.5720654 | 0.3702702 | 0.2017951 |
Y:84510000-84520000 | -0.8097790 | 1.9868289 | 1.7861816 | 0.2006473 |
plot(y$logMean,y$cv,pch=18,cex=0.5,xlab="log10(mean)",ylab="CV")
lines(predict(Rbs.9), col = "red", lwd = 1.0)
lines(predict(Rbs.median), col = "blue", lwd = 1.0)
points(yy$logMean,yy$cv)
text(yy$logMean,yy$cv+0.02,labels=rownames(yy),cex=0.8)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
zz<-x[which(rownames(x) %in% rownames(yy)),]
heatmap.2(as.matrix(zz),margin=c(8, 22),cexRow=0.65,trace="none",
cexCol=0.8,col=my_palette,scale="row")
heatmap.2(cor(t(zz)),trace="none",scale="none",margins=c(12,12),
cexRow=0.8, cexCol=0.8)
#```{r,1kbp1, fig.width = 8 ,fig.height = 8}
#x<-read.table(“SRP199233.1e3_fmt.tsv”,header=T,row.names=1) #x<-x[which(rowSums(x)>=10),] #x<-sweep(x, 2, colSums(x), FUN=“/”)*1000000 #mysd<-apply(x,1,sd) #mean<-apply(x,1,mean) #y<-data.frame(log10(mean),mysd/mean) #colnames(y)=c(“logMean”,“cv”) #Rbs.9 <- cobs(y\(logMean,y\)cv, nknots=10,constraint=“none”,tau=0.99) #Rbs.median <- cobs(y\(logMean,y\)cv,nknots=10,constraint=“none”,tau=0.5) #pred<-data.frame(predict(Rbs.9))
#res <- mclapply(X=1:nrow(y),function(row) { # interpolate_points(row,y,pred) # },mc.cores=8) #y$interpolated <- unlist(res)
#y\(diff=y\)cv-y\(interpolated #yy <- y[which(y\)diff>0.2),] #yy <- yy[order(-yy$diff),] #yy <- head(yy,200)
#write.table(yy,file=“SRP199233.1e3_regions.tsv”)
#plot(y\(logMean,y\)cv,pch=18,cex=0.5,xlab=“log10(mean)”,ylab=“CV”) #lines(predict(Rbs.9), col = “red”, lwd = 1.0) #lines(predict(Rbs.median), col = “blue”, lwd = 1.0) #points(yy\(logMean,yy\)cv) #text(yy\(logMean,yy\)cv+0.02,labels=rownames(yy),cex=0.8)
#my_palette <- colorRampPalette(c(“blue”, “white”, “red”))(n = 25) #zz<-x[which(rownames(x) %in% rownames(yy)),] #heatmap.2(as.matrix(zz),margin=c(8, 22),cexRow=0.65,trace=“none”, # cexCol=0.8,col=my_palette,scale=“row”)
#heatmap.2(cor(t(zz)),trace=“none”,scale=“none”,margins=c(12,12), # cexRow=0.8, cexCol=0.8)
#```
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] kableExtra_1.3.4 dplyr_1.0.7 quantreg_5.86 SparseM_1.81
## [5] cobs_1.3-4 gplots_3.1.1
##
## loaded via a namespace (and not attached):
## [1] gtools_3.9.2 tidyselect_1.1.1 xfun_0.25 bslib_0.2.5.1
## [5] purrr_0.3.4 splines_4.1.1 lattice_0.20-44 colorspace_2.0-2
## [9] vctrs_0.3.8 generics_0.1.0 viridisLite_0.4.0 htmltools_0.5.1.1
## [13] yaml_2.2.1 utf8_1.2.2 rlang_0.4.11 jquerylib_0.1.4
## [17] pillar_1.6.2 glue_1.4.2 DBI_1.1.1 matrixStats_0.60.0
## [21] lifecycle_1.0.0 stringr_1.4.0 MatrixModels_0.5-0 munsell_0.5.0
## [25] rvest_1.0.1 caTools_1.18.2 evaluate_0.14 knitr_1.33
## [29] fansi_0.5.0 highr_0.9 Rcpp_1.0.7 KernSmooth_2.23-20
## [33] conquer_1.0.2 scales_1.1.1 webshot_0.5.2 jsonlite_1.7.2
## [37] systemfonts_1.0.2 digest_0.6.27 stringi_1.7.3 grid_4.1.1
## [41] tools_4.1.1 bitops_1.0-7 magrittr_2.0.1 sass_0.4.0
## [45] tibble_3.1.3 crayon_1.4.1 pkgconfig_2.0.3 ellipsis_0.3.2
## [49] Matrix_1.3-4 xml2_1.3.2 assertthat_0.2.1 rmarkdown_2.10
## [53] svglite_2.0.0 httr_1.4.2 rstudioapi_0.13 R6_2.5.0
## [57] compiler_4.1.1