Reorganising the data
df <- data.frame(rowRanges(v75),info(v75)) #taking a look into the content for one of the vcf file.
df <- df[,c("seqnames","start","END")]
head(df)
## seqnames start END
## 1 1 26725045 26727528
## 2 1 39502386 39502399
## 3 1 39740580 39742018
## 4 1 39787083 39787266
## 5 1 39867178 39869113
## 6 1 39875834 39875909
alt <- unlist(as.vector(elementMetadata(v75)$ALT))
svtype <- info(v75)$SVTYPE
gt <- data.frame(g75,g76,g77,g78,g79,g80)
colnames(gt) <- c("g75","g76","g77","g78","g79","g80")
qual <- data.frame(r75$QUAL,r76$QUAL,r77$QUAL,r78$QUAL,r79$QUAL,r80$QUAL)
gt2 <- data.frame(df,alt,svtype,gt,qual)
head(gt2,20)
## seqnames start END alt svtype g75 g76 g77 g78 g79 g80 r75.QUAL
## 1 1 26725045 26727528 <DUP> DUP 1/1 0/0 1/1 1/1 ./. 0/0 17.35
## 2 1 39502386 39502399 <DEL> DEL ./. 0/0 0/0 ./. 0/0 0/0 0.00
## 3 1 39740580 39742018 <DEL> DEL ./. 1/1 ./. 1/1 0/0 1/1 0.00
## 4 1 39787083 39787266 <DEL> DEL ./. 1/1 ./. ./. 0/0 0/0 0.00
## 5 1 39867178 39869113 <DEL> DEL 1/1 ./. ./. ./. 0/0 0/0 31.46
## 6 1 39875834 39875909 <DEL> DEL 0/0 ./. 0/0 0/0 0/0 0/0 2.04
## 7 1 40181383 40187403 <DEL> DEL 0/0 1/1 ./. 1/1 1/1 ./. 4.77
## 8 1 40413280 40413335 <DEL> DEL ./. 0/0 ./. 1/1 1/1 1/1 0.00
## 9 1 40604578 40604663 <DEL> DEL 1/1 1/1 1/1 0/0 0/0 1/1 60.25
## 10 1 40737159 40741612 <DEL> DEL 0/0 0/0 ./. 1/1 0/0 ./. 4.77
## 11 1 42176587 42182958 <DEL> DEL ./. ./. 0/0 0/0 1/1 ./. 0.00
## 12 1 42685262 42687148 <DEL> DEL 1/1 ./. ./. ./. 0/0 ./. 60.25
## 13 1 42838780 42838966 <DEL> DEL 0/0 ./. 0/0 0/0 1/1 0/0 4.77
## 14 1 43069960 43070355 <DEL> DEL ./. 1/1 ./. ./. 1/1 ./. 0.00
## 15 1 43383064 43383173 <DEL> DEL ./. ./. 1/1 0/0 ./. 0/0 0.00
## 16 1 44768031 44769118 <DEL> DEL ./. ./. 0/0 0/0 ./. 0/0 0.00
## 17 1 45722421 45722587 <DEL> DEL 0/0 ./. 1/1 ./. 1/1 1/1 4.77
## 18 1 51290935 51296630 <DEL> DEL 0/0 1/1 ./. ./. 0/0 1/1 4.77
## 19 1 51584071 51585910 <DEL> DEL 1/1 ./. ./. 1/1 ./. ./. 31.46
## 20 1 51691097 51693813 <DEL> DEL ./. 0/0 ./. 1/1 ./. ./. 0.00
## r76.QUAL r77.QUAL r78.QUAL r79.QUAL r80.QUAL
## 1 4.77 17.35 46.54 0.00 3.24
## 2 4.77 4.77 0.00 4.77 4.77
## 3 31.46 0.00 31.46 4.77 236.38
## 4 89.31 0.00 0.00 4.77 4.77
## 5 0.00 0.00 0.00 4.77 4.77
## 6 0.00 4.77 0.07 1.01 2.04
## 7 60.25 0.00 31.46 89.31 0.00
## 8 4.77 0.00 31.46 147.94 60.25
## 9 31.46 60.25 4.77 4.77 31.46
## 10 4.77 0.00 60.25 4.77 0.00
## 11 0.00 4.77 4.77 31.46 0.00
## 12 0.00 0.00 0.00 4.77 0.00
## 13 0.00 4.77 4.77 31.46 4.77
## 14 31.46 0.00 0.00 60.25 0.00
## 15 0.00 31.46 4.77 0.00 4.77
## 16 0.00 4.77 4.77 0.00 4.77
## 17 0.00 31.46 0.00 31.46 31.46
## 18 31.46 0.00 0.00 4.77 31.46
## 19 0.00 0.00 31.46 0.00 0.00
## 20 4.77 0.00 60.25 0.00 0.00
gt3 <- gt2[which(apply(qual,1,min)>=20),]
head(gt3,20)
## seqnames start END alt svtype g75 g76 g77 g78 g79
## 53_1 1 16078454 NA N]1:78585581] BND 1/1 1/1 1/1 1/1 1/1
## 53_2 1 78585581 NA N]1:16078454] BND 1/1 1/1 1/1 1/1 1/1
## 74 1 88198043 88199163 <DEL> DEL 1/1 1/1 1/1 0/1 1/1
## 261_1 2 98493171 NA N]2:98495562] BND 1/1 1/1 1/1 1/1 1/1
## 261_2 2 98495562 NA N]2:98493171] BND 1/1 1/1 1/1 1/1 1/1
## 262 2 98493172 98496582 <DEL> DEL 1/1 1/1 1/1 1/1 1/1
## 269_1 2 98492824 NA N]2:98497676] BND 0/1 0/1 0/1 0/1 0/1
## 269_2 2 98497676 NA N]2:98492824] BND 0/1 0/1 0/1 0/1 0/1
## 274 2 98497402 98497677 <DUP> DUP 1/1 1/1 1/1 1/1 1/1
## 1205 10 128277743 128277834 <DUP> DUP 1/1 1/1 1/1 1/1 1/1
## 1217 11 3148658 3148711 <DEL> DEL 1/1 1/1 1/1 1/1 1/1
## 1418 13 48407019 48423790 <DUP> DUP 1/1 1/1 1/1 1/1 1/1
## 1506 15 73399206 73399288 <DEL> DEL 1/1 1/1 1/1 1/1 1/1
## 1597 17 13524840 13525181 <DUP> DUP 0/1 0/1 1/1 0/1 0/1
## 1893 X 75642359 75642466 <DEL> DEL 0/1 0/1 0/1 0/1 0/1
## 1894 X 75642288 75642741 <DUP> DUP 1/1 1/1 1/1 1/1 1/1
## 1895 X 75642155 75642741 <DUP> DUP 1/1 1/1 1/1 1/1 1/1
## 1896 X 75642466 75642741 <DUP> DUP 1/1 1/1 1/1 1/1 1/1
## 1901 Y 11858017 11858363 <DUP> DUP 0/1 1/1 0/1 1/1 0/1
## 1931 MU069435.1 5791 6073 <DEL> DEL 0/1 0/1 0/1 0/1 0/1
## g80 r75.QUAL r76.QUAL r77.QUAL r78.QUAL r79.QUAL r80.QUAL
## 53_1 1/1 649.93 443.14 590.85 1743.00 1004.44 1093.07
## 53_2 1/1 649.93 443.14 590.85 1743.00 1004.44 1093.07
## 74 0/1 31.46 31.46 60.25 25.33 138.74 75.87
## 261_1 1/1 118.56 147.94 118.56 590.85 295.44 236.38
## 261_2 1/1 118.56 147.94 118.56 590.85 295.44 236.38
## 262 1/1 8655.93 7208.35 6647.05 21034.21 15627.94 16632.39
## 269_1 0/1 1124.42 926.75 880.01 1504.26 1472.22 1169.32
## 269_2 0/1 1124.42 926.75 880.01 1504.26 1472.22 1169.32
## 274 1/1 36091.50 27559.29 24188.83 55896.96 36054.86 46082.68
## 1205 1/1 592.21 304.58 456.86 1641.27 1076.09 864.61
## 1217 1/1 472.68 60.25 89.31 265.90 226.54 147.94
## 1418 1/1 30.30 31.80 31.80 189.40 258.89 106.72
## 1506 1/1 31.46 31.46 31.46 89.31 60.25 31.46
## 1597 0/1 34.58 40.45 91.73 101.63 55.09 65.38
## 1893 0/1 278.72 152.79 83.85 323.30 464.45 221.55
## 1894 1/1 716.11 470.96 525.46 2275.09 1256.35 1501.50
## 1895 1/1 715.75 487.32 487.32 2269.09 1355.36 1629.48
## 1896 1/1 1041.97 654.61 754.57 2713.25 1864.10 1772.50
## 1901 0/1 44.54 91.73 22.06 308.18 126.23 82.97
## 1931 0/1 269.77 113.91 65.95 176.58 104.83 65.81
gt <- gt[which(apply(qual,1,min)>=20),]
head(gt,20)
## g75 g76 g77 g78 g79 g80
## 53_1 1/1 1/1 1/1 1/1 1/1 1/1
## 53_2 1/1 1/1 1/1 1/1 1/1 1/1
## 74 1/1 1/1 1/1 0/1 1/1 0/1
## 261_1 1/1 1/1 1/1 1/1 1/1 1/1
## 261_2 1/1 1/1 1/1 1/1 1/1 1/1
## 262 1/1 1/1 1/1 1/1 1/1 1/1
## 269_1 0/1 0/1 0/1 0/1 0/1 0/1
## 269_2 0/1 0/1 0/1 0/1 0/1 0/1
## 274 1/1 1/1 1/1 1/1 1/1 1/1
## 1205 1/1 1/1 1/1 1/1 1/1 1/1
## 1217 1/1 1/1 1/1 1/1 1/1 1/1
## 1418 1/1 1/1 1/1 1/1 1/1 1/1
## 1506 1/1 1/1 1/1 1/1 1/1 1/1
## 1597 0/1 0/1 1/1 0/1 0/1 0/1
## 1893 0/1 0/1 0/1 0/1 0/1 0/1
## 1894 1/1 1/1 1/1 1/1 1/1 1/1
## 1895 1/1 1/1 1/1 1/1 1/1 1/1
## 1896 1/1 1/1 1/1 1/1 1/1 1/1
## 1901 0/1 1/1 0/1 1/1 0/1 0/1
## 1931 0/1 0/1 0/1 0/1 0/1 0/1
rd <- rowRanges(v75)
rd <- rd[which(apply(qual,1,min)>=20),]
head(rd,20)
## GRanges object with 20 ranges and 5 metadata columns:
## seqnames ranges strand | paramRangeID REF
## <Rle> <IRanges> <Rle> | <factor> <DNAStringSet>
## 53_1 1 16078454 * | NA N
## 53_2 1 78585581 * | NA N
## 74 1 88198043 * | NA N
## 261_1 2 98493171 * | NA N
## 261_2 2 98495562 * | NA N
## ... ... ... ... . ... ...
## 1894 X 75642288 * | NA N
## 1895 X 75642155 * | NA N
## 1896 X 75642466 * | NA N
## 1901 Y 11858017 * | NA N
## 1931 MU069435.1 5791 * | NA N
## ALT QUAL FILTER
## <CharacterList> <numeric> <character>
## 53_1 N]1:78585581] 649.93 .
## 53_2 N]1:16078454] 649.93 .
## 74 <DEL> 31.46 .
## 261_1 N]2:98495562] 118.56 .
## 261_2 N]2:98493171] 118.56 .
## ... ... ... ...
## 1894 <DUP> 716.11 .
## 1895 <DUP> 715.75 .
## 1896 <DUP> 1041.97 .
## 1901 <DUP> 44.54 .
## 1931 <DEL> 269.77 .
## -------
## seqinfo: 34 sequences from GRCm39 genome; no seqlengths
rows <- which(apply(gt,1,function(x){
sd(as.numeric(as.factor(x)))
})!=0)
gt3 <- gt3[rows,]
gt <- gt[rows,]
gtx <- apply(gt,2,function(x) { as.numeric(as.factor(x))})
rownames(gtx) <- paste(gt3$seqnames,gt3$start,gt3$END,gt3$alt)
gtx
## g75 g76 g77 g78 g79 g80
## 1 88198043 88199163 <DEL> 2 2 2 1 2 1
## 17 13524840 13525181 <DUP> 1 1 2 1 1 1
## Y 11858017 11858363 <DUP> 1 2 1 2 1 1
## 4 28176942 NA ]X:75642741]N 1 2 1 1 1 1
## X 75642741 NA N[4:28176942[ 1 2 1 1 1 1
## GL456392.1 19292 NA ]GL456396.1:9019]N 1 2 2 1 1 1
## GL456396.1 9019 NA N[GL456392.1:19292[ 1 2 2 1 1 1
rd <- rd[rows,]
rd
## GRanges object with 7 ranges and 5 metadata columns:
## seqnames ranges strand | paramRangeID REF
## <Rle> <IRanges> <Rle> | <factor> <DNAStringSet>
## 74 1 88198043 * | NA N
## 1597 17 13524840 * | NA N
## 1901 Y 11858017 * | NA N
## 2032_1 4 28176942 * | NA N
## 2032_2 X 75642741 * | NA N
## 2127_1 GL456392.1 19292 * | NA N
## 2127_2 GL456396.1 9019 * | NA N
## ALT QUAL FILTER
## <CharacterList> <numeric> <character>
## 74 <DEL> 31.46 .
## 1597 <DUP> 34.58 .
## 1901 <DUP> 44.54 .
## 2032_1 ]X:75642741]N 204.60 .
## 2032_2 N[4:28176942[ 204.60 .
## 2127_1 ]GL456396.1:9019]N 48.50 .
## 2127_2 N[GL456392.1:19292[ 48.50 .
## -------
## seqinfo: 34 sequences from GRCm39 genome; no seqlengths
colfunc <- colorRampPalette(c("blue", "red"))
library(gplots)
## Registered S3 method overwritten by 'gplots':
## method from
## plot.venn eulerr
##
## Attaching package: 'gplots'
## The following object is masked from 'package:eulerr':
##
## venn
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
heatmap.2(gtx, trace = "none", scale = "none",
dendrogram='none', Rowv=FALSE, Colv=FALSE, col=colfunc(25),
margins = c(5,20), cexRow=.8, cexCol=.9, main="Genetic Variation")