Reorganising the data
df <- data.frame(rowRanges(v75),info(v75)) #taking a look into the content for one of the vcf file.
df <- df[,c("seqnames","start","END")]
head(df)
## seqnames start END
## 1 1 26685964 26688447
## 2 1 39463305 39463318
## 3 1 39701420 39702858
## 4 1 39747923 39748106
## 5 1 39828018 39829953
## 6 1 39836674 39836749
alt <- unlist(as.vector(elementMetadata(v75)$ALT))
svtype <- info(v75)$SVTYPE
gt <- data.frame(g75,g76,g77,g78,g79,g80)
colnames(gt) <- c("g75","g76","g77","g78","g79","g80")
qual <- data.frame(r75$QUAL,r76$QUAL,r77$QUAL,r78$QUAL,r79$QUAL,r80$QUAL)
gt2 <- data.frame(df,alt,svtype,gt,qual)
head(gt2,20)
## seqnames start END alt svtype g75 g76 g77 g78 g79 g80 r75.QUAL
## 1 1 26685964 26688447 <DUP> DUP 1/1 0/0 1/1 1/1 1/1 0/1 19.26
## 2 1 39463305 39463318 <DEL> DEL ./. 0/0 0/0 ./. 0/0 0/0 0.00
## 3 1 39701420 39702858 <DEL> DEL ./. 1/1 ./. 1/1 0/0 1/1 0.00
## 4 1 39747923 39748106 <DEL> DEL ./. 1/1 ./. 0/0 0/0 0/0 0.00
## 5 1 39828018 39829953 <DEL> DEL 1/1 ./. ./. 0/0 0/0 0/0 31.46
## 6 1 39836674 39836749 <DEL> DEL 0/0 ./. 0/0 0/0 0/0 0/0 2.04
## 7 1 40142223 40148243 <DEL> DEL 0/0 1/1 ./. 1/1 1/1 ./. 4.77
## 8 1 40374120 40374175 <DEL> DEL ./. 0/0 ./. 1/1 1/1 1/1 0.00
## 9 1 40565418 40565503 <DEL> DEL 1/1 1/1 1/1 0/0 0/0 1/1 60.25
## 10 1 40697999 40702452 <DEL> DEL 0/0 0/0 ./. 1/1 0/0 ./. 4.77
## 11 1 42137427 42143798 <DEL> DEL ./. ./. 0/0 0/0 1/1 0/0 0.00
## 12 1 42646102 42647988 <DEL> DEL 1/1 ./. ./. ./. 0/0 ./. 60.25
## 13 1 42799620 42799806 <DEL> DEL 0/0 ./. 0/0 0/0 1/1 0/0 4.77
## 14 1 43030800 43031195 <DEL> DEL ./. 1/1 ./. ./. 1/1 ./. 0.00
## 15 1 43343904 43344013 <DEL> DEL ./. ./. 1/1 0/0 ./. 0/0 0.00
## 16 1 44728871 44729958 <DEL> DEL ./. ./. 0/0 0/0 ./. 0/0 0.00
## 17 1 45683261 45683427 <DEL> DEL 0/0 ./. 1/1 ./. 1/1 1/1 4.77
## 18 1 51251776 51257471 <DEL> DEL 0/0 1/1 0/0 0/0 0/0 1/1 4.77
## 19 1 51544912 51546751 <DEL> DEL 1/1 ./. ./. 1/1 ./. 0/0 31.46
## 20 1 51651938 51654654 <DEL> DEL 0/0 0/0 ./. 1/1 0/0 ./. 4.77
## r76.QUAL r77.QUAL r78.QUAL r79.QUAL r80.QUAL
## 1 4.77 19.26 52.10 68.74 14.60
## 2 4.77 4.77 0.00 4.77 4.77
## 3 31.46 0.00 31.46 4.77 236.38
## 4 89.31 0.00 4.77 4.77 4.77
## 5 0.00 0.00 4.77 4.77 4.77
## 6 0.00 4.77 0.07 1.01 2.04
## 7 60.25 0.00 31.46 89.31 0.00
## 8 4.77 0.00 31.46 147.94 60.25
## 9 31.46 60.25 4.77 4.77 31.46
## 10 4.77 0.00 60.25 4.77 0.00
## 11 0.00 4.77 4.77 31.46 4.77
## 12 0.00 0.00 0.00 4.77 0.00
## 13 0.00 4.77 4.77 31.46 4.77
## 14 31.46 0.00 0.00 60.25 0.00
## 15 0.00 31.46 4.77 0.00 4.77
## 16 0.00 4.77 4.77 0.00 4.77
## 17 0.00 31.46 0.00 31.46 31.46
## 18 31.46 4.77 4.77 4.77 31.46
## 19 0.00 0.00 31.46 0.00 4.77
## 20 4.77 0.00 60.25 4.77 0.00
gt3 <- gt2[which(apply(qual,1,min)>=20),]
head(gt3,20)
## seqnames start END alt svtype g75 g76 g77 g78 g79 g80
## 74 1 88270321 88271441 <DEL> DEL 1/1 1/1 1/1 0/1 1/1 0/1
## 260_1 2 98662505 NA [2:98664879[N BND 1/1 1/1 1/1 1/1 1/1 1/1
## 260_2 2 98664879 NA [2:98662505[N BND 1/1 1/1 1/1 1/1 1/1 1/1
## 261_1 2 98662826 NA N]2:98665217] BND 1/1 1/1 1/1 1/1 1/1 1/1
## 261_2 2 98665217 NA N]2:98662826] BND 1/1 1/1 1/1 1/1 1/1 1/1
## 262 2 98662827 98666237 <DEL> DEL 1/1 1/1 1/1 1/1 1/1 1/1
## 269_1 2 98662479 NA N]2:98667331] BND 0/1 0/1 1/1 0/1 0/1 0/1
## 269_2 2 98667331 NA N]2:98662479] BND 0/1 0/1 1/1 0/1 0/1 0/1
## 274 2 98667057 98667332 <DUP> DUP 1/1 1/1 1/1 1/1 1/1 1/1
## 328 2 181917925 181917984 <DEL> DEL 0/1 0/1 0/1 0/1 0/1 0/1
## 330 2 181930977 181931229 <DEL> DEL 1/1 1/1 1/1 1/1 1/1 1/1
## 331 2 181930299 181931242 <DEL> DEL 0/1 0/1 1/1 1/1 1/1 1/1
## 464 4 3068073 3081254 <DEL> DEL 0/1 0/1 0/1 0/1 0/1 0/1
## 806 7 12010843 12015871 <DEL> DEL 1/1 0/1 1/1 1/1 1/1 1/1
## 1207 10 128441874 128441965 <DUP> DUP 1/1 1/1 1/1 1/1 1/1 1/1
## 1219 11 3198658 3198711 <DEL> DEL 1/1 1/1 1/1 1/1 1/1 1/1
## 1261 11 54140307 54140358 <DEL> DEL 1/1 1/1 0/1 1/1 1/1 1/1
## 1415 13 48253543 48270314 <DUP> DUP 1/1 1/1 1/1 1/1 1/1 1/1
## 1503 15 73527357 73527439 <DEL> DEL 1/1 1/1 1/1 1/1 1/1 1/1
## 1881 X 76598753 76598860 <DEL> DEL 0/1 0/1 0/1 0/1 0/1 0/1
## r75.QUAL r76.QUAL r77.QUAL r78.QUAL r79.QUAL r80.QUAL
## 74 31.46 31.46 60.25 25.33 138.74 75.87
## 260_1 60.25 118.56 31.46 462.68 177.38 413.60
## 260_2 60.25 118.56 31.46 462.68 177.38 413.60
## 261_1 118.56 147.94 118.56 531.76 236.38 236.38
## 261_2 118.56 147.94 118.56 531.76 236.38 236.38
## 262 8596.85 7237.89 6647.05 21004.66 15568.86 16514.22
## 269_1 1154.43 920.54 886.29 1447.28 1472.22 1169.32
## 269_2 1154.43 920.54 886.29 1447.28 1472.22 1169.32
## 274 36737.28 29577.39 25062.96 58676.11 37841.79 48576.45
## 328 545.42 329.57 287.61 1309.77 1261.93 950.14
## 330 1408.04 994.45 846.73 3061.53 2077.08 1683.47
## 331 482.78 228.14 570.47 1968.90 1427.14 1535.76
## 464 65.88 35.95 92.94 158.68 83.85 134.82
## 806 147.94 72.15 31.46 315.00 118.56 177.38
## 1207 659.63 339.80 509.69 1828.95 1197.37 962.48
## 1219 472.68 60.25 89.31 265.90 226.54 147.94
## 1261 118.56 60.25 53.14 287.93 147.94 197.14
## 1415 33.04 35.58 35.58 206.13 288.83 119.17
## 1503 31.46 31.46 31.46 89.31 60.25 31.46
## 1881 251.73 146.78 83.85 323.30 380.48 215.53
gt <- gt[which(apply(qual,1,min)>=20),]
head(gt,20)
## g75 g76 g77 g78 g79 g80
## 74 1/1 1/1 1/1 0/1 1/1 0/1
## 260_1 1/1 1/1 1/1 1/1 1/1 1/1
## 260_2 1/1 1/1 1/1 1/1 1/1 1/1
## 261_1 1/1 1/1 1/1 1/1 1/1 1/1
## 261_2 1/1 1/1 1/1 1/1 1/1 1/1
## 262 1/1 1/1 1/1 1/1 1/1 1/1
## 269_1 0/1 0/1 1/1 0/1 0/1 0/1
## 269_2 0/1 0/1 1/1 0/1 0/1 0/1
## 274 1/1 1/1 1/1 1/1 1/1 1/1
## 328 0/1 0/1 0/1 0/1 0/1 0/1
## 330 1/1 1/1 1/1 1/1 1/1 1/1
## 331 0/1 0/1 1/1 1/1 1/1 1/1
## 464 0/1 0/1 0/1 0/1 0/1 0/1
## 806 1/1 0/1 1/1 1/1 1/1 1/1
## 1207 1/1 1/1 1/1 1/1 1/1 1/1
## 1219 1/1 1/1 1/1 1/1 1/1 1/1
## 1261 1/1 1/1 0/1 1/1 1/1 1/1
## 1415 1/1 1/1 1/1 1/1 1/1 1/1
## 1503 1/1 1/1 1/1 1/1 1/1 1/1
## 1881 0/1 0/1 0/1 0/1 0/1 0/1
rd <- rowRanges(v75)
rd <- rd[which(apply(qual,1,min)>=20),]
head(rd,20)
## GRanges object with 20 ranges and 5 metadata columns:
## seqnames ranges strand | paramRangeID REF ALT
## <Rle> <IRanges> <Rle> | <factor> <DNAStringSet> <CharacterList>
## 74 1 88270321 * | NA N <DEL>
## 260_1 2 98662505 * | NA N [2:98664879[N
## 260_2 2 98664879 * | NA N [2:98662505[N
## 261_1 2 98662826 * | NA N N]2:98665217]
## 261_2 2 98665217 * | NA N N]2:98662826]
## ... ... ... ... . ... ... ...
## 1219 11 3198658 * | NA N <DEL>
## 1261 11 54140307 * | NA N <DEL>
## 1415 13 48253543 * | NA N <DUP>
## 1503 15 73527357 * | NA N <DEL>
## 1881 X 76598753 * | NA N <DEL>
## QUAL FILTER
## <numeric> <character>
## 74 31.46 .
## 260_1 60.25 .
## 260_2 60.25 .
## 261_1 118.56 .
## 261_2 118.56 .
## ... ... ...
## 1219 472.68 .
## 1261 118.56 .
## 1415 33.04 .
## 1503 31.46 .
## 1881 251.73 .
## -------
## seqinfo: 39 sequences from mm10 genome; no seqlengths
rows <- which(apply(gt,1,function(x){
sd(as.numeric(as.factor(x)))
})!=0)
gt3 <- gt3[rows,]
gt <- gt[rows,]
gtx <- apply(gt,2,function(x) { as.numeric(as.factor(x))})
rownames(gtx) <- paste(gt3$seqnames,gt3$start,gt3$END,gt3$alt)
gtx
## g75 g76 g77 g78 g79 g80
## 1 88270321 88271441 <DEL> 2 2 2 1 2 1
## 2 98662479 NA N]2:98667331] 1 1 2 1 1 1
## 2 98667331 NA N]2:98662479] 1 1 2 1 1 1
## 2 181930299 181931242 <DEL> 1 1 2 2 2 2
## 7 12010843 12015871 <DEL> 2 1 2 2 2 2
## 11 54140307 54140358 <DEL> 2 2 1 2 2 2
## Y 11858017 11858363 <DUP> 1 2 1 1 1 1
## 1 183299135 NA ]13:99791042]N 1 2 1 1 2 1
## 13 99791042 NA N[1:183299135[ 1 2 1 1 2 1
## 2 181930998 NA N]13:99791043] 1 1 1 2 2 2
## 13 99791043 NA N]2:181930998] 1 1 1 2 2 2
## 4 28176942 NA ]X:76599135]N 1 2 1 1 1 1
## X 76599135 NA N[4:28176942[ 1 2 1 1 1 1
## GL456392.1 19292 NA ]GL456396.1:9019]N 1 2 2 1 1 1
## GL456396.1 9019 NA N[GL456392.1:19292[ 1 2 2 1 1 1
rd <- rd[rows,]
rd
## GRanges object with 15 ranges and 5 metadata columns:
## seqnames ranges strand | paramRangeID REF
## <Rle> <IRanges> <Rle> | <factor> <DNAStringSet>
## 74 1 88270321 * | NA N
## 269_1 2 98662479 * | NA N
## 269_2 2 98667331 * | NA N
## 331 2 181930299 * | NA N
## 806 7 12010843 * | NA N
## ... ... ... ... . ... ...
## 1976_2 13 99791043 * | NA N
## 2024_1 4 28176942 * | NA N
## 2024_2 X 76599135 * | NA N
## 2140_1 GL456392.1 19292 * | NA N
## 2140_2 GL456396.1 9019 * | NA N
## ALT QUAL FILTER
## <CharacterList> <numeric> <character>
## 74 <DEL> 31.46 .
## 269_1 N]2:98667331] 1154.43 .
## 269_2 N]2:98662479] 1154.43 .
## 331 <DEL> 482.78 .
## 806 <DEL> 147.94 .
## ... ... ... ...
## 1976_2 N]2:181930998] 282.67 .
## 2024_1 ]X:76599135]N 204.60 .
## 2024_2 N[4:28176942[ 204.60 .
## 2140_1 ]GL456396.1:9019]N 48.50 .
## 2140_2 N[GL456392.1:19292[ 48.50 .
## -------
## seqinfo: 39 sequences from mm10 genome; no seqlengths
colfunc <- colorRampPalette(c("blue", "red"))
library(gplots)
## Registered S3 method overwritten by 'gplots':
## method from
## plot.venn eulerr
##
## Attaching package: 'gplots'
## The following object is masked from 'package:eulerr':
##
## venn
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
heatmap.2(gtx, trace = "none", scale = "none",
dendrogram='none', Rowv=FALSE, Colv=FALSE, col=colfunc(25),
margins = c(5,20), cexRow=.8, cexCol=.9, main="Genetic Variation")
dels <- subset(gt3,svtype=="DEL")[,1:3]
dels$seqnames <- gsub("^","chr",dels$seqnames)
dups <- subset(gt3,svtype=="DUP")[,1:3]
dups$seqnames <- gsub("^","chr",dups$seqnames)
brks <- subset(gt3,svtype=="BND")[,c(1,2,2)]
brks <- brks[grep("GL",brks$seqnames,invert = TRUE),]
brks$seqnames <- gsub("^","chr",brks$seqnames)
brks
## seqnames start start.1
## 269_1 chr2 98662479 98662479
## 269_2 chr2 98667331 98667331
## 1937_1 chr1 183299135 183299135
## 1937_2 chr13 99791042 99791042
## 1976_1 chr2 181930998 181930998
## 1976_2 chr13 99791043 99791043
## 2024_1 chr4 28176942 28176942
## 2024_2 chrX 76599135 76599135
links <- subset(gt3,svtype=="BND")
links <- links[grep("GL",links$seqnames,invert = TRUE),]
links$alt <- gsub("\\[","\\@",links$alt)
links$alt <- gsub("\\]","\\@",links$alt)
links$alt <- sapply(strsplit(links$alt,"@"),"[[",2)
links$chr2 <- sapply(strsplit(links$alt,":"),"[[",1)
links$start2 <- as.numeric(sapply(strsplit(links$alt,":"),"[[",2))
links <- links[,c("seqnames","start","start","chr2","start2","start2")]
links$seqnames <- gsub("^","chr",links$seqnames)
links$chr2 <- gsub("^","chr",links$chr2)
links
## seqnames start start.1 chr2 start2 start2.1
## 269_1 chr2 98662479 98662479 chr2 98667331 98667331
## 269_2 chr2 98667331 98667331 chr2 98662479 98662479
## 1937_1 chr1 183299135 183299135 chr13 99791042 99791042
## 1937_2 chr13 99791042 99791042 chr1 183299135 183299135
## 1976_1 chr2 181930998 181930998 chr13 99791043 99791043
## 1976_2 chr13 99791043 99791043 chr2 181930998 181930998
## 2024_1 chr4 28176942 28176942 chrX 76599135 76599135
## 2024_2 chrX 76599135 76599135 chr4 28176942 28176942
library("RCircos")
data(UCSC.Mouse.GRCm38.CytoBandIdeogram)
head(UCSC.Mouse.GRCm38.CytoBandIdeogram)
## Chromosome ChromStart ChromEnd Band Stain
## 1 chr1 0 8840440 qA1 gpos100
## 2 chr1 8840440 12278390 qA2 gneg
## 3 chr1 12278390 20136559 qA3 gpos33
## 4 chr1 20136559 22101102 qA4 gneg
## 5 chr1 22101102 30941543 qA5 gpos100
## 6 chr1 30941543 43219933 qB gneg
RCircos.Set.Core.Components(
cyto.info=UCSC.Mouse.GRCm38.CytoBandIdeogram,
tracks.inside=4, tracks.outside=0)
##
## RCircos.Core.Components initialized.
## Type ?RCircos.Reset.Plot.Parameters to see how to modify the core components.
RCircos.Set.Plot.Area()
RCircos.Chromosome.Ideogram.Plot()
RCircos.Tile.Plot(tile.data=dels, track.num=1, side="in")
RCircos.Tile.Plot(tile.data=dups, track.num=2, side="in")
RCircos.Tile.Plot(tile.data=links, track.num=3, side="in")
RCircos.Link.Plot(link.data=links, track.num = 4)
#using the package below to locate and annotate the gene variants without necessary needing the gff file at the moment
library(TxDb.Mmusculus.UCSC.mm10.ensGene)
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
txdb <- TxDb.Mmusculus.UCSC.mm10.ensGene
gff_file <- "Mus_musculus.GRCm38.98.gff3"
txdb <- makeTxDbFromGFF(gff_file, format="gff3")
## Import genomic features from the file as a GRanges object ... OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...
## Warning in .extract_exons_from_GRanges(exon_IDX, gr, mcols0, tx_IDX, feature = "exon", : 6 exons couldn't be linked to a transcript so were dropped:
## seqid start end strand ID Name
## 1 12 113654967 113655244 + <NA> ENSMUSE00000892648
## 2 12 114630680 114630973 + <NA> ENSMUSE00001053179
## 3 6 42980690 42980744 + <NA> ENSMUSE00001000765
## 4 6 42980854 42981150 + <NA> ENSMUSE00000892164
## 5 6 43081518 43081827 - <NA> ENSMUSE00000663119
## 6 6 43082049 43082097 - <NA> ENSMUSE00001046644
## Parent Parent_type
## 1 transcript:ENSMUST00000166255 <NA>
## 2 transcript:ENSMUST00000095364 <NA>
## 3 transcript:ENSMUST00000167638 <NA>
## 4 transcript:ENSMUST00000167638 <NA>
## 5 transcript:ENSMUST00000103301 <NA>
## 6 transcript:ENSMUST00000103301 <NA>
## Warning in .extract_exons_from_GRanges(cds_IDX, gr, mcols0, tx_IDX, feature = "cds", : 6 CDS couldn't be linked to a transcript so were dropped:
## seqid start end strand ID Name
## 1 12 113654967 113655244 + CDS:ENSMUSP00000128354 ENSMUSP00000128354
## 2 12 114630680 114630973 + CDS:ENSMUSP00000093010 ENSMUSP00000093010
## 3 6 42980690 42980744 + CDS:ENSMUSP00000131191 ENSMUSP00000131191
## 4 6 42980854 42981150 + CDS:ENSMUSP00000131191 ENSMUSP00000131191
## 5 6 43081518 43081827 - CDS:ENSMUSP00000100102 ENSMUSP00000100102
## 6 6 43082049 43082097 - CDS:ENSMUSP00000100102 ENSMUSP00000100102
## Parent Parent_type
## 1 transcript:ENSMUST00000166255 <NA>
## 2 transcript:ENSMUST00000095364 <NA>
## 3 transcript:ENSMUST00000167638 <NA>
## 4 transcript:ENSMUST00000167638 <NA>
## 5 transcript:ENSMUST00000103301 <NA>
## 6 transcript:ENSMUST00000103301 <NA>
## OK
Transcript intercept
tx <- transcripts(txdb)
ol <- findOverlaps(rd,tx)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': CHR_MG3251_PATCH, GL456389.1, GL456387.1, GL456393.1, CHR_MG4259_PATCH, GL456394.1, GL456368.1, CHR_CAST_EI_MMCHR11_CTG5, GL456383.1, GL456392.1, GL456396.1, GL456390.1
## - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456219.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456372.1, GL456385.1, JH584293.1, JH584294.1, JH584295.1, JH584296.1, JH584297.1, JH584298.1, JH584299.1, JH584303.1
## Make sure to always combine/compare objects based on the same reference
## genome (use suppressWarnings() to suppress this warning).
ol
## Hits object with 11 hits and 0 metadata columns:
## queryHits subjectHits
## <integer> <integer>
## [1] 1 6096
## [2] 1 6097
## [3] 1 6098
## [4] 1 6100
## [5] 1 6101
## [6] 2 11452
## [7] 8 3967
## [8] 8 3968
## [9] 8 3969
## [10] 8 3970
## [11] 13 138760
## -------
## queryLength: 15 / subjectLength: 142442
var_ol <- rd[queryHits(ol)]
tx_ol <- tx[subjectHits(ol)]
var_ol
## GRanges object with 11 ranges and 5 metadata columns:
## seqnames ranges strand | paramRangeID REF
## <Rle> <IRanges> <Rle> | <factor> <DNAStringSet>
## 74 1 88270321 * | NA N
## 74 1 88270321 * | NA N
## 74 1 88270321 * | NA N
## 74 1 88270321 * | NA N
## 74 1 88270321 * | NA N
## 269_1 2 98662479 * | NA N
## 1937_1 1 183299135 * | NA N
## 1937_1 1 183299135 * | NA N
## 1937_1 1 183299135 * | NA N
## 1937_1 1 183299135 * | NA N
## 2024_2 X 76599135 * | NA N
## ALT QUAL FILTER
## <CharacterList> <numeric> <character>
## 74 <DEL> 31.46 .
## 74 <DEL> 31.46 .
## 74 <DEL> 31.46 .
## 74 <DEL> 31.46 .
## 74 <DEL> 31.46 .
## 269_1 N]2:98667331] 1154.43 .
## 1937_1 ]13:99791042]N 201.04 .
## 1937_1 ]13:99791042]N 201.04 .
## 1937_1 ]13:99791042]N 201.04 .
## 1937_1 ]13:99791042]N 201.04 .
## 2024_2 N[4:28176942[ 204.60 .
## -------
## seqinfo: 39 sequences from mm10 genome; no seqlengths
tx_ol
## GRanges object with 11 ranges and 2 metadata columns:
## seqnames ranges strand | tx_id tx_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## [1] 1 88262471-88277633 - | 6096 ENSMUST00000054674
## [2] 1 88264828-88277557 - | 6097 ENSMUST00000065420
## [3] 1 88264978-88277506 - | 6098 ENSMUST00000147393
## [4] 1 88269337-88273119 - | 6100 ENSMUST00000128532
## [5] 1 88269378-88270947 - | 6101 ENSMUST00000148384
## [6] 2 98662237-98664083 + | 11452 ENSMUST00000099684
## [7] 1 183297049-183325476 + | 3967 ENSMUST00000109166
## [8] 1 183297327-183304714 + | 3968 ENSMUST00000191782
## [9] 1 183297405-183323609 + | 3969 ENSMUST00000193625
## [10] 1 183297570-183318834 + | 3970 ENSMUST00000193959
## [11] X 76582610-76602924 - | 138760 ENSMUST00000114054
## -------
## seqinfo: 45 sequences (1 circular) from an unspecified genome; no seqlengths
vardf <- as.data.frame(var_ol,row.names = 1:nrow(as.data.frame(ranges(var_ol,use.mcols = TRUE))))
head(vardf)
## seqnames start end width strand paramRangeID REF ALT QUAL
## 1 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 2 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 3 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 4 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 5 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 6 2 98662479 98662479 1 * <NA> N N]2:9866.... 1154.43
## FILTER
## 1 .
## 2 .
## 3 .
## 4 .
## 5 .
## 6 .
txdf <- as.data.frame(tx_ol,row.names = 1:nrow(as.data.frame(ranges(tx_ol))))
head(txdf)
## seqnames start end width strand tx_id tx_name
## 1 1 88262471 88277633 15163 - 6096 ENSMUST00000054674
## 2 1 88264828 88277557 12730 - 6097 ENSMUST00000065420
## 3 1 88264978 88277506 12529 - 6098 ENSMUST00000147393
## 4 1 88269337 88273119 3783 - 6100 ENSMUST00000128532
## 5 1 88269378 88270947 1570 - 6101 ENSMUST00000148384
## 6 2 98662237 98664083 1847 + 11452 ENSMUST00000099684
vardf$tx <- txdf$tx_name
head(vardf)
## seqnames start end width strand paramRangeID REF ALT QUAL
## 1 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 2 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 3 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 4 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 5 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 6 2 98662479 98662479 1 * <NA> N N]2:9866.... 1154.43
## FILTER tx
## 1 . ENSMUST00000054674
## 2 . ENSMUST00000065420
## 3 . ENSMUST00000147393
## 4 . ENSMUST00000128532
## 5 . ENSMUST00000148384
## 6 . ENSMUST00000099684
vardf_tx <- vardf
head(vardf_tx)
## seqnames start end width strand paramRangeID REF ALT QUAL
## 1 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 2 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 3 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 4 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 5 1 88270321 88270321 1 * <NA> N <DEL> 31.46
## 6 2 98662479 98662479 1 * <NA> N N]2:9866.... 1154.43
## FILTER tx
## 1 . ENSMUST00000054674
## 2 . ENSMUST00000065420
## 3 . ENSMUST00000147393
## 4 . ENSMUST00000128532
## 5 . ENSMUST00000148384
## 6 . ENSMUST00000099684
dim(vardf_tx)
## [1] 11 11
Gene intercept
gx <- genes(txdb)
ol <- findOverlaps(rd,gx,select = "all")
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': CHR_MG3251_PATCH, GL456389.1, GL456387.1, GL456393.1, CHR_MG4259_PATCH, GL456394.1, GL456368.1, CHR_CAST_EI_MMCHR11_CTG5, GL456383.1, GL456392.1, GL456396.1, GL456390.1
## - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456219.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456372.1, GL456385.1, JH584293.1, JH584294.1, JH584295.1, JH584296.1, JH584297.1, JH584298.1, JH584299.1, JH584303.1
## Make sure to always combine/compare objects based on the same reference
## genome (use suppressWarnings() to suppress this warning).
ol
## Hits object with 4 hits and 0 metadata columns:
## queryHits subjectHits
## <integer> <integer>
## [1] 1 13243
## [2] 2 21026
## [3] 8 12741
## [4] 13 22873
## -------
## queryLength: 15 / subjectLength: 55417
var_ol <- rd[queryHits(ol)]
gx_ol <- gx[subjectHits(ol)]
var_ol
## GRanges object with 4 ranges and 5 metadata columns:
## seqnames ranges strand | paramRangeID REF
## <Rle> <IRanges> <Rle> | <factor> <DNAStringSet>
## 74 1 88270321 * | NA N
## 269_1 2 98662479 * | NA N
## 1937_1 1 183299135 * | NA N
## 2024_2 X 76599135 * | NA N
## ALT QUAL FILTER
## <CharacterList> <numeric> <character>
## 74 <DEL> 31.46 .
## 269_1 N]2:98667331] 1154.43 .
## 1937_1 ]13:99791042]N 201.04 .
## 2024_2 N[4:28176942[ 204.60 .
## -------
## seqinfo: 39 sequences from mm10 genome; no seqlengths
vardf <- as.data.frame(var_ol,row.names = 1:nrow(as.data.frame(ranges(var_ol))))
vardf$ALT <- as.character(GenomicRanges::elementMetadata(var_ol)$ALT)
gxdf <- as.data.frame(gx_ol,row.names = 1:nrow(as.data.frame(ranges(gx_ol))))
vardf$gene <- gxdf$gene_id
vardf
## seqnames start end width strand paramRangeID REF ALT
## 1 1 88270321 88270321 1 * <NA> N <DEL>
## 2 2 98662479 98662479 1 * <NA> N N]2:98667331]
## 3 1 183299135 183299135 1 * <NA> N ]13:99791042]N
## 4 X 76599135 76599135 1 * <NA> N N[4:28176942[
## QUAL FILTER gene
## 1 31.46 . ENSMUSG00000044783
## 2 1154.43 . ENSMUSG00000075015
## 3 201.04 . ENSMUSG00000042901
## 4 204.60 . ENSMUSG00000079525
dim(vardf)
## [1] 4 11
write.table(x=vardf,file="vardf.tsv",quote = FALSE,sep = "\t",row.names = FALSE)