Loading the data

# reading in the GFF files
gff <- read.table("Mus_musculus.GRCm38.98.gff3", sep = "\t", quote = "")

v75 <- readVcf("VCF_SRP199233/SRX5884275_svtyper.vcf", "mm10")
v76 <- readVcf("VCF_SRP199233/SRX5884276_svtyper.vcf", "mm10")
v77 <- readVcf("VCF_SRP199233/SRX5884277_svtyper.vcf", "mm10")
v78 <- readVcf("VCF_SRP199233/SRX5884278_svtyper.vcf", "mm10")
v79 <- readVcf("VCF_SRP199233/SRX5884279_svtyper.vcf", "mm10")
v80 <- readVcf("VCF_SRP199233/SRX5884280_svtyper.vcf", "mm10")

r75 <- rowRanges(v75)
r76 <- rowRanges(v76)
r77 <- rowRanges(v77)
r78 <- rowRanges(v78)
r79 <- rowRanges(v79)
r80 <- rowRanges(v80)

g75 <- geno(v75)$GT
g76 <- geno(v76)$GT
g77 <- geno(v77)$GT
g78 <- geno(v78)$GT
g79 <- geno(v79)$GT
g80 <- geno(v80)$GT

Reorganising the data

df <- data.frame(rowRanges(v75),info(v75))  #taking a look into the content for one of the vcf file.
df <- df[,c("seqnames","start","END")]
head(df)
##   seqnames    start      END
## 1        1 26685964 26688447
## 2        1 39463305 39463318
## 3        1 39701420 39702858
## 4        1 39747923 39748106
## 5        1 39828018 39829953
## 6        1 39836674 39836749
alt <- unlist(as.vector(elementMetadata(v75)$ALT))
svtype <- info(v75)$SVTYPE

gt <- data.frame(g75,g76,g77,g78,g79,g80)
colnames(gt) <- c("g75","g76","g77","g78","g79","g80") 

qual <- data.frame(r75$QUAL,r76$QUAL,r77$QUAL,r78$QUAL,r79$QUAL,r80$QUAL)

gt2 <- data.frame(df,alt,svtype,gt,qual) 

head(gt2,20)
##    seqnames    start      END   alt svtype g75 g76 g77 g78 g79 g80 r75.QUAL
## 1         1 26685964 26688447 <DUP>    DUP 1/1 0/0 1/1 1/1 1/1 0/1    19.26
## 2         1 39463305 39463318 <DEL>    DEL ./. 0/0 0/0 ./. 0/0 0/0     0.00
## 3         1 39701420 39702858 <DEL>    DEL ./. 1/1 ./. 1/1 0/0 1/1     0.00
## 4         1 39747923 39748106 <DEL>    DEL ./. 1/1 ./. 0/0 0/0 0/0     0.00
## 5         1 39828018 39829953 <DEL>    DEL 1/1 ./. ./. 0/0 0/0 0/0    31.46
## 6         1 39836674 39836749 <DEL>    DEL 0/0 ./. 0/0 0/0 0/0 0/0     2.04
## 7         1 40142223 40148243 <DEL>    DEL 0/0 1/1 ./. 1/1 1/1 ./.     4.77
## 8         1 40374120 40374175 <DEL>    DEL ./. 0/0 ./. 1/1 1/1 1/1     0.00
## 9         1 40565418 40565503 <DEL>    DEL 1/1 1/1 1/1 0/0 0/0 1/1    60.25
## 10        1 40697999 40702452 <DEL>    DEL 0/0 0/0 ./. 1/1 0/0 ./.     4.77
## 11        1 42137427 42143798 <DEL>    DEL ./. ./. 0/0 0/0 1/1 0/0     0.00
## 12        1 42646102 42647988 <DEL>    DEL 1/1 ./. ./. ./. 0/0 ./.    60.25
## 13        1 42799620 42799806 <DEL>    DEL 0/0 ./. 0/0 0/0 1/1 0/0     4.77
## 14        1 43030800 43031195 <DEL>    DEL ./. 1/1 ./. ./. 1/1 ./.     0.00
## 15        1 43343904 43344013 <DEL>    DEL ./. ./. 1/1 0/0 ./. 0/0     0.00
## 16        1 44728871 44729958 <DEL>    DEL ./. ./. 0/0 0/0 ./. 0/0     0.00
## 17        1 45683261 45683427 <DEL>    DEL 0/0 ./. 1/1 ./. 1/1 1/1     4.77
## 18        1 51251776 51257471 <DEL>    DEL 0/0 1/1 0/0 0/0 0/0 1/1     4.77
## 19        1 51544912 51546751 <DEL>    DEL 1/1 ./. ./. 1/1 ./. 0/0    31.46
## 20        1 51651938 51654654 <DEL>    DEL 0/0 0/0 ./. 1/1 0/0 ./.     4.77
##    r76.QUAL r77.QUAL r78.QUAL r79.QUAL r80.QUAL
## 1      4.77    19.26    52.10    68.74    14.60
## 2      4.77     4.77     0.00     4.77     4.77
## 3     31.46     0.00    31.46     4.77   236.38
## 4     89.31     0.00     4.77     4.77     4.77
## 5      0.00     0.00     4.77     4.77     4.77
## 6      0.00     4.77     0.07     1.01     2.04
## 7     60.25     0.00    31.46    89.31     0.00
## 8      4.77     0.00    31.46   147.94    60.25
## 9     31.46    60.25     4.77     4.77    31.46
## 10     4.77     0.00    60.25     4.77     0.00
## 11     0.00     4.77     4.77    31.46     4.77
## 12     0.00     0.00     0.00     4.77     0.00
## 13     0.00     4.77     4.77    31.46     4.77
## 14    31.46     0.00     0.00    60.25     0.00
## 15     0.00    31.46     4.77     0.00     4.77
## 16     0.00     4.77     4.77     0.00     4.77
## 17     0.00    31.46     0.00    31.46    31.46
## 18    31.46     4.77     4.77     4.77    31.46
## 19     0.00     0.00    31.46     0.00     4.77
## 20     4.77     0.00    60.25     4.77     0.00
gt3 <- gt2[which(apply(qual,1,min)>=20),]
head(gt3,20)
##       seqnames     start       END           alt svtype g75 g76 g77 g78 g79 g80
## 74           1  88270321  88271441         <DEL>    DEL 1/1 1/1 1/1 0/1 1/1 0/1
## 260_1        2  98662505        NA [2:98664879[N    BND 1/1 1/1 1/1 1/1 1/1 1/1
## 260_2        2  98664879        NA [2:98662505[N    BND 1/1 1/1 1/1 1/1 1/1 1/1
## 261_1        2  98662826        NA N]2:98665217]    BND 1/1 1/1 1/1 1/1 1/1 1/1
## 261_2        2  98665217        NA N]2:98662826]    BND 1/1 1/1 1/1 1/1 1/1 1/1
## 262          2  98662827  98666237         <DEL>    DEL 1/1 1/1 1/1 1/1 1/1 1/1
## 269_1        2  98662479        NA N]2:98667331]    BND 0/1 0/1 1/1 0/1 0/1 0/1
## 269_2        2  98667331        NA N]2:98662479]    BND 0/1 0/1 1/1 0/1 0/1 0/1
## 274          2  98667057  98667332         <DUP>    DUP 1/1 1/1 1/1 1/1 1/1 1/1
## 328          2 181917925 181917984         <DEL>    DEL 0/1 0/1 0/1 0/1 0/1 0/1
## 330          2 181930977 181931229         <DEL>    DEL 1/1 1/1 1/1 1/1 1/1 1/1
## 331          2 181930299 181931242         <DEL>    DEL 0/1 0/1 1/1 1/1 1/1 1/1
## 464          4   3068073   3081254         <DEL>    DEL 0/1 0/1 0/1 0/1 0/1 0/1
## 806          7  12010843  12015871         <DEL>    DEL 1/1 0/1 1/1 1/1 1/1 1/1
## 1207        10 128441874 128441965         <DUP>    DUP 1/1 1/1 1/1 1/1 1/1 1/1
## 1219        11   3198658   3198711         <DEL>    DEL 1/1 1/1 1/1 1/1 1/1 1/1
## 1261        11  54140307  54140358         <DEL>    DEL 1/1 1/1 0/1 1/1 1/1 1/1
## 1415        13  48253543  48270314         <DUP>    DUP 1/1 1/1 1/1 1/1 1/1 1/1
## 1503        15  73527357  73527439         <DEL>    DEL 1/1 1/1 1/1 1/1 1/1 1/1
## 1881         X  76598753  76598860         <DEL>    DEL 0/1 0/1 0/1 0/1 0/1 0/1
##       r75.QUAL r76.QUAL r77.QUAL r78.QUAL r79.QUAL r80.QUAL
## 74       31.46    31.46    60.25    25.33   138.74    75.87
## 260_1    60.25   118.56    31.46   462.68   177.38   413.60
## 260_2    60.25   118.56    31.46   462.68   177.38   413.60
## 261_1   118.56   147.94   118.56   531.76   236.38   236.38
## 261_2   118.56   147.94   118.56   531.76   236.38   236.38
## 262    8596.85  7237.89  6647.05 21004.66 15568.86 16514.22
## 269_1  1154.43   920.54   886.29  1447.28  1472.22  1169.32
## 269_2  1154.43   920.54   886.29  1447.28  1472.22  1169.32
## 274   36737.28 29577.39 25062.96 58676.11 37841.79 48576.45
## 328     545.42   329.57   287.61  1309.77  1261.93   950.14
## 330    1408.04   994.45   846.73  3061.53  2077.08  1683.47
## 331     482.78   228.14   570.47  1968.90  1427.14  1535.76
## 464      65.88    35.95    92.94   158.68    83.85   134.82
## 806     147.94    72.15    31.46   315.00   118.56   177.38
## 1207    659.63   339.80   509.69  1828.95  1197.37   962.48
## 1219    472.68    60.25    89.31   265.90   226.54   147.94
## 1261    118.56    60.25    53.14   287.93   147.94   197.14
## 1415     33.04    35.58    35.58   206.13   288.83   119.17
## 1503     31.46    31.46    31.46    89.31    60.25    31.46
## 1881    251.73   146.78    83.85   323.30   380.48   215.53
gt <- gt[which(apply(qual,1,min)>=20),]
head(gt,20)
##       g75 g76 g77 g78 g79 g80
## 74    1/1 1/1 1/1 0/1 1/1 0/1
## 260_1 1/1 1/1 1/1 1/1 1/1 1/1
## 260_2 1/1 1/1 1/1 1/1 1/1 1/1
## 261_1 1/1 1/1 1/1 1/1 1/1 1/1
## 261_2 1/1 1/1 1/1 1/1 1/1 1/1
## 262   1/1 1/1 1/1 1/1 1/1 1/1
## 269_1 0/1 0/1 1/1 0/1 0/1 0/1
## 269_2 0/1 0/1 1/1 0/1 0/1 0/1
## 274   1/1 1/1 1/1 1/1 1/1 1/1
## 328   0/1 0/1 0/1 0/1 0/1 0/1
## 330   1/1 1/1 1/1 1/1 1/1 1/1
## 331   0/1 0/1 1/1 1/1 1/1 1/1
## 464   0/1 0/1 0/1 0/1 0/1 0/1
## 806   1/1 0/1 1/1 1/1 1/1 1/1
## 1207  1/1 1/1 1/1 1/1 1/1 1/1
## 1219  1/1 1/1 1/1 1/1 1/1 1/1
## 1261  1/1 1/1 0/1 1/1 1/1 1/1
## 1415  1/1 1/1 1/1 1/1 1/1 1/1
## 1503  1/1 1/1 1/1 1/1 1/1 1/1
## 1881  0/1 0/1 0/1 0/1 0/1 0/1
rd <- rowRanges(v75)
rd <- rd[which(apply(qual,1,min)>=20),]
head(rd,20)
## GRanges object with 20 ranges and 5 metadata columns:
##         seqnames    ranges strand | paramRangeID            REF             ALT
##            <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet> <CharacterList>
##      74        1  88270321      * |           NA              N           <DEL>
##   260_1        2  98662505      * |           NA              N   [2:98664879[N
##   260_2        2  98664879      * |           NA              N   [2:98662505[N
##   261_1        2  98662826      * |           NA              N   N]2:98665217]
##   261_2        2  98665217      * |           NA              N   N]2:98662826]
##     ...      ...       ...    ... .          ...            ...             ...
##    1219       11   3198658      * |           NA              N           <DEL>
##    1261       11  54140307      * |           NA              N           <DEL>
##    1415       13  48253543      * |           NA              N           <DUP>
##    1503       15  73527357      * |           NA              N           <DEL>
##    1881        X  76598753      * |           NA              N           <DEL>
##              QUAL      FILTER
##         <numeric> <character>
##      74     31.46           .
##   260_1     60.25           .
##   260_2     60.25           .
##   261_1    118.56           .
##   261_2    118.56           .
##     ...       ...         ...
##    1219    472.68           .
##    1261    118.56           .
##    1415     33.04           .
##    1503     31.46           .
##    1881    251.73           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
rows <- which(apply(gt,1,function(x){
  sd(as.numeric(as.factor(x)))
})!=0)

gt3 <- gt3[rows,]
gt <- gt[rows,]

gtx <- apply(gt,2,function(x) { as.numeric(as.factor(x))})
rownames(gtx) <- paste(gt3$seqnames,gt3$start,gt3$END,gt3$alt)
gtx
##                                        g75 g76 g77 g78 g79 g80
## 1 88270321 88271441 <DEL>                2   2   2   1   2   1
## 2 98662479 NA N]2:98667331]              1   1   2   1   1   1
## 2 98667331 NA N]2:98662479]              1   1   2   1   1   1
## 2 181930299 181931242 <DEL>              1   1   2   2   2   2
## 7 12010843 12015871 <DEL>                2   1   2   2   2   2
## 11 54140307 54140358 <DEL>               2   2   1   2   2   2
## Y 11858017 11858363 <DUP>                1   2   1   1   1   1
## 1 183299135 NA ]13:99791042]N            1   2   1   1   2   1
## 13 99791042 NA N[1:183299135[            1   2   1   1   2   1
## 2 181930998 NA N]13:99791043]            1   1   1   2   2   2
## 13 99791043 NA N]2:181930998]            1   1   1   2   2   2
## 4 28176942 NA ]X:76599135]N              1   2   1   1   1   1
## X 76599135 NA N[4:28176942[              1   2   1   1   1   1
## GL456392.1 19292 NA ]GL456396.1:9019]N   1   2   2   1   1   1
## GL456396.1 9019 NA N[GL456392.1:19292[   1   2   2   1   1   1
rd <- rd[rows,]
rd
## GRanges object with 15 ranges and 5 metadata columns:
##            seqnames    ranges strand | paramRangeID            REF
##               <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet>
##       74          1  88270321      * |           NA              N
##    269_1          2  98662479      * |           NA              N
##    269_2          2  98667331      * |           NA              N
##      331          2 181930299      * |           NA              N
##      806          7  12010843      * |           NA              N
##      ...        ...       ...    ... .          ...            ...
##   1976_2         13  99791043      * |           NA              N
##   2024_1          4  28176942      * |           NA              N
##   2024_2          X  76599135      * |           NA              N
##   2140_1 GL456392.1     19292      * |           NA              N
##   2140_2 GL456396.1      9019      * |           NA              N
##                          ALT      QUAL      FILTER
##              <CharacterList> <numeric> <character>
##       74               <DEL>     31.46           .
##    269_1       N]2:98667331]   1154.43           .
##    269_2       N]2:98662479]   1154.43           .
##      331               <DEL>    482.78           .
##      806               <DEL>    147.94           .
##      ...                 ...       ...         ...
##   1976_2      N]2:181930998]    282.67           .
##   2024_1       ]X:76599135]N    204.60           .
##   2024_2       N[4:28176942[    204.60           .
##   2140_1  ]GL456396.1:9019]N     48.50           .
##   2140_2 N[GL456392.1:19292[     48.50           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
colfunc <- colorRampPalette(c("blue", "red"))
library(gplots)
## Registered S3 method overwritten by 'gplots':
##   method    from  
##   plot.venn eulerr
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:eulerr':
## 
##     venn
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
heatmap.2(gtx, trace = "none", scale = "none", 
  dendrogram='none', Rowv=FALSE, Colv=FALSE, col=colfunc(25),
  margins = c(5,20), cexRow=.8, cexCol=.9,  main="Genetic Variation")

dels <- subset(gt3,svtype=="DEL")[,1:3]
dels$seqnames <- gsub("^","chr",dels$seqnames)
dups <- subset(gt3,svtype=="DUP")[,1:3]
dups$seqnames <- gsub("^","chr",dups$seqnames)
brks <- subset(gt3,svtype=="BND")[,c(1,2,2)]
brks <- brks[grep("GL",brks$seqnames,invert = TRUE),]
brks$seqnames <- gsub("^","chr",brks$seqnames)
brks
##        seqnames     start   start.1
## 269_1      chr2  98662479  98662479
## 269_2      chr2  98667331  98667331
## 1937_1     chr1 183299135 183299135
## 1937_2    chr13  99791042  99791042
## 1976_1     chr2 181930998 181930998
## 1976_2    chr13  99791043  99791043
## 2024_1     chr4  28176942  28176942
## 2024_2     chrX  76599135  76599135
links <- subset(gt3,svtype=="BND")
links <- links[grep("GL",links$seqnames,invert = TRUE),]
links$alt <- gsub("\\[","\\@",links$alt)
links$alt <- gsub("\\]","\\@",links$alt)
links$alt <- sapply(strsplit(links$alt,"@"),"[[",2)
links$chr2 <- sapply(strsplit(links$alt,":"),"[[",1)
links$start2 <- as.numeric(sapply(strsplit(links$alt,":"),"[[",2))
links <- links[,c("seqnames","start","start","chr2","start2","start2")]
links$seqnames <- gsub("^","chr",links$seqnames)
links$chr2 <- gsub("^","chr",links$chr2)
links
##        seqnames     start   start.1  chr2    start2  start2.1
## 269_1      chr2  98662479  98662479  chr2  98667331  98667331
## 269_2      chr2  98667331  98667331  chr2  98662479  98662479
## 1937_1     chr1 183299135 183299135 chr13  99791042  99791042
## 1937_2    chr13  99791042  99791042  chr1 183299135 183299135
## 1976_1     chr2 181930998 181930998 chr13  99791043  99791043
## 1976_2    chr13  99791043  99791043  chr2 181930998 181930998
## 2024_1     chr4  28176942  28176942  chrX  76599135  76599135
## 2024_2     chrX  76599135  76599135  chr4  28176942  28176942
library("RCircos")
data(UCSC.Mouse.GRCm38.CytoBandIdeogram)
head(UCSC.Mouse.GRCm38.CytoBandIdeogram)
##   Chromosome ChromStart ChromEnd Band   Stain
## 1       chr1          0  8840440  qA1 gpos100
## 2       chr1    8840440 12278390  qA2    gneg
## 3       chr1   12278390 20136559  qA3  gpos33
## 4       chr1   20136559 22101102  qA4    gneg
## 5       chr1   22101102 30941543  qA5 gpos100
## 6       chr1   30941543 43219933   qB    gneg
RCircos.Set.Core.Components(
    cyto.info=UCSC.Mouse.GRCm38.CytoBandIdeogram,
    tracks.inside=4, tracks.outside=0)
## 
## RCircos.Core.Components initialized.
## Type ?RCircos.Reset.Plot.Parameters to see how to modify the core components.
RCircos.Set.Plot.Area()
RCircos.Chromosome.Ideogram.Plot()

RCircos.Tile.Plot(tile.data=dels, track.num=1, side="in")

RCircos.Tile.Plot(tile.data=dups, track.num=2, side="in")

RCircos.Tile.Plot(tile.data=links, track.num=3, side="in")

RCircos.Link.Plot(link.data=links, track.num = 4)

#using the package below to locate and annotate the gene variants without necessary needing the gff file at the moment
library(TxDb.Mmusculus.UCSC.mm10.ensGene)
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
txdb <- TxDb.Mmusculus.UCSC.mm10.ensGene
gff_file <- "Mus_musculus.GRCm38.98.gff3"
txdb <- makeTxDbFromGFF(gff_file, format="gff3")
## Import genomic features from the file as a GRanges object ... OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...
## Warning in .extract_exons_from_GRanges(exon_IDX, gr, mcols0, tx_IDX, feature = "exon", : 6 exons couldn't be linked to a transcript so were dropped:
##   seqid     start       end strand   ID               Name
## 1    12 113654967 113655244      + <NA> ENSMUSE00000892648
## 2    12 114630680 114630973      + <NA> ENSMUSE00001053179
## 3     6  42980690  42980744      + <NA> ENSMUSE00001000765
## 4     6  42980854  42981150      + <NA> ENSMUSE00000892164
## 5     6  43081518  43081827      - <NA> ENSMUSE00000663119
## 6     6  43082049  43082097      - <NA> ENSMUSE00001046644
##                          Parent Parent_type
## 1 transcript:ENSMUST00000166255        <NA>
## 2 transcript:ENSMUST00000095364        <NA>
## 3 transcript:ENSMUST00000167638        <NA>
## 4 transcript:ENSMUST00000167638        <NA>
## 5 transcript:ENSMUST00000103301        <NA>
## 6 transcript:ENSMUST00000103301        <NA>
## Warning in .extract_exons_from_GRanges(cds_IDX, gr, mcols0, tx_IDX, feature = "cds", : 6 CDS couldn't be linked to a transcript so were dropped:
##   seqid     start       end strand                     ID               Name
## 1    12 113654967 113655244      + CDS:ENSMUSP00000128354 ENSMUSP00000128354
## 2    12 114630680 114630973      + CDS:ENSMUSP00000093010 ENSMUSP00000093010
## 3     6  42980690  42980744      + CDS:ENSMUSP00000131191 ENSMUSP00000131191
## 4     6  42980854  42981150      + CDS:ENSMUSP00000131191 ENSMUSP00000131191
## 5     6  43081518  43081827      - CDS:ENSMUSP00000100102 ENSMUSP00000100102
## 6     6  43082049  43082097      - CDS:ENSMUSP00000100102 ENSMUSP00000100102
##                          Parent Parent_type
## 1 transcript:ENSMUST00000166255        <NA>
## 2 transcript:ENSMUST00000095364        <NA>
## 3 transcript:ENSMUST00000167638        <NA>
## 4 transcript:ENSMUST00000167638        <NA>
## 5 transcript:ENSMUST00000103301        <NA>
## 6 transcript:ENSMUST00000103301        <NA>
## OK

Transcript intercept

tx <- transcripts(txdb)
ol <- findOverlaps(rd,tx)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': CHR_MG3251_PATCH, GL456389.1, GL456387.1, GL456393.1, CHR_MG4259_PATCH, GL456394.1, GL456368.1, CHR_CAST_EI_MMCHR11_CTG5, GL456383.1, GL456392.1, GL456396.1, GL456390.1
##   - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456219.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456372.1, GL456385.1, JH584293.1, JH584294.1, JH584295.1, JH584296.1, JH584297.1, JH584298.1, JH584299.1, JH584303.1
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol
## Hits object with 11 hits and 0 metadata columns:
##        queryHits subjectHits
##        <integer>   <integer>
##    [1]         1        6096
##    [2]         1        6097
##    [3]         1        6098
##    [4]         1        6100
##    [5]         1        6101
##    [6]         2       11452
##    [7]         8        3967
##    [8]         8        3968
##    [9]         8        3969
##   [10]         8        3970
##   [11]        13      138760
##   -------
##   queryLength: 15 / subjectLength: 142442
var_ol <- rd[queryHits(ol)]
tx_ol <- tx[subjectHits(ol)]
var_ol
## GRanges object with 11 ranges and 5 metadata columns:
##          seqnames    ranges strand | paramRangeID            REF
##             <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet>
##       74        1  88270321      * |           NA              N
##       74        1  88270321      * |           NA              N
##       74        1  88270321      * |           NA              N
##       74        1  88270321      * |           NA              N
##       74        1  88270321      * |           NA              N
##    269_1        2  98662479      * |           NA              N
##   1937_1        1 183299135      * |           NA              N
##   1937_1        1 183299135      * |           NA              N
##   1937_1        1 183299135      * |           NA              N
##   1937_1        1 183299135      * |           NA              N
##   2024_2        X  76599135      * |           NA              N
##                      ALT      QUAL      FILTER
##          <CharacterList> <numeric> <character>
##       74           <DEL>     31.46           .
##       74           <DEL>     31.46           .
##       74           <DEL>     31.46           .
##       74           <DEL>     31.46           .
##       74           <DEL>     31.46           .
##    269_1   N]2:98667331]   1154.43           .
##   1937_1  ]13:99791042]N    201.04           .
##   1937_1  ]13:99791042]N    201.04           .
##   1937_1  ]13:99791042]N    201.04           .
##   1937_1  ]13:99791042]N    201.04           .
##   2024_2   N[4:28176942[    204.60           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
tx_ol
## GRanges object with 11 ranges and 2 metadata columns:
##        seqnames              ranges strand |     tx_id            tx_name
##           <Rle>           <IRanges>  <Rle> | <integer>        <character>
##    [1]        1   88262471-88277633      - |      6096 ENSMUST00000054674
##    [2]        1   88264828-88277557      - |      6097 ENSMUST00000065420
##    [3]        1   88264978-88277506      - |      6098 ENSMUST00000147393
##    [4]        1   88269337-88273119      - |      6100 ENSMUST00000128532
##    [5]        1   88269378-88270947      - |      6101 ENSMUST00000148384
##    [6]        2   98662237-98664083      + |     11452 ENSMUST00000099684
##    [7]        1 183297049-183325476      + |      3967 ENSMUST00000109166
##    [8]        1 183297327-183304714      + |      3968 ENSMUST00000191782
##    [9]        1 183297405-183323609      + |      3969 ENSMUST00000193625
##   [10]        1 183297570-183318834      + |      3970 ENSMUST00000193959
##   [11]        X   76582610-76602924      - |    138760 ENSMUST00000114054
##   -------
##   seqinfo: 45 sequences (1 circular) from an unspecified genome; no seqlengths
vardf <- as.data.frame(var_ol,row.names = 1:nrow(as.data.frame(ranges(var_ol,use.mcols = TRUE)))) 
head(vardf)
##   seqnames    start      end width strand paramRangeID REF          ALT    QUAL
## 1        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 2        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 3        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 4        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 5        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 6        2 98662479 98662479     1      *         <NA>   N N]2:9866.... 1154.43
##   FILTER
## 1      .
## 2      .
## 3      .
## 4      .
## 5      .
## 6      .
txdf <- as.data.frame(tx_ol,row.names = 1:nrow(as.data.frame(ranges(tx_ol))))
head(txdf)
##   seqnames    start      end width strand tx_id            tx_name
## 1        1 88262471 88277633 15163      -  6096 ENSMUST00000054674
## 2        1 88264828 88277557 12730      -  6097 ENSMUST00000065420
## 3        1 88264978 88277506 12529      -  6098 ENSMUST00000147393
## 4        1 88269337 88273119  3783      -  6100 ENSMUST00000128532
## 5        1 88269378 88270947  1570      -  6101 ENSMUST00000148384
## 6        2 98662237 98664083  1847      + 11452 ENSMUST00000099684
vardf$tx <- txdf$tx_name
head(vardf)
##   seqnames    start      end width strand paramRangeID REF          ALT    QUAL
## 1        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 2        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 3        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 4        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 5        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 6        2 98662479 98662479     1      *         <NA>   N N]2:9866.... 1154.43
##   FILTER                 tx
## 1      . ENSMUST00000054674
## 2      . ENSMUST00000065420
## 3      . ENSMUST00000147393
## 4      . ENSMUST00000128532
## 5      . ENSMUST00000148384
## 6      . ENSMUST00000099684
vardf_tx <- vardf
head(vardf_tx)
##   seqnames    start      end width strand paramRangeID REF          ALT    QUAL
## 1        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 2        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 3        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 4        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 5        1 88270321 88270321     1      *         <NA>   N        <DEL>   31.46
## 6        2 98662479 98662479     1      *         <NA>   N N]2:9866.... 1154.43
##   FILTER                 tx
## 1      . ENSMUST00000054674
## 2      . ENSMUST00000065420
## 3      . ENSMUST00000147393
## 4      . ENSMUST00000128532
## 5      . ENSMUST00000148384
## 6      . ENSMUST00000099684
dim(vardf_tx)
## [1] 11 11

Gene intercept

gx <- genes(txdb)
ol <- findOverlaps(rd,gx,select = "all")
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': CHR_MG3251_PATCH, GL456389.1, GL456387.1, GL456393.1, CHR_MG4259_PATCH, GL456394.1, GL456368.1, CHR_CAST_EI_MMCHR11_CTG5, GL456383.1, GL456392.1, GL456396.1, GL456390.1
##   - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456219.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456372.1, GL456385.1, JH584293.1, JH584294.1, JH584295.1, JH584296.1, JH584297.1, JH584298.1, JH584299.1, JH584303.1
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol
## Hits object with 4 hits and 0 metadata columns:
##       queryHits subjectHits
##       <integer>   <integer>
##   [1]         1       13243
##   [2]         2       21026
##   [3]         8       12741
##   [4]        13       22873
##   -------
##   queryLength: 15 / subjectLength: 55417
var_ol <- rd[queryHits(ol)]
gx_ol <- gx[subjectHits(ol)]
var_ol
## GRanges object with 4 ranges and 5 metadata columns:
##          seqnames    ranges strand | paramRangeID            REF
##             <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet>
##       74        1  88270321      * |           NA              N
##    269_1        2  98662479      * |           NA              N
##   1937_1        1 183299135      * |           NA              N
##   2024_2        X  76599135      * |           NA              N
##                      ALT      QUAL      FILTER
##          <CharacterList> <numeric> <character>
##       74           <DEL>     31.46           .
##    269_1   N]2:98667331]   1154.43           .
##   1937_1  ]13:99791042]N    201.04           .
##   2024_2   N[4:28176942[    204.60           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
vardf <- as.data.frame(var_ol,row.names = 1:nrow(as.data.frame(ranges(var_ol))))
vardf$ALT <- as.character(GenomicRanges::elementMetadata(var_ol)$ALT)
gxdf <- as.data.frame(gx_ol,row.names = 1:nrow(as.data.frame(ranges(gx_ol))))
vardf$gene <- gxdf$gene_id
vardf
##   seqnames     start       end width strand paramRangeID REF            ALT
## 1        1  88270321  88270321     1      *         <NA>   N          <DEL>
## 2        2  98662479  98662479     1      *         <NA>   N  N]2:98667331]
## 3        1 183299135 183299135     1      *         <NA>   N ]13:99791042]N
## 4        X  76599135  76599135     1      *         <NA>   N  N[4:28176942[
##      QUAL FILTER               gene
## 1   31.46      . ENSMUSG00000044783
## 2 1154.43      . ENSMUSG00000075015
## 3  201.04      . ENSMUSG00000042901
## 4  204.60      . ENSMUSG00000079525
dim(vardf)
## [1]  4 11
write.table(x=vardf,file="vardf.tsv",quote = FALSE,sep = "\t",row.names = FALSE)

Session Information

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] TxDb.Mmusculus.UCSC.mm10.ensGene_3.4.0
##  [2] GenomicFeatures_1.44.2                
##  [3] AnnotationDbi_1.54.1                  
##  [4] RCircos_1.2.1                         
##  [5] gplots_3.1.1                          
##  [6] VariantAnnotation_1.38.0              
##  [7] Rsamtools_2.8.0                       
##  [8] SummarizedExperiment_1.22.0           
##  [9] Biobase_2.52.0                        
## [10] MatrixGenerics_1.4.3                  
## [11] matrixStats_0.60.1                    
## [12] eulerr_6.1.1                          
## [13] Biostrings_2.60.2                     
## [14] XVector_0.32.0                        
## [15] R.utils_2.10.1                        
## [16] R.oo_1.24.0                           
## [17] R.methodsS3_1.8.1                     
## [18] GenomicRanges_1.44.0                  
## [19] GenomeInfoDb_1.28.4                   
## [20] IRanges_2.26.0                        
## [21] S4Vectors_0.30.0                      
## [22] BiocGenerics_0.38.0                   
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7             bit64_4.0.5              filelock_1.0.2          
##  [4] progress_1.2.2           httr_1.4.2               tools_4.1.1             
##  [7] bslib_0.3.0              utf8_1.2.2               R6_2.5.1                
## [10] KernSmooth_2.23-20       DBI_1.1.1                tidyselect_1.1.1        
## [13] prettyunits_1.1.1        bit_4.0.4                curl_4.3.2              
## [16] compiler_4.1.1           xml2_1.3.2               DelayedArray_0.18.0     
## [19] rtracklayer_1.52.1       sass_0.4.0               caTools_1.18.2          
## [22] rappdirs_0.3.3           stringr_1.4.0            digest_0.6.27           
## [25] rmarkdown_2.11           pkgconfig_2.0.3          htmltools_0.5.2         
## [28] dbplyr_2.1.1             fastmap_1.1.0            BSgenome_1.60.0         
## [31] highr_0.9                rlang_0.4.11             RSQLite_2.2.8           
## [34] jquerylib_0.1.4          BiocIO_1.2.0             generics_0.1.0          
## [37] jsonlite_1.7.2           BiocParallel_1.26.2      gtools_3.9.2            
## [40] dplyr_1.0.7              RCurl_1.98-1.4           magrittr_2.0.1          
## [43] GenomeInfoDbData_1.2.6   Matrix_1.3-4             Rcpp_1.0.7              
## [46] fansi_0.5.0              lifecycle_1.0.0          stringi_1.7.4           
## [49] yaml_2.2.1               zlibbioc_1.38.0          BiocFileCache_2.0.0     
## [52] grid_4.1.1               blob_1.2.2               crayon_1.4.1            
## [55] lattice_0.20-44          hms_1.1.0                KEGGREST_1.32.0         
## [58] knitr_1.34               pillar_1.6.2             rjson_0.2.20            
## [61] biomaRt_2.48.3           XML_3.99-0.7             glue_1.4.2              
## [64] evaluate_0.14            png_0.1-7                vctrs_0.3.8             
## [67] purrr_0.3.4              assertthat_0.2.1         cachem_1.0.6            
## [70] xfun_0.26                restfulr_0.0.13          tibble_3.1.4            
## [73] GenomicAlignments_1.28.0 memoise_2.0.0            ellipsis_0.3.2