Using the VariantAnnotation package in accordance with the package workflow

Processing the first two vcf file of SRP199233

#reading in the first two VCF file with VariantAnnotation

vcf_x <- readVcf("VCF_SRP199233/SRX5884275_svtyper.vcf", "mm10")

vcf_y <- readVcf("VCF_SRP199233/SRX5884276_svtyper.vcf", "mm10")
#downloading the gff files and installing a package to unzip the gff file.

#download.file("ftp://ftp.ensembl.org/pub/release-98/gff3/mus_musculus/Mus_musculus.GRCm38.98.gff3.gz",destfile = "Mus_musculus.GRCm38.98.gff3.gz")

#library("R.utils")

#gunzip("Mus_musculus.GRCm38.98.gff3.gz")
# reading in the GFF files
gff <- read.table("Mus_musculus.GRCm38.98.gff3", sep = "\t", quote = "")
# this is a better way to look at the contents of the VCF files
rowRanges(vcf_x)
## GRanges object with 2417 ranges and 5 metadata columns:
##            seqnames    ranges strand | paramRangeID            REF
##               <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet>
##        1          1  26685964      * |           NA              N
##        2          1  39463305      * |           NA              N
##        3          1  39701420      * |           NA              N
##        4          1  39747923      * |           NA              N
##        5          1  39828018      * |           NA              N
##      ...        ...       ...    ... .          ...            ...
##   2138_2 GL456392.1      8589      * |           NA              N
##   2139_1 GL456390.1     23732      * |           NA              N
##   2139_2 GL456396.1     10046      * |           NA              N
##   2140_1 GL456392.1     19292      * |           NA              N
##   2140_2 GL456396.1      9019      * |           NA              N
##                          ALT      QUAL      FILTER
##              <CharacterList> <numeric> <character>
##        1               <DUP>     19.26           .
##        2               <DEL>      0.00           .
##        3               <DEL>      0.00           .
##        4               <DEL>      0.00           .
##        5               <DEL>     31.46           .
##      ...                 ...       ...         ...
##   2138_2 N[GL456390.1:21531[      0.13           .
##   2139_1 ]GL456396.1:10046]N     31.46           .
##   2139_2 N[GL456390.1:23732[     31.46           .
##   2140_1  ]GL456396.1:9019]N     48.50           .
##   2140_2 N[GL456392.1:19292[     48.50           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
rd_x <- rowRanges(vcf_x)
info(vcf_x)
## DataFrame with 2417 rows and 19 columns
##             SVTYPE         SVLEN       END         STRANDS IMPRECISE
##        <character> <IntegerList> <integer> <CharacterList> <logical>
## 1              DUP          2483  26688447            -+:5      TRUE
## 2              DEL           -13  39463318            +-:4      TRUE
## 3              DEL         -1438  39702858           +-:12     FALSE
## 4              DEL          -183  39748106            +-:5     FALSE
## 5              DEL         -1935  39829953            +-:5      TRUE
## ...            ...           ...       ...             ...       ...
## 2138_2         BND                      NA            +-:4      TRUE
## 2139_1         BND                      NA           -+:10      TRUE
## 2139_2         BND                      NA           +-:10      TRUE
## 2140_1         BND                      NA           -+:28     FALSE
## 2140_2         BND                      NA           +-:28     FALSE
##                CIPOS         CIEND       CIPOS95       CIEND95          MATEID
##        <IntegerList> <IntegerList> <IntegerList> <IntegerList> <CharacterList>
## 1             -119,9        -9,181         -32,1          0,36                
## 2             -10,11         -12,9         -2,10         -12,1                
## 3               -4,4         -10,9           0,0           0,0                
## 4              -10,9         -10,9           0,0           0,0                
## 5              -10,9         -10,9          -1,1          -1,1                
## ...              ...           ...           ...           ...             ...
## 2138_2         -10,9         -10,8          -1,1          -1,1          2138_1
## 2139_1          -3,2         -10,9          -1,1           0,0          2139_2
## 2139_2         -10,9          -3,2           0,0          -1,1          2139_1
## 2140_1         -10,9          -2,0           0,0           0,0          2140_2
## 2140_2          -2,0         -10,9           0,0           0,0          2140_1
##              EVENT SECONDARY            SU            PE            SR
##        <character> <logical> <IntegerList> <IntegerList> <IntegerList>
## 1               NA     FALSE             5             5             0
## 2               NA     FALSE             4             4             0
## 3               NA     FALSE            12             6             6
## 4               NA     FALSE             5             0             5
## 5               NA     FALSE             5             3             2
## ...            ...       ...           ...           ...           ...
## 2138_2        2138      TRUE             4             2             2
## 2139_1        2139     FALSE            10             5             5
## 2139_2        2139      TRUE            10             5             5
## 2140_1        2140     FALSE            28             3            25
## 2140_2        2140      TRUE            28             3            25
##                   BD              EV                                   PRPOS
##        <IntegerList> <CharacterList>                         <CharacterList>
## 1                                    1.63734e-09,3.84941e-09,6.97546e-09,...
## 2                                    4.02714e-05,9.24947e-05,0.000214025,...
## 3                                     4.2664e-15,4.18489e-11,4.10855e-07,...
## 4                                    9.80198e-21,9.80198e-19,9.80198e-17,...
## 5                                    2.96754e-17,2.96754e-15,2.96754e-13,...
## ...              ...             ...                                     ...
## 2138_2                                4.67913e-16,1.8628e-14,7.41593e-13,...
## 2139_1                                  3.19611e-05,0.00127685,0.0510055,...
## 2139_2                               6.22132e-38,6.22132e-34,6.22132e-30,...
## 2140_1                               9.65672e-101,9.70953e-91,9.7553e-81,...
## 2140_2                                             6.32701e-22,1.00167e-10,1
##                                          PREND
##                                <CharacterList>
## 1      2.41925e-13,9.61778e-12,1.52147e-10,...
## 2            0.0351816,0.0392766,0.0443191,...
## 3      4.61213e-25,1.28089e-22,3.50382e-20,...
## 4      9.80198e-21,9.80198e-19,9.80198e-17,...
## 5       5.80289e-09,3.78972e-08,2.4738e-07,...
## ...                                        ...
## 2138_2 7.81701e-09,4.94801e-08,3.13121e-07,...
## 2139_1 6.22132e-38,6.22132e-34,6.22132e-30,...
## 2139_2    3.19611e-05,0.00127685,0.0510055,...
## 2140_1               6.32701e-22,1.00167e-10,1
## 2140_2 9.65672e-101,9.70953e-91,9.7553e-81,...
info(VariantAnnotation::header(vcf_x))
## DataFrame with 19 rows and 3 columns
##                Number        Type            Description
##           <character> <character>            <character>
## SVTYPE              1      String Type of structural v..
## SVLEN               .     Integer Difference in length..
## END                 1     Integer End position of the ..
## STRANDS             .      String Strand orientation o..
## IMPRECISE           0        Flag Imprecise structural..
## ...               ...         ...                    ...
## SR                  .     Integer Number of split read..
## BD                  .     Integer Amount of BED eviden..
## EV                  .      String Type of LUMPY eviden..
## PRPOS               .      String LUMPY probability cu..
## PREND               .      String LUMPY probability cu..
geno(vcf_x)
## List of length 19
## names(19): GT SU PE SR BD GQ SQ GL DP RO AO QR QA RS AS ASC RP AP AB
geno(VariantAnnotation::header(vcf_x))
## DataFrame with 19 rows and 3 columns
##          Number        Type            Description
##     <character> <character>            <character>
## GT            1      String               Genotype
## SU            1     Integer Number of pieces of ..
## PE            1     Integer Number of paired-end..
## SR            1     Integer Number of split read..
## BD            1     Integer Amount of BED eviden..
## ...         ...         ...                    ...
## AS            A     Integer Alternate allele spl..
## ASC           A     Integer Alternate allele cli..
## RP            1     Integer Reference allele pai..
## AP            A     Integer Alternate allele pai..
## AB            A       Float Allele balance, frac..
header(vcf_x)
## class: VCFHeader 
## samples(1): sample
## meta(3): fileDate fileformat source
## fixed(2): FILTER ALT
## info(19): SVTYPE SVLEN ... PRPOS PREND
## geno(19): GT SU ... AP AB
samples(VariantAnnotation::header(vcf_x))
## [1] "sample"
#determining the genomic positions of the data using the rowRanges function which contains information from the CHROM, POS and ID fields of the Vcf files.
rowRanges(vcf_x)
## GRanges object with 2417 ranges and 5 metadata columns:
##            seqnames    ranges strand | paramRangeID            REF
##               <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet>
##        1          1  26685964      * |           NA              N
##        2          1  39463305      * |           NA              N
##        3          1  39701420      * |           NA              N
##        4          1  39747923      * |           NA              N
##        5          1  39828018      * |           NA              N
##      ...        ...       ...    ... .          ...            ...
##   2138_2 GL456392.1      8589      * |           NA              N
##   2139_1 GL456390.1     23732      * |           NA              N
##   2139_2 GL456396.1     10046      * |           NA              N
##   2140_1 GL456392.1     19292      * |           NA              N
##   2140_2 GL456396.1      9019      * |           NA              N
##                          ALT      QUAL      FILTER
##              <CharacterList> <numeric> <character>
##        1               <DUP>     19.26           .
##        2               <DEL>      0.00           .
##        3               <DEL>      0.00           .
##        4               <DEL>      0.00           .
##        5               <DEL>     31.46           .
##      ...                 ...       ...         ...
##   2138_2 N[GL456390.1:21531[      0.13           .
##   2139_1 ]GL456396.1:10046]N     31.46           .
##   2139_2 N[GL456390.1:23732[     31.46           .
##   2140_1  ]GL456396.1:9019]N     48.50           .
##   2140_2 N[GL456392.1:19292[     48.50           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
head(rowRanges(vcf_x, 3))
## GRanges object with 6 ranges and 5 metadata columns:
##     seqnames    ranges strand | paramRangeID            REF             ALT
##        <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet> <CharacterList>
##   1        1  26685964      * |           NA              N           <DUP>
##   2        1  39463305      * |           NA              N           <DEL>
##   3        1  39701420      * |           NA              N           <DEL>
##   4        1  39747923      * |           NA              N           <DEL>
##   5        1  39828018      * |           NA              N           <DEL>
##   6        1  39836674      * |           NA              N           <DEL>
##          QUAL      FILTER
##     <numeric> <character>
##   1     19.26           .
##   2      0.00           .
##   3      0.00           .
##   4      0.00           .
##   5     31.46           .
##   6      2.04           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
head(rowRanges(vcf_y, 3))
## GRanges object with 6 ranges and 5 metadata columns:
##     seqnames    ranges strand | paramRangeID            REF             ALT
##        <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet> <CharacterList>
##   1        1  26685964      * |           NA              N           <DUP>
##   2        1  39463305      * |           NA              N           <DEL>
##   3        1  39701420      * |           NA              N           <DEL>
##   4        1  39747923      * |           NA              N           <DEL>
##   5        1  39828018      * |           NA              N           <DEL>
##   6        1  39836674      * |           NA              N           <DEL>
##          QUAL      FILTER
##     <numeric> <character>
##   1      4.77           .
##   2      4.77           .
##   3     31.46           .
##   4     89.31           .
##   5      0.00           .
##   6      0.00           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
rd_y <- rowRanges(vcf_y)
#looking into the individuals fields of both vcf file for the first length 9
ref(vcf_x)[1:9]
## DNAStringSet object of length 9:
##     width seq
## [1]     1 N
## [2]     1 N
## [3]     1 N
## [4]     1 N
## [5]     1 N
## [6]     1 N
## [7]     1 N
## [8]     1 N
## [9]     1 N
qual(vcf_x)[1:9]
## [1] 19.26  0.00  0.00  0.00 31.46  2.04  4.77  0.00 60.25
alt(vcf_x)[1:9]
## CharacterList of length 9
## [[1]] <DUP>
## [[2]] <DEL>
## [[3]] <DEL>
## [[4]] <DEL>
## [[5]] <DEL>
## [[6]] <DEL>
## [[7]] <DEL>
## [[8]] <DEL>
## [[9]] <DEL>
ref(vcf_y)[1:9]
## DNAStringSet object of length 9:
##     width seq
## [1]     1 N
## [2]     1 N
## [3]     1 N
## [4]     1 N
## [5]     1 N
## [6]     1 N
## [7]     1 N
## [8]     1 N
## [9]     1 N
qual(vcf_y)[1:9]
## [1]  4.77  4.77 31.46 89.31  0.00  0.00 60.25  4.77 31.46
alt(vcf_y)[1:9]
## CharacterList of length 9
## [[1]] <DUP>
## [[2]] <DEL>
## [[3]] <DEL>
## [[4]] <DEL>
## [[5]] <DEL>
## [[6]] <DEL>
## [[7]] <DEL>
## [[8]] <DEL>
## [[9]] <DEL>
#getting the full list of genotype in both vcf files and describing them in `FORMAT` fields as dataframe and class
geno(vcf_x)
sapply(geno(vcf_x), DataFrame)
sapply(geno(vcf_x), class)


geno(vcf_y)
sapply(geno(vcf_y), DataFrame)
sapply(geno(vcf_y), class)

#I observed lots of differences with the genotype of both vcf when described in dataframe formats
#having a look at the genotype (GT) by analysing the data as a factor 
geno(VariantAnnotation::header(vcf_x))["GT",]
GT_x <- geno(vcf_x)$GT
GT_x <- as.factor(GT_x)
head(GT_x)
str(GT_x)
table(GT_x)


geno(VariantAnnotation::header(vcf_y))["GT",]
GT_y <- geno(vcf_y)$GT
GT_y <- as.factor(GT_y)
head(GT_y)
str(GT_y)
table(GT_y)
#comparing the two animals genotype as a single data frame 
GT <- data.frame(GT_x, GT_y)
head(GT)
GT[,c("GT_x","GT_y")]
str(GT)

#filtering the missing data found within the variants
subset(GT, GT_x != "./." & GT_y != "./.")
GT_f <- subset(GT, GT_x != "./." & GT_y != "./.")
GT_f

#using table and barplot to check the level of heterozygous and homozygous found within the two animals
table(paste(GT_f$GT_x,GT_f$GT_y))
barplot(table(GT_f$GT_x == GT_f$GT_y))

#creating a function that compares two animals together.
compare_vcf <- function(vcf_a,vcf_b) {
    gta <- geno(vcf_a)$GT
    gtb <- geno(vcf_b)$GT
    df <- data.frame(rowRanges(vcf_a),info(vcf_a))
    df <- df[,c("seqnames","start","END","SVTYPE","SVLEN","QUAL")]
    gt <- data.frame(gta,gtb)
    colnames(gt) <- c("a","b")
    gt <- data.frame(df,gt)
    nr <- nrow(gt)
    gt <- subset(gt, a != "./." & b != "./.")
    gtnr <- nrow(gt)
    missing <- nr - gtnr
    gtsame <- length(which(gt$a == gt$b))
    gtdiff <- length(which(gt$a != gt$b))
    barplot(table(gt$a),main="a")
    barplot(table(gt$b),main="b")
    gt2 <- subset(gt, a != "0/1" & b != "0/1")
    aset <- which(gt2$a == "1/1")
    bset <- which(gt2$b == "1/1")
    v1 <- list("a" = aset , "b" = bset)
    print(plot(euler(v1),quantities = TRUE,main="homozygous variants"))
    output=list("gt"=gt, "missing"=missing, "gtsame"=gtsame,"gtdiff"=gtdiff)
    return(output)
}

#function created and to be tried on two different animals read with VariantAnnotation package.
vcf_a <- readVcf("VCF_SRP199233/SRX5884277_svtyper.vcf", "mm10")
rd_a <- rowRanges(vcf_a)

vcf_b <- readVcf("VCF_SRP199233/SRX5884278_svtyper.vcf", "mm10")
rd_b <- rowRanges(vcf_b)

#attempting the new function.
res <- compare_vcf(vcf_a,vcf_b)

#str(res)
#head(res)

Analyzing all 6 Animals together

#reading in the last 2 vcf data.
vcf_d <- readVcf("VCF_SRP199233/SRX5884279_svtyper.vcf", "mm10")
rd_d <- rowRanges(vcf_d)

vcf_e <- readVcf("VCF_SRP199233/SRX5884280_svtyper.vcf", "mm10")
rd_e <- rowRanges(vcf_e)

#analyzing the geno for the 6 animals respectively.

gtx <- geno(vcf_x)$GT
gty <- geno(vcf_y)$GT
gta <- geno(vcf_a)$GT
gtb <- geno(vcf_b)$GT
gtd <- geno(vcf_d)$GT
gte <- geno(vcf_e)$GT

df <- data.frame(rowRanges(vcf_x),info(vcf_x))  #taking a look into the content for one of the vcf file.
head(df)
##   seqnames    start      end width strand paramRangeID REF   ALT  QUAL FILTER
## 1        1 26685964 26685964     1      *         <NA>   N <DUP> 19.26      .
## 2        1 39463305 39463305     1      *         <NA>   N <DEL>  0.00      .
## 3        1 39701420 39701420     1      *         <NA>   N <DEL>  0.00      .
## 4        1 39747923 39747923     1      *         <NA>   N <DEL>  0.00      .
## 5        1 39828018 39828018     1      *         <NA>   N <DEL> 31.46      .
## 6        1 39836674 39836674     1      *         <NA>   N <DEL>  2.04      .
##   SVTYPE SVLEN      END STRANDS IMPRECISE   CIPOS   CIEND CIPOS95 CIEND95
## 1    DUP  2483 26688447    -+:5      TRUE -119, 9 -9, 181  -32, 1   0, 36
## 2    DEL   -13 39463318    +-:4      TRUE -10, 11  -12, 9  -2, 10  -12, 1
## 3    DEL -1438 39702858   +-:12     FALSE   -4, 4  -10, 9    0, 0    0, 0
## 4    DEL  -183 39748106    +-:5     FALSE  -10, 9  -10, 9    0, 0    0, 0
## 5    DEL -1935 39829953    +-:5      TRUE  -10, 9  -10, 9   -1, 1   -1, 1
## 6    DEL   -75 39836749    +-:4      TRUE -10, 73  -20, 9  -3, 34  -16, 2
##   MATEID EVENT SECONDARY SU PE SR BD EV        PRPOS        PREND
## 1         <NA>     FALSE  5  5  0       1.63734e.... 2.41925e....
## 2         <NA>     FALSE  4  4  0       4.02714e.... 0.035181....
## 3         <NA>     FALSE 12  6  6       4.2664e-.... 4.61213e....
## 4         <NA>     FALSE  5  0  5       9.80198e.... 9.80198e....
## 5         <NA>     FALSE  5  3  2       2.96754e.... 5.80289e....
## 6         <NA>     FALSE  4  4  0       1.16408e.... 0.002455....
df <- df[,c("seqnames","start","END","SVTYPE","SVLEN","QUAL")]
gt <- data.frame(gtx,gty,gta,gtb,gtd,gte)
gt <- data.frame(df,gt) # this gives us the quality check of each variants for the 6 animals and from here i will need to copy objects gt into gt2 and then subset gt2 for the variants with highest confidence using the QUAL which should be > 30.
gt2 <- gt

colnames(gt) <- c("x","y","a","b","d","e") #continue working on gt objects to get the true Homozygous variants for all animals and visualize it on a venn diagram
gt <- subset(gt != "./." & gt != "0/1")
head(gt)
##      x    y    a    b    d    e  <NA>  <NA>  <NA>  <NA> <NA>  <NA>
## 1 TRUE TRUE TRUE TRUE TRUE TRUE  TRUE  TRUE  TRUE  TRUE TRUE FALSE
## 2 TRUE TRUE TRUE TRUE TRUE TRUE FALSE  TRUE  TRUE FALSE TRUE  TRUE
## 3 TRUE TRUE TRUE TRUE TRUE TRUE FALSE  TRUE FALSE  TRUE TRUE  TRUE
## 4 TRUE TRUE TRUE TRUE TRUE TRUE FALSE  TRUE FALSE  TRUE TRUE  TRUE
## 5 TRUE TRUE TRUE TRUE TRUE TRUE  TRUE FALSE FALSE  TRUE TRUE  TRUE
## 6 TRUE TRUE TRUE TRUE TRUE TRUE  TRUE FALSE  TRUE  TRUE TRUE  TRUE
table(gt) #having a tabular look of the TRUE Homozygous variants for all 6 animals.
## gt
## FALSE  TRUE 
##  5257 23193
a_set <- which(gta == "1/1")
b_set <- which(gtb == "1/1")
x_set <- which(gtx == "1/1")
y_set <- which(gty == "1/1")
d_set <- which(gtd == "1/1")
e_set <- which(gte == "1/1")

v1 <- list("a" = a_set, "b" = b_set, "x" = x_set, "y" = y_set, "d" = d_set, "e" = e_set)
print(plot(euler(v1),quantities = TRUE,main="homozygous variants"))

gt2 <- subset(gt2, gt2$QUAL>30)

gt3 <- apply(gt2[,c(7:12)], 2, function(x){
  as.numeric(as.factor(x))-2
})

gt23 <- cbind(gt2,gt3)
head(gt23)
##    seqnames    start      END SVTYPE SVLEN  QUAL sample sample.1 sample.2
## 5         1 39828018 39829953    DEL -1935 31.46    1/1      ./.      ./.
## 9         1 40565418 40565503    DEL   -85 60.25    1/1      1/1      1/1
## 12        1 42646102 42647988    DEL -1886 60.25    1/1      ./.      ./.
## 19        1 51544912 51546751    DEL -1839 31.46    1/1      ./.      ./.
## 26        1 63886209 63893328    DEL -7119 31.46    1/1      0/0      1/1
## 27        1 64151297 64153186    DEL -1889 31.46    1/1      ./.      ./.
##    sample.3 sample.4 sample.5 sample sample.1 sample.2 sample.3 sample.4
## 5       0/0      0/0      0/0      0       -1       -1        0        0
## 9       0/0      0/0      1/1      0        2        2        0        0
## 12      ./.      0/0      ./.      0       -1       -1       -1        0
## 19      1/1      ./.      0/0      0       -1       -1        2       -1
## 26      ./.      0/0      ./.      0        0        2       -1        0
## 27      1/1      1/1      ./.      0       -1       -1        2        2
##    sample.5
## 5         0
## 9         2
## 12       -1
## 19        0
## 26       -1
## 27       -1
gt2[gt2 == -1] <- NA

library(gplots)
## Registered S3 method overwritten by 'gplots':
##   method    from  
##   plot.venn eulerr
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:eulerr':
## 
##     venn
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
rownames(gt3) <- paste(gt2$seqnames,gt2$start,sep=":")
head(rownames(gt3))
## [1] "1:39828018" "1:40565418" "1:42646102" "1:51544912" "1:63886209"
## [6] "1:64151297"
colfunc <- colorRampPalette(c("yellow", "blue", "red"))
heatmap.2(gt3, trace = "none", scale = "none", dendrogram='none', Rowv=FALSE, Colv=FALSE, col=colfunc(25), margins = c(5,10), cexRow=.8, cexCol=.9,  main="Genetic Variation")

#from my little investigation, i could tell that variants with high confidence can only be identify through there quality check
#using the package below to locate and annotate the gene variants without necessary needing the gff file at the moment
library(TxDb.Mmusculus.UCSC.mm10.ensGene)
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
#txdb <- TxDb.Mmusculus.UCSC.mm10.ensGene


#myseqlevels <- gsub("chrUn_","",seqlevels(txdb))
#myseqlevels <- gsub("_random","",myseqlevels)
#strReverse <- function(x) { sapply(lapply(strsplit(x, NULL), rev), paste, collapse="") }
#myseqlevels <- strReverse(myseqlevels)
#myseqlevels <- sapply(strsplit(myseqlevels , "_"),"[[",1)
#myseqlevels <- strReverse(myseqlevels)
#myseqlevels <- gsub("chr","",myseqlevels)
#myseqlevels <- gsub("M","MT",myseqlevels)
#myseqlevels


# why are the variants in rd only 1bp size?
#loc <- locateVariants(rd, txdb, CodingVariants())

#all <- locateVariants(rd, txdb, AllVariants())
gff_file <- "Mus_musculus.GRCm38.98.gff3"
txdb <- makeTxDbFromGFF(gff_file, format="gff3")
## Import genomic features from the file as a GRanges object ... OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...
## Warning in .extract_exons_from_GRanges(exon_IDX, gr, mcols0, tx_IDX, feature = "exon", : 6 exons couldn't be linked to a transcript so were dropped:
##   seqid     start       end strand   ID               Name
## 1    12 113654967 113655244      + <NA> ENSMUSE00000892648
## 2    12 114630680 114630973      + <NA> ENSMUSE00001053179
## 3     6  42980690  42980744      + <NA> ENSMUSE00001000765
## 4     6  42980854  42981150      + <NA> ENSMUSE00000892164
## 5     6  43081518  43081827      - <NA> ENSMUSE00000663119
## 6     6  43082049  43082097      - <NA> ENSMUSE00001046644
##                          Parent Parent_type
## 1 transcript:ENSMUST00000166255        <NA>
## 2 transcript:ENSMUST00000095364        <NA>
## 3 transcript:ENSMUST00000167638        <NA>
## 4 transcript:ENSMUST00000167638        <NA>
## 5 transcript:ENSMUST00000103301        <NA>
## 6 transcript:ENSMUST00000103301        <NA>
## Warning in .extract_exons_from_GRanges(cds_IDX, gr, mcols0, tx_IDX, feature = "cds", : 6 CDS couldn't be linked to a transcript so were dropped:
##   seqid     start       end strand                     ID               Name
## 1    12 113654967 113655244      + CDS:ENSMUSP00000128354 ENSMUSP00000128354
## 2    12 114630680 114630973      + CDS:ENSMUSP00000093010 ENSMUSP00000093010
## 3     6  42980690  42980744      + CDS:ENSMUSP00000131191 ENSMUSP00000131191
## 4     6  42980854  42981150      + CDS:ENSMUSP00000131191 ENSMUSP00000131191
## 5     6  43081518  43081827      - CDS:ENSMUSP00000100102 ENSMUSP00000100102
## 6     6  43082049  43082097      - CDS:ENSMUSP00000100102 ENSMUSP00000100102
##                          Parent Parent_type
## 1 transcript:ENSMUST00000166255        <NA>
## 2 transcript:ENSMUST00000095364        <NA>
## 3 transcript:ENSMUST00000167638        <NA>
## 4 transcript:ENSMUST00000167638        <NA>
## 5 transcript:ENSMUST00000103301        <NA>
## 6 transcript:ENSMUST00000103301        <NA>
## OK
vcfd <- vcf_d #copied vcf_d file to vcfd, so as to use vcfd to have a clear look into how to annotate and locate the gene variants without altering the vcf_d files.

rd <- rowRanges(vcfd)

rd
## GRanges object with 2417 ranges and 5 metadata columns:
##            seqnames    ranges strand | paramRangeID            REF
##               <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet>
##        1          1  26685964      * |           NA              N
##        2          1  39463305      * |           NA              N
##        3          1  39701420      * |           NA              N
##        4          1  39747923      * |           NA              N
##        5          1  39828018      * |           NA              N
##      ...        ...       ...    ... .          ...            ...
##   2138_2 GL456392.1      8589      * |           NA              N
##   2139_1 GL456390.1     23732      * |           NA              N
##   2139_2 GL456396.1     10046      * |           NA              N
##   2140_1 GL456392.1     19292      * |           NA              N
##   2140_2 GL456396.1      9019      * |           NA              N
##                          ALT      QUAL      FILTER
##              <CharacterList> <numeric> <character>
##        1               <DUP>     68.74           .
##        2               <DEL>      4.77           .
##        3               <DEL>      4.77           .
##        4               <DEL>      4.77           .
##        5               <DEL>      4.77           .
##      ...                 ...       ...         ...
##   2138_2 N[GL456390.1:21531[      0.13           .
##   2139_1 ]GL456396.1:10046]N     41.83           .
##   2139_2 N[GL456390.1:23732[     41.83           .
##   2140_1  ]GL456396.1:9019]N    110.90           .
##   2140_2 N[GL456392.1:19292[    110.90           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
rd <- rd[elementMetadata(rd)[,4]>30]


txdb
## TxDb object:
## # Db type: TxDb
## # Supporting package: GenomicFeatures
## # Data source: Mus_musculus.GRCm38.98.gff3
## # Organism: NA
## # Taxonomy ID: NA
## # miRBase build ID: NA
## # Genome: NA
## # Nb of transcripts: 142442
## # Db created by: GenomicFeatures package from Bioconductor
## # Creation time: 2021-08-23 12:59:17 +1000 (Mon, 23 Aug 2021)
## # GenomicFeatures version at creation time: 1.44.0
## # RSQLite version at creation time: 2.2.7
## # DBSCHEMAVERSION: 1.2
seqlevels(vcfd)
##  [1] "1"                        "2"                       
##  [3] "3"                        "4"                       
##  [5] "5"                        "6"                       
##  [7] "7"                        "8"                       
##  [9] "9"                        "10"                      
## [11] "11"                       "12"                      
## [13] "13"                       "14"                      
## [15] "15"                       "16"                      
## [17] "17"                       "18"                      
## [19] "19"                       "X"                       
## [21] "Y"                        "MT"                      
## [23] "CHR_MG3251_PATCH"         "JH584304.1"              
## [25] "GL456216.1"               "GL456239.1"              
## [27] "GL456389.1"               "GL456381.1"              
## [29] "GL456387.1"               "GL456393.1"              
## [31] "CHR_MG4259_PATCH"         "GL456394.1"              
## [33] "GL456368.1"               "JH584292.1"              
## [35] "CHR_CAST_EI_MMCHR11_CTG5" "GL456383.1"              
## [37] "GL456392.1"               "GL456396.1"              
## [39] "GL456390.1"
seqlevels(txdb)
##  [1] "1"          "2"          "3"          "4"          "5"         
##  [6] "6"          "7"          "8"          "9"          "10"        
## [11] "11"         "12"         "13"         "14"         "15"        
## [16] "16"         "17"         "18"         "19"         "X"         
## [21] "Y"          "MT"         "GL456210.1" "GL456211.1" "GL456212.1"
## [26] "GL456216.1" "GL456219.1" "GL456221.1" "GL456233.1" "GL456239.1"
## [31] "GL456350.1" "GL456354.1" "GL456372.1" "GL456381.1" "GL456385.1"
## [36] "JH584292.1" "JH584293.1" "JH584294.1" "JH584295.1" "JH584296.1"
## [41] "JH584297.1" "JH584298.1" "JH584299.1" "JH584303.1" "JH584304.1"
intersect(seqlevels(rd),seqlevels(txdb))
##  [1] "1"          "2"          "3"          "4"          "5"         
##  [6] "6"          "7"          "8"          "9"          "10"        
## [11] "11"         "12"         "13"         "14"         "15"        
## [16] "16"         "17"         "18"         "19"         "X"         
## [21] "Y"          "MT"         "JH584304.1" "GL456216.1" "GL456239.1"
## [26] "GL456381.1" "JH584292.1"

transcript intercept

tx <- transcripts(txdb)
ol <- findOverlaps(rd,tx)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': CHR_MG3251_PATCH, GL456389.1, GL456387.1, GL456393.1, CHR_MG4259_PATCH, GL456394.1, GL456368.1, CHR_CAST_EI_MMCHR11_CTG5, GL456383.1, GL456392.1, GL456396.1, GL456390.1
##   - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456219.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456372.1, GL456385.1, JH584293.1, JH584294.1, JH584295.1, JH584296.1, JH584297.1, JH584298.1, JH584299.1, JH584303.1
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol
## Hits object with 1086 hits and 0 metadata columns:
##          queryHits subjectHits
##          <integer>   <integer>
##      [1]         1        4630
##      [2]         6         682
##      [3]        16        5642
##      [4]        16        5643
##      [5]        16        5644
##      ...       ...         ...
##   [1082]       784      138837
##   [1083]       784      138838
##   [1084]       785      134530
##   [1085]       786      136646
##   [1086]       786      136648
##   -------
##   queryLength: 792 / subjectLength: 142442
var_ol <- rd[queryHits(ol)]
tx_ol <- tx[subjectHits(ol)]
var_ol
## GRanges object with 1086 ranges and 5 metadata columns:
##          seqnames    ranges strand | paramRangeID            REF
##             <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet>
##        1        1  26685964      * |           NA              N
##       14        1  43030800      * |           NA              N
##       44        1  73977363      * |           NA              N
##       44        1  73977363      * |           NA              N
##       44        1  73977363      * |           NA              N
##      ...      ...       ...    ... .          ...            ...
##   2130_2        X  87238421      * |           NA              N
##   2130_2        X  87238421      * |           NA              N
##   2132_1       19  45650267      * |           NA              N
##   2132_2        X 112028707      * |           NA              N
##   2132_2        X 112028707      * |           NA              N
##                      ALT      QUAL      FILTER
##          <CharacterList> <numeric> <character>
##        1           <DUP>     68.74           .
##       14           <DEL>     60.25           .
##       44           <DEL>     31.46           .
##       44           <DEL>     31.46           .
##       44           <DEL>     31.46           .
##      ...             ...       ...         ...
##   2130_2  N]18:85698118]     41.98           .
##   2130_2  N]18:85698118]     41.98           .
##   2132_1  N[X:112028707[     60.25           .
##   2132_2  ]19:45650267]N     60.25           .
##   2132_2  ]19:45650267]N     60.25           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
tx_ol
## GRanges object with 1086 ranges and 2 metadata columns:
##          seqnames              ranges strand |     tx_id            tx_name
##             <Rle>           <IRanges>  <Rle> | <integer>        <character>
##      [1]        1   26681814-26687460      - |      4630 ENSMUST00000097801
##      [2]        1   42952963-43035456      + |       682 ENSMUST00000179766
##      [3]        1   73910231-74124447      - |      5642 ENSMUST00000169786
##      [4]        1   73910232-74091351      - |      5643 ENSMUST00000187584
##      [5]        1   73910232-74124449      - |      5644 ENSMUST00000191104
##      ...      ...                 ...    ... .       ...                ...
##   [1082]        X   86747242-87890235      - |    138837 ENSMUST00000078875
##   [1083]        X   86769118-88115645      - |    138838 ENSMUST00000113964
##   [1084]       19   45578254-45660312      - |    134530 ENSMUST00000046869
##   [1085]        X 111891389-112240590      + |    136646 ENSMUST00000207962
##   [1086]        X 112011007-112093089      + |    136648 ENSMUST00000131304
##   -------
##   seqinfo: 45 sequences (1 circular) from an unspecified genome; no seqlengths
vardf <- as.data.frame(var_ol,row.names = 1:nrow(as.data.frame(ranges(var_ol,use.mcols = TRUE)))) #read up more on the tab options to get the ALT.
head(vardf)
##   seqnames    start      end width strand paramRangeID REF   ALT  QUAL FILTER
## 1        1 26685964 26685964     1      *         <NA>   N <DUP> 68.74      .
## 2        1 43030800 43030800     1      *         <NA>   N <DEL> 60.25      .
## 3        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 4        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 5        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 6        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
txdf <- as.data.frame(tx_ol,row.names = 1:nrow(as.data.frame(ranges(tx_ol))))
head(txdf)
##   seqnames    start      end  width strand tx_id            tx_name
## 1        1 26681814 26687460   5647      -  4630 ENSMUST00000097801
## 2        1 42952963 43035456  82494      +   682 ENSMUST00000179766
## 3        1 73910231 74124447 214217      -  5642 ENSMUST00000169786
## 4        1 73910232 74091351 181120      -  5643 ENSMUST00000187584
## 5        1 73910232 74124449 214218      -  5644 ENSMUST00000191104
## 6        1 73914093 73995372  81280      -  5646 ENSMUST00000185702
vardf$tx <- txdf$tx_name
head(vardf)
##   seqnames    start      end width strand paramRangeID REF   ALT  QUAL FILTER
## 1        1 26685964 26685964     1      *         <NA>   N <DUP> 68.74      .
## 2        1 43030800 43030800     1      *         <NA>   N <DEL> 60.25      .
## 3        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 4        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 5        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 6        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
##                   tx
## 1 ENSMUST00000097801
## 2 ENSMUST00000179766
## 3 ENSMUST00000169786
## 4 ENSMUST00000187584
## 5 ENSMUST00000191104
## 6 ENSMUST00000185702
vardf_tx <- vardf
head(vardf_tx)
##   seqnames    start      end width strand paramRangeID REF   ALT  QUAL FILTER
## 1        1 26685964 26685964     1      *         <NA>   N <DUP> 68.74      .
## 2        1 43030800 43030800     1      *         <NA>   N <DEL> 60.25      .
## 3        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 4        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 5        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 6        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
##                   tx
## 1 ENSMUST00000097801
## 2 ENSMUST00000179766
## 3 ENSMUST00000169786
## 4 ENSMUST00000187584
## 5 ENSMUST00000191104
## 6 ENSMUST00000185702

gene intercept

gx <- genes(txdb)
ol <- findOverlaps(rd,gx,select = "all")
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': CHR_MG3251_PATCH, GL456389.1, GL456387.1, GL456393.1, CHR_MG4259_PATCH, GL456394.1, GL456368.1, CHR_CAST_EI_MMCHR11_CTG5, GL456383.1, GL456392.1, GL456396.1, GL456390.1
##   - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456219.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456372.1, GL456385.1, JH584293.1, JH584294.1, JH584295.1, JH584296.1, JH584297.1, JH584298.1, JH584299.1, JH584303.1
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol
## Hits object with 374 hits and 0 metadata columns:
##         queryHits subjectHits
##         <integer>   <integer>
##     [1]         1       20515
##     [2]         6       12401
##     [3]        16       15917
##     [4]        18       20495
##     [5]        19        5445
##     ...       ...         ...
##   [370]       775       27968
##   [371]       784       15270
##   [372]       785       12095
##   [373]       786        9717
##   [374]       786       46608
##   -------
##   queryLength: 792 / subjectLength: 55417
var_ol <- rd[queryHits(ol)]
gx_ol <- gx[subjectHits(ol)]
var_ol
## GRanges object with 374 ranges and 5 metadata columns:
##          seqnames    ranges strand | paramRangeID            REF
##             <Rle> <IRanges>  <Rle> |     <factor> <DNAStringSet>
##        1        1  26685964      * |           NA              N
##       14        1  43030800      * |           NA              N
##       44        1  73977363      * |           NA              N
##       55        1  79751457      * |           NA              N
##       56        1  79853964      * |           NA              N
##      ...      ...       ...    ... .          ...            ...
##   2118_1       16  35981692      * |           NA              N
##   2130_2        X  87238421      * |           NA              N
##   2132_1       19  45650267      * |           NA              N
##   2132_2        X 112028707      * |           NA              N
##   2132_2        X 112028707      * |           NA              N
##                      ALT      QUAL      FILTER
##          <CharacterList> <numeric> <character>
##        1           <DUP>     68.74           .
##       14           <DEL>     60.25           .
##       44           <DEL>     31.46           .
##       55           <DEL>     60.25           .
##       56           <DEL>     60.25           .
##      ...             ...       ...         ...
##   2118_1  [17:70963682[N     48.50           .
##   2130_2  N]18:85698118]     41.98           .
##   2132_1  N[X:112028707[     60.25           .
##   2132_2  ]19:45650267]N     60.25           .
##   2132_2  ]19:45650267]N     60.25           .
##   -------
##   seqinfo: 39 sequences from mm10 genome; no seqlengths
vardf <- as.data.frame(var_ol,row.names = 1:nrow(as.data.frame(ranges(var_ol))))
vardf$ALT <- as.character(GenomicRanges::elementMetadata(var_ol)$ALT)
head(vardf)
##   seqnames    start      end width strand paramRangeID REF   ALT  QUAL FILTER
## 1        1 26685964 26685964     1      *         <NA>   N <DUP> 68.74      .
## 2        1 43030800 43030800     1      *         <NA>   N <DEL> 60.25      .
## 3        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 4        1 79751457 79751457     1      *         <NA>   N <DEL> 60.25      .
## 5        1 79853964 79853964     1      *         <NA>   N <DEL> 60.25      .
## 6        1 80706070 80706070     1      *         <NA>   N <DEL> 31.46      .
gxdf <- as.data.frame(gx_ol,row.names = 1:nrow(as.data.frame(ranges(gx_ol))))
head(gxdf)
##   seqnames    start      end  width strand            gene_id
## 1        1 26681814 26687460   5647      - ENSMUSG00000073722
## 2        1 42952872 43035456  82585      + ENSMUSG00000041907
## 3        1 73910231 74124449 214219      - ENSMUSG00000055322
## 4        1 79702262 79776143  73882      - ENSMUSG00000073643
## 5        1 79794197 79861180  66984      - ENSMUSG00000026249
## 6        1 80501073 80758527 257455      - ENSMUSG00000038608
vardf$gene <- gxdf$gene_id

head(vardf)
##   seqnames    start      end width strand paramRangeID REF   ALT  QUAL FILTER
## 1        1 26685964 26685964     1      *         <NA>   N <DUP> 68.74      .
## 2        1 43030800 43030800     1      *         <NA>   N <DEL> 60.25      .
## 3        1 73977363 73977363     1      *         <NA>   N <DEL> 31.46      .
## 4        1 79751457 79751457     1      *         <NA>   N <DEL> 60.25      .
## 5        1 79853964 79853964     1      *         <NA>   N <DEL> 60.25      .
## 6        1 80706070 80706070     1      *         <NA>   N <DEL> 31.46      .
##                 gene
## 1 ENSMUSG00000073722
## 2 ENSMUSG00000041907
## 3 ENSMUSG00000055322
## 4 ENSMUSG00000073643
## 5 ENSMUSG00000026249
## 6 ENSMUSG00000038608

locating and merging the gene names

genenames <- read.table("Mus_musculus.GRCm38.98.gff3.genenames.tsv", header = FALSE, sep = "\t", quote = "")
colnames(genenames) <- c("Ensembl ID","Gene Name")
head(genenames)
##           Ensembl ID     Gene Name
## 1 ENSMUSG00000102693 4933401J01Rik
## 2 ENSMUSG00000051951          Xkr4
## 3 ENSMUSG00000103377       Gm37180
## 4 ENSMUSG00000104017       Gm37363
## 5 ENSMUSG00000103025       Gm37686
## 6 ENSMUSG00000103201       Gm37329
gene_variants <- merge(genenames, vardf,by.x="Ensembl ID", by.y="gene")
head(gene_variants)
##           Ensembl ID Gene Name seqnames     start       end width strand
## 1 ENSMUSG00000000197     Nalcn       14 123401210 123401210     1      *
## 2 ENSMUSG00000000275    Trim25       11  89009389  89009389     1      *
## 3 ENSMUSG00000000305      Cdh4        2 179446174 179446174     1      *
## 4 ENSMUSG00000001313      Rnd2       11 101469662 101469662     1      *
## 5 ENSMUSG00000001630    Stk38l        6 146764296 146764296     1      *
## 6 ENSMUSG00000001768      Rin2        2 145691244 145691244     1      *
##   paramRangeID REF   ALT  QUAL FILTER
## 1         <NA>   N <DEL> 31.46      .
## 2         <NA>   N <DEL> 60.25      .
## 3         <NA>   N <DEL> 60.25      .
## 4         <NA>   N <DEL> 31.46      .
## 5         <NA>   N <DEL> 89.31      .
## 6         <NA>   N <DEL> 89.31      .

Circos plot

Subset from the gene variants objects to get the 3 types of variation to show on the circos plot: ie Deletions, Duplications and Break-ends.

library("RCircos")

Session Information

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] RCircos_1.2.1                         
##  [2] TxDb.Mmusculus.UCSC.mm10.ensGene_3.4.0
##  [3] GenomicFeatures_1.44.0                
##  [4] AnnotationDbi_1.54.1                  
##  [5] gplots_3.1.1                          
##  [6] VariantAnnotation_1.38.0              
##  [7] Rsamtools_2.8.0                       
##  [8] SummarizedExperiment_1.22.0           
##  [9] Biobase_2.52.0                        
## [10] MatrixGenerics_1.4.2                  
## [11] matrixStats_0.60.0                    
## [12] eulerr_6.1.0                          
## [13] Biostrings_2.60.2                     
## [14] XVector_0.32.0                        
## [15] R.utils_2.10.1                        
## [16] R.oo_1.24.0                           
## [17] R.methodsS3_1.8.1                     
## [18] GenomicRanges_1.44.0                  
## [19] GenomeInfoDb_1.28.1                   
## [20] IRanges_2.26.0                        
## [21] S4Vectors_0.30.0                      
## [22] BiocGenerics_0.38.0                   
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7             bit64_4.0.5              filelock_1.0.2          
##  [4] progress_1.2.2           httr_1.4.2               tools_4.1.0             
##  [7] bslib_0.2.5.1            utf8_1.2.2               R6_2.5.0                
## [10] KernSmooth_2.23-20       DBI_1.1.1                tidyselect_1.1.1        
## [13] prettyunits_1.1.1        bit_4.0.4                curl_4.3.2              
## [16] compiler_4.1.0           xml2_1.3.2               DelayedArray_0.18.0     
## [19] rtracklayer_1.52.0       sass_0.4.0               caTools_1.18.2          
## [22] rappdirs_0.3.3           stringr_1.4.0            digest_0.6.27           
## [25] rmarkdown_2.10           pkgconfig_2.0.3          htmltools_0.5.1.1       
## [28] dbplyr_2.1.1             fastmap_1.1.0            BSgenome_1.60.0         
## [31] highr_0.9                rlang_0.4.11             RSQLite_2.2.7           
## [34] jquerylib_0.1.4          BiocIO_1.2.0             generics_0.1.0          
## [37] jsonlite_1.7.2           BiocParallel_1.26.1      gtools_3.9.2            
## [40] dplyr_1.0.7              RCurl_1.98-1.3           magrittr_2.0.1          
## [43] GenomeInfoDbData_1.2.6   Matrix_1.3-4             Rcpp_1.0.7              
## [46] fansi_0.5.0              lifecycle_1.0.0          stringi_1.7.3           
## [49] yaml_2.2.1               zlibbioc_1.38.0          BiocFileCache_2.0.0     
## [52] grid_4.1.0               blob_1.2.2               crayon_1.4.1            
## [55] lattice_0.20-44          hms_1.1.0                KEGGREST_1.32.0         
## [58] polylabelr_0.2.0         knitr_1.33               pillar_1.6.2            
## [61] rjson_0.2.20             biomaRt_2.48.2           XML_3.99-0.6            
## [64] glue_1.4.2               evaluate_0.14            png_0.1-7               
## [67] vctrs_0.3.8              purrr_0.3.4              polyclip_1.10-0         
## [70] assertthat_0.2.1         cachem_1.0.5             xfun_0.25               
## [73] restfulr_0.0.13          tibble_3.1.3             GenomicAlignments_1.28.0
## [76] memoise_2.0.0            ellipsis_0.3.2