Source: https://github.com/markziemann/dftd

Introduction

Here I’m reading in some SNP files from the cancer cell article: https://www.cell.com/cancer-cell/fulltext/S1535-6108(18)30117-X to identify SNPs in exons that could be used to distinguish the lines.

The two files are SNPs which are present in the DFTD1 and DFTD2 cell types respectively. Although I’m not sure if these are mutually exclusive, the SNP sets may have an overlap, which I will test.

There are 8 sample which are of interest to us.

Sample type Sample label
DFT1 86T
DFT1 88T
DFT2 202T2
DFT2 203T3
Normal 91H
Normal 31H
Normal 202H1
Normal 203H
suppressPackageStartupMessages({
  library("eulerr")
  library("kableExtra")
  library("GenomicRanges")
  library("readxl")
})

Load data

Here I’m loading the data in and only selecting the DFTD lines.

d1 <- read.table("DFT1_SNVs.txt",header=TRUE)
head(d1)
##                     SCAFFOLD     POS REF ALT  X86T  X88T X202T2 X203T3 X202H1
## 1 Chr1_supercontig_000000000  442678   T   C  0/70 32/58   0/31   0/36   0/39
## 2 Chr1_supercontig_000000000  482371   G   A  0/82 48/82   0/26   0/53   0/55
## 3 Chr1_supercontig_000000000  699656   C   T 36/80  0/72   0/41   0/29   0/53
## 4 Chr1_supercontig_000000000  708655   G   T  0/65 29/61   0/30   0/33   0/42
## 5 Chr1_supercontig_000000000 1515865   C   T 33/77  0/65   0/37   0/44   0/56
## 6 Chr1_supercontig_000000000 1801031   C   T  1/75 25/55   0/30   0/36   0/36
##   X203H X91H X31H X110H X112H X115H1 X117H X119H X11H X122H1 X124H X133H X134H
## 1  0/44 0/68  0/9   0/2   0/1    0/0   0/2   0/2  0/1    0/2   0/1   0/1   0/3
## 2  0/71 0/70 0/15   0/3   0/1    0/1   0/1   0/3  0/0    0/3   0/0   0/3   0/1
## 3  0/39 1/73 0/21   0/2   0/1    0/1   0/3   0/0  0/0    0/0   0/0   0/5   0/1
## 4  0/68 0/55 0/21   0/2   0/1    0/1   0/0   0/1  0/1    0/2   0/1   0/1   0/1
## 5  0/41 0/64 0/23   0/2   0/0    0/1   0/2   0/2  0/1    0/0   0/3   0/0   0/5
## 6  0/37 0/51 0/24   0/1   0/0    0/1   0/1   0/3  0/0    0/1   0/0   0/0   0/3
##   X238H X244H X264H X265H X266H X267H X268H X269H X270H X271H X317H X347H X379H
## 1   0/6   0/0   0/2   0/0   0/0   0/2   0/1   0/1   0/0   0/1   0/0   0/0   0/1
## 2   0/2   0/3   0/1   0/3   0/0   0/0   0/2   0/3   0/2   0/3   0/3   0/0   0/1
## 3   0/2   0/1   0/1   0/2   0/2   0/1   0/3   0/2   0/2   0/0   0/5   0/1   0/2
## 4   0/0   0/2   0/0   0/0   0/1   0/2   0/0   0/1   0/0   0/3   0/6   0/0   0/1
## 5   0/2   0/2   0/1   0/2   0/3   0/2   0/1   0/3   0/2   0/3   0/3   0/0   0/2
## 6   0/0   0/2   0/0   0/0   0/2   0/2   0/1   0/0   0/1   0/2   0/3   0/0   0/1
##   X398H X420H X442H X443H X444H X63H1 X811H ERS1202857 ERS1202858 ERS1202859
## 1   0/1   0/2   0/1   0/1   0/1   0/0   0/1        0/6       0/15        0/8
## 2   0/4   0/1   0/0   0/2   0/0   0/3   0/2        0/7       0/14       0/14
## 3   0/0   0/0   0/2   0/3   0/1   0/2   0/2        0/3       0/12        0/7
## 4   0/1   0/3   0/1   0/6   0/0   0/2   0/1        0/7       0/13       0/14
## 5   0/0   0/4   0/1   0/0   0/0   0/3   0/1        0/2       0/11        0/9
## 6   0/3   0/2   0/1   0/1   0/0   0/0   0/1        0/4        0/6       0/10
##   ERS1202860 ERS1202861 ERS682204 ERS682205 ERS682206 ERS682207 ERS682208
## 1        0/9        0/5       0/6      0/11       0/7      0/14       0/8
## 2        0/6        0/8      0/16      0/14       0/6      0/19      0/23
## 3        0/4       0/10      0/16      0/18       0/5      0/19      0/14
## 4        0/5       0/10      0/15      0/14       0/7      0/18      0/17
## 5        0/8       0/10      0/22      0/15       0/9      0/13       0/9
## 6        0/8       0/12       0/7      0/11      0/11      0/12      0/16
##   ERS682209 ERS682210
## 1      0/13       0/5
## 2      0/10      0/19
## 3      0/14      0/13
## 4      0/16      0/11
## 5       0/8      0/21
## 6      0/12      0/12
str(d1)
## 'data.frame':    9608 obs. of  54 variables:
##  $ SCAFFOLD  : chr  "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" ...
##  $ POS       : int  442678 482371 699656 708655 1515865 1801031 1815761 1858650 2116270 2122511 ...
##  $ REF       : chr  "T" "G" "C" "G" ...
##  $ ALT       : chr  "C" "A" "T" "T" ...
##  $ X86T      : chr  "0/70" "0/82" "36/80" "0/65" ...
##  $ X88T      : chr  "32/58" "48/82" "0/72" "29/61" ...
##  $ X202T2    : chr  "0/31" "0/26" "0/41" "0/30" ...
##  $ X203T3    : chr  "0/36" "0/53" "0/29" "0/33" ...
##  $ X202H1    : chr  "0/39" "0/55" "0/53" "0/42" ...
##  $ X203H     : chr  "0/44" "0/71" "0/39" "0/68" ...
##  $ X91H      : chr  "0/68" "0/70" "1/73" "0/55" ...
##  $ X31H      : chr  "0/9" "0/15" "0/21" "0/21" ...
##  $ X110H     : chr  "0/2" "0/3" "0/2" "0/2" ...
##  $ X112H     : chr  "0/1" "0/1" "0/1" "0/1" ...
##  $ X115H1    : chr  "0/0" "0/1" "0/1" "0/1" ...
##  $ X117H     : chr  "0/2" "0/1" "0/3" "0/0" ...
##  $ X119H     : chr  "0/2" "0/3" "0/0" "0/1" ...
##  $ X11H      : chr  "0/1" "0/0" "0/0" "0/1" ...
##  $ X122H1    : chr  "0/2" "0/3" "0/0" "0/2" ...
##  $ X124H     : chr  "0/1" "0/0" "0/0" "0/1" ...
##  $ X133H     : chr  "0/1" "0/3" "0/5" "0/1" ...
##  $ X134H     : chr  "0/3" "0/1" "0/1" "0/1" ...
##  $ X238H     : chr  "0/6" "0/2" "0/2" "0/0" ...
##  $ X244H     : chr  "0/0" "0/3" "0/1" "0/2" ...
##  $ X264H     : chr  "0/2" "0/1" "0/1" "0/0" ...
##  $ X265H     : chr  "0/0" "0/3" "0/2" "0/0" ...
##  $ X266H     : chr  "0/0" "0/0" "0/2" "0/1" ...
##  $ X267H     : chr  "0/2" "0/0" "0/1" "0/2" ...
##  $ X268H     : chr  "0/1" "0/2" "0/3" "0/0" ...
##  $ X269H     : chr  "0/1" "0/3" "0/2" "0/1" ...
##  $ X270H     : chr  "0/0" "0/2" "0/2" "0/0" ...
##  $ X271H     : chr  "0/1" "0/3" "0/0" "0/3" ...
##  $ X317H     : chr  "0/0" "0/3" "0/5" "0/6" ...
##  $ X347H     : chr  "0/0" "0/0" "0/1" "0/0" ...
##  $ X379H     : chr  "0/1" "0/1" "0/2" "0/1" ...
##  $ X398H     : chr  "0/1" "0/4" "0/0" "0/1" ...
##  $ X420H     : chr  "0/2" "0/1" "0/0" "0/3" ...
##  $ X442H     : chr  "0/1" "0/0" "0/2" "0/1" ...
##  $ X443H     : chr  "0/1" "0/2" "0/3" "0/6" ...
##  $ X444H     : chr  "0/1" "0/0" "0/1" "0/0" ...
##  $ X63H1     : chr  "0/0" "0/3" "0/2" "0/2" ...
##  $ X811H     : chr  "0/1" "0/2" "0/2" "0/1" ...
##  $ ERS1202857: chr  "0/6" "0/7" "0/3" "0/7" ...
##  $ ERS1202858: chr  "0/15" "0/14" "0/12" "0/13" ...
##  $ ERS1202859: chr  "0/8" "0/14" "0/7" "0/14" ...
##  $ ERS1202860: chr  "0/9" "0/6" "0/4" "0/5" ...
##  $ ERS1202861: chr  "0/5" "0/8" "0/10" "0/10" ...
##  $ ERS682204 : chr  "0/6" "0/16" "0/16" "0/15" ...
##  $ ERS682205 : chr  "0/11" "0/14" "0/18" "0/14" ...
##  $ ERS682206 : chr  "0/7" "0/6" "0/5" "0/7" ...
##  $ ERS682207 : chr  "0/14" "0/19" "0/19" "0/18" ...
##  $ ERS682208 : chr  "0/8" "0/23" "0/14" "0/17" ...
##  $ ERS682209 : chr  "0/13" "0/10" "0/14" "0/16" ...
##  $ ERS682210 : chr  "0/5" "0/19" "0/13" "0/11" ...
# subset to obtain DFTD lines only
d1 <- d1[,1:8]
head(d1)
##                     SCAFFOLD     POS REF ALT  X86T  X88T X202T2 X203T3
## 1 Chr1_supercontig_000000000  442678   T   C  0/70 32/58   0/31   0/36
## 2 Chr1_supercontig_000000000  482371   G   A  0/82 48/82   0/26   0/53
## 3 Chr1_supercontig_000000000  699656   C   T 36/80  0/72   0/41   0/29
## 4 Chr1_supercontig_000000000  708655   G   T  0/65 29/61   0/30   0/33
## 5 Chr1_supercontig_000000000 1515865   C   T 33/77  0/65   0/37   0/44
## 6 Chr1_supercontig_000000000 1801031   C   T  1/75 25/55   0/30   0/36

Cleaning data

Now look at the SNPS which are common to the two DFTD1 lines 86 and 88

First need to parse out the data into numerical format.

g1 <- d1[,5:ncol(d1)]
x86 <- do.call(rbind,strsplit(g1$X86T,"/"))
x88 <- do.call(rbind,strsplit(g1$X88T,"/"))
x202 <- do.call(rbind,strsplit(g1$X202T2,"/"))
x203 <- do.call(rbind,strsplit(g1$X203T3,"/"))

g1 <- cbind(x86,x88,x202,x203)
g1 <- apply(g1,2,as.numeric)

colnames(g1) <- c("x86_alt","x86_total",
  "x88_alt","x88_total",
  "x202_alt","x202_total",
  "x203_alt","x203_total")
head(g1)
##      x86_alt x86_total x88_alt x88_total x202_alt x202_total x203_alt
## [1,]       0        70      32        58        0         31        0
## [2,]       0        82      48        82        0         26        0
## [3,]      36        80       0        72        0         41        0
## [4,]       0        65      29        61        0         30        0
## [5,]      33        77       0        65        0         37        0
## [6,]       1        75      25        55        0         30        0
##      x203_total
## [1,]         36
## [2,]         53
## [3,]         29
## [4,]         33
## [5,]         44
## [6,]         36
x1 <- cbind(d1,g1)
str(x1)
## 'data.frame':    9608 obs. of  16 variables:
##  $ SCAFFOLD  : chr  "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" ...
##  $ POS       : int  442678 482371 699656 708655 1515865 1801031 1815761 1858650 2116270 2122511 ...
##  $ REF       : chr  "T" "G" "C" "G" ...
##  $ ALT       : chr  "C" "A" "T" "T" ...
##  $ X86T      : chr  "0/70" "0/82" "36/80" "0/65" ...
##  $ X88T      : chr  "32/58" "48/82" "0/72" "29/61" ...
##  $ X202T2    : chr  "0/31" "0/26" "0/41" "0/30" ...
##  $ X203T3    : chr  "0/36" "0/53" "0/29" "0/33" ...
##  $ x86_alt   : num  0 0 36 0 33 1 0 0 0 0 ...
##  $ x86_total : num  70 82 80 65 77 75 80 94 98 101 ...
##  $ x88_alt   : num  32 48 0 29 0 25 31 33 35 33 ...
##  $ x88_total : num  58 82 72 61 65 55 61 64 81 81 ...
##  $ x202_alt  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ x202_total: num  31 26 41 30 37 30 32 30 37 40 ...
##  $ x203_alt  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ x203_total: num  36 53 29 33 44 36 43 37 35 43 ...

Filtering

Now we can filter further.

Use a 25% allele frequency filter to identify the SNPs for each DFTD line.

x86 <- x1[(which(x1$x86_alt / x1$x86_total > 0.25)),]
x88 <- x1[(which(x1$x88_alt / x1$x88_total > 0.25)),]
x202 <- x1[(which(x1$x202_alt / x1$x202_total > 0.25)),]
x203 <- x1[(which(x1$x203_alt / x1$x203_total > 0.25)),]
str(x86)
## 'data.frame':    5678 obs. of  16 variables:
##  $ SCAFFOLD  : chr  "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" "Chr1_supercontig_000000002" "Chr1_supercontig_000000002" ...
##  $ POS       : int  699656 1515865 323555 357826 703114 188150 732837 972351 1112036 1377188 ...
##  $ REF       : chr  "C" "C" "T" "G" ...
##  $ ALT       : chr  "T" "T" "G" "A" ...
##  $ X86T      : chr  "36/80" "33/77" "39/86" "30/66" ...
##  $ X88T      : chr  "0/72" "0/65" "0/54" "20/41" ...
##  $ X202T2    : chr  "0/41" "0/37" "0/33" "0/28" ...
##  $ X203T3    : chr  "0/29" "0/44" "0/45" "0/33" ...
##  $ x86_alt   : num  36 33 39 30 38 42 30 34 34 29 ...
##  $ x86_total : num  80 77 86 66 71 68 57 76 73 54 ...
##  $ x88_alt   : num  0 0 0 20 0 0 20 27 0 25 ...
##  $ x88_total : num  72 65 54 41 66 55 42 52 77 50 ...
##  $ x202_alt  : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ x202_total: num  41 37 33 28 31 25 23 19 62 24 ...
##  $ x203_alt  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ x203_total: num  29 44 45 33 41 33 26 31 78 24 ...
str(x88)
## 'data.frame':    6231 obs. of  16 variables:
##  $ SCAFFOLD  : chr  "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" "Chr1_supercontig_000000000" ...
##  $ POS       : int  442678 482371 708655 1801031 1815761 1858650 2116270 2122511 332336 357826 ...
##  $ REF       : chr  "T" "G" "G" "C" ...
##  $ ALT       : chr  "C" "A" "T" "T" ...
##  $ X86T      : chr  "0/70" "0/82" "0/65" "1/75" ...
##  $ X88T      : chr  "32/58" "48/82" "29/61" "25/55" ...
##  $ X202T2    : chr  "0/31" "0/26" "0/30" "0/30" ...
##  $ X203T3    : chr  "0/36" "0/53" "0/33" "0/36" ...
##  $ x86_alt   : num  0 0 0 1 0 0 0 0 0 30 ...
##  $ x86_total : num  70 82 65 75 80 94 98 101 79 66 ...
##  $ x88_alt   : num  32 48 29 25 31 33 35 33 32 20 ...
##  $ x88_total : num  58 82 61 55 61 64 81 81 62 41 ...
##  $ x202_alt  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ x202_total: num  31 26 30 30 32 30 37 40 25 28 ...
##  $ x203_alt  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ x203_total: num  36 53 33 36 43 37 35 43 33 33 ...
str(x202)
## 'data.frame':    6 obs. of  16 variables:
##  $ SCAFFOLD  : chr  "Chr3_supercontig_000000088" "Chr3_supercontig_000001581" "Chr3_supercontig_000001581" "Chr4_supercontig_000000337" ...
##  $ POS       : int  3752 18502 18503 62371 146017 42492
##  $ REF       : chr  "T" "C" "A" "C" ...
##  $ ALT       : chr  "C" "G" "C" "T" ...
##  $ X86T      : chr  "6/6" "6/12" "6/12" "9/23" ...
##  $ X88T      : chr  "3/3" "0/7" "0/7" "5/15" ...
##  $ X202T2    : chr  "4/4" "3/8" "4/8" "4/11" ...
##  $ X203T3    : chr  "4/7" "2/6" "2/6" "4/16" ...
##  $ x86_alt   : num  6 6 6 9 7 7
##  $ x86_total : num  6 12 12 23 7 47
##  $ x88_alt   : num  3 0 0 5 0 1
##  $ x88_total : num  3 7 7 15 0 26
##  $ x202_alt  : num  4 3 4 4 4 3
##  $ x202_total: num  4 8 8 11 4 10
##  $ x203_alt  : num  4 2 2 4 0 0
##  $ x203_total: num  7 6 6 16 0 12
str(x203)
## 'data.frame':    7 obs. of  16 variables:
##  $ SCAFFOLD  : chr  "Chr2_supercontig_000000157" "Chr2_supercontig_000000457" "Chr3_supercontig_000000088" "Chr3_supercontig_000001581" ...
##  $ POS       : int  1561315 2526161 3752 18502 18503 719185 40142
##  $ REF       : chr  "T" "A" "T" "C" ...
##  $ ALT       : chr  "G" "T" "C" "G" ...
##  $ X86T      : chr  "5/23" "5/5" "6/6" "6/12" ...
##  $ X88T      : chr  "3/11" "2/2" "3/3" "0/7" ...
##  $ X202T2    : chr  "1/11" "0/1" "4/4" "3/8" ...
##  $ X203T3    : chr  "3/9" "4/7" "4/7" "2/6" ...
##  $ x86_alt   : num  5 5 6 6 6 8 5
##  $ x86_total : num  23 5 6 12 12 50 8
##  $ x88_alt   : num  3 2 3 0 0 3 2
##  $ x88_total : num  11 2 3 7 7 53 19
##  $ x202_alt  : num  1 0 4 3 4 2 0
##  $ x202_total: num  11 1 4 8 8 19 7
##  $ x203_alt  : num  3 4 4 2 2 4 1
##  $ x203_total: num  9 7 7 6 6 13 3

Importantly, there were approx 6000 SNPs that could identify DFTD1 lines but only 7 which could identify DFTD2 lines.

Let’s see whether these are specific to one cell line.

x86_2 <- x1[which(x1$x86_alt / x1$x86_total > 0.25 & x1$x202_alt / x1$x202_total < 0.25 &  x1$x203_alt / x1$x203_total < 0.25 ),]
dim(x86)
## [1] 5678   16
dim(x86_2)
## [1] 5667   16
x88_2 <- x1[which(x1$x88_alt / x1$x88_total > 0.25 & x1$x202_alt / x1$x202_total < 0.25 &  x1$x203_alt / x1$x203_total < 0.25 ),]
dim(x88)
## [1] 6231   16
dim(x88_2)
## [1] 6222   16

This shows that of the 5678 SNPs specific to x86, 5667 were specific to this line and not present in x202 or s203.

Of the 6231 SNPs specific to x88, 6222 were specific to this line and not present in x202 or s203.

This means that 5000-6000 SNPs can be used to distinguish genomes.

d1_snps <- x1[which(x1$x86_alt / x1$x86_total > 0.25 & x1$x88_alt / x1$x88_total > 0.25),]
str(d1_snps)
## 'data.frame':    2745 obs. of  16 variables:
##  $ SCAFFOLD  : chr  "Chr1_supercontig_000000002" "Chr1_supercontig_000000004" "Chr1_supercontig_000000005" "Chr1_supercontig_000000005" ...
##  $ POS       : int  357826 732837 972351 1377188 3665329 291757 293015 866865 855884 1593561 ...
##  $ REF       : chr  "G" "G" "A" "G" ...
##  $ ALT       : chr  "A" "A" "C" "A" ...
##  $ X86T      : chr  "30/66" "30/57" "34/76" "29/54" ...
##  $ X88T      : chr  "20/41" "20/42" "27/52" "25/50" ...
##  $ X202T2    : chr  "0/28" "0/23" "0/19" "1/24" ...
##  $ X203T3    : chr  "0/33" "0/26" "0/31" "0/24" ...
##  $ x86_alt   : num  30 30 34 29 41 26 45 40 39 26 ...
##  $ x86_total : num  66 57 76 54 75 57 79 79 81 55 ...
##  $ x88_alt   : num  20 20 27 25 35 23 33 28 33 22 ...
##  $ x88_total : num  41 42 52 50 55 50 65 55 59 44 ...
##  $ x202_alt  : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ x202_total: num  28 23 19 24 29 26 37 38 61 53 ...
##  $ x203_alt  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ x203_total: num  33 26 31 24 34 24 36 33 70 57 ...
d2_snps <- x1[which(x1$x202_alt / x1$x202_total > 0.25 & x1$x203_alt / x1$x203_total > 0.25),]
str(d2_snps)
## 'data.frame':    3 obs. of  16 variables:
##  $ SCAFFOLD  : chr  "Chr3_supercontig_000000088" "Chr3_supercontig_000001581" "Chr3_supercontig_000001581"
##  $ POS       : int  3752 18502 18503
##  $ REF       : chr  "T" "C" "A"
##  $ ALT       : chr  "C" "G" "C"
##  $ X86T      : chr  "6/6" "6/12" "6/12"
##  $ X88T      : chr  "3/3" "0/7" "0/7"
##  $ X202T2    : chr  "4/4" "3/8" "4/8"
##  $ X203T3    : chr  "4/7" "2/6" "2/6"
##  $ x86_alt   : num  6 6 6
##  $ x86_total : num  6 12 12
##  $ x88_alt   : num  3 0 0
##  $ x88_total : num  3 7 7
##  $ x202_alt  : num  4 3 4
##  $ x202_total: num  4 8 8
##  $ x203_alt  : num  4 2 2
##  $ x203_total: num  7 6 6

This indicates 2745 SNPs which are common to the DFTD1 cell lines, while only 3 SNPs were common to DFTD2 cell lines.

Given that there are are only 2745 which are common to DFTD1 lines, we better focus on the 6222 which can distinguish x88.

Exonic SNPs

Next, lets look at the 6222 SNPs that are present in x88 and not in the DFTD2 lines and see how many are located in exons.

The exon bedfile was generated with the gff2bed.sh script. That script extracts CDS coordinates the GTF file. We use the GTF as input because it has the gene ID number which can be matched to the RNA-seq.

cds <- read.table("../gene_annotation/Sarcophilus_harrisii.DEVIL7.0.90.gtf.cds.bed")
colnames(cds) <- c("seqname","start","end","gene_name")

# GRanges()
cdsr <- GRanges(seqnames=cds$seqname,ranges=IRanges(start=cds$start, end=cds$end) )
cdsr$gene_name <- cds$gene_name
head(cdsr)
## GRanges object with 6 ranges and 1 metadata column:
##         seqnames        ranges strand |          gene_name
##            <Rle>     <IRanges>  <Rle> |        <character>
##   [1] GL834412.1 183603-183680      * | ENSSHAG00000005817
##   [2] GL834412.1 184756-184957      * | ENSSHAG00000005817
##   [3] GL834412.1 189295-189467      * | ENSSHAG00000005817
##   [4] GL834412.1 189884-189945      * | ENSSHAG00000005817
##   [5] GL834412.1 191912-191969      * | ENSSHAG00000005817
##   [6] GL834412.1 193138-193190      * | ENSSHAG00000005817
##   -------
##   seqinfo: 3620 sequences from an unspecified genome; no seqlengths

SNP Exon Overlap

The problem is that exons have this sort of seq name (GL834412.1) while the variants have this type (Chr1_supercontig_000000000)

The correspondence map of these naming conventions is given here: https://plos.figshare.com/articles/dataset/Genome_coordinates_/13283417/1

head(x88_2)
##                     SCAFFOLD     POS REF ALT X86T  X88T X202T2 X203T3 x86_alt
## 1 Chr1_supercontig_000000000  442678   T   C 0/70 32/58   0/31   0/36       0
## 2 Chr1_supercontig_000000000  482371   G   A 0/82 48/82   0/26   0/53       0
## 4 Chr1_supercontig_000000000  708655   G   T 0/65 29/61   0/30   0/33       0
## 6 Chr1_supercontig_000000000 1801031   C   T 1/75 25/55   0/30   0/36       1
## 7 Chr1_supercontig_000000000 1815761   T   A 0/80 31/61   0/32   0/43       0
## 8 Chr1_supercontig_000000000 1858650   T   G 0/94 33/64   0/30   0/37       0
##   x86_total x88_alt x88_total x202_alt x202_total x203_alt x203_total
## 1        70      32        58        0         31        0         36
## 2        82      48        82        0         26        0         53
## 4        65      29        61        0         30        0         33
## 6        75      25        55        0         30        0         36
## 7        80      31        61        0         32        0         43
## 8        94      33        64        0         30        0         37
map_tbl <- read.table("../gene_annotation/supercontig2gl.tsv",header=TRUE)
head(map_tbl)
##   ENSEMBL_ID                     TCG_ID
## 1 GL834412.1 Chr1_supercontig_000000000
## 2 GL834413.1 Chr1_supercontig_000000001
## 3 GL834414.1 Chr1_supercontig_000000002
## 4 GL834415.1 Chr1_supercontig_000000003
## 5 GL834416.1 Chr1_supercontig_000000004
## 6 GL834417.1 Chr1_supercontig_000000005
x88_2$SCAFFOLD <- map_tbl[match(x88_2$SCAFFOLD,map_tbl$TCG_ID),"ENSEMBL_ID"]
head(x88_2)
##     SCAFFOLD     POS REF ALT X86T  X88T X202T2 X203T3 x86_alt x86_total x88_alt
## 1 GL834412.1  442678   T   C 0/70 32/58   0/31   0/36       0        70      32
## 2 GL834412.1  482371   G   A 0/82 48/82   0/26   0/53       0        82      48
## 4 GL834412.1  708655   G   T 0/65 29/61   0/30   0/33       0        65      29
## 6 GL834412.1 1801031   C   T 1/75 25/55   0/30   0/36       1        75      25
## 7 GL834412.1 1815761   T   A 0/80 31/61   0/32   0/43       0        80      31
## 8 GL834412.1 1858650   T   G 0/94 33/64   0/30   0/37       0        94      33
##   x88_total x202_alt x202_total x203_alt x203_total
## 1        58        0         31        0         36
## 2        82        0         26        0         53
## 4        61        0         30        0         33
## 6        55        0         30        0         36
## 7        61        0         32        0         43
## 8        64        0         30        0         37

Now we can intersect

x88_r <- GRanges(seqnames=x88_2$SCAFFOLD, ranges=IRanges(start=x88_2$POS, end=x88_2$POS+1) )

x88_r
## GRanges object with 6222 ranges and 0 metadata columns:
##            seqnames          ranges strand
##               <Rle>       <IRanges>  <Rle>
##      [1] GL834412.1   442678-442679      *
##      [2] GL834412.1   482371-482372      *
##      [3] GL834412.1   708655-708656      *
##      [4] GL834412.1 1801031-1801032      *
##      [5] GL834412.1 1815761-1815762      *
##      ...        ...             ...    ...
##   [6218] GL868375.1       6686-6687      *
##   [6219] GL868600.1       1630-1631      *
##   [6220] GL868618.1       2752-2753      *
##   [6221] GL868784.1       1362-1363      *
##   [6222] GL869864.1       3928-3929      *
##   -------
##   seqinfo: 2255 sequences from an unspecified genome; no seqlengths
ol <- findOverlaps(x88_r,cdsr)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': GL834431.1, GL834434.1, GL834449.1, GL834478.1, GL834499.1, GL834515.1, GL834518.1, GL834519.1, GL834529.1, GL834530.1, GL834532.1, GL834534.1, GL834536.1, GL834559.1, GL834599.1, GL834601.1, GL834616.1, GL834634.1, GL834663.1, GL834664.1, GL834675.1, GL834681.1, GL834700.1, GL834737.1, GL834738.1, GL834754.1, GL834755.1, GL834756.1, GL834758.1, GL834786.1, GL834791.1, GL834796.1, GL834827.1, GL834828.1, GL834832.1, GL834837.1, GL834846.1, GL834847.1, GL834851.1, GL834855.1, GL834866.1, GL834880.1, GL834908.1, GL834910.1, GL834911.1, GL834918.1, GL834924.1, GL834925.1, GL834930.1, GL834938.1, GL834941.1, GL834947.1, GL834972.1, GL834977.1, GL834994.1, GL834998.1, GL835005.1, GL835008.1, GL835032.1, GL835035.1, GL835036.1, GL835041.1, GL835042.1, GL835071.1, GL835073.1, GL835077.1, GL835081.1, GL835085.1, GL835086.1, GL835095.1, GL835106.1, GL835119.1, GL835126.1, GL835136.1, GL835137.1, GL835170.1, GL835177.1, GL835179.1, GL835201.1, GL835203.1, GL835208.1, GL835211.1, GL835216.1, GL835231.1, GL835242.1, GL835259.1, GL835262.1, GL835263.1, GL835271.1, GL835272.1, GL835279.1, GL835282.1, GL835283.1, GL835287.1, GL835289.1, GL835305.1, GL835306.1, GL835308.1, GL835344.1, GL835374.1, GL835388.1, GL835401.1, GL835403.1, GL835409.1, GL835419.1, GL835428.1, GL835435.1, GL835466.1, GL835476.1, GL835487.1, GL835495.1, GL835517.1, GL835524.1, GL835537.1, GL835544.1, GL835567.1, GL835578.1, GL835586.1, GL835603.1, GL835621.1, GL835630.1, GL835647.1, GL835669.1, GL835684.1, GL835706.1, GL835710.1, GL835718.1, GL835730.1, GL835752.1, GL835767.1, GL835783.1, GL835787.1, GL835808.1, GL835878.1, GL835913.1, GL835915.1, GL835961.1, GL836333.1, GL836800.1, GL836875.1, GL837065.1, GL837081.1, GL837084.1, GL837343.1, GL837493.1, GL838201.1, GL838268.1, GL839052.1, GL839418.1, GL839478.1, GL839531.1, GL839782.1, GL840005.1, GL840161.1, GL840390.1, GL840392.1, GL841144.1, GL841149.1, GL841152.1, GL841154.1, GL841168.1, GL841203.1, GL841215.1, GL841216.1, GL841217.1, GL841219.1, GL841224.1, GL841225.1, GL841226.1, GL841228.1, GL841229.1, GL841230.1, GL841233.1, GL841236.1, GL841238.1, GL841270.1, GL841276.1, GL841280.1, GL841281.1, GL841282.1, GL841292.1, GL841319.1, GL841320.1, GL841321.1, GL841322.1, GL841328.1, GL841335.1, GL841361.1, GL841383.1, GL841464.1, GL841477.1, GL841479.1, GL841508.1, GL841509.1, GL841531.1, GL841534.1, GL841542.1, GL841567.1, GL841574.1, GL841576.1, GL841604.1, GL841641.1, GL841652.1, GL841655.1, GL841666.1, GL841671.1, GL841674.1, GL841677.1, GL841678.1, GL841679.1, GL841688.1, GL841693.1, GL841695.1, GL841697.1, GL841700.1, GL841707.1, GL841711.1, GL841714.1, GL841715.1, GL841720.1, GL841721.1, GL841722.1, GL841728.1, GL841743.1, GL841745.1, GL841753.1, GL841757.1, GL841761.1, GL841763.1, GL841767.1, GL841773.1, GL841777.1, GL841778.1, GL841786.1, GL841789.1, GL841791.1, GL841800.1, GL841801.1, GL841813.1, GL841838.1, GL841842.1, GL841844.1, GL841858.1, GL841868.1, GL841869.1, GL841870.1, GL841872.1, GL841893.1, GL841899.1, GL841900.1, GL841901.1, GL841902.1, GL841903.1, GL841905.1, GL841913.1, GL841919.1, GL841937.1, GL841941.1, GL841948.1, GL841950.1, GL841955.1, GL841970.1, GL841973.1, GL841975.1, GL841977.1, GL841986.1, GL841990.1, GL841991.1, GL842001.1, GL842003.1, GL842010.1, GL842013.1, GL842024.1, GL842027.1, GL842029.1, GL842039.1, GL842051.1, GL842057.1, GL842069.1, GL842074.1, GL842076.1, GL842091.1, GL842096.1, GL842100.1, GL842104.1, GL842105.1, GL842107.1, GL842108.1, GL842112.1, GL842115.1, GL842118.1, GL842119.1, GL842123.1, GL842131.1, GL842132.1, GL842152.1, GL842157.1, GL842160.1, GL842178.1, GL842198.1, GL842212.1, GL842213.1, GL842216.1, GL842227.1, GL842232.1, GL842243.1, GL842271.1, GL842279.1, GL842305.1, GL842311.1, GL842315.1, GL842335.1, GL842344.1, GL842350.1, GL842351.1, GL842352.1, GL842364.1, GL842377.1, GL842390.1, GL842395.1, GL842404.1, GL842413.1, GL842419.1, GL842468.1, GL842482.1, GL842489.1, GL842501.1, GL842503.1, GL842507.1, GL842539.1, GL842549.1, GL842560.1, GL842584.1, GL842596.1, GL842597.1, GL842603.1, GL842604.1, GL842607.1, GL842608.1, GL842648.1, GL842660.1, GL842664.1, GL842678.1, GL842686.1, GL842692.1, GL842702.1, GL842730.1, GL842734.1, GL842736.1, GL842749.1, GL842775.1, GL842776.1, GL842779.1, GL842808.1, GL842818.1, GL842834.1, GL842846.1, GL842853.1, GL842858.1, GL842885.1, GL842889.1, GL842903.1, GL842930.1, GL842939.1, GL842944.1, GL843001.1, GL843004.1, GL843030.1, GL843090.1, GL843110.1, GL843111.1, GL843130.1, GL843209.1, GL843237.1, GL843441.1, GL844235.1, GL844391.1, GL844497.1, GL844930.1, GL845152.1, GL845848.1, GL846800.1, GL847029.1, GL847103.1, GL847114.1, GL847156.1, GL847467.1, GL847594.1, GL848031.1, GL848944.1, GL849027.1, GL849144.1, GL849542.1, GL849551.1, GL849552.1, GL849553.1, GL849554.1, GL849561.1, GL849576.1, GL849595.1, GL849599.1, GL849605.1, GL849613.1, GL849615.1, GL849617.1, GL849618.1, GL849637.1, GL849642.1, GL849643.1, GL849646.1, GL849649.1, GL849651.1, GL849669.1, GL849670.1, GL849671.1, GL849672.1, GL849674.1, GL849675.1, GL849682.1, GL849689.1, GL849700.1, GL849718.1, GL849745.1, GL849796.1, GL849800.1, GL849802.1, GL849859.1, GL849862.1, GL849866.1, GL849874.1, GL849875.1, GL849877.1, GL849880.1, GL849882.1, GL849883.1, GL849884.1, GL849886.1, GL849908.1, GL849938.1, GL849942.1, GL849952.1, GL849959.1, GL849969.1, GL849997.1, GL850008.1, GL850011.1, GL850038.1, GL850045.1, GL850054.1, GL850099.1, GL850104.1, GL850107.1, GL850111.1, GL850114.1, GL850115.1, GL850121.1, GL850126.1, GL850134.1, GL850140.1, GL850144.1, GL850147.1, GL850177.1, GL850184.1, GL850193.1, GL850203.1, GL850207.1, GL850216.1, GL850226.1, GL850227.1, GL850230.1, GL850236.1, GL850240.1, GL850242.1, GL850274.1, GL850276.1, GL850279.1, GL850292.1, GL850304.1, GL850314.1, GL850320.1, GL850330.1, GL850340.1, GL850344.1, GL850350.1, GL850360.1, GL850362.1, GL850380.1, GL850393.1, GL850394.1, GL850398.1, GL850399.1, GL850435.1, GL850446.1, GL850452.1, GL850477.1, GL850479.1, GL850485.1, GL850488.1, GL850495.1, GL850496.1, GL850524.1, GL850530.1, GL850555.1, GL850572.1, GL850606.1, GL850618.1, GL850619.1, GL850626.1, GL850633.1, GL850641.1, GL850664.1, GL850680.1, GL850698.1, GL850700.1, GL850715.1, GL850720.1, GL850722.1, GL850726.1, GL850734.1, GL850755.1, GL850758.1, GL850770.1, GL850774.1, GL850807.1, GL850811.1, GL850819.1, GL850830.1, GL850851.1, GL850862.1, GL850871.1, GL850877.1, GL850889.1, GL850893.1, GL850925.1, GL850955.1, GL850960.1, GL850967.1, GL850979.1, GL851012.1, GL851032.1, GL851057.1, GL851162.1, GL851178.1, GL851189.1, GL851199.1, GL851230.1, GL851244.1, GL851245.1, GL851251.1, GL851264.1, GL851315.1, GL851692.1, GL852120.1, GL852631.1, GL852749.1, GL852794.1, GL852826.1, GL853644.1, GL853781.1, GL854030.1, GL854076.1, GL854353.1, GL854697.1, GL855472.1, GL855609.1, GL855816.1, GL855896.1, GL856222.1, GL856346.1, GL856356.1, GL856759.1, GL856760.1, GL856762.1, GL856765.1, GL856766.1, GL856768.1, GL856769.1, GL856834.1, GL856835.1, GL856838.1, GL856891.1, GL856903.1, GL856904.1, GL856927.1, GL856930.1, GL856931.1, GL856943.1, GL856957.1, GL856983.1, GL856985.1, GL856986.1, GL856987.1, GL857026.1, GL857036.1, GL857040.1, GL857051.1, GL857052.1, GL857073.1, GL857086.1, GL857103.1, GL857112.1, GL857114.1, GL857122.1, GL857123.1, GL857124.1, GL857130.1, GL857140.1, GL857160.1, GL857166.1, GL857171.1, GL857175.1, GL857195.1, GL857209.1, GL857216.1, GL857231.1, GL857233.1, GL857235.1, GL857248.1, GL857269.1, GL857280.1, GL857288.1, GL857290.1, GL857295.1, GL857318.1, GL857322.1, GL857345.1, GL857357.1, GL857363.1, GL857364.1, GL857377.1, GL857402.1, GL857403.1, GL857414.1, GL857419.1, GL857421.1, GL857440.1, GL857448.1, GL857454.1, GL857467.1, GL857470.1, GL857481.1, GL857487.1, GL857497.1, GL857515.1, GL857517.1, GL857523.1, GL857524.1, GL857537.1, GL857580.1, GL857592.1, GL857599.1, GL857602.1, GL857615.1, GL857623.1, GL857680.1, GL857694.1, GL857696.1, GL857704.1, GL857728.1, GL857735.1, GL857744.1, GL857756.1, GL857775.1, GL857800.1, GL857805.1, GL857828.1, GL857953.1, GL858154.1, GL858667.1, GL859091.1, GL859142.1, GL859307.1, GL859566.1, GL860155.1, GL860925.1, GL861043.
x88_r[queryHits(ol)]
## GRanges object with 50 ranges and 0 metadata columns:
##          seqnames          ranges strand
##             <Rle>       <IRanges>  <Rle>
##    [1] GL834436.1 3231122-3231123      *
##    [2] GL834447.1   146798-146799      *
##    [3] GL834592.1   128581-128582      *
##    [4] GL834651.1   321559-321560      *
##    [5] GL834658.1   599931-599932      *
##    ...        ...             ...    ...
##   [46] GL865099.1   155927-155928      *
##   [47] GL867574.1   180868-180869      *
##   [48] GL867603.1   899665-899666      *
##   [49] GL867628.1   176037-176038      *
##   [50] GL867787.1       1746-1747      *
##   -------
##   seqinfo: 2255 sequences from an unspecified genome; no seqlengths
cds_vars <- as.data.frame(cdsr[subjectHits(ol)])

cds_vars %>% 
  kbl(caption="x88 vars in protein coding genes") %>% 
  kable_paper("hover", full_width = F)
x88 vars in protein coding genes
seqnames start end width strand gene_name
GL834436.1 3230924 3231135 212
ENSSHAG00000017948
GL834447.1 146291 147111 821
ENSSHAG00000005062
GL834592.1 128564 128651 88
ENSSHAG00000003861
GL834651.1 321536 321682 147
ENSSHAG00000007420
GL834658.1 599327 600064 738
ENSSHAG00000009840
GL834710.1 2881110 2881308 199
ENSSHAG00000017639
GL835252.1 102723 103018 296
ENSSHAG00000004398
GL841244.1 176971 177163 193
ENSSHAG00000005246
GL841260.1 1800897 1802400 1504
ENSSHAG00000013921
GL841325.1 205627 205880 254
ENSSHAG00000005425
GL841340.1 89790 89861 72
ENSSHAG00000003834
GL841442.1 623897 624021 125
ENSSHAG00000009381
GL841548.1 191897 192798 902
ENSSHAG00000005938
GL841556.1 374273 374576 304
ENSSHAG00000005525
GL841565.1 688585 688800 216
ENSSHAG00000010397
GL849532.1 855008 855991 984
ENSSHAG00000011474
GL849632.1 3110392 3110584 193
ENSSHAG00000017921
GL849666.1 81513 81787 275
ENSSHAG00000003707
GL849739.1 1251257 1252816 1560
ENSSHAG00000013516
GL849739.1 1622711 1623676 966
ENSSHAG00000014901
GL849767.1 110423 110706 284
ENSSHAG00000003933
GL849767.1 157218 158168 951
ENSSHAG00000005419
GL849767.1 241259 242188 930
ENSSHAG00000006573
GL849812.1 2017203 2017295 93
ENSSHAG00000016061
GL849867.1 283244 284017 774
ENSSHAG00000007080
GL849904.1 2927757 2927868 112
ENSSHAG00000017604
GL850418.1 116798 117655 858
ENSSHAG00000004695
GL850503.1 40321 40836 516
ENSSHAG00000002737
GL856719.1 1404119 1404242 124
ENSSHAG00000014128
GL856802.1 488812 489133 322
ENSSHAG00000008973
GL856821.1 1533257 1533424 168
ENSSHAG00000014561
GL856823.1 1120979 1121125 147
ENSSHAG00000012836
GL856827.1 1771717 1771938 222
ENSSHAG00000015345
GL856948.1 3601270 3601373 104
ENSSHAG00000018440
GL856961.1 231866 232046 181
ENSSHAG00000002179
GL856995.1 3322665 3323738 1074
ENSSHAG00000018182
GL857007.1 350522 351686 1165
ENSSHAG00000007648
GL857012.1 1388544 1388720 177
ENSSHAG00000013882
GL857020.1 899522 900780 1259
ENSSHAG00000011731
GL857297.1 136533 136734 202
ENSSHAG00000005049
GL861544.1 312210 312292 83
ENSSHAG00000007070
GL861691.1 824962 825450 489
ENSSHAG00000011266
GL861723.1 796121 796171 51
ENSSHAG00000011079
GL862090.1 81064 82227 1164
ENSSHAG00000003936
GL864774.1 1676513 1677382 870
ENSSHAG00000015070
GL865099.1 155640 156072 433
ENSSHAG00000005327
GL867574.1 180857 180968 112
ENSSHAG00000005778
GL867603.1 899431 900660 1230
ENSSHAG00000011738
GL867628.1 176033 176076 44
ENSSHAG00000005392
GL867787.1 657 2000 1344
ENSSHAG00000000367
length(unique(cds_vars$gene_name))
## [1] 50

This result shows that from the 6222 variants that can distinguish x88, only 50 are in protein-coding genes (0.08%).

Each of these genes contained only 1 variant, so there are only 50 genes which can be distinguished.

This is consistent with only 1.1% of the genome being in expressed exons. And that exons are anticipated to accumulate variants at a rate slower than non-exonic regions due to their essential function.

Which genes are expressed

From these 50 genes, not all of them will be expressed in the cells. We can look at the list of genes which are expressed in RNA-seq to give a realistic propostion of how many can be distinguished by RNA-seq.

rnaseq <- readxl::read_xlsx("../oncotarget-09-15895-s002.xlsx",col_names=TRUE,skip=1)
rnaseq <- as.data.frame(rnaseq)
rnaseq_expressed <- rnaseq[which(rowMeans(rnaseq[,grep("counts",colnames(rnaseq))])>10),]
dim(rnaseq_expressed)
## [1] 12842    12
head(rnaseq_expressed$gene_id )
## [1] "ENSSHAG00000019817" "ENSSHAG00000019651" "ENSSHAG00000020414"
## [4] "ENSSHAG00000020043" "ENSSHAG00000020375" "ENSSHAG00000019115"
cds_vars$gene_name %in% rnaseq_expressed$gene_id
##  [1]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
## [13] FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE
## [25] FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
## [37]  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE
## [49]  TRUE FALSE
cds_vars$gene_name[which(cds_vars$gene_name %in% rnaseq_expressed$gene_id)]
##  [1] "ENSSHAG00000017948" "ENSSHAG00000005062" "ENSSHAG00000003861"
##  [4] "ENSSHAG00000009840" "ENSSHAG00000017639" "ENSSHAG00000013921"
##  [7] "ENSSHAG00000005425" "ENSSHAG00000003834" "ENSSHAG00000009381"
## [10] "ENSSHAG00000005525" "ENSSHAG00000010397" "ENSSHAG00000017921"
## [13] "ENSSHAG00000003707" "ENSSHAG00000016061" "ENSSHAG00000002737"
## [16] "ENSSHAG00000014128" "ENSSHAG00000014561" "ENSSHAG00000012836"
## [19] "ENSSHAG00000015345" "ENSSHAG00000018440" "ENSSHAG00000002179"
## [22] "ENSSHAG00000007648" "ENSSHAG00000013882" "ENSSHAG00000011731"
## [25] "ENSSHAG00000007070" "ENSSHAG00000011079" "ENSSHAG00000003936"
## [28] "ENSSHAG00000005778" "ENSSHAG00000005392"
length(cds_vars$gene_name[which(cds_vars$gene_name %in% rnaseq_expressed$gene_id)])
## [1] 29

In conclusion, there are only 29 genes where we will be able to use SNP variants to distinguish cells in co-culture.

This work supports the idea that cells need to be sorted prior to transcriptome analysis.

Session information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
## [1] readxl_1.3.1         GenomicRanges_1.44.0 GenomeInfoDb_1.28.4 
## [4] IRanges_2.26.0       S4Vectors_0.30.2     BiocGenerics_0.38.0 
## [7] kableExtra_1.3.4     eulerr_6.1.1        
## 
## loaded via a namespace (and not attached):
##  [1] xfun_0.29              bslib_0.3.1            colorspace_2.0-2      
##  [4] vctrs_0.3.8            htmltools_0.5.2        viridisLite_0.4.0     
##  [7] yaml_2.2.1             utf8_1.2.2             rlang_0.4.12          
## [10] jquerylib_0.1.4        pillar_1.6.4           glue_1.6.0            
## [13] GenomeInfoDbData_1.2.6 lifecycle_1.0.1        stringr_1.4.0         
## [16] zlibbioc_1.38.0        munsell_0.5.0          cellranger_1.1.0      
## [19] rvest_1.0.2            evaluate_0.14          knitr_1.37            
## [22] fastmap_1.1.0          fansi_1.0.0            highr_0.9             
## [25] Rcpp_1.0.7             scales_1.1.1           webshot_0.5.2         
## [28] jsonlite_1.7.2         XVector_0.32.0         systemfonts_1.0.3     
## [31] digest_0.6.29          stringi_1.7.6          grid_4.1.2            
## [34] cli_3.1.0              tools_4.1.2            bitops_1.0-7          
## [37] magrittr_2.0.1         sass_0.4.0             RCurl_1.98-1.5        
## [40] tibble_3.1.6           crayon_1.4.2           pkgconfig_2.0.3       
## [43] ellipsis_0.3.2         xml2_1.3.3             rmarkdown_2.11        
## [46] svglite_2.0.0          httr_1.4.2             rstudioapi_0.13       
## [49] R6_2.5.1               compiler_4.1.2