Source: TBC

Introduction

Here we are looking at a new batch of 10X scRNA-seq data and seeing if any HIV reads can be mapped because it looks like cell ranger didn’t pick up any.

library("reshape2")
library("kableExtra")

Import read counts

Importing RNA-seq data

tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)

colSums(x)
## 1-17032023-GEX 2-09032023-GEX 3-13032023-GEX 
##      394709619      481880950      522625586
x[which(rownames(x)=="NC_001802.1"),]
##             1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## NC_001802.1              4              0              0
x[which(rownames(x)=="NC_001802.1"),] / colSums(x) * 1e6
##             1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## NC_001802.1     0.01013403              0              0
# gene table (for aggregation)
gt <- read.table("ref/Homo_sapiens.GRCh38.cdna+ncrna.genetable.tsv",sep="\t")

m <- merge(gt,x,by.x="V1",by.y=0)
m$V1=NULL
xx <- aggregate(. ~ V2,m,sum)
rownames(xx) <- xx$V2
xx$V2=NULL

dim(xx)
## [1] 69573     3
dim(m)
## [1] 275921      4
cor(xx)
##                1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## 1-17032023-GEX      1.0000000      0.9774365      0.9666938
## 2-09032023-GEX      0.9774365      1.0000000      0.9911065
## 3-13032023-GEX      0.9666938      0.9911065      1.0000000
rpm <- apply(xx,2,function(x) {x / sum(x) * 1e6} )
head(rpm[order(-rowSums(rpm)),])
##                           1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## ENSG00000251562.11 MALAT1      40282.173      35745.784      36994.144
## ENSG00000210082.2 MT-RNR2      17396.612      26559.465      27372.942
## ENSG00000198899.2 MT-ATP6       9378.566      10271.998       8209.969
## ENSG00000273686.4 B2M           9311.148       8289.703       7794.812
## ENSG00000167996.16 FTH1         7657.547       7263.132       9866.081
## ENSG00000198804.2 MT-CO1        9237.449       7925.858       7540.255