Source: TBC
Here we are looking at a new batch of 10X scRNA-seq data and seeing if any HIV reads can be mapped because it looks like cell ranger didn’t pick up any.
library("reshape2")
library("kableExtra")
Importing RNA-seq data
tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
colSums(x)
## 1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## 394709619 481880950 522625586
x[which(rownames(x)=="NC_001802.1"),]
## 1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## NC_001802.1 4 0 0
x[which(rownames(x)=="NC_001802.1"),] / colSums(x) * 1e6
## 1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## NC_001802.1 0.01013403 0 0
# gene table (for aggregation)
gt <- read.table("ref/Homo_sapiens.GRCh38.cdna+ncrna.genetable.tsv",sep="\t")
m <- merge(gt,x,by.x="V1",by.y=0)
m$V1=NULL
xx <- aggregate(. ~ V2,m,sum)
rownames(xx) <- xx$V2
xx$V2=NULL
dim(xx)
## [1] 69573 3
dim(m)
## [1] 275921 4
cor(xx)
## 1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## 1-17032023-GEX 1.0000000 0.9774365 0.9666938
## 2-09032023-GEX 0.9774365 1.0000000 0.9911065
## 3-13032023-GEX 0.9666938 0.9911065 1.0000000
rpm <- apply(xx,2,function(x) {x / sum(x) * 1e6} )
head(rpm[order(-rowSums(rpm)),])
## 1-17032023-GEX 2-09032023-GEX 3-13032023-GEX
## ENSG00000251562.11 MALAT1 40282.173 35745.784 36994.144
## ENSG00000210082.2 MT-RNR2 17396.612 26559.465 27372.942
## ENSG00000198899.2 MT-ATP6 9378.566 10271.998 8209.969
## ENSG00000273686.4 B2M 9311.148 8289.703 7794.812
## ENSG00000167996.16 FTH1 7657.547 7263.132 9866.081
## ENSG00000198804.2 MT-CO1 9237.449 7925.858 7540.255