Description of the samples TODO.
suppressPackageStartupMessages({
library("plyr")
library("Seurat")
library("hdf5r")
library("SingleCellExperiment")
library("parallel")
library("stringi")
library("beeswarm")
library("muscat")
library("DESeq2")
library("mitch")
library("vioplot")
library("beeswarm")
})
Load the h5 matrices.
CSP1_counts ./1-CSP/outs/filtered_feature_bc_matrix.h5
GEX1_counts ./1-GEX/outs/filtered_feature_bc_matrix.h5
CSP2_counts ./2-CSP/outs/filtered_feature_bc_matrix.h5
GEX2_counts ./2-GEX/outs/filtered_feature_bc_matrix.h5
M239_N239_Alv_counts ./M239-N239-Alv/outs/filtered_feature_bc_matrix.h5
N239Alv_react ./N239Alv-react/outs/filtered_feature_bc_matrix.h5
P239_Alv_and_MDM ./P239-Alv-and-MDM/outs/filtered_feature_bc_matrix.h5
CSP1_counts <- Read10X_h5("./1-CSP/outs/filtered_feature_bc_matrix.h5")
colnames(CSP1_counts) <- gsub("-1","",colnames(CSP1_counts))
dim(CSP1_counts)
## [1] 36603 1887
ncol(CSP1_counts)
## [1] 1887
summary(colSums(CSP1_counts))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 390 1021 1530 1624 2118 5422
CSP1_counts[1:10,1:30]
## 10 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCCAAGTGCAACG', 'AAACCCACAATGACCT', 'AAACCCAGTTTCCCAC' ... ]]
##
## gene-HIV1-1 . . . . . . . . . . 1 2 . . . . . . . . . . . . . . . . . .
## gene-HIV1-2 . . . 9 2 . . . 12 . 6 12 . 6 . . . 7 . . . 8 . . . 2 1 . . .
## MIR1302-2HG . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## FAM138A . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## OR4F5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.3 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.2 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
GEX1_counts <- Read10X_h5("./1-GEX/outs/filtered_feature_bc_matrix.h5")
colnames(GEX1_counts) <- gsub("-1","",colnames(GEX1_counts))
ncol(GEX1_counts)
## [1] 11943
summary(colSums(GEX1_counts))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500 5514 29095 29074 46098 166793
GEX1_counts[1:10,1:30]
## 10 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCCAAGAGGGCGA', 'AAACCCAAGGACAACC', 'AAACCCAAGGTAGTCG' ... ]]
##
## gene-HIV1-1 . . . 1 . . . . 22 . . 1 . . . . . . . . . . 1 . . 4 . . . .
## gene-HIV1-2 . . . 8 . . 3 1 145 1 2 2 . . 1 6 42 1 3 12 . 5 11 1 7 26 . . 1 2
## MIR1302-2HG . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## FAM138A . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## OR4F5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.3 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.2 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
CSP2_counts <- Read10X_h5("./2-CSP/outs/filtered_feature_bc_matrix.h5")
colnames(CSP2_counts) <- gsub("-1","",colnames(CSP2_counts))
ncol(CSP2_counts)
## [1] 6266
summary(colSums(CSP2_counts))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.00 26.00 33.00 54.44 60.00 372.00
CSP2_counts[1:10,1:30]
## 10 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCCAAGAGGGCGA', 'AAACCCAAGTGCAACG', 'AAACCCACAATGACCT' ... ]]
##
## gene-HIV1-1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## gene-HIV1-2 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## MIR1302-2HG . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## FAM138A . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## OR4F5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.3 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.2 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
GEX2_counts <- Read10X_h5("./2-GEX/outs/filtered_feature_bc_matrix.h5")
colnames(GEX2_counts) <- gsub("-1","",colnames(GEX2_counts))
ncol(GEX2_counts)
## [1] 2715
summary(colSums(GEX2_counts))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 711 12815 52245 55394 87348 259921
GEX2_counts[1:10,1:30]
## 10 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCCAAGTGCAACG', 'AAACCCACAATGACCT', 'AAACCCAGTGACATCT' ... ]]
##
## gene-HIV1-1 . . . . . . 10 . . . . . . 3 . 2 . 12 14 . 12 . . . . . 2
## gene-HIV1-2 . . . 2 4 . 220 98 2 . . . . 205 . 37 . 215 255 . 152 . . . . . 17
## MIR1302-2HG . . . . . . . . . . . . . . . . . . . . . . . . . . .
## FAM138A . . . . . . . . . . . . . . . . . . . . . . . . . . .
## OR4F5 . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.1 . . . . . . . . . . . . . . . . . . . . . 2 . . . . .
## AL627309.3 . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.2 . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.5 . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.4 . . . . . . . . . . . . . . . . . . . . . . . . . . .
##
## gene-HIV1-1 . . .
## gene-HIV1-2 1 164 21
## MIR1302-2HG . . .
## FAM138A . . .
## OR4F5 . . .
## AL627309.1 . . .
## AL627309.3 . . .
## AL627309.2 . . .
## AL627309.5 . . .
## AL627309.4 . . .
M239_N239_Alv_counts <- Read10X_h5("./M239-N239-Alv/outs/filtered_feature_bc_matrix.h5")
colnames(M239_N239_Alv_counts) <- gsub("-1","",colnames(M239_N239_Alv_counts))
ncol(M239_N239_Alv_counts)
## [1] 10096
summary(colSums(M239_N239_Alv_counts))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500 17449 29574 29758 40389 134943
M239_N239_Alv_counts[1:10,1:30]
## 10 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCCACACTGAATC', 'AAACCCACAGTCAGAG', 'AAACCCAGTACCCAGC' ... ]]
##
## gene-HIV1-1 14 . . . 87 . . . . . . 158 1 . . . . . . . . . . . . . . . . .
## gene-HIV1-2 135 . 1 . 215 1 1 1 . . 2 1619 3 1 . . 1 . . . 1 1 . 4 4 1 2 . 5 .
## MIR1302-2HG . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## FAM138A . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## OR4F5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.3 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.2 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.5 . . . 1 . . . . . . . . . 1 . . . . . . . . . . . . . . . .
## AL627309.4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
N239Alv_react_counts <- Read10X_h5("./N239Alv-react/outs/filtered_feature_bc_matrix.h5")
colnames(N239Alv_react_counts) <- gsub("-1","",colnames(N239Alv_react_counts))
ncol(N239Alv_react_counts)
## [1] 3098
summary(colSums(N239Alv_react_counts))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 542 25090 33558 32742 41771 93107
N239Alv_react_counts[1:10,1:30]
## 10 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCCATCTCGCTCA', 'AAACGAACAGTGAGCA', 'AAACGAATCACTGGTA' ... ]]
##
## gene-HIV1-1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## gene-HIV1-2 . . . . 1 . . . . . . 2 . . . . . . . . 1 . . . . . . . . .
## MIR1302-2HG . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## FAM138A . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## OR4F5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.3 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.2 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
P239_Alv_MDM_counts <- Read10X_h5("./P239-Alv-and-MDM/outs/filtered_feature_bc_matrix.h5")
colnames(P239_Alv_MDM_counts) <- gsub("-1","",colnames(P239_Alv_MDM_counts))
ncol(P239_Alv_MDM_counts)
## [1] 9004
summary(colSums(P239_Alv_MDM_counts))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 506 15076 22383 23960 31715 120277
P239_Alv_MDM_counts[1:10,1:30]
## 10 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCCACACAATGAA', 'AAACCCACACGCCACA', 'AAACCCACACGCGTGT' ... ]]
##
## gene-HIV1-1 . . . . . . . . . 20 . . 1 8 . 36 . . . 2 1 . . . 46 . . . 1
## gene-HIV1-2 . . . 3 1 . 1 . 4 233 . 66 2 11 8 145 1 4 . 63 16 . . 1 92 4 . 2 3
## MIR1302-2HG . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## FAM138A . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## OR4F5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.3 . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.2 . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## AL627309.4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
##
## gene-HIV1-1 .
## gene-HIV1-2 .
## MIR1302-2HG .
## FAM138A .
## OR4F5 .
## AL627309.1 .
## AL627309.3 .
## AL627309.2 .
## AL627309.5 .
## AL627309.4 .
CSP1_counts ./1-CSP/outs/filtered_feature_bc_matrix.h5
GEX1_counts ./1-GEX/outs/filtered_feature_bc_matrix.h5
CSP2_counts ./2-CSP/outs/filtered_feature_bc_matrix.h5
GEX2_counts ./2-GEX/outs/filtered_feature_bc_matrix.h5
M239_N239_Alv_counts ./M239-N239-Alv/outs/filtered_feature_bc_matrix.h5
N239Alv_react_counts ./N239Alv-react/outs/filtered_feature_bc_matrix.h5
P239_Alv_MDM_counts ./P239-Alv-and-MDM/outs/filtered_feature_bc_matrix.h5
if (! file.exists("CSP1_cell_barcodes.txt") ) {
CSP1_bc <- colnames(CSP1_counts)
writeLines(CSP1_bc,con="CSP1_cell_barcodes.txt")
}
if (! file.exists("GEX1_cell_barcodes.txt") ) {
GEX1_bc <- colnames(GEX1_counts)
writeLines(GEX1_bc,con="GEX1_cell_barcodes.txt")
}
if (! file.exists("CSP2_cell_barcodes.txt") ) {
CSP2_bc <- colnames(CSP2_counts)
writeLines(CSP2_bc,con="CSP2_cell_barcodes.txt")
}
if (! file.exists("GEX2_cell_barcodes.txt") ) {
GEX2_bc <- colnames(GEX2_counts)
writeLines(GEX2_bc,con="GEX2_cell_barcodes.txt")
}
if (! file.exists("M239_N239_Alv_cell_barcodes.txt") ) {
M239_N239_Alv_bc <- colnames(M239_N239_Alv_counts)
writeLines(M239_N239_Alv_bc,con="M239_N239_Alv_cell_barcodes.txt")
}
if (! file.exists("N239Alv_react_cell_barcodes.txt") ) {
N239Alv_react_bc <- colnames(N239Alv_react_counts)
writeLines(N239Alv_react_bc,con="N239Alv_react_cell_barcodes.txt")
}
if (! file.exists("P239_Alv_and_MDM_cell_barcodes.txt") ) {
P239_Alv_and_MDM_bc <- colnames(P239_Alv_MDM_counts)
writeLines(P239_Alv_and_MDM_bc,con="P239_Alv_and_MDM_cell_barcodes.txt")
}
Now read HTO data.
gex1_hto <- Read10X("1-CSP_hto/read_count/", gene.column=1)
#gex1_hto_trash <- Read10X("trash/1-CSP_hto/read_count/", gene.column=1)
summary(colSums(gex1_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16 3580 7222 9093 12782 51772
dim(gex1_hto)
## [1] 7 11266
gex2_hto <- Read10X("2-CSP_hto/read_count/", gene.column=1)
summary(colSums(gex2_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27 4110 13737 15216 24504 64861
dim(gex2_hto)
## [1] 7 1689
M239_N239_Alv_hto <- Read10X("M239-N239-Alv_hto/read_count/", gene.column=1)
summary(colSums(M239_N239_Alv_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 1420 2897 4551 6469 40064
dim(M239_N239_Alv_hto)
## [1] 7 10083
P239_Alv_MDM_hto <- Read10X("P239-Alv-and-MDM_hto/read_count/", gene.column=1)
summary(colSums(P239_Alv_MDM_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 2072 5122 6850 10136 53637
dim(P239_Alv_MDM_hto)
## [1] 7 8872
str(which(colnames(GEX1_counts) %in% colnames(gex1_hto)))
## int [1:11266] 1 2 3 4 5 6 7 8 9 10 ...
str(which(colnames(GEX2_counts) %in% colnames(gex2_hto)))
## int [1:1689] 3 4 5 6 7 8 9 10 11 12 ...
str(which(colnames(M239_N239_Alv_counts) %in% colnames(M239_N239_Alv_hto)))
## int [1:10083] 1 2 3 4 5 6 7 8 9 10 ...
str(which(colnames(P239_Alv_MDM_counts) %in% colnames(P239_Alv_MDM_hto)))
## int [1:8872] 1 2 3 5 6 7 8 9 10 11 ...
summary(colSums(gex1_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16 3580 7222 9093 12782 51772
table(colSums(gex1_hto)>=100)
##
## FALSE TRUE
## 213 11053
gex1_hto <- gex1_hto[,which(colSums(gex1_hto)>=100)]
summary(colSums(gex1_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 101 3764 7356 9267 12923 51772
summary(colSums(gex2_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27 4110 13737 15216 24504 64861
table(colSums(gex2_hto)>=100)
##
## FALSE TRUE
## 1 1688
gex2_hto <- gex2_hto[,which(colSums(gex2_hto)>=100)]
summary(colSums(gex2_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 145 4110 13746 15225 24512 64861
summary(colSums(M239_N239_Alv_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 1420 2897 4551 6469 40064
table(colSums(M239_N239_Alv_hto)>=100)
##
## FALSE TRUE
## 308 9775
M239_N239_Alv_hto <- M239_N239_Alv_hto[,which(colSums(M239_N239_Alv_hto)>=100)]
summary(colSums(M239_N239_Alv_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 104 1530 3039 4694 6620 40064
summary(colSums(P239_Alv_MDM_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 2072 5122 6850 10136 53637
table(colSums(P239_Alv_MDM_hto)>=100)
##
## FALSE TRUE
## 242 8630
P239_Alv_MDM_hto <- P239_Alv_MDM_hto[,which(colSums(P239_Alv_MDM_hto)>=100)]
summary(colSums(P239_Alv_MDM_hto))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 105 2253 5345 7042 10302 53637
summary(apply(gex1_hto,2,function(x) {x[6]/sum(x) } ) )
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 0.000e+00 0.000e+00 8.014e-05 9.345e-05 7.634e-03
summary(apply(gex2_hto,2,function(x) {x[6]/sum(x) } ) )
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 0.000e+00 0.000e+00 8.596e-05 7.020e-05 1.285e-02
summary(apply(M239_N239_Alv_hto,2,function(x) {x[6]/sum(x) } ) )
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001816 0.031254 0.085117 0.304038 0.656792 0.976466
summary(apply(P239_Alv_MDM_hto,2,function(x) {x[6]/sum(x) } ) )
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0009302 0.0228943 0.0751835 0.3774595 0.9449641 0.9855072
For each cell barcode, calculate the ratio of top BC to 2nd BC.
getratio <- function(mx){
res <- lapply(1:ncol(mx), function(i) {
cnt <- mx[,i]
top1 <- cnt[order(-cnt)][1]+1
top2 <- cnt[order(-cnt)][2]+1
top1/top2
})
return(unlist(res))
}
gex1_hto_ratio <- getratio(gex1_hto)
summary(unlist(gex1_hto_ratio))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.569 13.970 18.889 30.167 287.357
table(gex1_hto_ratio>3)
##
## FALSE TRUE
## 1527 9526
gex1_hto <- gex1_hto[,which(gex1_hto_ratio>3)]
gex2_hto_ratio <- getratio(gex2_hto)
summary(unlist(gex2_hto_ratio))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 6.643 23.788 22.126 35.101 60.985
table(gex2_hto_ratio>3)
##
## FALSE TRUE
## 201 1487
gex2_hto <- gex2_hto[,which(gex2_hto_ratio>3)]
M239_N239_Alv_hto_ratio <- getratio(M239_N239_Alv_hto)
summary(unlist(M239_N239_Alv_hto_ratio))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 5.849 18.830 19.447 31.135 81.322
table(M239_N239_Alv_hto_ratio>3)
##
## FALSE TRUE
## 1492 8283
M239_N239_Alv_hto <- M239_N239_Alv_hto[,which(M239_N239_Alv_hto_ratio>3)]
P239_Alv_MDM_hto_ratio <- getratio(P239_Alv_MDM_hto)
summary(unlist(P239_Alv_MDM_hto_ratio))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 14.70 29.37 25.73 33.78 95.67
table(P239_Alv_MDM_hto_ratio>3)
##
## FALSE TRUE
## 591 8039
P239_Alv_MDM_hto <- P239_Alv_MDM_hto[,which(P239_Alv_MDM_hto_ratio>3)]
gex1_itx <- intersect(colnames(GEX1_counts),colnames(gex1_hto))
dim(GEX1_counts)
## [1] 36603 11943
GEX1_counts <- GEX1_counts[,gex1_itx]
dim(GEX1_counts)
## [1] 36603 9526
gex1_hto <- gex1_hto[,gex1_itx]
dim(gex1_hto)
## [1] 7 9526
gex2_itx <- intersect(colnames(GEX2_counts),colnames(gex2_hto))
dim(GEX2_counts)
## [1] 36603 2715
GEX2_counts <- GEX2_counts[,gex2_itx]
dim(GEX2_counts)
## [1] 36603 1487
gex2_hto <- gex2_hto[,gex2_itx]
dim(gex2_hto)
## [1] 7 1487
M239_N239_Alv_itx <- intersect(colnames(M239_N239_Alv_counts),colnames(M239_N239_Alv_hto))
dim(M239_N239_Alv_counts)
## [1] 36603 10096
M239_N239_Alv_counts <- M239_N239_Alv_counts[,M239_N239_Alv_itx]
dim(M239_N239_Alv_counts)
## [1] 36603 8283
M239_N239_Alv_hto <- M239_N239_Alv_hto[,M239_N239_Alv_itx]
dim(M239_N239_Alv_hto)
## [1] 7 8283
P239_Alv_MDM_itx <- intersect(colnames(P239_Alv_MDM_counts),colnames(P239_Alv_MDM_hto))
dim(P239_Alv_MDM_counts)
## [1] 36603 9004
P239_Alv_MDM_counts <- P239_Alv_MDM_counts[,P239_Alv_MDM_itx]
dim(P239_Alv_MDM_counts)
## [1] 36603 8039
P239_Alv_MDM_hto <- P239_Alv_MDM_hto[,P239_Alv_MDM_itx]
dim(P239_Alv_MDM_hto)
## [1] 7 8039
Hash tagging sheet.
Library | HTO | Sample |
---|---|---|
gex1 | 1 | mock (donor P and V) |
gex1 | 2 | gfp_neg (donor P and V) |
gex1 | 4 | gfp_pos (donor P and V) |
gex2 | 1 | mock donor O |
gex2 | 2 | gfp_neg donor O |
gex2 | 4 | gfp_pos donor O |
M239_N239_Alv | 1 | AlvM239_mock |
M239_N239_Alv | 2 | AlvM239_gfp_neg |
M239_N239_Alv | 3 | AlvM239_gfp_pos |
M239_N239_Alv | 4 | AlvN239_mock |
M239_N239_Alv | 5 | AlvN239_gfp_neg |
M239_N239_Alv | 6 | AlvN239_gfp_pos |
P239_Alv_MDM_hto | 1 | MDM_mock |
P239_Alv_MDM_hto | 2 | MDM_gfp_neg |
P239_Alv_MDM_hto | 3 | MDM_gfp_pos |
P239_Alv_MDM_hto | 4 | Alv_mock |
P239_Alv_MDM_hto | 5 | Alv_gfp_neg |
P239_Alv_MDM_hto | 6 | Alv_gfp_pos |
Input counts:
GEX1_counts gex1_hto
GEX2_counts gex2_hto
M239_N239_Alv M239_N239_Alv_hto
P239_Alv_MDM P239_Alv_MDM_hto
table(apply(gex1_hto,2,function(x) { order(-x) } )[1,] )
##
## 1 2 4 7
## 1532 1448 6030 516
gex1_idx <- apply(gex1_hto,2,function(x) { order(-x) } )[1,]
gex1_h1 <- GEX1_counts[,which(gex1_idx==1)] # mock (donor P and V)
gex1_h2 <- GEX1_counts[,which(gex1_idx==2)] # gfp_neg (donor P and V)
gex1_h4 <- GEX1_counts[,which(gex1_idx==4)] # gfp_pos (donor P and V)
table(apply(gex2_hto,2,function(x) { order(-x) } )[1,] )
##
## 1 2 4 7
## 177 421 888 1
gex2_idx <- apply(gex2_hto,2,function(x) { order(-x) } )[1,]
gex2_h1 <- GEX2_counts[,which(gex2_idx==1)] # mock donor O
gex2_h2 <- GEX2_counts[,which(gex2_idx==2)] # gfp_neg donor O
gex2_h4 <- GEX2_counts[,which(gex2_idx==4)] # gfp_pos donor O
table(apply(M239_N239_Alv_hto,2,function(x) { order(-x) } )[1,] )
##
## 1 2 3 4 5 6
## 804 621 2335 1354 713 2456
M239_N239_Alv_idx <- apply(M239_N239_Alv_hto,2,function(x) { order(-x) } )[1,]
M239_N239_Alv_h1 <- M239_N239_Alv_counts[,which(M239_N239_Alv_idx==1)] # AlvM239_mock
M239_N239_Alv_h2 <- M239_N239_Alv_counts[,which(M239_N239_Alv_idx==2)] # AlvM239_gfp_neg
M239_N239_Alv_h3 <- M239_N239_Alv_counts[,which(M239_N239_Alv_idx==3)] # AlvM239_gfp_pos
M239_N239_Alv_h4 <- M239_N239_Alv_counts[,which(M239_N239_Alv_idx==4)] # AlvN239_mock
M239_N239_Alv_h5 <- M239_N239_Alv_counts[,which(M239_N239_Alv_idx==5)] # AlvN239_gfp_neg
M239_N239_Alv_h6 <- M239_N239_Alv_counts[,which(M239_N239_Alv_idx==6)] # AlvN239_gfp_pos
table(apply(P239_Alv_MDM_hto,2,function(x) { order(-x) } )[1,] )
##
## 1 2 3 4 5 6
## 815 436 1811 974 945 3058
P239_Alv_MDM_idx <- apply(P239_Alv_MDM_hto,2,function(x) { order(-x) } )[1,]
P239_Alv_MDM_h1 <- P239_Alv_MDM_counts[,which(P239_Alv_MDM_idx==1)] # MDM_mock
P239_Alv_MDM_h2 <- P239_Alv_MDM_counts[,which(P239_Alv_MDM_idx==2)] # MDM_gfp_neg
P239_Alv_MDM_h3 <- P239_Alv_MDM_counts[,which(P239_Alv_MDM_idx==3)] # MDM_gfp_pos
P239_Alv_MDM_h4 <- P239_Alv_MDM_counts[,which(P239_Alv_MDM_idx==4)] # Alv_mock
P239_Alv_MDM_h5 <- P239_Alv_MDM_counts[,which(P239_Alv_MDM_idx==5)] # Alv_gfp_neg
P239_Alv_MDM_h6 <- P239_Alv_MDM_counts[,which(P239_Alv_MDM_idx==6)] # Alv_gfp_pos
Data was previously genotyped and called using cellSNP-lite and vireo.
snp <- read.table("1-GEX_vireo/donor_ids.tsv",header=TRUE)
donor0 <- subset(snp,donor_id=="donor0")$cell
donor0 <- gsub("-1","",donor0)
donor1 <- subset(snp,donor_id=="donor1")$cell
donor1 <- gsub("-1","",donor1)
gex1_h1_d0 <- gex1_h1[,colnames(gex1_h1) %in% donor0]
dim(gex1_h1_d0)
## [1] 36603 796
gex1_h1_d1 <- gex1_h1[,colnames(gex1_h1) %in% donor1]
dim(gex1_h1_d1)
## [1] 36603 659
gex1_h2_d0 <- gex1_h2[,colnames(gex1_h2) %in% donor0]
dim(gex1_h2_d0)
## [1] 36603 880
gex1_h2_d1 <- gex1_h2[,colnames(gex1_h2) %in% donor1]
dim(gex1_h2_d1)
## [1] 36603 479
gex1_h4_d0 <- gex1_h4[,colnames(gex1_h4) %in% donor0]
dim(gex1_h4_d0)
## [1] 36603 2666
gex1_h4_d1 <- gex1_h4[,colnames(gex1_h4) %in% donor1]
dim(gex1_h4_d1)
## [1] 36603 2585
Let’s look at the expression of genes on sex cheomosomes.
BTW here’s the bash script to get the chr2gene tsv file:
zcat genes.gtf.gz \
| grep ^chr \
| cut -f1,9 \
| sed 's/; /\n/g' \
| egrep '(chr|gene_name)' \
| sed 's/gene_name "//' \
| tr -d '"' \
| cut -f1 \
| paste - - \
| uniq \
| sort -u > gene_chr.tsv
gene_chr <- read.table("ref_combined1/hum_hiv/genes/gene_chr.tsv")
head(gene_chr)
## V1 V2
## 1 chr10 A1CF
## 2 chr10 ABCC2
## 3 chr10 ABI1
## 4 chr10 ABLIM1
## 5 chr10 ABRAXAS2
## 6 chr10 AC005383.1
chrx <- subset(gene_chr,V1=="chrX")$V2
chry <- subset(gene_chr,V1=="chrY")$V2
chrm <- subset(gene_chr,V1=="chrM")$V2
chra <- gene_chr$V2[!gene_chr$V2 %in% c(chrx,chry,chrm)]
str(gene_chr)
## 'data.frame': 36562 obs. of 2 variables:
## $ V1: chr "chr10" "chr10" "chr10" "chr10" ...
## $ V2: chr "A1CF" "ABCC2" "ABI1" "ABLIM1" ...
str(chrx)
## chr [1:1146] "ABCB7" "ABCD1" "AC000113.1" "AC002072.1" "AC002368.1" ...
str(chry)
## chr [1:111] "AC006040.1" "AC006157.1" "AC007244.1" "AC007359.1" ...
str(chrm)
## chr [1:13] "MT-ATP6" "MT-ATP8" "MT-CO1" "MT-CO2" "MT-CO3" "MT-CYB" ...
d0y <- median(colSums(gex1_h4_d0[which(rownames(gex1_h4_d0) %in% chry),]))
d0x <- median(colSums(gex1_h4_d0[which(rownames(gex1_h4_d0) %in% chrx),]))
d0m <- median(colSums(gex1_h4_d0[which(rownames(gex1_h4_d0) %in% chrm),]))
d0a <- median(colSums(gex1_h4_d0[which(rownames(gex1_h4_d0) %in% chra),]))
d1y <- median(colSums(gex1_h4_d1[which(rownames(gex1_h4_d1) %in% chry),]))
d1x <- median(colSums(gex1_h4_d1[which(rownames(gex1_h4_d1) %in% chrx),]))
d1m <- median(colSums(gex1_h4_d1[which(rownames(gex1_h4_d1) %in% chrm),]))
d1a <- median(colSums(gex1_h4_d1[which(rownames(gex1_h4_d1) %in% chra),]))
message(paste("Donor 0 chrY:",signif(d0y/d0a * 100,3),"%"))
## Donor 0 chrY: 0 %
message(paste("Donor 0 chrX:",signif(d0x/d0a * 100,3),"%"))
## Donor 0 chrX: 3.33 %
message(paste("Donor 0 chrM:",signif(d0m/d0a * 100,3),"%"))
## Donor 0 chrM: 5.7 %
message(paste("Donor 1 chrY:",signif(d1y/d1a * 100,3),"%"))
## Donor 1 chrY: 0.068 %
message(paste("Donor 1 chrX:",signif(d1x/d1a * 100,3),"%"))
## Donor 1 chrX: 3.52 %
message(paste("Donor 1 chrM:",signif(d1m/d1a * 100,3),"%"))
## Donor 1 chrM: 11.2 %
Therefore donor 0 is female and 1 is male.
hiv <- mean(colSums(gex1_h1_d0[1:2,])) / mean(colSums(gex1_h1_d0[3:nrow(gex1_h1_d0),]))
message(paste("Mock Donor 0 HIV:",signif(hiv,3),"%"))
## Mock Donor 0 HIV: 1.99e-05 %
hiv <- mean(colSums(gex1_h1_d1[1:2,])) / mean(colSums(gex1_h1_d1[3:nrow(gex1_h1_d1),]))
message(paste("Mock Donor 1 HIV:",signif(hiv,3),"%"))
## Mock Donor 1 HIV: 2.69e-05 %
hiv <- mean(colSums(gex1_h2_d0[1:2,])) / mean(colSums(gex1_h2_d0[3:nrow(gex1_h2_d0),]))
message(paste("GFP- Donor 0 HIV:",signif(hiv,3),"%"))
## GFP- Donor 0 HIV: 0.00249 %
hiv <- mean(colSums(gex1_h2_d1[1:2,])) / mean(colSums(gex1_h2_d1[3:nrow(gex1_h2_d1),]))
message(paste("GFP- Donor 1 HIV:",signif(hiv,3),"%"))
## GFP- Donor 1 HIV: 0.00219 %
hiv <- mean(colSums(gex1_h4_d0[1:2,])) / mean(colSums(gex1_h4_d0[3:nrow(gex1_h4_d0),]))
message(paste("GFP+ Donor 0 HIV:",signif(hiv,3),"%"))
## GFP+ Donor 0 HIV: 9.52e-05 %
hiv <- mean(colSums(gex1_h4_d1[1:2,])) / mean(colSums(gex1_h4_d1[3:nrow(gex1_h4_d1),]))
message(paste("GFP+ Donor 1 HIV:",signif(hiv,3),"%"))
## GFP+ Donor 1 HIV: 0.000104 %
hiv <- mean(colSums(gex2_h1[1:2,])) / mean(colSums(gex2_h1[3:nrow(gex2_h1),]))
message(paste("Mock Donor 2 HIV:",signif(hiv,3),"%"))
## Mock Donor 2 HIV: 1.36e-05 %
hiv <- mean(colSums(gex2_h2[1:2,])) / mean(colSums(gex2_h2[3:nrow(gex2_h2),]))
message(paste("GFP- Donor 2 HIV:",signif(hiv,3),"%"))
## GFP- Donor 2 HIV: 0.00297 %
hiv <- mean(colSums(gex2_h4[1:2,])) / mean(colSums(gex2_h4[3:nrow(gex2_h4),]))
message(paste("GFP+ Donor 2 HIV:",signif(hiv,3),"%"))
## GFP+ Donor 2 HIV: 0.000111 %
hiv <- mean(colSums(P239_Alv_MDM_h1[1:2,])) / mean(colSums(P239_Alv_MDM_h1[3:nrow(P239_Alv_MDM_h1),]))
message(paste("Mock Donor 3 HIV:",signif(hiv,3),"%"))
## Mock Donor 3 HIV: 5.87e-05 %
hiv <- mean(colSums(P239_Alv_MDM_h2[1:2,])) / mean(colSums(P239_Alv_MDM_h2[3:nrow(P239_Alv_MDM_h2),]))
message(paste("GFP+ Donor 3 HIV:",signif(hiv,3),"%"))
## GFP+ Donor 3 HIV: 0.00338 %
hiv <- mean(colSums(P239_Alv_MDM_h3[1:2,])) / mean(colSums(P239_Alv_MDM_h3[3:nrow(P239_Alv_MDM_h3),]))
message(paste("GPF- Donor 3 HIV:",signif(hiv,3),"%"))
## GPF- Donor 3 HIV: 6.72e-05 %
hiv <- mean(colSums(P239_Alv_MDM_h4[1:2,])) / mean(colSums(P239_Alv_MDM_h4[3:nrow(P239_Alv_MDM_h4),]))
message(paste("Mock Donor 4 HIV:",signif(hiv,3),"%"))
## Mock Donor 4 HIV: 3.99e-05 %
hiv <- mean(colSums(P239_Alv_MDM_h5[1:2,])) / mean(colSums(P239_Alv_MDM_h5[3:nrow(P239_Alv_MDM_h5),]))
message(paste("GFP+ Donor 4 HIV:",signif(hiv,3),"%"))
## GFP+ Donor 4 HIV: 0.00501 %
hiv <- mean(colSums(P239_Alv_MDM_h6[1:2,])) / mean(colSums(P239_Alv_MDM_h6[3:nrow(P239_Alv_MDM_h6),]))
message(paste("GFP- Donor 4 HIV:",signif(hiv,3),"%"))
## GFP- Donor 4 HIV: 0.000213 %
hiv <- mean(colSums(M239_N239_Alv_h1[1:2,])) / mean(colSums(M239_N239_Alv_h1[3:nrow(M239_N239_Alv_h1),]))
message(paste("Mock Donor 5 HIV:",signif(hiv,3),"%"))
## Mock Donor 5 HIV: 6.94e-05 %
hiv <- mean(colSums(M239_N239_Alv_h2[1:2,])) / mean(colSums(M239_N239_Alv_h2[3:nrow(M239_N239_Alv_h2),]))
message(paste("Mock Donor 5 HIV:",signif(hiv,3),"%"))
## Mock Donor 5 HIV: 0.00456 %
Yes it looks like GFP minus and positive were swapped
From now on we will rename them: “mock”, “latent”, “active”
mock0 -> mdm_mock1 mock1 -> mdm_mock2 mock2 -> mdm_mock3 mock3 -> mdm_mock4
mock4 -> alv_mock1 mock5 -> alv_mock3 mock6 -> alv_mock4
active0 -> mdm_active1 active1 -> mdm_active2 active2 -> mdm_active3 active3 -> mdm_active4
active4 -> alv_active1 active5 -> alv_active2 active6 -> alv_active3
latent0 -> mdm_latent1 latent1 -> mdm_latent2 latent2 -> mdm_latent3
latent3 -> alv_latent1 latent4 -> alv_latent2 latent5 -> alv_latent3 latent6 -> alv_latent4
bystander0 -> mdm_bystander1 bystander1 -> mdm_bystander2 bystander2 -> mdm_bystander3
bystander3 -> alv_bystander1 bystander4 -> alv_bystander2 bystander5 -> alv_bystander3 bystander6 -> alv_bystander4
mdm_mock1 <- gex1_h1_d0 #MDMP236 MDMmock
mdm_mock2 <- gex1_h1_d1 #MDMV236 MDMmock
mdm_mock3<- gex2_h1 #MDMO236 MDMmock
mdm_mock4 <- P239_Alv_MDM_h1 #MDMP239 MDMmock
alv_mock1 <- P239_Alv_MDM_h4 #AlvP239 AlvMDMmock
alv_mock2 <- M239_N239_Alv_h1 #AlvM239 AlvMDMmock
alv_mock3 <- M239_N239_Alv_h4 #AlvN239 AlvMDMmock
mdm_active1 <- gex1_h2_d0 #MDMP236 MDMactive
mdm_active2 <- gex1_h2_d1 #MDMV236 MDMactive
mdm_active3 <- gex2_h2 #MDMO236 MDMactive
mdm_active4 <- P239_Alv_MDM_h2 #MDMP239 MDMactive
alv_active1 <- P239_Alv_MDM_h5 #AlvP239 AlvMDMactive
alv_active2 <- M239_N239_Alv_h2 #AlvM239 AlvMDMactive
alv_active3 <- M239_N239_Alv_h5 #AlvN239 AlvMDMactive
mdm_latent1 <- gex1_h4_d0[,which(colSums(gex1_h4_d0[1:2,])/colSums(gex1_h4_d0)*1e6>=10)] #MDMP236 latent
mdm_bystander1 <- gex1_h4_d0[,which(colSums(gex1_h4_d0[1:2,])/colSums(gex1_h4_d0)*1e6<10)] #MDMP236 bystander
mdm_latent2 <- gex1_h4_d1[,which(colSums(gex1_h4_d1[1:2,])/colSums(gex1_h4_d1)*1e6>=10)] #MDMV236 latent
mdm_bystander2 <- gex1_h4_d1[,which(colSums(gex1_h4_d1[1:2,])/colSums(gex1_h4_d1)*1e6<10)] #MDMV236 bystander
mdm_latent3 <- gex2_h4[,which(colSums(gex2_h4[1:2,])/colSums(gex2_h4)*1e6>=10)] #MDMO236 latent
mdm_bystander3 <- gex2_h4[,which(colSums(gex2_h4[1:2,])/colSums(gex2_h4)*1e6<10)] #MDMO236 bystander
alv_latent1 <- P239_Alv_MDM_h3[,which(colSums(P239_Alv_MDM_h3[1:2,])/colSums(P239_Alv_MDM_h3)*1e6>=10)] #MDMP239 latent
alv_bystander1 <- P239_Alv_MDM_h3[,which(colSums(P239_Alv_MDM_h3[1:2,])/colSums(P239_Alv_MDM_h3)*1e6<10)] #MDMP239 bystander
alv_latent2 <- P239_Alv_MDM_h6[,which(colSums(P239_Alv_MDM_h6[1:2,])/colSums(P239_Alv_MDM_h6)*1e6>=10)] #AlvP239 latent
alv_bystander2 <- P239_Alv_MDM_h6[,which(colSums(P239_Alv_MDM_h6[1:2,])/colSums(P239_Alv_MDM_h6)*1e6<10)] #AlvP239 bystander
alv_latent3 <- M239_N239_Alv_h3[,which(colSums(M239_N239_Alv_h3[1:2,])/colSums(M239_N239_Alv_h3)*1e6>=10)] #AlvM239 latent
alv_bystander3 <- M239_N239_Alv_h3[,which(colSums(M239_N239_Alv_h3[1:2,])/colSums(M239_N239_Alv_h3)*1e6<10)] #AlvM239 bystander
alv_latent4 <- M239_N239_Alv_h6[,which(colSums(M239_N239_Alv_h6[1:2,])/colSums(M239_N239_Alv_h6)*1e6>=10)] #AlvN239 latent
alv_bystander4 <- M239_N239_Alv_h6[,which(colSums(M239_N239_Alv_h6[1:2,])/colSums(M239_N239_Alv_h6)*1e6<10)] #AlvN239 bystander
react6 <- N239Alv_react_counts #AlvN239
colnames(mdm_mock1) <- paste("mdm_mock1|",colnames(mdm_mock1),sep="")
colnames(mdm_mock2) <- paste("mdm_mock2|",colnames(mdm_mock2),sep="")
colnames(mdm_mock3) <- paste("mdm_mock3|",colnames(mdm_mock3),sep="")
colnames(mdm_mock4) <- paste("mdm_mock4|",colnames(mdm_mock4),sep="")
colnames(alv_mock1) <- paste("alv_mock1|",colnames(alv_mock1),sep="")
colnames(alv_mock2) <- paste("alv_mock2|",colnames(alv_mock2),sep="")
colnames(alv_mock3) <- paste("alv_mock3|",colnames(alv_mock3),sep="")
colnames(mdm_active1) <- paste("mdm_active1|",colnames(mdm_active1),sep="")
colnames(mdm_active2) <- paste("mdm_active2|",colnames(mdm_active2),sep="")
colnames(mdm_active3) <- paste("mdm_active3|",colnames(mdm_active3),sep="")
colnames(mdm_active4) <- paste("mdm_active4|",colnames(mdm_active4),sep="")
colnames(alv_active1) <- paste("alv_active1|",colnames(alv_active1),sep="")
colnames(alv_active2) <- paste("alv_active2|",colnames(alv_active2),sep="")
colnames(alv_active3) <- paste("alv_active3|",colnames(alv_active3),sep="")
colnames(mdm_bystander1) <- paste("mdm_bystander1|",colnames(mdm_bystander1),sep="")
colnames(mdm_bystander2) <- paste("mdm_bystander2|",colnames(mdm_bystander2),sep="")
colnames(mdm_bystander3) <- paste("mdm_bystander3|",colnames(mdm_bystander3),sep="")
colnames(alv_bystander1) <- paste("alv_bystander1|",colnames(alv_bystander1),sep="")
colnames(alv_bystander2) <- paste("alv_bystander2|",colnames(alv_bystander2),sep="")
colnames(alv_bystander3) <- paste("alv_bystander3|",colnames(alv_bystander3),sep="")
colnames(alv_bystander4) <- paste("alv_bystander4|",colnames(alv_bystander4),sep="")
colnames(mdm_latent1) <- paste("mdm_latent1|",colnames(mdm_latent1),sep="")
colnames(mdm_latent2) <- paste("mdm_latent2|",colnames(mdm_latent2),sep="")
colnames(mdm_latent3) <- paste("mdm_latent3|",colnames(mdm_latent3),sep="")
colnames(alv_latent1) <- paste("alv_latent1|",colnames(alv_latent1),sep="")
colnames(alv_latent2) <- paste("alv_latent2|",colnames(alv_latent2),sep="")
colnames(alv_latent3) <- paste("alv_latent3|",colnames(alv_latent3),sep="")
colnames(alv_latent4) <- paste("alv_latent4|",colnames(alv_latent4),sep="")
colnames(react6) <- paste("react6|",colnames(react6),sep="")
mylist <- list(mdm_mock1,mdm_mock2,mdm_mock3,mdm_mock4,alv_mock1,alv_mock2,alv_mock3,
mdm_active1,mdm_active2,mdm_active3,mdm_active4,alv_active1,alv_active2,alv_active3,
mdm_latent1,mdm_latent2,mdm_latent3,alv_latent1,alv_latent2,alv_latent3,alv_latent4,
mdm_bystander1,mdm_bystander2,mdm_bystander3,alv_bystander1,alv_bystander2,alv_bystander3,alv_bystander4,
react6)
names(mylist) <- c("mdm_mock1","mdm_mock2","mdm_mock3","mdm_mock4","alv_mock1","alv_mock2","alv_mock3",
"mdm_active1","mdm_active2","mdm_active3","mdm_active4","alv_active1","alv_active2","alv_active3",
"mdm_latent1","mdm_latent2","mdm_latent3","alv_latent1","alv_latent2","mdm_latent3","alv_latent4",
"mdm_bystander1","mdm_bystander2","mdm_bystander3","alv_bystander1","alv_bystander2","alv_bystander3","alv_bystander4",
"react6")
comb <- do.call(cbind,mylist)
rev(sapply(mylist,ncol))
## react6 alv_bystander4 alv_bystander3 alv_bystander2 alv_bystander1
## 3098 581 504 1122 1059
## mdm_bystander3 mdm_bystander2 mdm_bystander1 alv_latent4 mdm_latent3
## 540 1334 1494 1875 1831
## alv_latent2 alv_latent1 mdm_latent3 mdm_latent2 mdm_latent1
## 1936 752 348 1251 1172
## alv_active3 alv_active2 alv_active1 mdm_active4 mdm_active3
## 713 621 945 436 421
## mdm_active2 mdm_active1 alv_mock3 alv_mock2 alv_mock1
## 479 880 1354 804 974
## mdm_mock4 mdm_mock3 mdm_mock2 mdm_mock1
## 815 177 659 796
barplot(rev(sapply(mylist,ncol)),horiz=TRUE,las=1,xlab="no. cells")
rev(sapply(mylist,function(mx) {sum(colSums(mx))}))
## react6 alv_bystander4 alv_bystander3 alv_bystander2 alv_bystander1
## 101434715 13581660 14197319 22576981 20899809
## mdm_bystander3 mdm_bystander2 mdm_bystander1 alv_latent4 mdm_latent3
## 16347008 37007345 40706682 52551196 57038454
## alv_latent2 alv_latent1 mdm_latent3 mdm_latent2 mdm_latent1
## 43828374 16683014 15146215 39470469 35367506
## alv_active3 alv_active2 alv_active1 mdm_active4 mdm_active3
## 24294045 28767640 30368943 13952159 26354854
## mdm_active2 mdm_active1 alv_mock3 alv_mock2 alv_mock1
## 22949672 31058301 35198714 25658289 20524272
## mdm_mock4 mdm_mock3 mdm_mock2 mdm_mock1
## 20766932 7574030 21671028 29204285
par(mar = c(5.1, 10.1, 4.1, 2.1))
barplot(rev(sapply(mylist,function(mx) {sum(colSums(mx))})),horiz=TRUE,las=1,xlab="no. counts")
res <- rev(sapply(mylist,function(mx) {
mean(colSums(mx[1:2,])) / mean(colSums(mx[3:nrow(mx),])) *100
}))
res
## react6 alv_bystander4 alv_bystander3 alv_bystander2 alv_bystander1
## 8.335119e-03 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## mdm_bystander3 mdm_bystander2 mdm_bystander1 alv_latent4 mdm_latent3
## 3.670398e-05 0.000000e+00 0.000000e+00 5.531000e-02 2.802404e-02
## alv_latent2 alv_latent1 mdm_latent3 mdm_latent2 mdm_latent1
## 3.228175e-02 1.513145e-02 2.300113e-02 2.010515e-02 2.047779e-02
## alv_active3 alv_active2 alv_active1 mdm_active4 mdm_active3
## 4.908506e-01 4.561895e-01 5.008139e-01 3.379896e-01 2.969919e-01
## mdm_active2 mdm_active1 alv_mock3 alv_mock2 alv_mock1
## 2.189171e-01 2.493880e-01 2.206249e-02 6.941709e-03 3.990557e-03
## mdm_mock4 mdm_mock3 mdm_mock2 mdm_mock1
## 5.870254e-03 1.359929e-03 2.694915e-03 1.986049e-03
barplot(rev(res),horiz=TRUE,las=1,xlab="% HIV reads")
par(mar = c(5.1, 4.1, 4.1, 2.1))
chrmx <- sapply(mylist,function(mx) {
rsum <- as.data.frame(rowSums(mx))
m <- merge(gene_chr,rsum,by.x="V2",by.y=0)
m$V2=NULL
ag <- aggregate(. ~ V1 , m , sum)
out <- ag[,2]
names(out) <- ag[,1]
return(out)
})
chryx <- chrmx["chrY",] / chrmx["chrX",] * 100
chryx
## mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 alv_mock1
## 0.031172346 2.182391164 0.002422481 1.295832427 1.540775049
## alv_mock2 alv_mock3 mdm_active1 mdm_active2 mdm_active3
## 1.611251947 0.073153613 0.041594896 2.102213034 0.002521683
## mdm_active4 alv_active1 alv_active2 alv_active3 mdm_latent1
## 1.440742401 1.576667523 1.677336931 0.040418557 0.048474051
## mdm_latent2 mdm_latent3 alv_latent1 alv_latent2 mdm_latent3
## 2.338805483 0.002960918 1.359138677 1.627231785 1.618820278
## alv_latent4 mdm_bystander1 mdm_bystander2 mdm_bystander3 alv_bystander1
## 0.106820775 0.032885588 2.341440052 0.002211842 1.311121761
## alv_bystander2 alv_bystander3 alv_bystander4 react6
## 1.586910279 1.642328742 0.057817427 0.825632571
par(mar = c(5.1, 10.1, 4.1, 2.1))
barplot(rev(chryx),horiz=TRUE,las=1,xlab="Ratio ChrY to ChrX x100")
These checks look consistent, except for the react6 which appears as more male but that is not consistent with mock6, active6 and latent6.
chrmt <- chrmx["chrM",] / colSums(chrmx[1:22,]) *100
chrmt
## mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 alv_mock1
## 5.640547 12.170054 13.618687 4.525319 8.676031
## alv_mock2 alv_mock3 mdm_active1 mdm_active2 mdm_active3
## 11.244711 13.267361 6.309643 7.977511 9.660507
## mdm_active4 alv_active1 alv_active2 alv_active3 mdm_latent1
## 3.423388 5.698696 5.857497 8.241454 6.837145
## mdm_latent2 mdm_latent3 alv_latent1 alv_latent2 mdm_latent3
## 12.548767 15.863535 5.450106 7.388422 9.886849
## alv_latent4 mdm_bystander1 mdm_bystander2 mdm_bystander3 alv_bystander1
## 10.559073 6.134851 11.734885 12.933701 5.862922
## alv_bystander2 alv_bystander3 alv_bystander4 react6
## 7.932224 10.203284 11.634663 5.610831
barplot(rev(chrmt),horiz=TRUE,las=1,xlab="% chrM reads")
Let’s take a look at mtDNA transcripts at the level of individual cells.
mt <- lapply(mylist, function(x) {
xmt <- x[grep("MT-",rownames(x)),]
sapply(1:ncol(x), function(j) { sum(xmt[,j])/sum(x[,j]) } )
} )
vioplot(mt)
boxplot(mt,horizontal=TRUE,cex=0,col="white",las=1,
main="mtDNA gene proportion")
barplot(sapply(mt, function(x) { length(which(x>0.15))/length(x) } ),horiz=TRUE,las=1)
barplot(sapply(mt, function(x) { length(which(x>0.15))/length(x) } ),horiz=TRUE,las=1, main="Proportion of cells with >15% mito reads")
fmt <- lapply(mylist, function(x) {
xmt <- x[grep("MT-",rownames(x)),]
v <- sapply(1:ncol(x), function(j) { sum(xmt[,j])/sum(x[,j]) } )
f <- x[,which(v<0.15)]
return(f)
} )
lapply(fmt,dim)
## $mdm_mock1
## [1] 36603 690
##
## $mdm_mock2
## [1] 36603 516
##
## $mdm_mock3
## [1] 36603 124
##
## $mdm_mock4
## [1] 36603 772
##
## $alv_mock1
## [1] 36603 870
##
## $alv_mock2
## [1] 36603 632
##
## $alv_mock3
## [1] 36603 971
##
## $mdm_active1
## [1] 36603 593
##
## $mdm_active2
## [1] 36603 410
##
## $mdm_active3
## [1] 36603 302
##
## $mdm_active4
## [1] 36603 401
##
## $alv_active1
## [1] 36603 767
##
## $alv_active2
## [1] 36603 543
##
## $alv_active3
## [1] 36603 526
##
## $mdm_latent1
## [1] 36603 831
##
## $mdm_latent2
## [1] 36603 968
##
## $mdm_latent3
## [1] 36603 203
##
## $alv_latent1
## [1] 36603 640
##
## $alv_latent2
## [1] 36603 1749
##
## $mdm_latent3
## [1] 36603 1587
##
## $alv_latent4
## [1] 36603 1565
##
## $mdm_bystander1
## [1] 36603 1167
##
## $mdm_bystander2
## [1] 36603 1075
##
## $mdm_bystander3
## [1] 36603 355
##
## $alv_bystander1
## [1] 36603 873
##
## $alv_bystander2
## [1] 36603 986
##
## $alv_bystander3
## [1] 36603 417
##
## $alv_bystander4
## [1] 36603 435
##
## $react6
## [1] 36603 2820
saveRDS(fmt,"macrophage_counts.rds")
For reproducibility.
save.image("macrophage_dataprep.Rdata")
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] vioplot_0.5.0 zoo_1.8-12
## [3] sm_2.2-6.0 mitch_1.19.3
## [5] DESeq2_1.44.0 muscat_1.18.0
## [7] beeswarm_0.4.0 stringi_1.8.4
## [9] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
## [11] Biobase_2.64.0 GenomicRanges_1.56.2
## [13] GenomeInfoDb_1.40.1 IRanges_2.38.1
## [15] S4Vectors_0.42.1 BiocGenerics_0.50.0
## [17] MatrixGenerics_1.16.0 matrixStats_1.4.1
## [19] hdf5r_1.3.11 Seurat_5.1.0
## [21] SeuratObject_5.0.2 sp_2.1-4
## [23] plyr_1.8.9
##
## loaded via a namespace (and not attached):
## [1] spatstat.sparse_3.1-0 bitops_1.0-9
## [3] httr_1.4.7 RColorBrewer_1.1-3
## [5] doParallel_1.0.17 numDeriv_2016.8-1.1
## [7] tools_4.4.1 sctransform_0.4.1
## [9] backports_1.5.0 utf8_1.2.4
## [11] R6_2.5.1 lazyeval_0.2.2
## [13] uwot_0.2.2 mgcv_1.9-1
## [15] GetoptLong_1.0.5 GGally_2.2.1
## [17] prettyunits_1.2.0 gridExtra_2.3
## [19] progressr_0.15.1 cli_3.6.3
## [21] spatstat.explore_3.3-3 fastDummies_1.7.4
## [23] sass_0.4.9 mvtnorm_1.3-2
## [25] spatstat.data_3.1-4 blme_1.0-6
## [27] ggridges_0.5.6 pbapply_1.7-2
## [29] systemfonts_1.1.0 R.utils_2.12.3
## [31] svglite_2.1.3 scater_1.32.1
## [33] parallelly_1.39.0 limma_3.60.6
## [35] rstudioapi_0.17.1 generics_0.1.3
## [37] shape_1.4.6.1 gtools_3.9.5
## [39] ica_1.0-3 spatstat.random_3.3-2
## [41] dplyr_1.1.4 Matrix_1.7-1
## [43] ggbeeswarm_0.7.2 fansi_1.0.6
## [45] abind_1.4-8 R.methodsS3_1.8.2
## [47] lifecycle_1.0.4 yaml_2.3.10
## [49] edgeR_4.2.2 gplots_3.2.0
## [51] SparseArray_1.4.8 Rtsne_0.17
## [53] grid_4.4.1 promises_1.3.1
## [55] crayon_1.5.3 miniUI_0.1.1.1
## [57] lattice_0.22-6 echarts4r_0.4.5
## [59] beachmat_2.20.0 cowplot_1.1.3
## [61] knitr_1.49 pillar_1.9.0
## [63] ComplexHeatmap_2.20.0 rjson_0.2.23
## [65] boot_1.3-31 corpcor_1.6.10
## [67] future.apply_1.11.3 codetools_0.2-20
## [69] leiden_0.4.3.1 glue_1.8.0
## [71] spatstat.univar_3.1-1 data.table_1.16.2
## [73] vctrs_0.6.5 png_0.1-8
## [75] spam_2.11-0 Rdpack_2.6.2
## [77] gtable_0.3.6 cachem_1.1.0
## [79] xfun_0.49 rbibutils_2.3
## [81] S4Arrays_1.4.1 mime_0.12
## [83] reformulas_0.4.0 survival_3.7-0
## [85] iterators_1.0.14 statmod_1.5.0
## [87] fitdistrplus_1.2-1 ROCR_1.0-11
## [89] nlme_3.1-166 pbkrtest_0.5.3
## [91] bit64_4.5.2 progress_1.2.3
## [93] EnvStats_3.0.0 RcppAnnoy_0.0.22
## [95] bslib_0.8.0 TMB_1.9.15
## [97] irlba_2.3.5.1 vipor_0.4.7
## [99] KernSmooth_2.23-24 colorspace_2.1-1
## [101] tidyselect_1.2.1 bit_4.5.0
## [103] compiler_4.4.1 BiocNeighbors_1.22.0
## [105] xml2_1.3.6 DelayedArray_0.30.1
## [107] plotly_4.10.4 scales_1.3.0
## [109] caTools_1.18.3 remaCor_0.0.18
## [111] lmtest_0.9-40 stringr_1.5.1
## [113] digest_0.6.37 goftest_1.2-3
## [115] spatstat.utils_3.1-1 minqa_1.2.8
## [117] rmarkdown_2.29 variancePartition_1.34.0
## [119] aod_1.3.3 XVector_0.44.0
## [121] RhpcBLASctl_0.23-42 htmltools_0.5.8.1
## [123] pkgconfig_2.0.3 lme4_1.1-35.5
## [125] sparseMatrixStats_1.16.0 fastmap_1.2.0
## [127] rlang_1.1.4 GlobalOptions_0.1.2
## [129] htmlwidgets_1.6.4 UCSC.utils_1.0.0
## [131] shiny_1.9.1 DelayedMatrixStats_1.26.0
## [133] jquerylib_0.1.4 farver_2.1.2
## [135] jsonlite_1.8.9 BiocParallel_1.38.0
## [137] R.oo_1.27.0 BiocSingular_1.20.0
## [139] magrittr_2.0.3 kableExtra_1.4.0
## [141] scuttle_1.14.0 GenomeInfoDbData_1.2.12
## [143] dotCall64_1.2 patchwork_1.3.0
## [145] munsell_0.5.1 Rcpp_1.0.13-1
## [147] viridis_0.6.5 reticulate_1.40.0
## [149] zlibbioc_1.50.0 MASS_7.3-61
## [151] ggstats_0.7.0 listenv_0.9.1
## [153] ggrepel_0.9.6 deldir_2.0-4
## [155] splines_4.4.1 tensor_1.5
## [157] hms_1.1.3 circlize_0.4.16
## [159] locfit_1.5-9.10 igraph_2.1.1
## [161] spatstat.geom_3.3-4 RcppHNSW_0.6.0
## [163] reshape2_1.4.4 ScaledMatrix_1.12.0
## [165] evaluate_1.0.1 nloptr_2.1.1
## [167] foreach_1.5.2 httpuv_1.6.15
## [169] RANN_2.6.2 tidyr_1.3.1
## [171] purrr_1.0.2 polyclip_1.10-7
## [173] future_1.34.0 clue_0.3-66
## [175] scattermore_1.2 ggplot2_3.5.1
## [177] rsvd_1.0.5 broom_1.0.7
## [179] xtable_1.8-4 fANCOVA_0.6-1
## [181] RSpectra_0.16-2 later_1.4.0
## [183] viridisLite_0.4.2 tibble_3.2.1
## [185] lmerTest_3.1-3 glmmTMB_1.1.10
## [187] cluster_2.1.6 globals_0.16.3