Introduction

Ee are interested in comparing DEGs between the following comparison groups:

  • Latently- vs productively-infected cells (groups 3 vs 4).

  • Latently-infected vs bystander cells (2 vs 3).

  • Productively-infected vs mock (4 vs 1)

  • Mock vs bystander (1 vs 2).

As discussed, I think we will do these comparisons separately for both MDM and AlvMDM samples initially. Then, it would be of interest to compare DEGs for MDM vs AlvMDM for each of the four infection groups.

suppressPackageStartupMessages({
  library("kableExtra")
  library("ggplot2")
  library("plyr")
  library("Seurat")
  library("hdf5r")
  library("SingleCellExperiment")
  library("parallel")
  library("stringi")
  library("beeswarm")
  library("muscat")
  library("DESeq2")
  library("mitch")
  library("harmony")
  library("celldex")
  library("SingleR")
  library("limma")
  library("gplots")
})

Load data

Sample Patient Group
mock0 MDMP236 mock
mock1 MDMV236 mock
mock2 MDMO236 mock
mock3 MDMP239 mock
mock4 AlvP239 mock
mock5 AlvM239 mock
mock6 AlvN239 mock
active0 MDMP236 active
active1 MDMV236 active
active2 MDMO236 active
active3 MDMP239 active
active4 AlvP239 active
active5 AlvM239 active
active6 AlvN239 active
latent0 MDMP236 latent
latent1 MDMV236 latent
latent2 MDMO236 latent
latent3 MDMP239 latent
latent4 AlvP239 latent
latent5 AlvM239 latent
latent6 AlvN239 latent
bystander0 MDMP236 bystander
bystander1 MDMV236 bystander
bystander2 MDMO236 bystander
bystander3 MDMP239 bystander
bystander4 AlvP239 bystander
bystander5 AlvM239 bystander
bystander6 AlvN239 bystander
react6 AlvN239 reactivated

exclude react6

ss <- read.table("samplesheet.tsv",header=TRUE,row.names=1)

ss %>% kbl(caption="sample sheet") %>% kable_paper("hover", full_width = F)
sample sheet
Patient Group
mock0 MDMP236 mock
mock1 MDMV236 mock
mock2 MDMO236 mock
mock3 MDMP239 mock
mock4 AlvP239 mock
mock5 AlvM239 mock
mock6 AlvN239 mock
active0 MDMP236 active
active1 MDMV236 active
active2 MDMO236 active
active3 MDMP239 active
active4 AlvP239 active
active5 AlvM239 active
active6 AlvN239 active
latent0 MDMP236 latent
latent1 MDMV236 latent
latent2 MDMO236 latent
latent3 MDMP239 latent
latent4 AlvP239 latent
latent5 AlvM239 latent
latent6 AlvN239 latent
bystander0 MDMP236 bystander
bystander1 MDMV236 bystander
bystander2 MDMO236 bystander
bystander3 MDMP239 bystander
bystander4 AlvP239 bystander
bystander5 AlvM239 bystander
bystander6 AlvN239 bystander
react6 AlvN239 reactivated
mylist <- readRDS("macrophage_counts.rds")

Make single cell experiment object

comb <- do.call(cbind,mylist)
sce <- SingleCellExperiment(list(counts=comb))
sce
## class: SingleCellExperiment 
## dim: 36622 24311 
## metadata(0):
## assays(1): counts
## rownames(36622): HIV_LTRR HIV_LTRU5 ... AC007325.4 AC007325.2
## rowData names(0):
## colnames(24311): mdm_mock1|AAACGAATCACATACG mdm_mock1|AAACGCTCATCAGCGC
##   ... react6|TTTGTTGTCTGAACGT react6|TTTGTTGTCTTGGCTC
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Normalise data

cellmetadata <- data.frame(colnames(comb) ,sapply(strsplit(colnames(comb),"\\|"),"[[",1))
colnames(cellmetadata) <- c("cell","sample")
comb <- CreateSeuratObject(comb, project = "mac", assay = "RNA", meta.data = cellmetadata)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
comb <- NormalizeData(comb)
## Normalizing layer: counts
comb <- FindVariableFeatures(comb, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
comb <- ScaleData(comb)
## Centering and scaling data matrix

PCA and Cluster

comb <- RunPCA(comb, features = VariableFeatures(object = comb))
## PC_ 1 
## Positive:  GAPDH, FABP3, TXN, IFI30, S100A10, PRDX6, TUBA1B, BLVRB, OTOA, S100A9 
##     FAH, C15orf48, CYSTM1, GCHFR, CARD16, GSTP1, HAMP, PSMA7, CSTA, FABP4 
##     ACTG1, CTSB, H2AFZ, LDHB, LINC01827, CFD, TUBA1A, MMP9, LINC02244, SELENOW 
## Negative:  ARL15, DOCK3, FTX, NEAT1, EXOC4, MALAT1, DPYD, LRMDA, RASAL2, JMJD1C 
##     TMEM117, PLXDC2, VPS13B, FHIT, ATG7, TRIO, TPRG1, ZNF438, ZFAND3, MAML2 
##     MITF, COP1, ZEB2, ELMO1, DENND4C, MED13L, TCF12, ERC1, JARID2, FMNL2 
## PC_ 2 
## Positive:  HLA-DRB1, CD74, HLA-DRA, HLA-DPA1, GCLC, HLA-DPB1, LYZ, RCBTB2, KCNMA1, MRC1 
##     SPRED1, C1QA, FGL2, AC020656.1, SLCO2B1, CYP1B1, AIF1, HLA-DRB5, PTGDS, S100A4 
##     VAMP5, LINC02345, CA2, CRIP1, CAMK1, ALOX5AP, RTN1, HLA-DQB1, MX1, TGFBI 
## Negative:  CYSTM1, CD164, PSAT1, FAH, FDX1, GDF15, ATP6V1D, BCAT1, SAT1, CCPG1 
##     PHGDH, PSMA7, HEBP2, SLAMF9, RETREG1, GARS, HES2, TXN, TCEA1, RHOQ 
##     RILPL2, B4GALT5, CLGN, NUPR1, CSTA, SPTBN1, HSD17B12, SNHG5, STMN1, PTER 
## PC_ 3 
## Positive:  NCAPH, CRABP2, RGCC, CHI3L1, TM4SF19, DUSP2, GAL, CCL22, AC015660.2, ACAT2 
##     LINC01010, TMEM114, MGST1, RGS20, TRIM54, LINC02244, MREG, NUMB, TCTEX1D2, GPC4 
##     CCND1, POLE4, SYNGR1, SLC20A1, SERTAD2, IL1RN, GCLC, CLU, PLEK, AC092353.2 
## Negative:  MARCKS, CD14, BTG1, MS4A6A, TGFBI, CTSC, FPR3, HLA-DQA1, AIF1, MPEG1 
##     MEF2C, CD163, IFI30, TIMP1, HLA-DPB1, ALDH2, SELENOP, NUPR1, NAMPT, HLA-DQB1 
##     HIF1A, C1QC, MS4A7, FUCA1, EPB41L3, HLA-DQA2, RNASE1, ARL4C, ZNF331, TCF4 
## PC_ 4 
## Positive:  ACTG1, TPM4, CTSB, CCL3, TUBA1B, CSF1, DHCR24, CYTOR, LGMN, INSIG1 
##     GAPDH, TUBB, CD36, HAMP, C1QA, CCL7, AIF1, MGLL, LIMA1, TYMS 
##     C1QC, HSP90B1, CCL2, C1QB, TNFSF13, PCLAF, C15orf48, CLSPN, CAMK1, TK1 
## Negative:  PTGDS, LINC02244, CLU, CSTA, CCPG1, MGST1, SYNGR1, EPHB1, LINC01010, ALDH2 
##     LY86-AS1, AC015660.2, GAS5, NCF1, BX664727.3, S100P, AP000331.1, TMEM91, SNHG5, CLEC12A 
##     APOD, PDE4D, C1QTNF4, VAMP5, DIXDC1, LYZ, AC073359.2, ARHGAP15, RCBTB2, CFD 
## PC_ 5 
## Positive:  TYMS, PCLAF, TK1, MKI67, MYBL2, RRM2, CENPM, BIRC5, CEP55, CLSPN 
##     CDK1, DIAPH3, SHCBP1, NUSAP1, CENPF, CENPK, PRC1, TOP2A, NCAPG, ESCO2 
##     KIF11, ANLN, CCNA2, TPX2, ASPM, FAM111B, MAD2L1, RAD51AP1, GTSE1, HMMR 
## Negative:  HIV-BaLEnv, HIV-LTRU5, HIV-Polprot, HIV-Gagp17, HIV-Nef, HIV-TatEx1, HIV-Polp15p31, HIV-LTRR, HIV-Vif, HIV-Gagp1Pol 
##     HIV-TatEx2Rev, HIV-Gagp2p7, HIV-EnvStart, HIV-Vpu, HIV-Vpr, HIV-EGFP, CTSB, MMP19, IL6R-AS1, CSF1 
##     IL1RN, CCL3, MGLL, INSIG1, AL157912.1, SDS, TCTEX1D2, TNFRSF9, LGMN, PHLDA1
comb <- RunHarmony(comb,"sample")
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony converged after 4 iterations
DimHeatmap(comb, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(comb)

comb <- JackStraw(comb, num.replicate = 100)
comb <- FindNeighbors(comb, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
comb <- FindClusters(comb, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 24311
## Number of edges: 745637
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8948
## Number of communities: 14
## Elapsed time: 2 seconds

UMAP

comb <- RunUMAP(comb, dims = 1:10)
## 14:47:31 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:47:31 Read 24311 rows and found 10 numeric columns
## 14:47:31 Using Annoy for neighbor search, n_neighbors = 30
## 14:47:31 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:47:33 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc331addb2
## 14:47:33 Searching Annoy index using 1 thread, search_k = 3000
## 14:47:39 Annoy recall = 100%
## 14:47:40 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:47:42 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:47:42 Commencing optimization for 200 epochs, with 972676 positive edges
## 14:47:49 Optimization finished
DimPlot(comb, reduction = "umap")

Assign names to clusters

ADGRE1, CCR2, CD169, CX3CR1, CD206, CD163, LYVE1, CD9, TREM2

HLA-DP, HLA-DM, HLA-DOA, HLA-DOB, HLA-DQ, and HLA-DR.

message("macrophage markers")
## macrophage markers
FeaturePlot(comb, features = c("ADGRE1", "CCR2", "SIGLEC1", "CX3CR1", "MRC1", "CD163", "LYVE1", "CD9", "TREM2"))

DimPlot(comb, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

That’s pretty useless. Let’s use celldex pkg to annotate cells and get the macs.

ref <- celldex::MonacoImmuneData()

DefaultAssay(comb) <- "RNA"
comb2 <- as.SingleCellExperiment(comb)

lc <- logcounts(comb2)

pred_imm_broad <- SingleR(test=comb2, ref=ref,
                          labels=ref$label.main)

head(pred_imm_broad)
## DataFrame with 6 rows and 4 columns
##                                                    scores      labels
##                                                  <matrix> <character>
## mdm_mock1|AAACGAATCACATACG 0.306472:0.325537:0.166567:...   Monocytes
## mdm_mock1|AAACGCTCATCAGCGC 0.311527:0.281167:0.189514:...   Monocytes
## mdm_mock1|AAACGCTGTCGAGTGA 0.316271:0.276472:0.170736:...   Monocytes
## mdm_mock1|AAAGGTAAGCCATATC 0.290547:0.291036:0.154976:...   Monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG 0.290907:0.279384:0.181857:...   Monocytes
## mdm_mock1|AAATGGAAGATCGCCC 0.242404:0.241021:0.117564:...   Monocytes
##                            delta.next pruned.labels
##                             <numeric>   <character>
## mdm_mock1|AAACGAATCACATACG  0.1823977     Monocytes
## mdm_mock1|AAACGCTCATCAGCGC  0.0338547     Monocytes
## mdm_mock1|AAACGCTGTCGAGTGA  0.1301308     Monocytes
## mdm_mock1|AAAGGTAAGCCATATC  0.1794308     Monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG  0.0952702     Monocytes
## mdm_mock1|AAATGGAAGATCGCCC  0.1508974     Monocytes
table(pred_imm_broad$pruned.labels)
## 
##       Basophils Dendritic cells       Monocytes 
##               1              86           23423
cellmetadata$label <- pred_imm_broad$pruned.labels

pred_imm_fine <- SingleR(test=comb2, ref=ref,
                          labels=ref$label.fine)
head(pred_imm_fine)
## DataFrame with 6 rows and 4 columns
##                                                    scores              labels
##                                                  <matrix>         <character>
## mdm_mock1|AAACGAATCACATACG 0.180057:0.485292:0.202974:... Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC 0.195973:0.430960:0.226764:... Classical monocytes
## mdm_mock1|AAACGCTGTCGAGTGA 0.170594:0.441313:0.186890:... Classical monocytes
## mdm_mock1|AAAGGTAAGCCATATC 0.156243:0.415082:0.167816:... Classical monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG 0.185006:0.431679:0.205883:... Classical monocytes
## mdm_mock1|AAATGGAAGATCGCCC 0.125917:0.383407:0.146835:... Classical monocytes
##                            delta.next       pruned.labels
##                             <numeric>         <character>
## mdm_mock1|AAACGAATCACATACG  0.0675290 Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC  0.1150706 Classical monocytes
## mdm_mock1|AAACGCTGTCGAGTGA  0.0651352 Classical monocytes
## mdm_mock1|AAAGGTAAGCCATATC  0.1076301 Classical monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG  0.1521533 Classical monocytes
## mdm_mock1|AAATGGAAGATCGCCC  0.1282183 Classical monocytes
table(pred_imm_fine$pruned.labels)
## 
##          Classical monocytes       Intermediate monocytes 
##                        20826                         2648 
##      Low-density neutrophils      Myeloid dendritic cells 
##                            1                           90 
##      Non classical monocytes Plasmacytoid dendritic cells 
##                           11                            6
cellmetadata$finelabel <- pred_imm_fine$pruned.labels

col_pal <- c('#e31a1c', '#ff7f00', "#999900", '#cc00ff', '#1f78b4', '#fdbf6f',
             '#33a02c', '#fb9a99', "#a6cee3", "#cc6699", "#b2df8a", "#99004d", "#66ff99",
             "#669999", "#006600", "#9966ff", "#cc9900", "#e6ccff", "#3399ff", "#ff66cc",
             "#ffcc66", "#003399")

annot_df <- data.frame(
  barcodes = rownames(pred_imm_broad),
  monaco_broad_annotation = pred_imm_broad$labels,
  monaco_broad_pruned_labels = pred_imm_broad$pruned.labels,
  monaco_fine_annotation = pred_imm_fine$labels,
  monaco_fine_pruned_labels = pred_imm_fine$pruned.labels
)

meta_inf <- comb@meta.data
meta_inf$cell_barcode <- colnames(comb)

meta_inf <- meta_inf %>% dplyr::left_join(y = annot_df,
                                          by = c("cell_barcode" = "barcodes"))
rownames(meta_inf) <- colnames(lc)

meta_inf$line <- sapply(strsplit(meta_inf$sample,"_"),"[[",1)


comb@meta.data <- meta_inf

DimPlot(comb, label=TRUE, group.by = "monaco_broad_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

DimPlot(comb, label=TRUE, group.by = "monaco_fine_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

Make MDM object

mdmlist <- mylist[grep("mdm",names(mylist))]
comb1 <- do.call(cbind,mdmlist)
sce1 <- SingleCellExperiment(list(counts=comb1))
sce1
## class: SingleCellExperiment 
## dim: 36622 10269 
## metadata(0):
## assays(1): counts
## rownames(36622): HIV_LTRR HIV_LTRU5 ... AC007325.4 AC007325.2
## rowData names(0):
## colnames(10269): mdm_mock1|AAACGAATCACATACG mdm_mock1|AAACGCTCATCAGCGC
##   ... mdm_bystander4|TTTGTTGAGAACGCGT mdm_bystander4|TTTGTTGCAAATGCGG
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
cellmetadata1 <- data.frame(colnames(comb1) ,sapply(strsplit(colnames(comb1),"\\|"),"[[",1))
colnames(cellmetadata1) <- c("cell","sample")
comb1 <- CreateSeuratObject(comb1, project = "mac", assay = "RNA", meta.data = cellmetadata1)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
comb1 <- NormalizeData(comb1)
## Normalizing layer: counts
comb1 <- FindVariableFeatures(comb1, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
comb1 <- ScaleData(comb1)
## Centering and scaling data matrix
comb1 <- RunPCA(comb1, features = VariableFeatures(object = comb1))
## PC_ 1 
## Positive:  S100A10, TXN, COX5B, PRDX6, FABP3, C15orf48, BCL2A1, CALM3, PSME2, TUBA1B 
##     CYSTM1, SPP1, CHI3L1, TUBA1A, CRABP2, ACTB, ACTG1, MGST1, ACAT2, FBP1 
##     IFI30, FABP4, GAL, HAMP, RGCC, MMP9, AKR7A2, LILRA1, CTSL, LDHA 
## Negative:  ARL15, FTX, EXOC4, NEAT1, DPYD, FHIT, JMJD1C, RAD51B, VPS13B, ZFAND3 
##     MALAT1, PLXDC2, TRIO, LRMDA, ZEB2, DOCK3, COP1, MBD5, TCF12, ATXN1 
##     RASAL2, MAML2, ATG7, ZSWIM6, FNDC3B, DOCK4, ELMO1, ZNF438, SPIDR, ARHGAP15 
## PC_ 2 
## Positive:  TM4SF19, ANO5, GPC4, CYSTM1, FNIP2, TXNRD1, BCL2A1, SPP1, SNX10, PSD3 
##     RETREG1, RGS20, TXN, TCTEX1D2, SLC28A3, MGST1, EPB41L1, FABP3, RGCC, CALM3 
##     NIBAN1, TGM2, ATP6V0D2, ACAT2, CCL22, CCDC26, LINC01010, CHI3L1, MREG, CRABP2 
## Negative:  HLA-DPB1, HLA-DRA, CD74, HLA-DPA1, TGFBI, MS4A6A, AIF1, HLA-DQB1, C1QA, HLA-DQA1 
##     HLA-DRB1, CEBPD, FPR3, C1QC, MS4A7, CD163, CD14, MPEG1, LYZ, TIMP1 
##     ST8SIA4, LILRB2, FOS, EPB41L3, TCF4, MAFB, HLA-DRB5, RNASE1, SELENOP, FCN1 
## PC_ 3 
## Positive:  CCPG1, NUPR1, HES2, PSAT1, S100P, CLGN, CARD16, TCEA1, PHGDH, SUPV3L1 
##     BTG1, NIBAN1, G0S2, BEX2, NMB, PDE4D, PLEKHA5, RAB6B, STMN1, XIST 
##     ME1, CLEC4A, CLEC4E, RETREG1, IFI6, CYSTM1, GDF15, CXCR4, DUSP1, SEL1L3 
## Negative:  ACTB, CALR, SLC35F1, TIMP3, TUBA1B, FBP1, ACTG1, LINC01091, HSP90B1, GSN 
##     MGLL, IL1RN, GLIPR1, INSIG1, LPL, GCLC, PLEK, PDIA4, MADD, RGCC 
##     LDHA, MANF, ALCAM, HLA-DRB1, IGSF6, TMEM176B, DHCR24, CSF1, TUBA1A, CYP1B1 
## PC_ 4 
## Positive:  PTGDS, NCAPH, CLU, BX664727.3, LINC02244, SYNGR1, COX5B, RCBTB2, CRABP2, AL136317.2 
##     MT-ATP6, SSBP3, RARRES1, ADRA2B, LINC01010, AC015660.2, S100A4, CRIP1, MT-ND2, LY86-AS1 
##     RNASE6, HLA-C, S100A8, VAMP5, MT-CO3, MT-CYB, CCL22, CPE, CSRP2, TMEM176B 
## Negative:  HIV-Gagp17, HIV-BaLEnv, HIV-Polprot, HIV-Polp15p31, HIV-LTRU5, HIV-Vif, HIV-Nef, HIV-TatEx1, HIV-LTRR, HIV-Gagp1Pol 
##     HIV-Gagp2p7, HIV-TatEx2Rev, MARCKS, CCL3, HIV-Vpu, HIV-EGFP, TPM4, UGCG, SNCA, HIV-EnvStart 
##     HIV-Vpr, CD36, LGMN, G0S2, HES4, B4GALT5, CLEC4A, BCAT1, TNFRSF9, SDS 
## PC_ 5 
## Positive:  TYMS, PCLAF, BIRC5, MKI67, CEP55, CENPF, CENPM, TK1, CDKN3, PRC1 
##     CDK1, DIAPH3, MYBL2, SHCBP1, NUSAP1, DLGAP5, RRM2, CENPK, HMMR, TPX2 
##     ASPM, NCAPG, CCNA2, MAD2L1, PTTG1, TOP2A, CLSPN, KIF4A, CIT, KIF11 
## Negative:  GCLC, TIMP3, TMEM117, TMEM176B, AC067751.1, CRABP2, NUMB, LINC01091, CHI3L1, LY86-AS1 
##     LINC00278, TNFSF14, RGCC, KCNJ1, IGSF6, SH3RF3, AC015660.2, IL1RN, DUSP2, MADD 
##     KCNA2, DOCK3, FLT1, RPS4Y1, TTTY14, TNS3, GADD45G, NCAPH, AL157886.1, TM4SF19-AS1
comb1 <- RunHarmony(comb1,"sample")
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony converged after 5 iterations
DimHeatmap(comb1, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(comb1)

comb1 <- JackStraw(comb1, num.replicate = 100)
comb1 <- FindNeighbors(comb1, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
comb1 <- FindClusters(comb1, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10269
## Number of edges: 322519
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8766
## Number of communities: 12
## Elapsed time: 0 seconds
comb1 <- RunUMAP(comb1, dims = 1:10)
## 14:51:38 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:51:38 Read 10269 rows and found 10 numeric columns
## 14:51:38 Using Annoy for neighbor search, n_neighbors = 30
## 14:51:38 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:51:38 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc1b8d915
## 14:51:38 Searching Annoy index using 1 thread, search_k = 3000
## 14:51:41 Annoy recall = 100%
## 14:51:42 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:51:43 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:51:44 Commencing optimization for 200 epochs, with 405542 positive edges
## 14:51:47 Optimization finished
DimPlot(comb1, reduction = "umap")

message("macrophage markers")
## macrophage markers
FeaturePlot(comb1, features = c("ADGRE1", "CCR2", "SIGLEC1", "CX3CR1", "MRC1", "CD163", "LYVE1", "CD9", "TREM2"))

DimPlot(comb1, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

ref <- celldex::MonacoImmuneData()
DefaultAssay(comb1) <- "RNA"
comb21 <- as.SingleCellExperiment(comb1)
lc1 <- logcounts(comb21)
pred_imm_broad1 <- SingleR(test=comb21, ref=ref,labels=ref$label.main)
head(pred_imm_broad1)
## DataFrame with 6 rows and 4 columns
##                                                    scores      labels
##                                                  <matrix> <character>
## mdm_mock1|AAACGAATCACATACG 0.306472:0.325537:0.166567:...   Monocytes
## mdm_mock1|AAACGCTCATCAGCGC 0.311527:0.281167:0.189514:...   Monocytes
## mdm_mock1|AAACGCTGTCGAGTGA 0.316271:0.276472:0.170736:...   Monocytes
## mdm_mock1|AAAGGTAAGCCATATC 0.290547:0.291036:0.154976:...   Monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG 0.290907:0.279384:0.181857:...   Monocytes
## mdm_mock1|AAATGGAAGATCGCCC 0.242404:0.241021:0.117564:...   Monocytes
##                            delta.next pruned.labels
##                             <numeric>   <character>
## mdm_mock1|AAACGAATCACATACG  0.1823977     Monocytes
## mdm_mock1|AAACGCTCATCAGCGC  0.0338547     Monocytes
## mdm_mock1|AAACGCTGTCGAGTGA  0.1301308     Monocytes
## mdm_mock1|AAAGGTAAGCCATATC  0.1794308     Monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG  0.0952702     Monocytes
## mdm_mock1|AAATGGAAGATCGCCC  0.1508974     Monocytes
table(pred_imm_broad1$pruned.labels)
## 
##       Basophils Dendritic cells       Monocytes 
##               1              71            9629
cellmetadata1$label <- pred_imm_broad1$pruned.labels
pred_imm_fine1 <- SingleR(test=comb21, ref=ref, labels=ref$label.fine)
head(pred_imm_fine1)
## DataFrame with 6 rows and 4 columns
##                                                    scores              labels
##                                                  <matrix>         <character>
## mdm_mock1|AAACGAATCACATACG 0.180057:0.485292:0.202974:... Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC 0.195973:0.430960:0.226764:... Classical monocytes
## mdm_mock1|AAACGCTGTCGAGTGA 0.170594:0.441313:0.186890:... Classical monocytes
## mdm_mock1|AAAGGTAAGCCATATC 0.156243:0.415082:0.167816:... Classical monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG 0.185006:0.431679:0.205883:... Classical monocytes
## mdm_mock1|AAATGGAAGATCGCCC 0.125917:0.383407:0.146835:... Classical monocytes
##                            delta.next       pruned.labels
##                             <numeric>         <character>
## mdm_mock1|AAACGAATCACATACG  0.0675290 Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC  0.1150706 Classical monocytes
## mdm_mock1|AAACGCTGTCGAGTGA  0.0651352 Classical monocytes
## mdm_mock1|AAAGGTAAGCCATATC  0.1076301 Classical monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG  0.1521533 Classical monocytes
## mdm_mock1|AAATGGAAGATCGCCC  0.1282183 Classical monocytes
table(pred_imm_fine1$pruned.labels)
## 
##          Classical monocytes       Intermediate monocytes 
##                         8841                          823 
##      Low-density neutrophils      Myeloid dendritic cells 
##                            1                           51 
##      Non classical monocytes Plasmacytoid dendritic cells 
##                            8                            6
cellmetadata1$finelabel <- pred_imm_fine1$pruned.labels
col_pal <- c('#e31a1c', '#ff7f00', "#999900", '#cc00ff', '#1f78b4', '#fdbf6f',
             '#33a02c', '#fb9a99', "#a6cee3", "#cc6699", "#b2df8a", "#99004d", "#66ff99",
             "#669999", "#006600", "#9966ff", "#cc9900", "#e6ccff", "#3399ff", "#ff66cc",
             "#ffcc66", "#003399")
annot_df1 <- data.frame(
  barcodes = rownames(pred_imm_broad1),
  monaco_broad_annotation = pred_imm_broad1$labels,
  monaco_broad_pruned_labels = pred_imm_broad1$pruned.labels,
  monaco_fine_annotation = pred_imm_fine1$labels,
  monaco_fine_pruned_labels = pred_imm_fine1$pruned.labels)

meta_inf1 <- comb1@meta.data
meta_inf1$cell_barcode <- colnames(comb1)
meta_inf1 <- meta_inf1 %>% dplyr::left_join(y = annot_df1, by = c("cell_barcode" = "barcodes"))
rownames(meta_inf1) <- colnames(lc1)
comb1@meta.data <- meta_inf1
DimPlot(comb1, label=TRUE, group.by = "monaco_broad_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

DimPlot(comb1, label=TRUE, group.by = "monaco_fine_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

message("extract mono")
## extract mono
mono <- comb1[,which(meta_inf1$monaco_broad_annotation == "Monocytes")]
mono_metainf1 <- meta_inf1[which(meta_inf1$monaco_broad_annotation == "Monocytes"),]
mono_metainf1 <- mono_metainf1[grep("monocytes",mono_metainf1$monaco_fine_pruned_labels),]
mono <- mono[,which(colnames(mono) %in% rownames(mono_metainf1))]
mono <- FindVariableFeatures(mono, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono <- RunPCA(mono, features = VariableFeatures(object = mono))
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: AL358779.1, CSTA, RALA, PLAUR, ACP5,
## ALDH1A1, NABP1, PTPRJ, S100A9, ZCCHC7, LCP1, GATAD2B, FDX1, RPS20, EML1, RAB10,
## AL669970.1, RPS6KA2, RASGEF1B, FAM13B, SLC11A1, AC002429.2.
## PC_ 1 
## Positive:  S100A10, TXN, COX5B, PRDX6, C15orf48, FABP3, BCL2A1, PSME2, TUBA1B, CALM3 
##     ACTB, CYSTM1, CHI3L1, ACTG1, TUBA1A, CRABP2, ACAT2, MGST1, IFI30, SPP1 
##     FBP1, RGCC, LDHA, MMP9, CTSL, HAMP, AKR7A2, ANXA1, LILRA1, HLA-C 
## Negative:  ARL15, FTX, EXOC4, NEAT1, DPYD, FHIT, RAD51B, MALAT1, VPS13B, JMJD1C 
##     ZFAND3, MBD5, LRMDA, TRIO, ZEB2, TCF12, DOCK4, COP1, DOCK3, ZSWIM6 
##     SPIDR, ARHGAP15, ELMO1, PLXDC2, MAML2, RERE, SBF2, ATP9B, MED13L, ATG7 
## PC_ 2 
## Positive:  TM4SF19, ANO5, GPC4, CYSTM1, FNIP2, TXNRD1, BCL2A1, SPP1, PSD3, SNX10 
##     RETREG1, RGS20, TCTEX1D2, SLC28A3, TXN, EPB41L1, NIBAN1, MGST1, CALM3, FABP3 
##     RGCC, TGM2, CCL22, ATP6V0D2, CCDC26, LINC01010, AC092353.2, ACAT2, LINC01135, HES2 
## Negative:  HLA-DPB1, HLA-DRA, CD74, HLA-DPA1, TGFBI, AIF1, HLA-DQB1, MS4A6A, C1QA, HLA-DQA1 
##     HLA-DRB1, CEBPD, C1QC, FPR3, MS4A7, CD163, CD14, MPEG1, TIMP1, LYZ 
##     ST8SIA4, FOS, EPB41L3, MAFB, TCF4, HLA-DRB5, SELENOP, FCN1, RNASE1, ARL4C 
## PC_ 3 
## Positive:  CCPG1, NUPR1, HES2, PSAT1, CARD16, CLGN, S100P, TCEA1, BTG1, SUPV3L1 
##     PHGDH, NIBAN1, G0S2, BEX2, NMB, STMN1, IFI6, CLEC4A, CLEC4E, PLEKHA5 
##     RAB6B, DUSP1, GDF15, CYSTM1, ME1, PDE4D, CXCR4, RETREG1, QPCT, XIST 
## Negative:  ACTB, CALR, SLC35F1, TIMP3, LINC01091, TUBA1B, FBP1, ACTG1, IL1RN, HSP90B1 
##     GSN, INSIG1, MGLL, LPL, GLIPR1, GCLC, MADD, PDIA4, ALCAM, PLEK 
##     MANF, RGCC, CSF1, DHCR24, LDHA, GADD45G, TMEM176B, HLA-DRB1, DUSP2, TNS3 
## PC_ 4 
## Positive:  HIV-Gagp17, HIV-BaLEnv, HIV-Polprot, HIV-Polp15p31, HIV-LTRU5, HIV-Vif, HIV-Nef, HIV-TatEx1, HIV-LTRR, HIV-Gagp1Pol 
##     HIV-Gagp2p7, HIV-TatEx2Rev, HIV-Vpu, HIV-EGFP, MARCKS, CCL3, HIV-EnvStart, TPM4, SNCA, UGCG 
##     HIV-Vpr, G0S2, CD36, LGMN, HES4, B4GALT5, TNFRSF9, CLEC4A, BCAT1, SDS 
## Negative:  PTGDS, CLU, NCAPH, BX664727.3, LINC02244, SYNGR1, COX5B, RCBTB2, MT-ATP6, CRABP2 
##     AL136317.2, RARRES1, SSBP3, LINC01010, ADRA2B, AC015660.2, MT-ND2, S100A4, CRIP1, MT-CYB 
##     LY86-AS1, RNASE6, MT-CO3, S100A8, HLA-C, VAMP5, CCL22, CPE, CSRP2, TMEM176B 
## PC_ 5 
## Positive:  TYMS, BIRC5, MKI67, PCLAF, CEP55, CENPF, CENPM, TK1, PRC1, CDKN3 
##     DIAPH3, CDK1, MYBL2, SHCBP1, NUSAP1, DLGAP5, RRM2, CENPK, HMMR, ASPM 
##     TPX2, NCAPG, CCNA2, MAD2L1, TOP2A, CIT, KIF4A, CLSPN, KIF11, PTTG1 
## Negative:  RGCC, TMEM176B, CRABP2, IGSF6, GCLC, TIMP3, IFI30, AC005280.2, GSN, CCND1 
##     NUMB, TNFSF14, PLEK, BCL2A1, NCAPH, KCNJ1, GPAT3, MGLL, AC015660.2, MREG 
##     PTGDS, RPS4Y1, RASSF4, TMEM117, CFD, CHI3L1, HLA-DRB1, DUSP2, ACTB, AC067751.1
DimHeatmap(mono, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono)

mono <- FindNeighbors(mono, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono <- FindClusters(mono, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono <- RunUMAP(mono, dims = 1:4)
## 14:52:25 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:52:25 Read 9669 rows and found 4 numeric columns
## 14:52:25 Using Annoy for neighbor search, n_neighbors = 30
## 14:52:25 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:52:25 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc25ded006
## 14:52:25 Searching Annoy index using 1 thread, search_k = 3000
## 14:52:28 Annoy recall = 100%
## 14:52:29 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:52:31 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:52:31 Commencing optimization for 500 epochs, with 332446 positive edges
## 14:52:37 Optimization finished
DimPlot(mono, reduction = "umap", label=TRUE)

DimPlot(mono, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono, group.by="sample" , reduction = "umap", label=TRUE)

Make Alv object

alvlist <- mylist[grep("alv",names(mylist))]

comb1 <- do.call(cbind,alvlist)
sce1 <- SingleCellExperiment(list(counts=comb1))
sce1
## class: SingleCellExperiment 
## dim: 36622 11212 
## metadata(0):
## assays(1): counts
## rownames(36622): HIV_LTRR HIV_LTRU5 ... AC007325.4 AC007325.2
## rowData names(0):
## colnames(11212): alv_mock1|AAACCCAGTGCTGCAC alv_mock1|AAAGGATAGCATGAAT
##   ... alv_bystander3|TTTGGTTCAGGTTCCG alv_bystander3|TTTGTTGTCGCGTTTC
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
cellmetadata1 <- data.frame(colnames(comb1) ,sapply(strsplit(colnames(comb1),"\\|"),"[[",1))
colnames(cellmetadata1) <- c("cell","sample")
comb1 <- CreateSeuratObject(comb1, project = "mac", assay = "RNA", meta.data = cellmetadata1)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
comb1 <- NormalizeData(comb1)
## Normalizing layer: counts
comb1 <- FindVariableFeatures(comb1, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
comb1 <- ScaleData(comb1)
## Centering and scaling data matrix
comb1 <- RunPCA(comb1, features = VariableFeatures(object = comb1))
## PC_ 1 
## Positive:  S100A6, GAPDH, LGALS1, DBI, MIF, LGALS3, PRDX6, PSME2, CSTB, GSTO1 
##     LINC02244, PTGDS, CALM3, CYSTM1, ELOC, TXN, TMEM176B, GSTP1, CLU, MGST1 
##     CRIP1, MMP9, CHI3L1, SYNGR1, FAH, H2AFZ, ACTB, TMEM176A, TUBA1A, LDHA 
## Negative:  DOCK3, ARL15, MALAT1, RASAL2, LRMDA, TMEM117, DPYD, PLXDC2, EXOC4, ASAP1 
##     FTX, ATG7, NEAT1, MITF, TPRG1, JMJD1C, VPS13B, FHIT, ELMO1, UBE2E2 
##     MAML2, ZNF438, ZFAND3, FMNL2, FRMD4B, LPP, COP1, TRIO, ZEB2, DENND4C 
## PC_ 2 
## Positive:  HLA-DPA1, HLA-DRA, CD74, HLA-DPB1, LYZ, AIF1, MRC1, HLA-DRB1, TGFBI, CTSC 
##     C1QA, VAMP5, RCBTB2, SAMSN1, HMOX1, FOS, CLEC7A, SLCO2B1, FCGR2A, C1QC 
##     FGL2, SPRED1, SLC8A1, RBPJ, SELENOP, PDGFC, CLEC4A, ME1, FCGR3A, CD14 
## Negative:  TM4SF19, GAL, CCL22, CYSTM1, ATP6V1D, GM2A, CD164, FDX1, SCD, ACAT2 
##     CSTB, TGM2, CIR1, IARS, TCTEX1D2, RHOF, BCAT1, CYTOR, NCAPH, EPB41L1 
##     DCSTAMP, SLC20A1, GOLGA7B, LGALS1, CSF1, SNHG32, ADCY3, DUSP13, NRIP3, MREG 
## PC_ 3 
## Positive:  PTGDS, TMEM176B, LINC02244, CLU, LINC01800, RGS20, LGALS3, TMEM176A, MGST1, KCNMA1 
##     SERTAD2, NCAPH, CRIP1, AC067751.1, SYNGR1, GPC4, GCLC, C2orf92, NOS1AP, TRIM54 
##     S100A6, LINC01010, FCMR, SLC35F1, LY86-AS1, NCF1, FGL2, ST5, NRCAM, CT69 
## Negative:  CTSZ, SLC11A1, MS4A7, AIF1, MRC1, FCER1G, CTSB, LGMN, ID3, MSR1 
##     FCGR3A, TPM4, CLEC7A, FPR3, C1QA, CTSC, CAMK1, CTSL, HLA-DRB5, CCL3 
##     S100A9, C1QC, HAMP, CSTB, HLA-DQA1, HLA-DQB1, MARCO, MARCKS, SLA, PLAU 
## PC_ 4 
## Positive:  TYMS, PCLAF, CLSPN, TK1, DIAPH3, MYBL2, RRM2, ESCO2, CENPM, MKI67 
##     FAM111B, TCF19, SHCBP1, CDK1, HELLS, CEP55, CENPK, BIRC5, CENPU, ATAD2 
##     DTL, KIF11, NCAPG, NUSAP1, MCM10, TOP2A, PRC1, GINS2, ANLN, TPX2 
## Negative:  GCHFR, XIST, HLA-DRB5, GPX3, SAT1, SLC11A1, MS4A7, MSR1, QPCT, AC020656.1 
##     GPRIN3, MARCO, NMB, PAX8-AS1, FRMD4A, ST6GAL1, AL035446.2, FDX1, SERINC2, CTSZ 
##     S100A9, STX4, FUCA1, RARRES1, SASH1, AC008591.1, LINC01500, CCDC26, GM2A, C22orf34 
## PC_ 5 
## Positive:  AC020656.1, NIPAL2, LINC02244, GCHFR, RARRES1, TDRD3, BX664727.3, FDX1, XIST, AL136317.2 
##     LINC01010, TDRD9, OSBP2, QPCT, GJB2, CFD, LYZ, S100A9, GAPLINC, TMTC1 
##     PKD1L1, PRSS21, SLC6A16, CCDC26, GM2A, HES2, CTSK, PLEKHA5, HLA-DRB5, ANO5 
## Negative:  HIV-Gagp17, HIV-BaLEnv, HIV-LTRU5, HIV-TatEx1, HIV-Polprot, MIF, HIV-Nef, HIV-LTRR, HIV-Polp15p31, HIV-Vif 
##     HIV-Gagp1Pol, IL1RN, PLEK, HIV-EnvStart, HIV-TatEx2Rev, HIV-Vpu, HIV-Gagp2p7, SLC35F1, ACTB, HIV-Vpr 
##     TMEM176A, CYTOR, ACTG1, HIV-EGFP, PSME2, TUBA1A, CTSB, MARCKS, MYL9, PHLDA1
comb1 <- RunHarmony(comb1,"sample")
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony converged after 4 iterations
DimHeatmap(comb1, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(comb1)

comb1 <- JackStraw(comb1, num.replicate = 100)
comb1 <- FindNeighbors(comb1, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
comb1 <- FindClusters(comb1, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11212
## Number of edges: 344775
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8624
## Number of communities: 10
## Elapsed time: 0 seconds
comb1 <- RunUMAP(comb1, dims = 1:10)
## 14:55:51 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:55:51 Read 11212 rows and found 10 numeric columns
## 14:55:51 Using Annoy for neighbor search, n_neighbors = 30
## 14:55:51 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:55:52 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc1042baed
## 14:55:52 Searching Annoy index using 1 thread, search_k = 3000
## 14:55:55 Annoy recall = 100%
## 14:55:56 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:55:57 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:55:57 Commencing optimization for 200 epochs, with 450140 positive edges
## 14:56:01 Optimization finished
DimPlot(comb1, reduction = "umap")

message("macrophage markers")
## macrophage markers
FeaturePlot(comb1, features = c("ADGRE1", "CCR2", "SIGLEC1", "CX3CR1", "MRC1", "CD163", "LYVE1", "CD9", "TREM2"))

DimPlot(comb1, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

ref <- celldex::MonacoImmuneData()
DefaultAssay(comb1) <- "RNA"
comb21 <- as.SingleCellExperiment(comb1)
lc1 <- logcounts(comb21)
pred_imm_broad1 <- SingleR(test=comb21, ref=ref,labels=ref$label.main)
head(pred_imm_broad1)
## DataFrame with 6 rows and 4 columns
##                                                    scores      labels
##                                                  <matrix> <character>
## alv_mock1|AAACCCAGTGCTGCAC 0.273560:0.272344:0.161242:...   Monocytes
## alv_mock1|AAAGGATAGCATGAAT 0.318711:0.307377:0.191663:...   Monocytes
## alv_mock1|AAAGGATAGTCAGGGT 0.294485:0.275673:0.182914:...   Monocytes
## alv_mock1|AAAGGATAGTTCCGGC 0.294678:0.294725:0.183945:...   Monocytes
## alv_mock1|AAAGGATTCACCATCC 0.278966:0.268607:0.190627:...   Monocytes
## alv_mock1|AAAGGGCCATGACGTT 0.284776:0.293739:0.171230:...   Monocytes
##                            delta.next pruned.labels
##                             <numeric>   <character>
## alv_mock1|AAACCCAGTGCTGCAC   0.134488     Monocytes
## alv_mock1|AAAGGATAGCATGAAT   0.104645     Monocytes
## alv_mock1|AAAGGATAGTCAGGGT   0.140018     Monocytes
## alv_mock1|AAAGGATAGTTCCGGC   0.128407     Monocytes
## alv_mock1|AAAGGATTCACCATCC   0.147810     Monocytes
## alv_mock1|AAAGGGCCATGACGTT   0.166792     Monocytes
table(pred_imm_broad1$pruned.labels)
## 
## Dendritic cells       Monocytes 
##              12           11014
cellmetadata1$label <- pred_imm_broad1$pruned.labels
pred_imm_fine1 <- SingleR(test=comb21, ref=ref, labels=ref$label.fine)
head(pred_imm_fine1)
## DataFrame with 6 rows and 4 columns
##                                                    scores              labels
##                                                  <matrix>         <character>
## alv_mock1|AAACCCAGTGCTGCAC 0.163017:0.398232:0.176698:... Classical monocytes
## alv_mock1|AAAGGATAGCATGAAT 0.180234:0.435146:0.208731:... Classical monocytes
## alv_mock1|AAAGGATAGTCAGGGT 0.169967:0.389207:0.187751:... Classical monocytes
## alv_mock1|AAAGGATAGTTCCGGC 0.166462:0.422480:0.189466:... Classical monocytes
## alv_mock1|AAAGGATTCACCATCC 0.184520:0.383707:0.203877:... Classical monocytes
## alv_mock1|AAAGGGCCATGACGTT 0.173873:0.439659:0.198357:... Classical monocytes
##                            delta.next       pruned.labels
##                             <numeric>         <character>
## alv_mock1|AAACCCAGTGCTGCAC  0.1433756 Classical monocytes
## alv_mock1|AAAGGATAGCATGAAT  0.1213924 Classical monocytes
## alv_mock1|AAAGGATAGTCAGGGT  0.0502055 Classical monocytes
## alv_mock1|AAAGGATAGTTCCGGC  0.0994518 Classical monocytes
## alv_mock1|AAAGGATTCACCATCC  0.0283404 Classical monocytes
## alv_mock1|AAAGGGCCATGACGTT  0.0687853 Classical monocytes
table(pred_imm_fine1$pruned.labels)
## 
##     Classical monocytes  Intermediate monocytes Myeloid dendritic cells 
##                    9702                    1332                      25 
## Non classical monocytes 
##                       3
cellmetadata1$finelabel <- pred_imm_fine1$pruned.labels
col_pal <- c('#e31a1c', '#ff7f00', "#999900", '#cc00ff', '#1f78b4', '#fdbf6f',
             '#33a02c', '#fb9a99', "#a6cee3", "#cc6699", "#b2df8a", "#99004d", "#66ff99",
             "#669999", "#006600", "#9966ff", "#cc9900", "#e6ccff", "#3399ff", "#ff66cc",
             "#ffcc66", "#003399")
annot_df1 <- data.frame(
  barcodes = rownames(pred_imm_broad1),
  monaco_broad_annotation = pred_imm_broad1$labels,
  monaco_broad_pruned_labels = pred_imm_broad1$pruned.labels,
  monaco_fine_annotation = pred_imm_fine1$labels,
  monaco_fine_pruned_labels = pred_imm_fine1$pruned.labels)

meta_inf1 <- comb1@meta.data
meta_inf1$cell_barcode <- colnames(comb1)
meta_inf1 <- meta_inf1 %>% dplyr::left_join(y = annot_df1, by = c("cell_barcode" = "barcodes"))
rownames(meta_inf1) <- colnames(lc1)
comb1@meta.data <- meta_inf1
DimPlot(comb1, label=TRUE, group.by = "monaco_broad_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

DimPlot(comb1, label=TRUE, group.by = "monaco_fine_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

message("extract mono")
## extract mono
mono <- comb1[,which(meta_inf1$monaco_broad_annotation == "Monocytes")]
mono_metainf1 <- meta_inf1[which(meta_inf1$monaco_broad_annotation == "Monocytes"),]
mono_metainf1 <- mono_metainf1[grep("monocytes",mono_metainf1$monaco_fine_pruned_labels),]
mono <- mono[,which(colnames(mono) %in% rownames(mono_metainf1))]
mono <- FindVariableFeatures(mono, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono <- RunPCA(mono, features = VariableFeatures(object = mono))
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: AC074099.1, MBOAT4, CYP1B1, AC022035.1,
## MS4A4A, IFI30, PSMA7, ZCCHC7, CPEB2-DT, ZNF609, CEP85L, AC023194.3, GLUL,
## LINC01951, LINC02643, DIAPH3-AS1, F2RL1, AC108860.2, DNAI1, HULC, AL135818.3,
## KIF16B, TESK2, HCAR3, LIMCH1, PGBD5, DOP1B, YJEFN3, LINC02732.
## PC_ 1 
## Positive:  S100A6, GAPDH, LGALS1, DBI, MIF, LGALS3, PSME2, PRDX6, CSTB, GSTO1 
##     LINC02244, PTGDS, CYSTM1, ELOC, CALM3, TXN, GSTP1, TMEM176B, MMP9, CRIP1 
##     MGST1, CLU, CHI3L1, H2AFZ, FAH, TUBA1A, LDHA, TMEM176A, SYNGR1, S100A4 
## Negative:  DOCK3, ARL15, MALAT1, RASAL2, LRMDA, TMEM117, DPYD, PLXDC2, FTX, EXOC4 
##     ASAP1, TPRG1, ATG7, MITF, NEAT1, JMJD1C, VPS13B, FHIT, ELMO1, MAML2 
##     UBE2E2, ZNF438, COP1, FMNL2, LPP, ZFAND3, TRIO, FRMD4B, ZEB2, MED13L 
## PC_ 2 
## Positive:  TM4SF19, CCL22, GAL, CYSTM1, ATP6V1D, GM2A, CD164, FDX1, SCD, ACAT2 
##     CSTB, TGM2, IARS, CIR1, TCTEX1D2, RHOF, BCAT1, CYTOR, NCAPH, EPB41L1 
##     DCSTAMP, SLC20A1, GOLGA7B, CSF1, LGALS1, ADCY3, SNHG32, DUSP13, NRIP3, MREG 
## Negative:  HLA-DPA1, HLA-DRA, CD74, HLA-DPB1, AIF1, LYZ, HLA-DRB1, MRC1, CTSC, TGFBI 
##     VAMP5, C1QA, RCBTB2, SAMSN1, HMOX1, FOS, CLEC7A, SLCO2B1, FCGR2A, C1QC 
##     FGL2, SPRED1, SLC8A1, SELENOP, RBPJ, PDGFC, CLEC4A, ME1, FCGR3A, CD14 
## PC_ 3 
## Positive:  PTGDS, TMEM176B, LINC02244, CLU, LINC01800, LGALS3, TMEM176A, RGS20, MGST1, KCNMA1 
##     CRIP1, NCAPH, SERTAD2, SYNGR1, AC067751.1, GPC4, GCLC, TRIM54, C2orf92, NOS1AP 
##     S100A6, FCMR, SLC35F1, LINC01010, NCF1, LY86-AS1, FGL2, ST5, PLEK, MX1 
## Negative:  CTSZ, SLC11A1, MS4A7, AIF1, MRC1, FCER1G, LGMN, CTSB, MSR1, FCGR3A 
##     ID3, TPM4, CLEC7A, FPR3, CAMK1, C1QA, CTSC, HLA-DRB5, CTSL, CCL3 
##     S100A9, HAMP, C1QC, CSTB, HLA-DQA1, MARCO, HLA-DQB1, FMN1, SLA, MARCKS 
## PC_ 4 
## Positive:  GCHFR, XIST, SAT1, GPX3, HLA-DRB5, QPCT, MS4A7, SLC11A1, AC020656.1, MSR1 
##     GPRIN3, NMB, MARCO, PAX8-AS1, FRMD4A, ST6GAL1, FDX1, AL035446.2, SERINC2, CTSZ 
##     FUCA1, S100A9, STX4, RARRES1, SASH1, AC008591.1, LINC01500, CCDC26, GM2A, CFD 
## Negative:  TYMS, PCLAF, CLSPN, TK1, MYBL2, DIAPH3, RRM2, ESCO2, CENPM, FAM111B 
##     MKI67, TCF19, SHCBP1, HELLS, CDK1, CENPU, CEP55, CENPK, DTL, BIRC5 
##     ATAD2, NCAPG, KIF11, MCM10, GINS2, NUSAP1, TOP2A, PRC1, TPX2, ANLN 
## PC_ 5 
## Positive:  AC020656.1, NIPAL2, LINC02244, GCHFR, RARRES1, TDRD3, BX664727.3, XIST, FDX1, AL136317.2 
##     LINC01010, GJB2, CFD, QPCT, OSBP2, TDRD9, LYZ, S100A9, GAPLINC, TMTC1 
##     PRSS21, CTSK, SLC6A16, PKD1L1, GM2A, HES2, CCDC26, PLEKHA5, HLA-DRB5, ANO5 
## Negative:  HIV-Gagp17, HIV-LTRU5, HIV-BaLEnv, HIV-TatEx1, HIV-Polprot, MIF, HIV-Nef, HIV-LTRR, HIV-Polp15p31, HIV-Vif 
##     HIV-Gagp1Pol, IL1RN, PLEK, HIV-EnvStart, HIV-Vpu, HIV-TatEx2Rev, HIV-Gagp2p7, ACTB, SLC35F1, HIV-Vpr 
##     CYTOR, ACTG1, TMEM176A, HIV-EGFP, PSME2, CTSB, MARCKS, TUBA1A, MYL9, PHLDA1
DimHeatmap(mono, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono)

mono <- FindNeighbors(mono, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono <- FindClusters(mono, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono <- RunUMAP(mono, dims = 1:4)
## 14:56:43 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:56:43 Read 11036 rows and found 4 numeric columns
## 14:56:43 Using Annoy for neighbor search, n_neighbors = 30
## 14:56:43 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:56:44 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc4917d956
## 14:56:44 Searching Annoy index using 1 thread, search_k = 3000
## 14:56:47 Annoy recall = 100%
## 14:56:48 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:56:50 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:56:50 Commencing optimization for 200 epochs, with 374004 positive edges
## 14:56:53 Optimization finished
DimPlot(mono, reduction = "umap", label=TRUE)

DimPlot(mono, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono, group.by="sample" , reduction = "umap", label=TRUE)

Extract monocytes MDM only

mono <- comb[,which(meta_inf$monaco_broad_annotation == "Monocytes")]
mono_metainf <- meta_inf[which(meta_inf$monaco_broad_annotation == "Monocytes"),]
mono_metainf1 <- mono_metainf[grep("monocytes",mono_metainf$monaco_fine_pruned_labels),]
mono <- mono[,which(colnames(mono) %in% rownames(mono_metainf))]
mono <- FindVariableFeatures(mono, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono <- RunPCA(mono, features = VariableFeatures(object = mono))
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: ADGRF1, AP000812.1, ACTB, AC005740.5,
## AC138123.1, HIF1A-AS3, PRH1, AL592494.2.
## PC_ 1 
## Positive:  GAPDH, FABP3, TXN, IFI30, S100A10, PRDX6, TUBA1B, BLVRB, OTOA, S100A9 
##     FAH, C15orf48, GCHFR, CYSTM1, CARD16, GSTP1, HAMP, PSMA7, CTSB, CSTA 
##     ACTG1, FABP4, H2AFZ, LDHB, LINC01827, CFD, TUBA1A, MMP9, SELENOW, LINC02244 
## Negative:  ARL15, DOCK3, FTX, NEAT1, EXOC4, MALAT1, DPYD, LRMDA, RASAL2, JMJD1C 
##     TMEM117, PLXDC2, VPS13B, FHIT, TPRG1, TRIO, ATG7, ZNF438, MAML2, ZFAND3 
##     MITF, COP1, ZEB2, ELMO1, MED13L, DENND4C, TCF12, ERC1, JARID2, FMNL2 
## PC_ 2 
## Positive:  HLA-DRB1, CD74, HLA-DRA, HLA-DPA1, GCLC, HLA-DPB1, LYZ, RCBTB2, MRC1, KCNMA1 
##     SPRED1, C1QA, FGL2, AC020656.1, SLCO2B1, CYP1B1, AIF1, HLA-DRB5, PTGDS, S100A4 
##     VAMP5, LINC02345, CA2, CRIP1, CAMK1, ALOX5AP, RTN1, HLA-DQB1, MX1, TGFBI 
## Negative:  CYSTM1, CD164, PSAT1, FAH, FDX1, GDF15, ATP6V1D, BCAT1, SAT1, CCPG1 
##     PHGDH, PSMA7, HEBP2, SLAMF9, RETREG1, GARS, HES2, TCEA1, TXN, RHOQ 
##     RILPL2, B4GALT5, CLGN, NUPR1, CSTA, SPTBN1, HSD17B12, STMN1, SNHG5, PTER 
## PC_ 3 
## Positive:  MARCKS, CD14, BTG1, MS4A6A, TGFBI, CTSC, FPR3, HLA-DQA1, AIF1, MPEG1 
##     MEF2C, CD163, IFI30, TIMP1, HLA-DPB1, ALDH2, SELENOP, NUPR1, NAMPT, HLA-DQB1 
##     HIF1A, C1QC, MS4A7, FUCA1, EPB41L3, HLA-DQA2, RNASE1, ARL4C, ZNF331, TCF4 
## Negative:  NCAPH, CRABP2, RGCC, CHI3L1, TM4SF19, DUSP2, GAL, AC015660.2, CCL22, ACAT2 
##     LINC01010, TMEM114, MGST1, RGS20, TRIM54, LINC02244, MREG, NUMB, TCTEX1D2, GPC4 
##     CCND1, POLE4, SYNGR1, SLC20A1, SERTAD2, IL1RN, GCLC, CLU, PLEK, AC092353.2 
## PC_ 4 
## Positive:  ACTG1, TPM4, CCL3, CTSB, TUBA1B, CSF1, DHCR24, CYTOR, LGMN, INSIG1 
##     GAPDH, TUBB, CD36, HAMP, CCL7, C1QA, AIF1, MGLL, TYMS, LIMA1 
##     C1QC, PCLAF, CCL2, HSP90B1, CLSPN, C1QB, TNFSF13, TK1, C15orf48, CAMK1 
## Negative:  PTGDS, LINC02244, CLU, CSTA, CCPG1, MGST1, SYNGR1, LINC01010, EPHB1, ALDH2 
##     AC015660.2, LY86-AS1, GAS5, NCF1, BX664727.3, S100P, TMEM91, SNHG5, CLEC12A, AP000331.1 
##     APOD, PDE4D, C1QTNF4, VAMP5, LYZ, CFD, RCBTB2, DIXDC1, AC073359.2, ARHGAP15 
## PC_ 5 
## Positive:  TYMS, PCLAF, TK1, MKI67, MYBL2, RRM2, CENPM, BIRC5, CEP55, CLSPN 
##     CDK1, DIAPH3, SHCBP1, NUSAP1, CENPF, CENPK, PRC1, TOP2A, NCAPG, ESCO2 
##     KIF11, ANLN, CCNA2, TPX2, ASPM, FAM111B, MAD2L1, RAD51AP1, GTSE1, HMMR 
## Negative:  HIV-BaLEnv, HIV-LTRU5, HIV-Polprot, HIV-Gagp17, HIV-Nef, HIV-TatEx1, HIV-Polp15p31, HIV-LTRR, HIV-Vif, HIV-Gagp1Pol 
##     HIV-TatEx2Rev, HIV-Gagp2p7, HIV-EnvStart, HIV-Vpu, HIV-Vpr, HIV-EGFP, CTSB, MMP19, IL6R-AS1, CSF1 
##     CCL3, MGLL, IL1RN, INSIG1, AL157912.1, SDS, LGMN, TCTEX1D2, TNFRSF9, PHLDA1
DimHeatmap(mono, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono)

mono <- FindNeighbors(mono, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono <- FindClusters(mono, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono <- RunUMAP(mono, dims = 1:4)
## 14:57:35 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:57:35 Read 24224 rows and found 4 numeric columns
## 14:57:35 Using Annoy for neighbor search, n_neighbors = 30
## 14:57:35 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:57:37 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc368e808c
## 14:57:37 Searching Annoy index using 1 thread, search_k = 3000
## 14:57:44 Annoy recall = 100%
## 14:57:45 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:57:47 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:57:47 Commencing optimization for 200 epochs, with 793206 positive edges
## 14:57:53 Optimization finished
DimPlot(mono, reduction = "umap", label=TRUE)

DimPlot(mono, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono, group.by="sample" , reduction = "umap", label=TRUE)

Cell counts

Most cells are classified as monocytes.

cc <- table(meta_inf[,c("sample","monaco_broad_annotation")])

cc %>% kbl(caption="cell counts") %>% kable_paper("hover", full_width = F)
cell counts
Basophils Dendritic cells Monocytes
alv_active1 0 0 774
alv_active2 0 3 543
alv_active3 0 1 540
alv_bystander1 0 1 2458
alv_bystander2 0 0 1948
alv_bystander3 0 1 1936
alv_latent1 0 1 301
alv_latent2 0 0 99
alv_latent3 0 1 137
alv_mock1 0 1 883
alv_mock2 0 0 649
alv_mock3 0 3 1031
mdm_active1 0 3 599
mdm_active2 0 0 414
mdm_active3 0 2 305
mdm_active4 0 0 401
mdm_bystander1 0 12 1845
mdm_bystander2 0 8 1957
mdm_bystander3 0 19 490
mdm_bystander4 0 1 1495
mdm_latent1 1 11 160
mdm_latent2 0 8 187
mdm_latent3 0 3 72
mdm_latent4 0 0 23
mdm_mock1 0 1 691
mdm_mock2 0 1 549
mdm_mock3 0 2 135
mdm_mock4 0 0 775
react6 0 3 2827
tcc <- t(cc)

pctcc <- apply(tcc,2,function(x) { x/sum(x)*100} )

pctcc %>% kbl(caption="cell proportions") %>% kable_paper("hover", full_width = F)
cell proportions
alv_active1 alv_active2 alv_active3 alv_bystander1 alv_bystander2 alv_bystander3 alv_latent1 alv_latent2 alv_latent3 alv_mock1 alv_mock2 alv_mock3 mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_bystander1 mdm_bystander2 mdm_bystander3 mdm_bystander4 mdm_latent1 mdm_latent2 mdm_latent3 mdm_latent4 mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 react6
Basophils 0 0.0000000 0.0000000 0.0000000 0 0.0000000 0.0000000 0 0.0000000 0.0000000 0 0.0000000 0.0000000 0 0.0000000 0 0.0000000 0.0000000 0.000000 0.0000000 0.5813953 0.000000 0 0 0.0000000 0.0000000 0.000000 0 0.0000000
Dendritic cells 0 0.5494505 0.1848429 0.0406669 0 0.0516262 0.3311258 0 0.7246377 0.1131222 0 0.2901354 0.4983389 0 0.6514658 0 0.6462036 0.4071247 3.732809 0.0668449 6.3953488 4.102564 4 0 0.1445087 0.1818182 1.459854 0 0.1060071
Monocytes 100 99.4505495 99.8151571 99.9593331 100 99.9483738 99.6688742 100 99.2753623 99.8868778 100 99.7098646 99.5016611 100 99.3485342 100 99.3537964 99.5928753 96.267191 99.9331551 93.0232558 95.897436 96 100 99.8554913 99.8181818 98.540146 100 99.8939929

Focus on MOCK only

Need a UMAP with AlvMDM vs MDM (for only mock cells, and donors/samples combined for each group)

Need to recall metadata from meta_inf, modify then save as , so that the UMAP labeling will work.

focus <- meta_inf[grep("mock",rownames(meta_inf)),]
focus <- focus[which(focus$monaco_broad_annotation=="Monocytes"),]
mono_focus <- mono[,which(colnames(mono) %in% rownames(focus))]

focus$line <- sapply(strsplit(focus$sample,"_"),"[[",1)
mono_focus@meta.data <- focus

# mono_focus
mono_focus <- FindVariableFeatures(mono_focus, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono_focus <- RunPCA(mono_focus, features = VariableFeatures(object = mono_focus))
## Warning: The following 38 features requested have zero variance; running
## reduction without them: MT1E, AC004704.1, CKAP2L, GLIS3, ZNF804A, KIF23, FCN1,
## CPA6, LINC02392, NLGN4Y, AC110995.1, VTI1A, PTER, NIBAN1, CTNND2, VIPR1,
## AL096794.1, HIV-Vpu, MICAL3, KLRD1, ACMSD, GYPC, MMP10, AC093307.1, AL603756.1,
## UPK1A-AS1, KIF16B, STRBP, TULP4, LINC02853, MIR3142HG, AC005041.4, AJUBA, GRM4,
## LARP6, AC104809.2, MX1, FKBP5
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: CH25H, AL009177.1, CARD11, CREM, TMEM71,
## IFI6, PAWR, RNF212, C5AR2, TNFRSF4, SPOCK1, LINC00964, TTK, TENM1, UBE2T,
## SLC9A2, DUSP16, AC089983.1, CRISPLD2, CD28, GLT1D1, IL6, PCBP3, GK, SPOCK3,
## AC078923.1, HSF2BP, DACH1, UBXN10, AC079193.2, AC013799.1, AC019117.2, COL27A1,
## LINC02585, SRGN, AC093766.1, RELN, CNTNAP5, MAP1B, BUB1, C16orf89, CD93, GRID2,
## TACC3, AC005740.5, RFTN2, BCL2A1, RPS4Y1, LINC00885, BFSP2, GPR155, LINC01891,
## AC137770.1, AL354733.2, PTPRB, JAKMIP3, TEAD1, INKA2-AS1, NRG1, ATP1A4,
## AC008443.9, AL031658.1, GGH, COX5B, AC107081.2, AC079313.2, PPP1R16B, ANGPT1,
## CPXM2, AL445584.2, TMEM236, STK32B, CNGB1, ALPK2, LINC01054, POF1B, SHANK2,
## SIK2, CFAP69, GRIK5, PCLO, AC117383.1, SMS, NPTX2, FDXACB1, CENPU, AC007389.1,
## TUBB3, RXFP1, BRCA1, CCNB2, LMNB1-DT, TEKT5, TFRC, GRM7, RFX8, NEGR1, MCAM,
## TLCD4, LINC00511, FBLN5, AP001496.1, LINC02112, LGALS1, EXO1, TRIM2, BARD1,
## LHCGR, PDCD1, EPHB4, HPYR1, AL162411.1, IL17RB, ASAP3, AL592295.3, MAP3K15,
## AC010201.2, WASF3, C4BPA, AC009229.3, ADCY5, AC093725.2, PF4, LINC01187,
## STEAP4, AC103794.1, SAA4, IGHA1, KRT19, TCF4-AS2, AC105094.2, AC011462.2,
## NUDT10, AC010175.1, PDE11A, GPAT3, STS, EPHA6, MLPH, PAX8-AS1, AC012533.1,
## AC233296.1, AC009435.1, AC025263.1, AHR, DOK5, GNLY, LINC02466, HIST1H3F, PKP2,
## LILRB2, STXBP6, AGT, CFI, IGSF23, AL359220.1, COBL, AC068228.3, PPEF1, TUBB4B,
## GINS2, PYGL, AKAP6, AP000812.1, AL158068.2, KDR, NABP1, KIAA1755, AL360015.1,
## AC099552.1, MCM7, AL008730.1, AC008033.3, AC125421.1, KCNH6, AOC1, EDIL3, QKI,
## LINC01857, NDRG2, C11orf45, OPCML, AL358334.1, ZNF827, VNN2, CHD5, AC073091.4,
## MAATS1, AC083836.1, DBI, HSPB1, KCNJ1, TNIK, GM2A, EMP1, OSBPL6, ZWINT,
## PKIA-AS1, PCDH15, KIF6, NUAK1, PARD3, AC073050.1, SPSB1, CHD9, GRHL2, ACKR3,
## CEP112, LINC02805, DUXA, LINC01121, LINC00500, OLAH, SOAT2, AC092718.1, DPF1,
## RNF150, ST3GAL1, AC079163.2, CPQ, GK-AS1, CLEC7A, HIVEP1, AL138694.1, STXBP5L,
## NRIP3, KIF20B, DENND2A, PHEX, TDRD9, CNIH3, SVEP1, AC016074.2, AC090630.1,
## AC108206.1, MRO, NXF3, ZNF385D-AS1, AC083837.1, RBPJ, CD72, FHOD3, ITGA4, KCP,
## LINC01800, TREM1, NR1H3, SRL, JAKMIP2-AS1, LINC00350, FBXO5, ERAP2, TMEM246,
## PAX5, FRMD6-AS1, CALM3, CENPA, MEGF9, PTX3, CTXND1, LINC01572, APLP2, MRC2,
## AC011346.1, CDT1, SGO1, SOS1, SLA, NFIA-AS1, KIF21A, COL25A1, ACTB, IGSF6,
## ELOC, LINC00378, SLC7A14-AS1, TFPI, SPAG5, GNG4, LINC01414, LAMA3, TREM2, NWD1,
## SPTLC3, Z95118.2, AL358232.2, LINC00867, MMS22L, CCDC34, KIAA0825, SLC35F3,
## AC010834.2, PLXDC1, HSP90AA1, KPNA2, AC124852.1, AC079742.1, RCAN1, SCFD2,
## HIVEP3, EML4, ZNF609, SESN2, MYH15, AL355388.1, HTRA4, AC010809.2, ADSSL1,
## LINC00886, FSD1, ZPLD1, AC007001.1, MARCH1, CNGA4, COL8A2, AC135050.3,
## AC068587.4, PCP4L1, TOM1L2, LYPD1, EZH2, LRRK2, NRCAM, AL035446.1, NFKB1,
## ITPR2, FAM49A, HSPA9, MCM3, STPG2, RFX3, ANOS1, PSMA8, CD248, SLC6A7, BASP1,
## FAM110D, LINC02851, BCL2L11, TSGA13, SEMA6D, ATP1B2, AC112493.1, AC013287.1,
## SLC4A8, AC246817.1, FAM110B, GALNT18, SESN3, AC114763.1, SLC44A5, FBXO43,
## IKZF3, LINC01358, ADAMTS10, AC019068.1, KLF12, CLEC10A, CLMN, MT-ND2, ORC6,
## AL353576.1, FAM107B, CACNA2D3, TSEN34, SHROOM4, RFC3, RTKN2, CDCA5, LINC02742,
## RNF39, AC006449.3, SSPO, LUZP2, AC009509.1, SGO2, TMPO, ATP9A, AC005808.1,
## SYT10, DDHD1, ICAM2, ADGRL4, AL157702.2, SFTPD-AS1, ACAN, NLRP7, RAB6D,
## ANKRD22, LCP1, PIWIL2, TMEM72-AS1, ZFP36L1, AHCYL2, BAAT, PDLIM4, AC015908.2,
## AL390067.1, LINC01900, SETBP1, PLBD1, SOCS3, CCDC85A, AC036176.1, DUT,
## AC104435.2, NBAT1, GYS2, FOXM1, CCDC168, LINC01862, AC084740.1, GPX8, MYADM,
## INPP4B, ITGB1, ABCA1, AC008691.1, AL357153.2, NQO1, RASSF4, SCRN1, RHOBTB3,
## ESRRG, MEP1A, PLS3, SH3GL2, MGAT5, AC092535.4, AL031728.1, FOXO6, MSGN1,
## AC012355.1, AC012442.1, AC092958.4, MINDY4B, SPTSSB, AL512308.1, BVES-AS1,
## TRDN-AS1, LHFPL3-AS1, CELF2-AS2, AP000941.1, AL161716.1, AL158196.1, LOXL1,
## NPIPB9, AC007952.6, AC011446.3, NLRP9, PROX1, AC084026.1, GNA14-AS1, SPON1-AS1,
## AC087639.2, TEKT3, STUM, PCED1B-AS1, AC117386.2, HPN, MCTP2, POU2F2, IGF1R,
## TMEM156, LPAR1, BICD1, ARHGEF3, PRH1, CRIM1, GAS1RR, WIPF3, NR6A1, HCAR3,
## LDLRAD4, OSBPL10, PRKG2, HDX, SUCNR1, MAP1A, CLDN23, LINC00519, LINC01999,
## LINC01917, RPS6KA6, TNR, ARHGAP6, CAPG, CLSTN2, LINC00589, TXK, NES, EDA,
## LGALSL, LPP, RCSD1, AL358944.1, KCNA2, XRCC2, SCIN, GNG2, CCNH, ADAM28, SVOP,
## AC092490.1, ARRDC3-AS1, UVRAG-DT, DPEP3, AC024084.1, AP001347.1, AL049828.1,
## UHRF1, PEAK1, STAP2, LINC00571, FRRS1, PKD2, TROAP, EEF1A1, MTMR1, IGFBP7,
## SCN8A, DAPK2, EMP3, IRAK3, SMC2, AC239860.4, LINC02355, AC027627.1, AC002472.1,
## AC007128.2, CLEC4C, AC076968.2, TAT-AS1, TEKT1, AC008555.2, SDC2, SERPINA1,
## GALNT14, CDCA8, RBM47, TUBB4A, IPCEF1, KIAA1217, SPINK8, CR1, CDH12, ATP6V1G1,
## ZHX2.
## PC_ 1 
## Positive:  GAPDH, S100A10, PRDX6, TXN, FABP3, CYSTM1, PSMA7, FAH, BLVRB, C15orf48 
##     TUBA1B, GSTP1, SELENOW, FABP4, LDHB, S100A9, TUBA1A, IFI30, SLAMF9, CRABP2 
##     LILRA1, MMP9, CSTA, GAL, H2AFZ, FDX1, ACTG1, GDF15, TUBA1C, HAMP 
## Negative:  ARL15, FTX, EXOC4, LRMDA, DOCK3, DPYD, ATG7, ZEB2, ELMO1, DLEU2 
##     DOCK4, RASAL2, SPIDR, PLXDC2, JMJD1C, VPS13B, ZSWIM6, MAML2, TCF12, TMEM117 
##     TPRG1, MED13L, ZNF438, TRIO, COP1, FMNL2, MALAT1, ZFAND3, ARHGAP15, MITF 
## PC_ 2 
## Positive:  TM4SF19, FDX1, ATP6V1D, GPC4, MREG, PSD3, FNIP2, ACAT2, TXNRD1, ANO5 
##     CYSTM1, RGS20, TGM2, CD164, SLC28A3, NCAPH, TCTEX1D2, TMEM114, GAL, FAH 
##     AC005280.2, CCL22, CSF1, NUMB, EPB41L1, SPP1, SCD, PRDX6, CRABP2, PSMA7 
## Negative:  TGFBI, AIF1, HLA-DRA, MS4A6A, CEBPD, CD14, HLA-DPB1, HLA-DPA1, CD74, CTSC 
##     MS4A7, EPB41L3, MAFB, RNASE1, TCF4, FPR3, SAMSN1, FOS, BTG1, VMO1 
##     SELENOP, LYZ, MEF2C, ARL4C, CD163, HLA-DRB1, ST8SIA4, SEMA4A, SSBP2, MPEG1 
## PC_ 3 
## Positive:  GCLC, DUSP2, NCAPH, PTGDS, KCNMA1, CHI3L1, CRIP1, AC020656.1, CYP1B1, CRABP2 
##     RCBTB2, CDKN2A, RNF128, SPRED1, RGCC, LINC01010, TIMP3, AC015660.2, ALOX5AP, CA2 
##     S100A4, MMP7, CCDC102B, ZNF366, LINC02345, AC067751.1, GJB2, STAC, FAIM2, ID2 
## Negative:  SAT1, STMN1, CTSL, MARCKS, NUPR1, BCAT1, BTG1, SLAMF9, CD48, PLEKHA5 
##     SDS, FABP4, RILPL2, FAH, SELENOW, PLIN2, OLFML2B, PSAT1, HIF1A, B4GALT5 
##     MARCO, CCPG1, TPM4, SOD2, CP, CSTA, GSTP1, FLNB, ADAMDEC1, TCEA1 
## PC_ 4 
## Positive:  TYMS, CLSPN, MKI67, PCLAF, SHCBP1, CEP55, TK1, CENPK, CENPF, DIAPH3 
##     RRM2, MYBL2, ESCO2, CDK1, CCNA2, BIRC5, NCAPG, CENPM, HMMR, HELLS 
##     NUSAP1, MAD2L1, PRC1, CIT, KIF4A, KNL1, TPX2, KIF11, FAM111B, TOP2A 
## Negative:  HES2, PDE4D, CCDC26, CSTA, RETREG1, CCPG1, SAT1, TDRD3, AP000331.1, AC073359.2 
##     LINC02244, NMB, LY86-AS1, RAB6B, ANO5, FDX1, XIST, CPD, AL136317.2, AC015660.2 
##     CIR1, NUPR1, STX18-AS1, LINC02320, BX664727.3, SLC28A3, GPC4, EPHB1, CYSTM1, AC008591.1 
## PC_ 5 
## Positive:  ACTG1, TPM4, ALCAM, TUBA1B, AC078850.1, CSF1, TIMP3, SLC35F1, CTSB, NRP1 
##     CD36, HSP90B1, LGMN, PHLDA1, LIMA1, FBP1, GAPDH, CADM1, CTSL, CCL3 
##     LNCAROD, HSPA5, CYTOR, TUBB, IL18BP, ATP13A3, CCL7, AIF1, DUSP6, TUBA1A 
## Negative:  LINC02244, CLU, BX664727.3, PTGDS, SYNGR1, GPX3, CSTA, LINC01010, AL136317.2, SEL1L3 
##     CPD, CXCR4, TDRD3, CCPG1, XIST, HES2, AC015660.2, CLEC12A, CCL22, NIPAL2 
##     HMGB2, MKI67, CLEC4E, SNHG32, AC012668.3, NUSAP1, CDKN1C, CEBPD, SIPA1L1, AC008591.1
DimHeatmap(mono_focus, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono_focus, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono_focus)

mono_focus <- FindNeighbors(mono_focus, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono_focus <- FindClusters(mono_focus, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono_focus <- RunUMAP(mono_focus, dims = 1:4)
## 14:58:04 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:58:04 Read 4713 rows and found 4 numeric columns
## 14:58:04 Using Annoy for neighbor search, n_neighbors = 30
## 14:58:04 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:58:04 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc44e3d22a
## 14:58:04 Searching Annoy index using 1 thread, search_k = 3000
## 14:58:05 Annoy recall = 100%
## 14:58:06 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:58:08 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:58:08 Commencing optimization for 500 epochs, with 166226 positive edges
## 14:58:12 Optimization finished
DimPlot(mono_focus, reduction = "umap", label=TRUE)

DimPlot(mono_focus, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono_focus, group.by="sample" , reduction = "umap", label=TRUE)

DimPlot(mono_focus, group.by="line" , reduction = "umap", label=TRUE)

Focus on mock vs active differences and mock vs latent

Need to plot all cells (mock, active, latent, bystander), but highlight mock and active.

Need separate UMAPS for MDM and AlvMDM.

MDM mock and active.

focus <- meta_inf
focus <- focus[which(focus$line=="mdm"),]
focus <- focus[which(focus$monaco_broad_annotation=="Monocytes"),]
focus$samplegroup <- gsub('[[:digit:]]+', '', focus$sample)
mono_focus <- mono[,which(colnames(mono) %in% rownames(focus))]
mono_focus@meta.data <- focus

mono_focus <- FindVariableFeatures(mono_focus, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono_focus <- RunPCA(mono_focus, features = VariableFeatures(object = mono_focus))
## Warning: The following 7 features requested have zero variance; running
## reduction without them: TSBP1, ELL2, AC023503.1, AL365259.1, CDKN1C, PSAT1,
## GRXCR1
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: AC097515.1, LINC02432, GRID1, PCBP3,
## HIST1H3J, HIST1H3F, SSBP3, HIST1H2BJ, ROBO1, PPP6R2, AGBL4, MARCH1, CCND2,
## DUSP16, SYT10, HORMAD2, MT-ND5, FAM118B, ATP6V0A2, HLA-C, PDGFD, AKR7A2,
## AL355388.3, TVP23A, GPR155, SYT4, POF1B, C16orf89, TENM1, MT-ND2, ZBTB41,
## SF3B3, AC096570.1, NEGR1, ZNF276, AP003472.1, KLF12, AL355388.1, BTBD2,
## TTC21B-AS1, ASTL, AC100849.2, SRRM2-AS1, MAP4K1, LPL, LINC01208, AL445584.2,
## ACTB, AC013287.1, AC092598.1, FSTL4, SCAI, AC034213.1, AC007364.1, RPS4Y1,
## AL353759.1, SRGN, FHOD3, DOK5, C5AR2, PCLO, AL031005.2, CENPA, AC093766.1,
## LINC02641, CPE, AC079062.1, NYX, GNG4, DAPK2, HIF1A-AS3, AC011346.1, PFKFB3,
## FILIP1L, LINC00885, IPCEF1, MT-ATP6, NCALD, SRGAP3, OASL, AC108134.2,
## AC016074.2, TFRC, MAP3K15, PRTG, CREM, AC005753.2, AL161935.3, UBASH3B,
## LMCD1-AS1, ASAP3, LINC02853, AGPAT5, COX5B, TNFRSF4, AC107982.2, CLIC5,
## AL358944.1, AL162394.1, RASSF4, AC008115.2, SPOCK3, PIWIL2, PARD3, LRRK2,
## MGAT5, AC133550.2, IFI6, OVCH1, AL157911.1, CKMT1A, MLLT3, EPHA6, TTK, FAM214A,
## RALYL, AC006511.6, AC079584.1, AL132775.1, AKR1C3, ROS1, AC089987.2, SOAT2,
## LINC01081, AC105411.1, TMIGD3, FANCM, AC078850.2, FAM163A, CFB, LINC02336,
## AL009177.1, AP002793.1, AC108879.1, STPG2, AC246817.1, AL078602.1, AC138123.1,
## AL590814.1, LINC01788, AC099552.1, LRRIQ4, TNFSF18, LINC02621, KLHL4, GPR78,
## AC023590.1, MPDZ, MAPK8, MYO18B, TENT5A, SIK2, RORA, MRC2, TNS3, CALR, ZNF608,
## ZBED9, AC073569.1, AC137770.1, KIF26B, SGMS1, OBI1-AS1, SESN3, LINC02421,
## AC009159.2, CNIH3, SPTLC3, AANAT, TUBB3, LINC02137, LYPD1, NR2F1-AS1, LRRC9,
## AP000829.1, SPIB, AC105001.1, MT-CYB, ADRA2B, SOS1, ZHX2, DMXL2, RHAG,
## AL138689.2, PSME2, AL031658.1, MT-ND1, SCFD2, ARHGEF4, CCDC7, RERE, AL110292.1,
## AC092490.1, TFEC, TEX15, KDM6A, LINC00987, ZNF385D-AS1, CXCL13, NFE4,
## AC004160.1, CWC22, EPHA4, MANF, TMEM72-AS1, STAU2, CCNB2, GSN, HIVEP3,
## MAPT-AS1, RAP1GDS1, AL360015.1, AC026333.3, TMEM45A, QKI, AFF1, ZZEF1, SAMD11,
## STXBP5, P2RY13, FBXO39, SETD2, GAS1RR, OR8D1, MYZAP, UBE2U, LINC00326,
## AC005632.5, GSTM5, RFX3, IFT80, STK17B, CEP112, SH3GL1, NRG2, BDNF-AS, TSPAN33,
## GPAT3, AC025262.1, MAP1A, PDIA4, GTSF1, HIST1H2AC, P2RY12, NR4A2, AC003035.1,
## TMEM131L, SPIRE1, IGF1R, CFAP52, GRM7, PCA3, ALPK1, CALM3, KIAA0825, LPP,
## RASIP1, CU638689.5, SLC16A2, TSPAN8, TNRC6C, WDFY3, GSG1L, ENG, IGSF6, BICD1,
## LINC00649, NWD1, BCL2A1, SLC22A23, LGALSL, AC024901.1, AL356379.2, STAG1,
## SLC16A1-AS1, AC013799.1, ATP6V0D2, AL049828.1, MT-CO3, SETBP1, AC064805.2,
## AC106793.1, XKR6, KIAA1217, CRYM, LINC00571, LINC01701, TRIM71, PPP1R12B,
## AC078923.1, LINC02398, PEAK1, LDHA, CD72, ZNF609, ESR2, STXBP1, AL645929.2,
## AC011476.3, TRIM37, PRDM1, OSBPL6, AL359987.1, NR6A1, PRRX2, RAP2C-AS1,
## AL357153.2, MCTP2, ZMYM2, KIF21A, SUSD4, GLUL, AC114763.1, AC007091.1, EIF2B3,
## Z84484.1, AC008555.2, ITPRID1, ANKRD36C, HCAR2, FAM184A, RNF180, HCRTR2, HHAT,
## LPAR1, CHRM3, PKIA-AS1, SLC4A8, AC019117.1, LINC01811, CSRNP3, AL449363.2,
## ARLNC1, AC104459.1, TASP1, GAS7, LINC02851, KANSL1, SLC1A3, VNN2, SCAPER,
## DZIP1, MIR548A1HG, RGS18, ZFP36L1, SLC7A8, PACS1, GLIPR1, RREB1, AL590822.3,
## AL451042.1, AL158839.1, AL589765.6, AL590666.1, C4BPA, AC106053.1, AC073987.1,
## AC073257.2, LINC01923, AC104667.1, AQP12B, AC063944.3, C3orf85, AC026329.1,
## AC097105.1, AC021220.2, PF4, ETNPPL, AC107222.2, AC139792.1, RASGRF2-AS1,
## AC011374.3, AC008703.1, AL132775.2, AL662884.1, GSTA1, SPACA1, AL109920.1,
## AC005100.2, ASB4, AC091185.1, AC084262.2, CNTFR-AS1, HSD17B3, PPP3R2,
## AL390962.1, AL356481.3, OR13A1, FAM170B-AS1, OR10A4, AC013714.1, AC004672.2,
## AC007552.2, WNT10B, AC137590.1, AL512652.1, SMAD9-IT1, CLYBL-AS2, GAS6-DT,
## EDDM3A, CMA1, CCDC177, AC021483.1, AC138932.2, AC108125.1, AC007952.6,
## AC079336.2, AC132938.1, AP001120.5, AC090377.1, AC092192.1, LINC01858,
## AC022150.2, AL121845.4, ERVH48-1, LINC01679, AC004832.5, GLCCI1, AL357873.1,
## RALGAPA2, ABCA1, AURKC, CPQ, FGGY, CDK19, FBXW7, AC008691.1, C10orf67, DPT,
## ELOVL2, SMC2-AS1, CARMIL3, LAMP5, TNIK, SCLT1, C1orf143, ITPR2, PRH1, FABP6,
## MEGF10, BMP7, SNX10, OR1L8, AC087627.1, C20orf203, CPLX1, MIR3142HG, SLC44A5,
## SPATA5, FBXL2, GADD45G, AC117386.2, AL121839.2, LINC02274, PWRN4, AC007529.2,
## LNX1, AC007389.1, LINC02015, HDX, SULT6B1, AC021546.1, AP000446.1, ZNF827,
## PAWR, ULK4, PTPRJ, SIRPB2, PLXNC1, TSPAN10, AL592494.2, AC107021.1, GCM2,
## NUPR2, HMX3, AP003170.4, AC129507.2, AC006237.1, AC008738.1, CU634019.5,
## SMIM10L2A, TPTE2, AC024084.1, CARD11, AC072022.2, SVOP, ZCCHC7, KCNJ1, SNED1,
## AC020743.2, SIGLEC10, LILRB2, MAEL, AL133538.1, NLRP4, AC112493.1, PAX8-AS1,
## KIAA0319, KCNA2, SNAP25-AS1, ALDH1A1, SULF2, EDA, KLRG1.
## PC_ 1 
## Positive:  PRDX6, FABP3, S100A10, TXN, C15orf48, TUBA1B, CHI3L1, CRABP2, SPP1, FBP1 
##     TUBA1A, MGST1, ACTG1, ACAT2, RGCC, CYSTM1, LILRA1, SLC35F1, GAL, HAMP 
##     MMP9, UCHL1, FABP4, IFI30, MYL9, TUBA1C, LINC02244, LILRA2, ANXA1, AC078850.1 
## Negative:  FTX, ARL15, FHIT, RAD51B, EXOC4, NEAT1, FMN1, AL035446.2, SLC22A15, ZFAND3 
##     MBD5, FNDC3B, DOCK4, PDE4D, VPS13B, DENND1A, COP1, GAB2, TRIO, SBF2 
##     GMDS-DT, MALAT1, VTI1A, JMJD1C, ZEB2, SLC8A1, ZBTB20, LRMDA, SNTB1, COG5 
## PC_ 2 
## Positive:  CYSTM1, TM4SF19, ANO5, TXNRD1, RETREG1, GDF15, GPC4, NIBAN1, HES2, TXN 
##     CCPG1, CLGN, AC073359.2, SPP1, SUPV3L1, FNIP2, CCDC26, S100P, BEX2, LINC01135 
##     CSRP2, TCEA1, B4GALT5, FABP3, BCAT1, RAB6B, AC073359.1, STX18-AS1, NMB, S100A10 
## Negative:  CD74, HLA-DRA, HLA-DPB1, HLA-DPA1, TGFBI, HLA-DRB1, CD163, HLA-DQB1, MS4A6A, HLA-DQA1 
##     CEBPD, LYZ, ST8SIA4, MPEG1, AIF1, FPR3, FCN1, MS4A7, TCF4, TIMP1 
##     ARL4C, SEMA4A, HLA-DQA2, EPB41L3, FOS, HLA-DOA, CD14, MAFB, C1QA, RNASE1 
## PC_ 3 
## Positive:  BTG1, NUPR1, G0S2, MARCKS, STMN1, CCPG1, TCEA1, CLGN, SUPV3L1, GDF15 
##     ADAMDEC1, CD14, SLAMF9, CLEC4E, S100P, HLA-DQA2, CLEC4A, TIMP1, CTSL, CXCR4 
##     NAMPT, BEX2, OLFML2B, DNAJC5B, ARL4C, HIF1A, IFI30, CST7, SDS, CYSTM1 
## Negative:  NCAPH, GCLC, CYP1B1, RGCC, DUSP2, CRABP2, CHI3L1, S100A4, TIMP3, LINC01010 
##     RAPGEF1, ASAP1, CRIP1, CA2, AC015660.2, MYO1E, RASAL2, ZNF366, SERTAD2, KCNMA1 
##     CDKN2A, PTGDS, IL1RN, RCBTB2, LRCH1, PLXDC2, ATG7, OCSTAMP, MREG, NUMB 
## PC_ 4 
## Positive:  ACTG1, TPM4, AC078850.1, TUBA1B, MGLL, CD36, PHLDA1, SLC35F1, CSF1, CCL3 
##     INSIG1, MACC1, TUBB, C3, PLEK, CADM1, LGMN, IL18BP, HSP90B1, LIMA1 
##     CCL7, LINC01091, ATP13A3, TNFRSF9, B4GALT5, MMP19, FBP1, DUSP6, C15orf48, ALCAM 
## Negative:  PTGDS, LINC02244, CLU, BX664727.3, LINC01010, SYNGR1, RAB6B, CCPG1, HES2, S100P 
##     TDRD3, EPHB1, MT1X, MT1G, MEIKIN, ANG, CSRP2, AL136317.2, SEL1L3, AC015660.2 
##     MT1H, CLEC12A, NCAPH, RNASE6, RCBTB2, OXT, FOLR2, LY86-AS1, PCDH19, RNASE4 
## PC_ 5 
## Positive:  FBP1, AC078850.1, PRDX6, PLAUR, MMP9, CTSL, RAB6B, TUBB, PCLAF, ACTG1 
##     TUBA1B, CSRP2, AC073359.1, STMN1, TUBA1C, CTSK, TMEM114, PALLD, AC073359.2, HSP90B1 
##     FABP4, CCNA1, ANXA1, ACAT2, LILRA1, PLEKHA5, FOLR2, RGS1, TUBA1A, ADAMDEC1 
## Negative:  HIV-Polp15p31, HIV-Gagp17, HIV-Vif, HIV-Polprot, HIV-BaLEnv, HIV-LTRU5, HIV-Gagp2p7, HIV-Gagp1Pol, HIV-Vpu, HIV-TatEx1 
##     HIV-TatEx2Rev, HIV-EGFP, HIV-LTRR, HIV-Nef, HIV-EnvStart, HIV-Vpr, HES4, G0S2, HES1, BEX2 
##     ST6GALNAC3, PLA2G12A, NMRK2, PECR, MDM2, SPRED1, CLEC4A, CDKN1A, NIBAN1, QPCT
DimHeatmap(mono_focus, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono_focus, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono_focus)

mono_focus <- FindNeighbors(mono_focus, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono_focus <- FindClusters(mono_focus, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono_focus <- RunUMAP(mono_focus, dims = 1:4)
## 14:58:30 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:58:30 Read 10098 rows and found 4 numeric columns
## 14:58:30 Using Annoy for neighbor search, n_neighbors = 30
## 14:58:30 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:58:31 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc4d8401bf
## 14:58:31 Searching Annoy index using 1 thread, search_k = 3000
## 14:58:34 Annoy recall = 100%
## 14:58:35 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:58:36 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:58:36 Commencing optimization for 200 epochs, with 339240 positive edges
## 14:58:39 Optimization finished
DimPlot(mono_focus, reduction = "umap", label=TRUE)

DimPlot(mono_focus, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono_focus, group.by="sample" , reduction = "umap", label=TRUE)

# MDM mock + active
DimPlot(mono_focus, group.by="samplegroup" , reduction = "umap", label=TRUE,
  cols=c("brown1","gray","gray","cornflowerblue"))

#MDM mock and latent.
DimPlot(mono_focus, group.by="samplegroup" , reduction = "umap", label=TRUE,
  cols=c("gray","gray","darkorange2","cornflowerblue"))

Alv MDM mock and active.

focus <- meta_inf
focus <- focus[which(focus$line=="alv"),]
focus <- focus[which(focus$monaco_broad_annotation=="Monocytes"),]
focus$samplegroup <- gsub('[[:digit:]]+', '', focus$sample)
mono_focus <- mono[,which(colnames(mono) %in% rownames(focus))]
mono_focus@meta.data <- focus

mono_focus <- FindVariableFeatures(mono_focus, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono_focus <- RunPCA(mono_focus, features = VariableFeatures(object = mono_focus))
## Warning: The following 19 features requested have zero variance; running
## reduction without them: CCL22, ISG20, LINC00520, BCAT1, HP, HLA-DQB1, MYO1D,
## LNCOG, GBE1, SH3RF3, C11orf96, ARL4D, TNR, C3orf52, RNLS, EMP1, GABRR3,
## LINC02439, SBF2
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: SMS, NME5, CD28, TMIGD1, CSTB, SPINK6,
## FYB2, DCSTAMP, LGALS1, AC098617.1, AC007317.1, EXO1, MIF, AC105940.1, CEP152,
## CLSTN2, AC022031.2, MRPS6, AHCTF1, LINC00964, CFAP74, ACVR2A, PPP1R1C, ADCY3,
## GLT1D1, CTSZ, IQCA1, AC011396.2, BRMS1L, LINC01862, AC076968.2, C11orf49,
## ZC3H8, GRHL2, TMEM236, GNLY, AC034114.2, GM2A, PTPN2, ITGA2, EMP3, ERAP2,
## CPNE8, IL6, SLC7A14-AS1, RSPO3, LYPD1, TRDN, ASMT, LINC01080, DNAJC9, NRIP3,
## TEK, LINC00589, PPEF1, CLEC7A, LINC00501, TCF7L1, SLC30A8, AC026391.1, CHM,
## AC083837.1, SYT12, MX2, CENPU, TEKT5, HCAR2, AP002075.1, URB1, PHACTR1,
## IGFBP7-AS1, TMC5, AL138767.1, IPCEF1, LPCAT1, GOLGA7B, AC135050.3, PASK,
## ANKRD22, AC078864.1, AL096799.1, AC011124.1, AL513166.1, GDPD4, CCDC85A,
## DYNC1H1, LINC01151, ACTB, SLC35F3, LCP1, XDH, ACTN2, SPACA3, PPP2R2C,
## LINC00535, AC005740.5, CCR6, GCH1, UHRF1, AC068587.4, CNTNAP2, RAB7B, WDR93,
## NUP210L, IGF2R, AL645933.3, MARCKSL1, GAPLINC, CTXND1, GTF2IRD2, SLC6A16,
## AC104574.2, PDE1C, WIPF3, LINC02740, NPTX2, AL606923.2, MS4A5, SGCD, LINC01414,
## RIMS2, AC063944.1, AL023495.1, AC090709.1, AC079313.2, PAX8-AS1, LINC01641,
## FAM92B, AC207130.1, AKAP6, AC092801.1, LINC00461, TENM4, NDRG1, GATA2,
## AC007001.1, MLIP, AC006441.4, LINC02269, EGFLAM, AHRR, UNC13C, DNAJC1, SHROOM4,
## COL27A1, CH25H, FCRLB, SLC2A5, RHBG, NWD1, GPAT3, LINC02068, AC007785.1, MECP2,
## ATP1B1, IARS, HSF2BP, CAPSL, LINC00842, LINC01775, SULT6B1, EFHD1, ZAN,
## AC013799.1, AC109454.3, SLC16A9, SERINC2, AC013391.2, AC026689.1, AC104532.2,
## LINC02844, LINC02540, AC084809.2, BTNL8, HLTF-AS1, AC020584.1, ABCA6,
## AL096794.1, LINC01818, EML4, FCMR, SLC23A2, ELOVL5, AP001496.1, IL17RB,
## LINC02558, AC023825.2, RBPJ, MIR2052HG, TMEM108, HPSE2, LINC01344, TDRD9,
## COL25A1, AL161646.1, AC117569.2, TRIM2, LINC02774, ELOC, GINS2, AC233296.1,
## BRCA2, PCDH15, NR1H3, TUBB4A, RFTN2, AL354733.2, EDIL3, NBAT1, KIF6, SVEP1,
## C22orf34, SPNS2, FRMD6-AS2, RNF212, GLIPR1, MIR3142HG, SLC6A7, AC079163.2,
## BRCA1, KCP, SHCBP1L, NCAPG2, AOC1, KIF21A, SDC2, AL713998.1, AL672032.1,
## AC093001.2, EFCAB7, AC103796.1, PLAAT3, SCIN, COBL, DNAH12, LINC00511,
## PROSER2-AS1, LINC02775, NNMT, LINC00609, GNGT1, LDOC1, TNIK, INPP4B, GNRHR,
## LINC01748, RASSF4, HIST1H2BG, SH3BP5, PDE3A, NECAB1, RELN, LINC01358, HTRA4,
## PPM1H, LINC02805, AL731563.2, AC021613.1, SPINK5, CHRNB3, LPP, SLC4A8,
## LINC00299, SNAI3, TNFRSF12A, LDLRAD4, FAF1, CATSPERB, AC007529.2, AP000812.1,
## AC108066.1, ZNF99, AC092957.1, RBPJL, LINC02224, RBM47, FAM110D, PDLIM4, GBP4,
## HIV-EnvEnd, SPSB1, AC084734.1, INO80, NOX3, AC023794.1, WASF3-AS1, DLGAP1-AS3,
## NCAN, AC005264.1, BMPR1B, ULBP2, SVOP, ANOS1, SLA, FCGR2A, CST6, DSG2, RIMS1,
## LINC00350, LINC02752, CAMK2D, LGALS3, MRC2, ADAM22, COL8A2, S100A6, LIPG,
## AC092821.3, AC084740.1, KIAA0825, DOCK2, LINC02789, AIF1L, RHOD, NABP1, CDC45,
## GFOD1, AL592431.2, SOAT2, SH3PXD2B, AL353072.2, AC024597.1, AC090099.1, OR10H1,
## RFC3, MB21D2, POU2F2, TMEM37, PTX3, FRMPD3, CRIM1, ATP1B4, ABLIM1, PRKCG,
## C1orf143, EZH2, ITPR2, SDSL, AL162411.1, AC120498.4, SPATA3, VIPR1, MIR4300HG,
## SEPTIN14, NES, HMOX1, CR1, CHD9, IGSF6, ARMC8, RFX3-AS1, AC246817.1, DPH6,
## SLC9A2, AC245014.3, HNRNPM, KLHL1, TFRC, LINC02073, AC104248.1, STPG4, MSRB3,
## EML2-AS1, MARCH3, LINC01779, AC239860.4, LEMD1-DT, WNT10A, LIFR-AS1, LHX6,
## AC090629.1, LINC02136, DLGAP1-AS4, C22orf31, STUM, STPG2, AL592295.3,
## AC012409.2, AL049828.1, AC104596.1, NANOS1, SMCHD1, LINC02777, AC011346.1,
## MCM7, XKR9, AL353596.1, TTLL9, AHCYL2, CNIH3, RYR2, LPAR1, TCF19, AKAP5, MID2,
## SCFD2, ARHGAP6, TROAP, KCNA2, ROBO4, GALNT13, OLAH, ACAN, PRH1, BICD1, MFSD11,
## MOSPD1, IL3RA, BARD1, MIR3945HG, DBI, ELF5, WDFY4, PSME2, FOXB1, WDR90, RP1L1,
## AMPD3, AL122003.2, LHCGR, AL157702.2, SRGAP3, ABCG2, NEURL3, AC104041.1, STX4,
## AC074099.1, LINC00690, CEACAM20, AC009623.1, DUXA, CACNA2D1, ARRDC3-AS1, EGFL7,
## EDA, SIK2, NCMAP, LHFPL6, AC116563.1, CD5L, LZTS1, AL359220.1, SRL, ARHGAP44,
## CFI, LINC00840, LINC02444, ASTN2, ZNF431, LDHA, SCFD1, AC008443.9, ASPH, IGF1R,
## AC073475.1, STS, PITPNC1, LINC01572, SLC44A4, LINC00894, RASL10A, AC024901.1,
## LINC01397, AC084030.1, ADGRF1, MARCH10-DT, AL050349.1, KCNJ1, AC008691.1, RBL1,
## AGPAT4, FRRS1, LINC00571, MIR155HG, CPQ, FCER1G, AC124852.1, GRIK5, FSD1,
## CD226, PRIM2, LMNB1-DT, QKI, PTGES, AC012078.2, CALM3, RAP1GAP2, ABCA1,
## GALNT14, AL662884.3, ASAP3, ICAM1, ZNF157, PTPRJ, SLC1A3, SSH2, TMEM131L, TOX,
## PTPN3, MEP1A, AC138123.1, AC145146.1, SFTPD-AS1, IGSF23, AC025430.1, SPRY4-AS1,
## PTPRB, ATP2B4, NRCAM, NSG2, ATP6V1H, XKR6, AC087280.2, FKBP5, NCAM2,
## AP002793.1, GSTO1, AL078602.1, LINC01800, MCM4, GRID2, MS4A4A, LIN28B-AS1,
## LINC02466, LINC01350, F2RL1, LINC01951, AC108860.2, DNAI1, LINC02643,
## DIAPH3-AS1.
## PC_ 1 
## Positive:  GAPDH, TXN, CYSTM1, GSTP1, BLVRB, SAT1, FAH, PRDX6, H2AFZ, FABP4 
##     NUPR1, CARD16, STMN1, SNHG5, GDF15, SLAMF9, S100A9, NMB, PSAT1, MARCKS 
##     SNHG32, HES2, PHGDH, CTSL, GYPC, CD300A, GCHFR, HEBP2, TUBA1B, C15orf48 
## Negative:  TMEM117, RASAL2, DPYD, DOCK3, TPRG1, MAML2, MITF, ATG7, ASAP1, ELMO1 
##     ARL15, LRMDA, PLXDC2, VWA8, ZNF804A, AC092353.2, CPEB2, EXOC4, FMNL2, JMJD1C 
##     NFATC3, ARHGAP10, FTX, LRCH1, TTC28, ATP9B, ADK, UBE2E2, ZNF438, MEF2A 
## PC_ 2 
## Positive:  VAMP5, RCBTB2, FGL2, LYZ, MRC1, HLA-DPA1, HLA-DRA, SPRED1, RTN1, CD74 
##     HLA-DRB1, PTGDS, HLA-DPB1, KCNMA1, MX1, SOBP, C1QTNF4, LGALS2, GCLC, AIF1 
##     CAMK1, PDGFC, XYLT1, FOS, SLCO2B1, CFD, RAMP1, ALOX5AP, RNF128, CTSC 
## Negative:  GAL, TM4SF19, HIV-Nef, TCTEX1D2, FDX1, ATP6V1D, SCD, HIV-BaLEnv, HIV-TatEx1, CYSTM1 
##     HIV-LTRU5, CYTOR, HIV-Gagp17, CD164, HIV-LTRR, HIV-Polprot, CIR1, CSF1, DUSP13, ACAT2 
##     EPB41L1, SLC20A1, MIR4435-2HG, AL157912.1, HIV-Polp15p31, RHOF, TGM2, IL6R-AS1, PLXNA2, PSD3 
## PC_ 3 
## Positive:  CTSL, MARCKS, SLC11A1, CTSC, MSR1, LGMN, FCGR3A, AIF1, MS4A7, MARCO 
##     SLC8A1, FMN1, STMN1, NUPR1, MRC1, FABP4, BLVRB, FPR3, CD36, TPM4 
##     PLAU, S100A9, CLEC4A, COLEC12, CTSB, UGCG, HIF1A, HAMP, SAT1, C1QA 
## Negative:  NCAPH, CLU, TRIM54, DUSP2, CRIP1, PTGDS, MX1, MMP7, TMEM176B, IL1RN 
##     SERTAD2, SYNGR1, GCLC, C2orf92, IFIT3, ISG15, C1QTNF4, LINC02244, AC067751.1, KCNMA1 
##     GAL, TMEM176A, MGST1, PLEK, RGS20, RGCC, CCND1, LINC01010, ALOX5AP, RNF128 
## PC_ 4 
## Positive:  CLSPN, TYMS, TK1, DIAPH3, PCLAF, RRM2, MYBL2, FAM111B, SHCBP1, MKI67 
##     ESCO2, HELLS, CENPM, CENPK, ATAD2, CDK1, KIF11, CEP55, MCM10, DTL 
##     NCAPG, TOP2A, BIRC5, MIR924HG, POLQ, ANLN, CCNA2, TPX2, NUSAP1, CIT 
## Negative:  AC008591.1, XIST, SAT1, PDE4D, HIV-Nef, LINC01708, HIV-Polp15p31, GCHFR, HIV-TatEx1, LINC01340 
##     HIV-BaLEnv, HIV-Polprot, LIX1-AS1, HIV-LTRU5, MIR646HG, AL035446.2, HIV-Vif, KCNMB2-AS1, AC012668.3, PRKN 
##     SAMD4A, UBA6-AS1, LINC02320, HIV-Gagp17, AC034195.1, HIV-Vpr, HIV-LTRR, ST6GAL1, HIV-Gagp1Pol, LY86-AS1 
## PC_ 5 
## Positive:  NIPAL2, LINC02244, TDRD3, OSBP2, BX664727.3, AC020656.1, GJB2, AL136317.2, FDX1, ANO5 
##     RARRES1, XIST, AC012668.3, PKD1L1, PRSS21, LINC01010, CCDC26, TMEM114, TMTC1, QPCT 
##     CFD, GCHFR, CTSK, AC005280.2, PSD3, LYZ, GPAT2, HES2, S100A9, AC116534.1 
## Negative:  HIV-Nef, HIV-TatEx1, HIV-LTRR, HIV-LTRU5, HIV-BaLEnv, HIV-Polprot, HIV-Gagp17, HIV-EnvStart, HIV-Polp15p31, HIV-Vpr 
##     HIV-Gagp1Pol, HIV-Vif, HIV-TatEx2Rev, HIV-Vpu, HIV-Gagp2p7, IL1RN, HIV-EGFP, CTSB, IL6R-AS1, AIF1 
##     ALCAM, ISG15, AL157912.1, GBP1, SPRED1, STAC, TNFSF13B, LINC02345, IFI44L, EPSTI1
DimHeatmap(mono_focus, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono_focus, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono_focus)

mono_focus <- FindNeighbors(mono_focus, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono_focus <- FindClusters(mono_focus, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono_focus <- RunUMAP(mono_focus, dims = 1:4)
## 14:59:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:59:01 Read 11299 rows and found 4 numeric columns
## 14:59:01 Using Annoy for neighbor search, n_neighbors = 30
## 14:59:01 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:59:01 Writing NN index file to temp file /tmp/RtmpF3qYLw/file32fffc7b607c0b
## 14:59:01 Searching Annoy index using 1 thread, search_k = 3000
## 14:59:05 Annoy recall = 100%
## 14:59:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:59:07 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:59:07 Commencing optimization for 200 epochs, with 383718 positive edges
## 14:59:10 Optimization finished
DimPlot(mono_focus, reduction = "umap", label=TRUE)

DimPlot(mono_focus, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono_focus, group.by="sample" , reduction = "umap", label=TRUE)

# MDM mock + active
DimPlot(mono_focus, group.by="samplegroup" , reduction = "umap", label=TRUE,
  cols=c("brown1","gray","gray","cornflowerblue"))

#MDM mock and latent.
DimPlot(mono_focus, group.by="samplegroup" , reduction = "umap", label=TRUE,
  cols=c("gray","gray","darkorange2","cornflowerblue"))

Differential expression

We are going to use muscat for pseudobulk analysis. First need to convert seurat obj to singlecellexperiment object. Then summarise counts to pseudobulk level.

sce <- Seurat::as.SingleCellExperiment(comb, assay = "RNA")

head(colData(sce),2)
## DataFrame with 2 rows and 14 columns
##                            orig.ident nCount_RNA nFeature_RNA
##                              <factor>  <numeric>    <integer>
## mdm_mock1|AAACGAATCACATACG        mac      72761         7103
## mdm_mock1|AAACGCTCATCAGCGC        mac      49143         6282
##                                              cell      sample RNA_snn_res.0.5
##                                       <character> <character>        <factor>
## mdm_mock1|AAACGAATCACATACG mdm_mock1|AAACGAATCA..   mdm_mock1              5 
## mdm_mock1|AAACGCTCATCAGCGC mdm_mock1|AAACGCTCAT..   mdm_mock1              12
##                            seurat_clusters           cell_barcode
##                                   <factor>            <character>
## mdm_mock1|AAACGAATCACATACG              5  mdm_mock1|AAACGAATCA..
## mdm_mock1|AAACGCTCATCAGCGC              12 mdm_mock1|AAACGCTCAT..
##                            monaco_broad_annotation monaco_broad_pruned_labels
##                                        <character>                <character>
## mdm_mock1|AAACGAATCACATACG               Monocytes                  Monocytes
## mdm_mock1|AAACGCTCATCAGCGC               Monocytes                  Monocytes
##                            monaco_fine_annotation monaco_fine_pruned_labels
##                                       <character>               <character>
## mdm_mock1|AAACGAATCACATACG    Classical monocytes       Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC    Classical monocytes       Classical monocytes
##                                   line    ident
##                            <character> <factor>
## mdm_mock1|AAACGAATCACATACG         mdm       5 
## mdm_mock1|AAACGCTCATCAGCGC         mdm       12
colnames(colData(sce))
##  [1] "orig.ident"                 "nCount_RNA"                
##  [3] "nFeature_RNA"               "cell"                      
##  [5] "sample"                     "RNA_snn_res.0.5"           
##  [7] "seurat_clusters"            "cell_barcode"              
##  [9] "monaco_broad_annotation"    "monaco_broad_pruned_labels"
## [11] "monaco_fine_annotation"     "monaco_fine_pruned_labels" 
## [13] "line"                       "ident"
#muscat library
pb <- aggregateData(sce,
    assay = "counts", fun = "sum",
    by = c("monaco_broad_annotation", "sample"))

# one sheet per subpopulation
assayNames(pb)
## [1] "Basophils"       "Dendritic cells" "Monocytes"
t(head(assay(pb)))
##                HIV-LTRR HIV-LTRU5 HIV-Gagp17 HIV-Gagp24 HIV-Gagp2p7
## alv_active1           0         0          0          0           0
## alv_active2           0         0          0          0           0
## alv_active3           0         0          0          0           0
## alv_bystander1        0         0          0          0           0
## alv_bystander2        0         0          0          0           0
## alv_bystander3        0         0          0          0           0
## alv_latent1           0         0          0          0           0
## alv_latent2           0         0          0          0           0
## alv_latent3           0         0          0          0           0
## alv_mock1             0         0          0          0           0
## alv_mock2             0         0          0          0           0
## alv_mock3             0         0          0          0           0
## mdm_active1           0         0          0          0           0
## mdm_active2           0         0          0          0           0
## mdm_active3           0         0          0          0           0
## mdm_active4           0         0          0          0           0
## mdm_bystander1        0         0          0          0           0
## mdm_bystander2        0         0          0          0           0
## mdm_bystander3        0         0          0          0           0
## mdm_bystander4        0         0          0          0           0
## mdm_latent1           0         0          0          0           0
## mdm_latent2           0         0          0          0           0
## mdm_latent3           0         0          0          0           0
## mdm_latent4           0         0          0          0           0
## mdm_mock1             0         0          0          0           0
## mdm_mock2             0         0          0          0           0
## mdm_mock3             0         0          0          0           0
## mdm_mock4             0         0          0          0           0
## react6                0         0          0          0           0
##                HIV-Gagp1Pol
## alv_active1               0
## alv_active2               0
## alv_active3               0
## alv_bystander1            0
## alv_bystander2            0
## alv_bystander3            0
## alv_latent1               0
## alv_latent2               0
## alv_latent3               0
## alv_mock1                 0
## alv_mock2                 0
## alv_mock3                 0
## mdm_active1               0
## mdm_active2               0
## mdm_active3               0
## mdm_active4               0
## mdm_bystander1            0
## mdm_bystander2            0
## mdm_bystander3            0
## mdm_bystander4            0
## mdm_latent1               0
## mdm_latent2               0
## mdm_latent3               0
## mdm_latent4               0
## mdm_mock1                 0
## mdm_mock2                 0
## mdm_mock3                 0
## mdm_mock4                 0
## react6                    0
plotMDS(assays(pb)[[3]], main="Monocytes" )

par(mfrow=c(2,3))

dump <- lapply(1:length(assays(pb)) , function(i) {
  cellname=names(assays(pb))[i]
  plotMDS(assays(pb)[[i]],cex=1,pch=19,main=paste(cellname),labels=colnames(assays(pb)[[1]]))
})
## Warning in plotMDS.default(assays(pb)[[i]], cex = 1, pch = 19, main =
## paste(cellname), : dimension 2 is degenerate or all zero
par(mfrow=c(1,1))

DE Prep

pbmono <- assays(pb)[["Monocytes"]]

head(pbmono)
##              alv_active1 alv_active2 alv_active3 alv_bystander1 alv_bystander2
## HIV-LTRR            4391        3316        2601            111            134
## HIV-LTRU5         162741      127498      102452           2085           2885
## HIV-Gagp17         32789       27176       17079            106            162
## HIV-Gagp24             0           0           0              0              0
## HIV-Gagp2p7         1201        1242         744             16             26
## HIV-Gagp1Pol        2100        2334        1592             26             50
##              alv_bystander3 alv_latent1 alv_latent2 alv_latent3 alv_mock1
## HIV-LTRR                138         249         261         411        31
## HIV-LTRU5              2848       10994       10224       17207       753
## HIV-Gagp17              183        2306        1784        2576       106
## HIV-Gagp24                0           0           0           0         0
## HIV-Gagp2p7              17          69         104         121         2
## HIV-Gagp1Pol             42         129         163         210         6
##              alv_mock2 alv_mock3 mdm_active1 mdm_active2 mdm_active3
## HIV-LTRR            52       200        2251        1482        1476
## HIV-LTRU5         1311      7206      100998       71868       94108
## HIV-Gagp17         178      1530       38092       23541       40568
## HIV-Gagp24           0         0           0           0           0
## HIV-Gagp2p7          7        52        1462        1021        2365
## HIV-Gagp1Pol        21        94        2021        1375        3032
##              mdm_active4 mdm_bystander1 mdm_bystander2 mdm_bystander3
## HIV-LTRR            1648            103            147             26
## HIV-LTRU5          56478           1425           1946            449
## HIV-Gagp17         15110            255            254             57
## HIV-Gagp24             0              0              0              0
## HIV-Gagp2p7          414             23             32              3
## HIV-Gagp1Pol         785             20             61             16
##              mdm_bystander4 mdm_latent1 mdm_latent2 mdm_latent3 mdm_latent4
## HIV-LTRR                 41         119         142          37          48
## HIV-LTRU5               725        5359        6710        2150        1644
## HIV-Gagp17               61        1927        2077         566         534
## HIV-Gagp24                0           0           0           0           0
## HIV-Gagp2p7               8          51         108          37          12
## HIV-Gagp1Pol             10          83         129          63          24
##              mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 react6
## HIV-LTRR            30        37         9        39    256
## HIV-LTRU5          486       685       106      1119   7461
## HIV-Gagp17         117       253        37       159    596
## HIV-Gagp24           0         0         0         0      0
## HIV-Gagp2p7          5         8         1         5     21
## HIV-Gagp1Pol         7        14         3        13     39
dim(pbmono)
## [1] 36622    29
hiv <- pbmono[1:2,]
pbmono <- pbmono[3:nrow(pbmono),]

Gene sets

Gene ontology.

#go <- gmt_import("c5.go.v2023.2.Hs.symbols.gmt")
#names(go) <- gsub("_"," ",names(go))

#wget https://ziemann-lab.net/public/tmp/go_2024-11.gmt
go <- gmt_import("go_2024-11.gmt")

DE1 Latently- vs productively-infected cells (groups 3 vs 4).

MDM group.

pbmdm <- pbmono[,grep("mdm",colnames(pbmono))]

pb1m <- pbmdm[,c(grep("active",colnames(pbmdm)),grep("latent",colnames(pbmdm)))]

head(pb1m)
##               mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_latent1
## HIV-Gagp17          38092       23541       40568       15110        1927
## HIV-Gagp24              0           0           0           0           0
## HIV-Gagp2p7          1462        1021        2365         414          51
## HIV-Gagp1Pol         2021        1375        3032         785          83
## HIV-Polprot         27388       18583       44857        9126        1383
## HIV-Polp15p31       75686       55267      105649       14984        3589
##               mdm_latent2 mdm_latent3 mdm_latent4
## HIV-Gagp17           2077         566         534
## HIV-Gagp24              0           0           0
## HIV-Gagp2p7           108          37          12
## HIV-Gagp1Pol          129          63          24
## HIV-Polprot          1587         877         250
## HIV-Polp15p31        5077        2425         441
pb1mf <- pb1m[which(rowMeans(pb1m)>=10),]
head(pb1mf)
##               mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_latent1
## HIV-Gagp17          38092       23541       40568       15110        1927
## HIV-Gagp2p7          1462        1021        2365         414          51
## HIV-Gagp1Pol         2021        1375        3032         785          83
## HIV-Polprot         27388       18583       44857        9126        1383
## HIV-Polp15p31       75686       55267      105649       14984        3589
## HIV-Vif              5276        4254        7255        1109         221
##               mdm_latent2 mdm_latent3 mdm_latent4
## HIV-Gagp17           2077         566         534
## HIV-Gagp2p7           108          37          12
## HIV-Gagp1Pol          129          63          24
## HIV-Polprot          1587         877         250
## HIV-Polp15p31        5077        2425         441
## HIV-Vif               317         146          25
colSums(pb1mf)
## mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_latent1 mdm_latent2 
##    29512873    22423101    25226765    13842115     2508756     3993617 
## mdm_latent3 mdm_latent4 
##     2428015      582852
des1m <- as.data.frame(grepl("active",colnames(pb1mf)))
colnames(des1m) <- "case"

plot(cmdscale(dist(t(pb1mf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb1mf))), labels=colnames(pb1mf) )

dds <- DESeqDataSetFromMatrix(countData = pb1mf , colData = des1m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de1mf <- de
write.table(de1mf,"de1mf.tsv",sep="\t")

nrow(subset(de1mf,padj<0.05 & log2FoldChange>0))
## [1] 30
nrow(subset(de1mf,padj<0.05 & log2FoldChange<0))
## [1] 128
head(subset(de1mf,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active MDM cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active MDM cells
baseMean log2FoldChange lfcSE stat pvalue padj
HIV-EnvStart 106.40798 1.541035 0.2458193 6.268976 0.00e+00 0.0000007
HIV-TatEx2Rev 171.30897 1.206074 0.2125159 5.675220 0.00e+00 0.0000148
HIV-BaLEnv 8035.68066 1.268309 0.2402470 5.279190 1.00e-07 0.0001018
HIV-Vpr 76.19161 1.394455 0.2683166 5.197049 2.00e-07 0.0001401
HIV-EGFP 179.06534 1.453434 0.3000391 4.844148 1.30e-06 0.0006503
TNFRSF9 79.53884 2.370885 0.5090863 4.657138 3.20e-06 0.0014506
HIV-Gagp1Pol 362.34737 1.372549 0.2994366 4.583772 4.60e-06 0.0018364
PLA2G7 4673.01516 0.603530 0.1362746 4.428777 9.50e-06 0.0027966
MACC1 113.76132 1.709692 0.3956601 4.321113 1.55e-05 0.0038045
HIV-Polprot 4907.03552 1.400361 0.3245336 4.314996 1.60e-05 0.0038315
head(subset(de1mf,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in active MDM cells") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in active MDM cells
baseMean log2FoldChange lfcSE stat pvalue padj
PDE4B 111.80062 -3.208542 0.4331447 -7.407552 0 0.00e+00
TGFBI 281.12990 -3.575563 0.5417002 -6.600630 0 2.00e-07
FCN1 82.63210 -3.079582 0.4665686 -6.600492 0 2.00e-07
VCAN 16.54526 -4.633638 0.7147412 -6.482960 0 3.00e-07
PDE7B 32.06724 -3.870351 0.6058452 -6.388351 0 4.00e-07
MS4A6A 293.46672 -3.013062 0.4889141 -6.162764 0 1.10e-06
SESN3 60.20373 -2.120561 0.3444238 -6.156836 0 1.10e-06
TLR8 31.13299 -2.376698 0.3944600 -6.025196 0 2.20e-06
SSBP2 60.95534 -2.996618 0.5011017 -5.980061 0 2.60e-06
RCBTB2 217.40602 -1.320024 0.2372682 -5.563422 0 2.59e-05
m1m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 13336
## Note: no. genes in output = 13336
## Note: estimated proportion of input genes in output = 1
mres1m <- mitch_calc(m1m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres1m$enrichment_result
mitchtbl <- mres1m$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de1mf_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_latent_vs_active.html") ) {
  mitch_report(mres1m,outfile="MDM_latent_vs_active.html")
}

Alv cells.

pbalv <- pbmono[,grep("alv",colnames(pbmono))]

pb1a <- pbalv[,c(grep("active",colnames(pbalv)),grep("latent",colnames(pbalv)))]
head(pb1a)
##               alv_active1 alv_active2 alv_active3 alv_latent1 alv_latent2
## HIV-Gagp17          32789       27176       17079        2306        1784
## HIV-Gagp24              0           0           0           0           0
## HIV-Gagp2p7          1201        1242         744          69         104
## HIV-Gagp1Pol         2100        2334        1592         129         163
## HIV-Polprot         23710       30544       21871        1465        2065
## HIV-Polp15p31       38437       59592       41124        2414        4070
##               alv_latent3
## HIV-Gagp17           2576
## HIV-Gagp24              0
## HIV-Gagp2p7           121
## HIV-Gagp1Pol          210
## HIV-Polprot          3280
## HIV-Polp15p31        5631
pb1af <- pb1a[which(rowMeans(pb1a)>=10),]
head(pb1af)
##               alv_active1 alv_active2 alv_active3 alv_latent1 alv_latent2
## HIV-Gagp17          32789       27176       17079        2306        1784
## HIV-Gagp2p7          1201        1242         744          69         104
## HIV-Gagp1Pol         2100        2334        1592         129         163
## HIV-Polprot         23710       30544       21871        1465        2065
## HIV-Polp15p31       38437       59592       41124        2414        4070
## HIV-Vif              3140        4489        3034         173         322
##               alv_latent3
## HIV-Gagp17           2576
## HIV-Gagp2p7           121
## HIV-Gagp1Pol          210
## HIV-Polprot          3280
## HIV-Polp15p31        5631
## HIV-Vif               423
colSums(pb1af)
## alv_active1 alv_active2 alv_active3 alv_latent1 alv_latent2 alv_latent3 
##    29715862    28360869    23446102     7231016     4200851     5268032
des1a <- as.data.frame(grepl("active",colnames(pb1af)))
colnames(des1a) <- "case"

plot(cmdscale(dist(t(pb1af))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb1af))), labels=colnames(pb1af) )

dds <- DESeqDataSetFromMatrix(countData = pb1af , colData = des1a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de1af <- de
write.table(de1af,"de1af.tsv",sep="\t")

nrow(subset(de1af,padj<0.05 & log2FoldChange>0))
## [1] 6
nrow(subset(de1af,padj<0.05 & log2FoldChange<0))
## [1] 24
head(subset(de1af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
CCL2 187.46326 1.9545059 0.3689124 5.298022 0.0000001 0.0003419
HIV-Gagp17 8100.42425 1.1820302 0.2317335 5.100818 0.0000003 0.0006585
HIV-EnvStart 304.44628 1.2626580 0.2904376 4.347433 0.0000138 0.0114934
NAV2 211.50743 0.9938714 0.2500926 3.974013 0.0000707 0.0358951
HIV-Gagp1Pol 641.13400 1.2041681 0.3090865 3.895894 0.0000978 0.0423309
CCL8 25.45343 2.6059285 0.6716333 3.879987 0.0001045 0.0435830
head(subset(de1af,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
CHST13 187.27152 -1.4773377 0.2486695 -5.940969 0.00e+00 0.0000331
TSPAN33 323.56673 -1.3232152 0.2340738 -5.652984 0.00e+00 0.0000727
NDRG2 130.11646 -1.6777233 0.2983223 -5.623861 0.00e+00 0.0000727
APOA1 43.38261 -3.1838678 0.6142038 -5.183732 2.00e-07 0.0005081
CEBPD 247.53318 -1.0655607 0.2256494 -4.722195 2.30e-06 0.0038937
FGL2 290.45945 -2.0460415 0.4371137 -4.680799 2.90e-06 0.0041728
SPARC 3366.09610 -0.8261922 0.1780691 -4.639727 3.50e-06 0.0045283
TXLNB 89.27259 -2.1494827 0.4686497 -4.586545 4.50e-06 0.0052644
LYZ 16640.38839 -0.7462134 0.1641027 -4.547234 5.40e-06 0.0057726
CCDC88C 105.67588 -1.0843534 0.2463761 -4.401212 1.08e-05 0.0097664
m1a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14501
## Note: no. genes in output = 14501
## Note: estimated proportion of input genes in output = 1
mres1a <- mitch_calc(m1a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres1a$enrichment_result

mitchtbl <- mres1a$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de1af_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_latent_vs_active.html") ) {
  mitch_report(mres1a,outfile="Alv_latent_vs_active.html")
}

Combined.

mm1 <- merge(m1a,m1m,by=0)

head(mm1)
##   Row.names        x.x         x.y
## 1      A1BG  0.3846188  0.02587676
## 2  A1BG-AS1 -1.1387385 -0.52043018
## 3       A2M  0.5406778 -0.02830157
## 4    A4GALT  1.2345220 -1.06580803
## 5      AAAS  0.2017214 -0.18837628
## 6      AACS  2.5390654  1.27744097
rownames(mm1) <- mm1[,1]
mm1[,1]=NULL
colnames(mm1) <- c("Alv","MDM")
plot(mm1)
mylm <- lm(mm1)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2519 -0.5490  0.0058  0.5606  5.5456 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.003153   0.007727   0.408    0.683    
## MDM         0.299448   0.006301  47.521   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8839 on 13111 degrees of freedom
## Multiple R-squared:  0.1469, Adjusted R-squared:  0.1469 
## F-statistic:  2258 on 1 and 13111 DF,  p-value: < 2.2e-16
cor.test(mm1$Alv,mm1$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm1$Alv and mm1$MDM
## t = 47.521, df = 13111, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3686193 0.3978230
## sample estimates:
##      cor 
## 0.383317
mm1r <- as.data.frame(apply(mm1,2,rank))
plot(mm1r,cex=0.3)
mylm <- lm(mm1r)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm1r)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8602.8 -3014.9   -27.6  2953.0  8629.1 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.295e+03  6.206e+01   69.20   <2e-16 ***
## MDM         3.450e-01  8.197e-03   42.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3553 on 13111 degrees of freedom
## Multiple R-squared:  0.119,  Adjusted R-squared:  0.119 
## F-statistic:  1771 on 1 and 13111 DF,  p-value: < 2.2e-16
cor.test(mm1r$Alv,mm1r$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm1r$Alv and mm1r$MDM
## t = 42.088, df = 13111, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3298364 0.3599950
## sample estimates:
##       cor 
## 0.3450048

Mitch 2D

head(mm1)
##                 Alv         MDM
## A1BG      0.3846188  0.02587676
## A1BG-AS1 -1.1387385 -0.52043018
## A2M       0.5406778 -0.02830157
## A4GALT    1.2345220 -1.06580803
## AAAS      0.2017214 -0.18837628
## AACS      2.5390654  1.27744097
mm1res <- mitch_calc(mm1,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
res <- mm1res$enrichment_result
write.table(res,"de1_mm.tsv",row.names = FALSE, quote=FALSE,sep="\t")
res <- subset(mm1res$enrichment_result,p.adjustMANOVA<0.05)

mx <- res[1:30,c("s.Alv","s.MDM")]
rownames(mx) <- res$set[1:30]
colnames(mx) <- gsub("s.","",colnames(mx))

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.75,cexCol=0.8)
mtext("DE1: latent vs productive (top effect size)")

mm1res <- mitch_calc(mm1,genesets=go,minsetsize=5,cores=4,priority="SD")
## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.
res <- subset(mm1res$enrichment_result,p.adjustMANOVA<0.05)

mx <- res[1:30,c("s.Alv","s.MDM")]
rownames(mx) <- res$set[1:30]
colnames(mx) <- gsub("s.","",colnames(mx))

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.75,cexCol=0.8)
mtext("DE1: latent vs productive (top discordant)")

DE2 Latently-infected vs bystander cells

MDM group.

pb2m <- pbmdm[,c(grep("bystander",colnames(pbmdm)),grep("latent",colnames(pbmdm)))]
head(pb2m)
##               mdm_bystander1 mdm_bystander2 mdm_bystander3 mdm_bystander4
## HIV-Gagp17               255            254             57             61
## HIV-Gagp24                 0              0              0              0
## HIV-Gagp2p7               23             32              3              8
## HIV-Gagp1Pol              20             61             16             10
## HIV-Polprot              331            492            181             81
## HIV-Polp15p31           1033           1505            413            181
##               mdm_latent1 mdm_latent2 mdm_latent3 mdm_latent4
## HIV-Gagp17           1927        2077         566         534
## HIV-Gagp24              0           0           0           0
## HIV-Gagp2p7            51         108          37          12
## HIV-Gagp1Pol           83         129          63          24
## HIV-Polprot          1383        1587         877         250
## HIV-Polp15p31        3589        5077        2425         441
pb2mf <- pb2m[which(rowMeans(pb2m)>=10),]
head(pb2mf)
##               mdm_bystander1 mdm_bystander2 mdm_bystander3 mdm_bystander4
## HIV-Gagp17               255            254             57             61
## HIV-Gagp2p7               23             32              3              8
## HIV-Gagp1Pol              20             61             16             10
## HIV-Polprot              331            492            181             81
## HIV-Polp15p31           1033           1505            413            181
## HIV-Vif                   87             73             29             16
##               mdm_latent1 mdm_latent2 mdm_latent3 mdm_latent4
## HIV-Gagp17           1927        2077         566         534
## HIV-Gagp2p7            51         108          37          12
## HIV-Gagp1Pol           83         129          63          24
## HIV-Polprot          1383        1587         877         250
## HIV-Polp15p31        3589        5077        2425         441
## HIV-Vif               221         317         146          25
colSums(pb2mf)
## mdm_bystander1 mdm_bystander2 mdm_bystander3 mdm_bystander4    mdm_latent1 
##       70251518       68922564       26227400       36266616        2511512 
##    mdm_latent2    mdm_latent3    mdm_latent4 
##        3999100        2431331         583834
des2m <- as.data.frame(grepl("latent",colnames(pb2mf)))
colnames(des2m) <- "case"

plot(cmdscale(dist(t(pb2mf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb2mf))), labels=colnames(pb2mf) )

dds <- DESeqDataSetFromMatrix(countData = pb2mf , colData = des2m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2mf <- de
#write.table(de2mf,"de2mf.tsv",sep="\t")

nrow(subset(de2mf,padj<0.05 & log2FoldChange>0))
## [1] 22
nrow(subset(de2mf,padj<0.05 & log2FoldChange<0))
## [1] 1
head(subset(de2mf,log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (MDM unpaired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (MDM unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
HIV-BaLEnv 3477.64934 6.707022 0.2926909 22.91503 0 0
HIV-Gagp17 2763.92685 7.632632 0.4293637 17.77661 0 0
HIV-Polprot 1995.91085 6.251883 0.3562838 17.54748 0 0
HIV-Vif 312.16572 6.024406 0.3630952 16.59181 0 0
HIV-TatEx2Rev 79.07938 7.147004 0.4568174 15.64521 0 0
HIV-Gagp2p7 95.31590 5.996752 0.3878850 15.46013 0 0
HIV-Gagp1Pol 151.88529 5.868568 0.3885120 15.10524 0 0
HIV-TatEx1 246.18840 6.229807 0.4134688 15.06718 0 0
HIV-EGFP 67.19305 5.961189 0.3968480 15.02134 0 0
HIV-Polp15p31 5133.34400 6.163400 0.4142217 14.87947 0 0
head(subset(de2mf,log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (MDM unpaired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (MDM unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
VIM 24783.45929 -0.6072566 0.1187122 -5.115368 0.0000003 0.0002836
CYP27A1 4943.44251 -0.4003349 0.1026945 -3.898307 0.0000969 0.0548270
CAPG 13118.99520 -0.5313260 0.1412173 -3.762472 0.0001682 0.0846445
SSR2 1539.16153 -0.3854041 0.1029573 -3.743341 0.0001816 0.0850591
TM4SF1 13.50868 -4.2394819 1.1480971 -3.692616 0.0002220 0.0986060
FABP5 41753.11037 -0.5389353 0.1460877 -3.689123 0.0002250 0.0986060
RPL7A 9456.73631 -0.3595509 0.1004892 -3.578005 0.0003462 0.1333837
TUBB 1912.49738 -0.5178978 0.1503773 -3.443988 0.0005732 0.1850191
ALDH3A2 450.11505 -0.4810222 0.1401607 -3.431933 0.0005993 0.1850191
NQO1 354.27138 -0.6356651 0.1877551 -3.385607 0.0007102 0.2097278
des2m$sample <- rep(1:4,2)

dds <- DESeqDataSetFromMatrix(countData = pb2mf , colData = des2m, design = ~ sample + case)
## converting counts to integer mode
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2mf <- de
write.table(de2mf,"de2mf.tsv",sep="\t")

nrow(subset(de2mf,padj<0.05 & log2FoldChange>0))
## [1] 30
nrow(subset(de2mf,padj<0.05 & log2FoldChange<0))
## [1] 19
head(subset(de2mf,log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (MDM paired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (MDM paired)
baseMean log2FoldChange lfcSE stat pvalue padj
HIV-Vif 312.16572 6.060595 0.2096421 28.90925 0 0
HIV-BaLEnv 3477.64934 6.755062 0.2431928 27.77657 0 0
HIV-Polp15p31 5133.34400 6.200422 0.3071491 20.18701 0 0
HIV-Polprot 1995.91085 6.270763 0.3209171 19.54013 0 0
HIV-Gagp17 2763.92685 7.702589 0.4093093 18.81850 0 0
HIV-Gagp2p7 95.31590 6.001675 0.3530808 16.99802 0 0
HIV-EGFP 67.19305 5.954128 0.3657873 16.27757 0 0
HIV-TatEx2Rev 79.07938 7.161458 0.4442420 16.12062 0 0
HIV-Vpu 58.00682 5.569366 0.3571472 15.59404 0 0
HIV-TatEx1 246.18840 6.224838 0.4070984 15.29075 0 0
head(subset(de2mf,log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (MDM paired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (MDM paired)
baseMean log2FoldChange lfcSE stat pvalue padj
VIM 24783.4593 -0.6055611 0.1156514 -5.236091 2.00e-07 0.0001314
FBP1 8641.3556 -0.7193683 0.1396845 -5.149951 3.00e-07 0.0001971
MGST3 8645.8034 -0.4021467 0.0795970 -5.052285 4.00e-07 0.0002972
TMSB4X 91023.5640 -0.3227649 0.0687588 -4.694158 2.70e-06 0.0017358
CAPG 13118.9952 -0.5307891 0.1142230 -4.646953 3.40e-06 0.0020849
PRDX1 11714.8162 -0.5281115 0.1151080 -4.587966 4.50e-06 0.0026497
CYP27A1 4943.4425 -0.4045165 0.0889949 -4.545390 5.50e-06 0.0031109
TUBB 1912.4974 -0.5203904 0.1154535 -4.507360 6.60e-06 0.0034937
IFI27 121.1303 -2.2691939 0.5090422 -4.457771 8.30e-06 0.0041764
TUBB4B 608.5510 -0.7597242 0.1780907 -4.265940 1.99e-05 0.0087433
m2m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15078
## Note: no. genes in output = 15078
## Note: estimated proportion of input genes in output = 1
mres2m <- mitch_calc(m2m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres2m$enrichment_result

mitchtbl <- mres2m$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de2mf_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_bystander_vs_latent.html") ) {
  mitch_report(mres2m,outfile="MDM_bystander_vs_latent.html")
}

Alv cells.

pb2a <- pbalv[,c(grep("bystander",colnames(pbalv)),grep("latent",colnames(pbalv)))]
head(pb2a)
##               alv_bystander1 alv_bystander2 alv_bystander3 alv_latent1
## HIV-Gagp17               106            162            183        2306
## HIV-Gagp24                 0              0              0           0
## HIV-Gagp2p7               16             26             17          69
## HIV-Gagp1Pol              26             50             42         129
## HIV-Polprot              208            515            534        1465
## HIV-Polp15p31            476           1203           1151        2414
##               alv_latent2 alv_latent3
## HIV-Gagp17           1784        2576
## HIV-Gagp24              0           0
## HIV-Gagp2p7           104         121
## HIV-Gagp1Pol          163         210
## HIV-Polprot          2065        3280
## HIV-Polp15p31        4070        5631
pb2af <- pb2a[which(rowMeans(pb2a)>=10),]
head(pb2af)
##               alv_bystander1 alv_bystander2 alv_bystander3 alv_latent1
## HIV-Gagp17               106            162            183        2306
## HIV-Gagp2p7               16             26             17          69
## HIV-Gagp1Pol              26             50             42         129
## HIV-Polprot              208            515            534        1465
## HIV-Polp15p31            476           1203           1151        2414
## HIV-Vif                   31             78             86         173
##               alv_latent2 alv_latent3
## HIV-Gagp17           1784        2576
## HIV-Gagp2p7           104         121
## HIV-Gagp1Pol          163         210
## HIV-Polprot          2065        3280
## HIV-Polp15p31        4070        5631
## HIV-Vif               322         423
colSums(pb2af)
## alv_bystander1 alv_bystander2 alv_bystander3    alv_latent1    alv_latent2 
##       58217374       65486247       58735478        7238086        4203761 
##    alv_latent3 
##        5271201
des2a <- as.data.frame(grepl("latent",colnames(pb2af)))
colnames(des2a) <- "case"

plot(cmdscale(dist(t(pb2af))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb2af))), labels=colnames(pb2af) )

dds <- DESeqDataSetFromMatrix(countData = pb2af , colData = des2a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2af <- de
#write.table(de2af,"de2af.tsv",sep="\t")

nrow(subset(de2af,padj<0.05 & log2FoldChange>0))
## [1] 51
nrow(subset(de2af,padj<0.05 & log2FoldChange<0))
## [1] 1
head(subset(de2af, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (Alv unpaired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (Alv unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
HIV-Gagp17 3883.4893 7.471314 0.2766310 27.00823 0 0
HIV-BaLEnv 7093.0345 6.832401 0.3359054 20.34025 0 0
HIV-TatEx1 884.5283 6.109715 0.3981002 15.34718 0 0
HIV-Gagp1Pol 309.1866 5.742374 0.3862127 14.86842 0 0
HIV-Gagp2p7 182.9194 5.991491 0.4304164 13.92022 0 0
HIV-TatEx2Rev 174.7315 6.239469 0.4796188 13.00923 0 0
HIV-Nef 4865.1335 5.399573 0.4207960 12.83181 0 0
HIV-EnvStart 143.5971 5.540001 0.4364343 12.69378 0 0
HIV-Vif 580.6007 5.921900 0.4926373 12.02081 0 0
HIV-Vpu 124.9166 5.541504 0.4648747 11.92042 0 0
head(subset(de2af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (Alv unpaired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (Alv unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
ERP29 2954.7581 -0.5783129 0.1492500 -3.874794 0.0001067 0.0319350
ZNF804A 1304.5405 -0.6206457 0.1684160 -3.685193 0.0002285 0.0584412
ATP8B4 1444.1134 -0.6823246 0.1883236 -3.623150 0.0002910 0.0660231
HIST1H1C 789.4848 -0.8254679 0.2327725 -3.546243 0.0003908 0.0845229
MT-ND6 821.0851 -0.8667233 0.2452224 -3.534438 0.0004086 0.0845229
TNIK 1315.4638 -0.5606913 0.1612905 -3.476281 0.0005084 0.0993181
SFMBT2 638.9676 -0.4728057 0.1387071 -3.408662 0.0006528 0.1175529
PDE3B 3058.4136 -0.4045251 0.1210715 -3.341207 0.0008341 0.1413531
AC011476.3 180.7652 -0.7473785 0.2254841 -3.314550 0.0009179 0.1471293
CBL 1263.2520 -0.4920618 0.1499907 -3.280615 0.0010358 0.1600955
des2a$sample <- rep(1:3,2)

dds <- DESeqDataSetFromMatrix(countData = pb2af , colData = des2a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2af <- de
write.table(de2af,"de2af.tsv",sep="\t")

nrow(subset(de2af,padj<0.05 & log2FoldChange>0))
## [1] 51
nrow(subset(de2af,padj<0.05 & log2FoldChange<0))
## [1] 1
head(subset(de2af, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (Alv paired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (Alv paired)
baseMean log2FoldChange lfcSE stat pvalue padj
HIV-Gagp17 3883.4893 7.471314 0.2766310 27.00823 0 0
HIV-BaLEnv 7093.0345 6.832401 0.3359054 20.34025 0 0
HIV-TatEx1 884.5283 6.109715 0.3981002 15.34718 0 0
HIV-Gagp1Pol 309.1866 5.742374 0.3862127 14.86842 0 0
HIV-Gagp2p7 182.9194 5.991491 0.4304164 13.92022 0 0
HIV-TatEx2Rev 174.7315 6.239469 0.4796188 13.00923 0 0
HIV-Nef 4865.1335 5.399573 0.4207960 12.83181 0 0
HIV-EnvStart 143.5971 5.540001 0.4364343 12.69378 0 0
HIV-Vif 580.6007 5.921900 0.4926373 12.02081 0 0
HIV-Vpu 124.9166 5.541504 0.4648747 11.92042 0 0
head(subset(de2af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (Alv paired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (Alv paired)
baseMean log2FoldChange lfcSE stat pvalue padj
ERP29 2954.7581 -0.5783129 0.1492500 -3.874794 0.0001067 0.0319350
ZNF804A 1304.5405 -0.6206457 0.1684160 -3.685193 0.0002285 0.0584412
ATP8B4 1444.1134 -0.6823246 0.1883236 -3.623150 0.0002910 0.0660231
HIST1H1C 789.4848 -0.8254679 0.2327725 -3.546243 0.0003908 0.0845229
MT-ND6 821.0851 -0.8667233 0.2452224 -3.534438 0.0004086 0.0845229
TNIK 1315.4638 -0.5606913 0.1612905 -3.476281 0.0005084 0.0993181
SFMBT2 638.9676 -0.4728057 0.1387071 -3.408662 0.0006528 0.1175529
PDE3B 3058.4136 -0.4045251 0.1210715 -3.341207 0.0008341 0.1413531
AC011476.3 180.7652 -0.7473785 0.2254841 -3.314550 0.0009179 0.1471293
CBL 1263.2520 -0.4920618 0.1499907 -3.280615 0.0010358 0.1600955
m2a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 16291
## Note: no. genes in output = 16291
## Note: estimated proportion of input genes in output = 1
mres2a <- mitch_calc(m2a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres2a$enrichment_result

mitchtbl <- mres2a$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de2af_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_bystander_vs_latent.html") ) {
  mitch_report(mres2a,outfile="Alv_bystander_vs_latent.html")
}

Combined.

mm2 <- merge(m2a,m2m,by=0)

head(mm2)
##   Row.names        x.x         x.y
## 1      A1BG  1.4516792 -0.31296863
## 2  A1BG-AS1  0.5088094  0.30818540
## 3       A2M -0.2470666 -0.60355175
## 4   A2M-AS1 -0.8526086  0.03503586
## 5 A2ML1-AS1 -0.9980205  0.63891414
## 6      AAAS  0.3081962  0.06893024
rownames(mm2) <- mm2[,1]
mm2[,1]=NULL
colnames(mm2) <- c("Alv","MDM")
plot(mm2)
mylm <- lm(mm2)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9424 -0.6512 -0.0480  0.5877 24.2060 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.049443   0.008741   5.657 1.57e-08 ***
## MDM         0.146280   0.007256  20.160  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.061 on 14736 degrees of freedom
## Multiple R-squared:  0.02684,    Adjusted R-squared:  0.02678 
## F-statistic: 406.4 on 1 and 14736 DF,  p-value: < 2.2e-16
cor.test(mm2$Alv,mm2$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm2$Alv and mm2$MDM
## t = 20.16, df = 14736, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1480793 0.1795026
## sample estimates:
##       cor 
## 0.1638325
mm2r <- as.data.frame(apply(mm2,2,rank))
plot(mm2r,cex=0.3)
mylm <- lm(mm2r)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm2r)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7701.3 -3682.6   -14.9  3683.0  7705.6 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.707e+03  7.003e+01 110.058  < 2e-16 ***
## MDM         -4.577e-02  8.229e-03  -5.562 2.71e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4250 on 14736 degrees of freedom
## Multiple R-squared:  0.002095,   Adjusted R-squared:  0.002027 
## F-statistic: 30.94 on 1 and 14736 DF,  p-value: 2.713e-08
cor.test(mm2r$Alv,mm2r$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm2r$Alv and mm2r$MDM
## t = -5.562, df = 14736, p-value = 2.713e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06187000 -0.02964783
## sample estimates:
##         cor 
## -0.04577082

Mitch 2D

head(mm2)
##                  Alv         MDM
## A1BG       1.4516792 -0.31296863
## A1BG-AS1   0.5088094  0.30818540
## A2M       -0.2470666 -0.60355175
## A2M-AS1   -0.8526086  0.03503586
## A2ML1-AS1 -0.9980205  0.63891414
## AAAS       0.3081962  0.06893024
mm2res <- mitch_calc(mm2,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
res <- mm2res$enrichment_result
write.table(res,"de2_mm.tsv",row.names = FALSE, quote=FALSE,sep="\t")
res <- subset(mm2res$enrichment_result,p.adjustMANOVA<0.05)

mx <- res[1:30,c("s.Alv","s.MDM")]
rownames(mx) <- res$set[1:30]
colnames(mx) <- gsub("s.","",colnames(mx))

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.75,cexCol=0.8)
mtext("DE2: bystander vs latent (top effect size)")

mm2res <- mitch_calc(mm2,genesets=go,minsetsize=5,cores=4,priority="SD")
## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.
res <- subset(mm2res$enrichment_result,p.adjustMANOVA<0.05)

mx <- res[1:30,c("s.Alv","s.MDM")]
rownames(mx) <- res$set[1:30]
colnames(mx) <- gsub("s.","",colnames(mx))

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.75,cexCol=0.8)
mtext("DE2: bystander vs latent (top discordant)")

DE3 Active vs mock

MDM group.

pb3m <- pbmdm[,c(grep("active",colnames(pbmdm)),grep("mock",colnames(pbmdm)))]

head(pb3m)
##               mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_mock1
## HIV-Gagp17          38092       23541       40568       15110       117
## HIV-Gagp24              0           0           0           0         0
## HIV-Gagp2p7          1462        1021        2365         414         5
## HIV-Gagp1Pol         2021        1375        3032         785         7
## HIV-Polprot         27388       18583       44857        9126       136
## HIV-Polp15p31       75686       55267      105649       14984       341
##               mdm_mock2 mdm_mock3 mdm_mock4
## HIV-Gagp17          253        37       159
## HIV-Gagp24            0         0         0
## HIV-Gagp2p7           8         1         5
## HIV-Gagp1Pol         14         3        13
## HIV-Polprot         196        47       146
## HIV-Polp15p31       550       101       257
pb3mf <- pb3m[which(rowMeans(pb3m)>=10),]
head(pb3mf)
##               mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_mock1
## HIV-Gagp17          38092       23541       40568       15110       117
## HIV-Gagp2p7          1462        1021        2365         414         5
## HIV-Gagp1Pol         2021        1375        3032         785         7
## HIV-Polprot         27388       18583       44857        9126       136
## HIV-Polp15p31       75686       55267      105649       14984       341
## HIV-Vif              5276        4254        7255        1109        15
##               mdm_mock2 mdm_mock3 mdm_mock4
## HIV-Gagp17          253        37       159
## HIV-Gagp2p7           8         1         5
## HIV-Gagp1Pol         14         3        13
## HIV-Polprot         196        47       146
## HIV-Polp15p31       550       101       257
## HIV-Vif              45         8        17
colSums(pb3mf)
## mdm_active1 mdm_active2 mdm_active3 mdm_active4   mdm_mock1   mdm_mock2 
##    29532716    22439063    25242866    13852866    28545021    20536722 
##   mdm_mock3   mdm_mock4 
##     7022107    20628091
des3m <- as.data.frame(grepl("active",colnames(pb3mf)))
colnames(des3m) <- "case"

plot(cmdscale(dist(t(pb3mf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb3mf))), labels=colnames(pb3mf) )

dds <- DESeqDataSetFromMatrix(countData = pb3mf , colData = des3m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de3mf <- de
write.table(de3mf,"de3mf.tsv",sep="\t")

nrow(subset(de3mf,padj<0.05 & log2FoldChange>0))
## [1] 125
nrow(subset(de3mf,padj<0.05 & log2FoldChange<0))
## [1] 227
head(subset(de3mf,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
HIV-Gagp17 12498.4369 7.499640 0.3193505 23.48404 0 0
HIV-Polprot 10407.8721 7.302128 0.3384398 21.57585 0 0
HIV-BaLEnv 16603.6070 7.375420 0.3461329 21.30806 0 0
HIV-Polp15p31 25546.9540 7.375321 0.3989100 18.48868 0 0
HIV-Gagp1Pol 760.7351 7.355893 0.4017003 18.31189 0 0
HIV-TatEx2Rev 344.4153 7.178345 0.3928135 18.27418 0 0
HIV-TatEx1 1135.6725 7.108449 0.4151902 17.12095 0 0
HIV-Vpu 279.0444 5.637258 0.3296147 17.10257 0 0
HIV-EGFP 391.0342 7.635327 0.4499573 16.96900 0 0
HIV-EnvStart 232.1886 6.852333 0.4100221 16.71211 0 0
head(subset(de1mf,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in active MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in active MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
PDE4B 111.80062 -3.208542 0.4331447 -7.407552 0 0.00e+00
TGFBI 281.12990 -3.575563 0.5417002 -6.600630 0 2.00e-07
FCN1 82.63210 -3.079582 0.4665686 -6.600492 0 2.00e-07
VCAN 16.54526 -4.633638 0.7147412 -6.482960 0 3.00e-07
PDE7B 32.06724 -3.870351 0.6058452 -6.388351 0 4.00e-07
MS4A6A 293.46672 -3.013062 0.4889141 -6.162764 0 1.10e-06
SESN3 60.20373 -2.120561 0.3444238 -6.156836 0 1.10e-06
TLR8 31.13299 -2.376698 0.3944600 -6.025196 0 2.20e-06
SSBP2 60.95534 -2.996618 0.5011017 -5.980061 0 2.60e-06
RCBTB2 217.40602 -1.320024 0.2372682 -5.563422 0 2.59e-05
m3m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14556
## Note: no. genes in output = 14556
## Note: estimated proportion of input genes in output = 1
mres3m <- mitch_calc(m3m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres3m$enrichment_result

mitchtbl <- mres3m$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de3mf_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_mock_vs_active.html") ) {
  mitch_report(mres3m,outfile="MDM_mock_vs_active.html")
}
pb3a <- pbalv[,c(grep("active",colnames(pbalv)),grep("mock",colnames(pbalv)))]

head(pb3a)
##               alv_active1 alv_active2 alv_active3 alv_mock1 alv_mock2 alv_mock3
## HIV-Gagp17          32789       27176       17079       106       178      1530
## HIV-Gagp24              0           0           0         0         0         0
## HIV-Gagp2p7          1201        1242         744         2         7        52
## HIV-Gagp1Pol         2100        2334        1592         6        21        94
## HIV-Polprot         23710       30544       21871        95       230      1596
## HIV-Polp15p31       38437       59592       41124       164       360      2804
pb3af <- pb3a[which(rowMeans(pb3a)>=10),]
head(pb3af)
##               alv_active1 alv_active2 alv_active3 alv_mock1 alv_mock2 alv_mock3
## HIV-Gagp17          32789       27176       17079       106       178      1530
## HIV-Gagp2p7          1201        1242         744         2         7        52
## HIV-Gagp1Pol         2100        2334        1592         6        21        94
## HIV-Polprot         23710       30544       21871        95       230      1596
## HIV-Polp15p31       38437       59592       41124       164       360      2804
## HIV-Vif              3140        4489        3034        10        33       162
colSums(pb3af)
## alv_active1 alv_active2 alv_active3   alv_mock1   alv_mock2   alv_mock3 
##    29735667    28374259    23458113    20217498    24567251    33158713
des3a <- as.data.frame(grepl("active",colnames(pb3af)))
colnames(des3a) <- "case"

plot(cmdscale(dist(t(pb3af))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb3af))), labels=colnames(pb3af) )

dds <- DESeqDataSetFromMatrix(countData = pb3af , colData = des3a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de3af <- de
write.table(de3af,"de3af.tsv",sep="\t")

nrow(subset(de3af,padj<0.05 & log2FoldChange>0))
## [1] 886
nrow(subset(de3af,padj<0.05 & log2FoldChange<0))
## [1] 654
head(subset(de3af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
HIV-EGFP 505.1051 6.228719 0.4412255 14.116863 0 0
AL157912.1 745.8582 2.719126 0.2063421 13.177756 0 0
LAYN 547.0886 2.260197 0.1948557 11.599339 0 0
HES4 377.0144 2.363778 0.2040378 11.585001 0 0
OCIAD2 348.2412 2.479455 0.2229928 11.118993 0 0
CDKN1A 5259.6829 1.542031 0.1400365 11.011636 0 0
IL6R-AS1 450.4087 3.518576 0.3271549 10.755077 0 0
SDS 4663.3040 2.115797 0.1999132 10.583576 0 0
TNFRSF9 169.0443 2.531684 0.2525074 10.026176 0 0
CCL7 1431.5449 1.935602 0.1973803 9.806458 0 0
head(subset(de3af,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
NDRG2 315.7674 -1.8213644 0.1665679 -10.934665 0 0
HIST1H1C 782.9297 -1.1710597 0.1098714 -10.658462 0 0
ADA2 1768.1605 -1.0593324 0.1146721 -9.237927 0 0
CEBPD 682.1436 -1.4858672 0.1675104 -8.870299 0 0
CRIM1 379.1844 -1.6814255 0.1950919 -8.618632 0 0
RPL10A 8042.6905 -0.8855144 0.1028289 -8.611534 0 0
LYZ 45445.6900 -1.1847470 0.1418565 -8.351731 0 0
CHST13 457.5548 -1.6409262 0.1985105 -8.266196 0 0
TGFBI 391.0922 -2.3344782 0.2846892 -8.200094 0 0
H1FX 859.4086 -1.1910036 0.1582745 -7.524924 0 0
m3a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15728
## Note: no. genes in output = 15728
## Note: estimated proportion of input genes in output = 1
mres3a <- mitch_calc(m3a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres3a$enrichment_result

mitchtbl <- mres3a$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de3af_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_mock_vs_active.html") ) {
  mitch_report(mres3a,outfile="Alv_mock_vs_active.html")
}

Combined.

mm3 <- merge(m3a,m3m,by=0)

head(mm3)
##   Row.names        x.x        x.y
## 1      A1BG  1.2205067  0.3721932
## 2  A1BG-AS1 -0.7077775 -1.1211047
## 3       A2M -1.1947198 -0.3506444
## 4 A2ML1-AS1 -0.3894102  0.4327166
## 5    A4GALT  3.0694313  0.1669604
## 6      AAAS  0.7994090 -0.3926734
rownames(mm3) <- mm3[,1]
mm3[,1]=NULL
colnames(mm3) <- c("Alv","MDM")
plot(mm3)
mylm <- lm(mm3)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.2378  -0.8680  -0.0890   0.7795  11.8076 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.07329    0.01208   6.067 1.33e-09 ***
## MDM          0.75654    0.00843  89.742  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.445 on 14306 degrees of freedom
## Multiple R-squared:  0.3602, Adjusted R-squared:  0.3601 
## F-statistic:  8054 on 1 and 14306 DF,  p-value: < 2.2e-16
cor.test(mm3$Alv,mm3$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm3$Alv and mm3$MDM
## t = 89.742, df = 14306, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5895663 0.6105360
## sample estimates:
##       cor 
## 0.6001542
mm3r <- as.data.frame(apply(mm3,2,rank))
plot(mm3r,cex=0.3)
mylm <- lm(mm3r)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm3r)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10217.6  -2519.5   -113.6   2436.5  10980.2 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.820e+03  5.495e+01   51.32   <2e-16 ***
## MDM         6.058e-01  6.652e-03   91.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3286 on 14306 degrees of freedom
## Multiple R-squared:  0.367,  Adjusted R-squared:  0.367 
## F-statistic:  8296 on 1 and 14306 DF,  p-value: < 2.2e-16
cor.test(mm3r$Alv,mm3r$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm3r$Alv and mm3r$MDM
## t = 91.083, df = 14306, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5953709 0.6161156
## sample estimates:
##       cor 
## 0.6058463

Mitch 2D

head(mm3)
##                  Alv        MDM
## A1BG       1.2205067  0.3721932
## A1BG-AS1  -0.7077775 -1.1211047
## A2M       -1.1947198 -0.3506444
## A2ML1-AS1 -0.3894102  0.4327166
## A4GALT     3.0694313  0.1669604
## AAAS       0.7994090 -0.3926734
mm3res <- mitch_calc(mm3,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
res <- mm3res$enrichment_result
write.table(res,"de3_mm.tsv",row.names = FALSE, quote=FALSE,sep="\t")
res <- subset(mm3res$enrichment_result,p.adjustMANOVA<0.05)

mx <- res[1:30,c("s.Alv","s.MDM")]
rownames(mx) <- res$set[1:30]
colnames(mx) <- gsub("s.","",colnames(mx))

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.75,cexCol=0.8)
mtext("DE3: mock vs productive (top effect size)")

mm3res <- mitch_calc(mm3,genesets=go,minsetsize=5,cores=4,priority="SD")
## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.
res <- subset(mm3res$enrichment_result,p.adjustMANOVA<0.05)

mx <- res[1:30,c("s.Alv","s.MDM")]
rownames(mx) <- res$set[1:30]
colnames(mx) <- gsub("s.","",colnames(mx))

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.75,cexCol=0.8)
mtext("DE3: mock vs productive (top discordant)")

DE4 Mock vs bystander

MDM group.

pb4m <- pbmdm[,c(grep("mock",colnames(pbmdm)),grep("bystander",colnames(pbmdm)))]

head(pb4m)
##               mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 mdm_bystander1
## HIV-Gagp17          117       253        37       159            255
## HIV-Gagp24            0         0         0         0              0
## HIV-Gagp2p7           5         8         1         5             23
## HIV-Gagp1Pol          7        14         3        13             20
## HIV-Polprot         136       196        47       146            331
## HIV-Polp15p31       341       550       101       257           1033
##               mdm_bystander2 mdm_bystander3 mdm_bystander4
## HIV-Gagp17               254             57             61
## HIV-Gagp24                 0              0              0
## HIV-Gagp2p7               32              3              8
## HIV-Gagp1Pol              61             16             10
## HIV-Polprot              492            181             81
## HIV-Polp15p31           1505            413            181
pb4mf <- pb4m[which(rowMeans(pb4m)>=10),]
head(pb4mf)
##               mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 mdm_bystander1
## HIV-Gagp17          117       253        37       159            255
## HIV-Gagp2p7           5         8         1         5             23
## HIV-Gagp1Pol          7        14         3        13             20
## HIV-Polprot         136       196        47       146            331
## HIV-Polp15p31       341       550       101       257           1033
## HIV-Vif              15        45         8        17             87
##               mdm_bystander2 mdm_bystander3 mdm_bystander4
## HIV-Gagp17               254             57             61
## HIV-Gagp2p7               32              3              8
## HIV-Gagp1Pol              61             16             10
## HIV-Polprot              492            181             81
## HIV-Polp15p31           1505            413            181
## HIV-Vif                   73             29             16
colSums(pb4mf)
##      mdm_mock1      mdm_mock2      mdm_mock3      mdm_mock4 mdm_bystander1 
##       28557312       20547234        7025832       20638609       70265280 
## mdm_bystander2 mdm_bystander3 mdm_bystander4 
##       68938388       26232912       36276740
des4m <- as.data.frame(grepl("bystander",colnames(pb4mf)))
colnames(des4m) <- "case"

plot(cmdscale(dist(t(pb4mf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb4mf))), labels=colnames(pb4mf) )

dds <- DESeqDataSetFromMatrix(countData = pb4mf , colData = des4m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de4mf <- de
write.table(de4mf,"de4mf.tsv",sep="\t")

nrow(subset(de4mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de4mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de4mf, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in bystander MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in bystander MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
PLAAT3 27.495463 1.3311638 0.3494467 3.809347 0.0001393 0.8552440
CCL8 11.080843 2.8478132 0.7590519 3.751803 0.0001756 0.8552440
ISG15 1232.216705 0.6855917 0.1840010 3.726022 0.0001945 0.8552440
PSME2 4609.357252 0.3430492 0.0978421 3.506149 0.0004546 0.9628740
NCMAP 6.755414 2.3333130 0.7714540 3.024565 0.0024899 0.9999854
RBP7 435.213346 0.7687445 0.2837163 2.709553 0.0067374 0.9999854
AL162497.1 7.481541 1.7865436 0.6664301 2.680767 0.0073454 0.9999854
RBM42 750.000256 0.2819835 0.1086112 2.596266 0.0094243 0.9999854
IL3RA 27.875556 1.4189454 0.5468915 2.594565 0.0094711 0.9999854
SLC9B1 62.647780 0.7284858 0.3033611 2.401381 0.0163333 0.9999854
head(subset(de4mf, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in bystander MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in bystander MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
ATAD2 257.69337 -0.6096526 0.1648714 -3.697746 0.0002175 0.8552440
PBK 22.44710 -2.8534263 0.7911581 -3.606645 0.0003102 0.9628740
CENPF 169.31681 -1.9979286 0.5680603 -3.517106 0.0004363 0.9628740
CENPK 158.41357 -0.9293783 0.2665819 -3.486277 0.0004898 0.9628740
KIF11 51.05594 -1.1804467 0.3592065 -3.286262 0.0010153 0.9999854
HIV-Gagp17 140.04463 -1.3214552 0.4058772 -3.255801 0.0011307 0.9999854
CXCL5 39.93797 -3.6909696 1.1406693 -3.235793 0.0012131 0.9999854
MKI67 202.95580 -2.0462224 0.6500555 -3.147766 0.0016452 0.9999854
NCAPG 41.61228 -1.6289330 0.5178769 -3.145406 0.0016586 0.9999854
CCNA2 55.20340 -1.2180408 0.3897170 -3.125450 0.0017753 0.9999854
m4m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15779
## Note: no. genes in output = 15779
## Note: estimated proportion of input genes in output = 1
mres4m <- mitch_calc(m4m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres4m$enrichment_result

mitchtbl <- mres4m$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de4mf_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_mock_vs_bystander.html") ) {
  mitch_report(mres4m,outfile="MDM_mock_vs_bystander.html")
}

Alv cells.

pb4a <- pbalv[,c(grep("mock",colnames(pbalv)),grep("bystander",colnames(pbalv)))]

head(pb4a)
##               alv_mock1 alv_mock2 alv_mock3 alv_bystander1 alv_bystander2
## HIV-Gagp17          106       178      1530            106            162
## HIV-Gagp24            0         0         0              0              0
## HIV-Gagp2p7           2         7        52             16             26
## HIV-Gagp1Pol          6        21        94             26             50
## HIV-Polprot          95       230      1596            208            515
## HIV-Polp15p31       164       360      2804            476           1203
##               alv_bystander3
## HIV-Gagp17               183
## HIV-Gagp24                 0
## HIV-Gagp2p7               17
## HIV-Gagp1Pol              42
## HIV-Polprot              534
## HIV-Polp15p31           1151
pb4af <- pb4a[which(rowMeans(pb4a)>=10),]
head(pb4af)
##               alv_mock1 alv_mock2 alv_mock3 alv_bystander1 alv_bystander2
## HIV-Gagp17          106       178      1530            106            162
## HIV-Gagp2p7           2         7        52             16             26
## HIV-Gagp1Pol          6        21        94             26             50
## HIV-Polprot          95       230      1596            208            515
## HIV-Polp15p31       164       360      2804            476           1203
## HIV-Vif              10        33       162             31             78
##               alv_bystander3
## HIV-Gagp17               183
## HIV-Gagp2p7               17
## HIV-Gagp1Pol              42
## HIV-Polprot              534
## HIV-Polp15p31           1151
## HIV-Vif                   86
colSums(pb4af)
##      alv_mock1      alv_mock2      alv_mock3 alv_bystander1 alv_bystander2 
##       20228192       24576310       33170699       58232744       65496965 
## alv_bystander3 
##       58745079
des4a <- as.data.frame(grepl("bystander",colnames(pb4af)))
colnames(des4a) <- "case"

plot(cmdscale(dist(t(pb4af))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb4af))), labels=colnames(pb4af) )

dds <- DESeqDataSetFromMatrix(countData = pb4af , colData = des4a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de4af <- de
write.table(de4af,"de4af.tsv",sep="\t")

nrow(subset(de4af,padj<0.05 & log2FoldChange>0))
## [1] 40
nrow(subset(de4af,padj<0.05 & log2FoldChange<0))
## [1] 1
head(subset(de4af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent Alv cells compared to ") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent Alv cells compared to
baseMean log2FoldChange lfcSE stat pvalue padj
IFI44L 231.6426 4.4691944 0.6280989 7.115431 0 0.00e+00
IFI6 27252.4351 2.5264574 0.3587932 7.041543 0 0.00e+00
PARP14 2649.6649 1.1667034 0.1671472 6.980096 0 0.00e+00
EPSTI1 841.0903 3.1328464 0.4652473 6.733722 0 1.00e-07
IFIT1 903.4800 2.9586186 0.4605872 6.423580 0 5.00e-07
OAS1 1040.3169 1.5921246 0.2611350 6.096940 0 3.10e-06
CMPK2 128.1722 2.9608547 0.5062587 5.848502 0 1.21e-05
IRF7 293.0091 1.6196405 0.2863101 5.656945 0 3.28e-05
IFIT3 2363.8904 1.9388543 0.3467406 5.591657 0 4.26e-05
GMPR 436.2431 0.9806396 0.1779481 5.510819 0 5.95e-05
head(subset(de4af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
SPP1 81304.43380 -0.4228031 0.0889530 -4.753107 0.0000020 0.0013976
FCGBP 49.30761 -3.7410601 1.0353317 -3.613393 0.0003022 0.0989759
HIV-Gagp17 441.21001 -3.0148001 0.9823512 -3.068964 0.0021480 0.5225842
FABP4 9311.07939 -0.5768152 0.1933690 -2.982977 0.0028546 0.6659418
F13A1 69.99588 -4.4385973 1.4958032 -2.967367 0.0030036 0.6912388
CCL7 757.08873 -0.6504921 0.2254710 -2.885036 0.0039137 0.8655857
MACC1 120.37788 -0.6473085 0.2575103 -2.513719 0.0119466 0.9999453
AC092484.1 14.47319 -3.7309046 1.5115979 -2.468186 0.0135800 0.9999453
FAM135B 26.59587 -1.2954410 0.5368231 -2.413162 0.0158148 0.9999453
LINC01534 117.12690 -0.5707210 0.2432961 -2.345788 0.0189869 0.9999453
m4a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 17047
## Note: no. genes in output = 17047
## Note: estimated proportion of input genes in output = 1
mres4a <- mitch_calc(m4a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres4a$enrichment_result

mitchtbl <- mres4a$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de4af_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_mock_vs_bystander.html") ) {
  mitch_report(mres4a,outfile="Alv_mock_vs_bystander.html")
}

Combined.

mm4 <- merge(m4a,m4m,by=0)

head(mm4)
##   Row.names         x.x         x.y
## 1      A1BG -0.74359044  0.59808019
## 2  A1BG-AS1  0.04118850 -0.81910713
## 3       A2M -1.62410772 -0.08613625
## 4   A2M-AS1  0.04893154  0.41963455
## 5 A2ML1-AS1  0.84025261 -0.16313488
## 6      AAAS  0.10269483 -0.16988155
rownames(mm4) <- mm4[,1]
mm4[,1]=NULL
colnames(mm4) <- c("Alv","MDM")
plot(mm4)
mylm <- lm(mm4)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7985 -0.3483 -0.0228  0.3094  6.9292 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.007081   0.004983   1.421    0.155    
## MDM         0.092294   0.007460  12.372   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6192 on 15437 degrees of freedom
## Multiple R-squared:  0.009818,   Adjusted R-squared:  0.009754 
## F-statistic: 153.1 on 1 and 15437 DF,  p-value: < 2.2e-16
cor.test(mm4$Alv,mm4$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm4$Alv and mm4$MDM
## t = 12.372, df = 15437, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08344104 0.11467959
## sample estimates:
##        cor 
## 0.09908473
mm4r <- as.data.frame(apply(mm4,2,rank))
plot(mm4r,cex=0.3)
mylm <- lm(mm4r)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm4r)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7975.6 -3853.2     3.1  3855.5  7927.2 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 7.411e+03  7.169e+01 103.381  < 2e-16 ***
## MDM         4.000e-02  8.042e-03   4.974 6.64e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4454 on 15437 degrees of freedom
## Multiple R-squared:  0.0016, Adjusted R-squared:  0.001535 
## F-statistic: 24.74 on 1 and 15437 DF,  p-value: 6.641e-07
cor.test(mm4r$Alv,mm4r$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm4r$Alv and mm4r$MDM
## t = 4.9736, df = 15437, p-value = 6.641e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02423988 0.05573763
## sample estimates:
##        cor 
## 0.03999869

Mitch 2D

head(mm4)
##                   Alv         MDM
## A1BG      -0.74359044  0.59808019
## A1BG-AS1   0.04118850 -0.81910713
## A2M       -1.62410772 -0.08613625
## A2M-AS1    0.04893154  0.41963455
## A2ML1-AS1  0.84025261 -0.16313488
## AAAS       0.10269483 -0.16988155
mm4res <- mitch_calc(mm4,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
res <- mm4res$enrichment_result
write.table(res,"de4_mm.tsv",row.names = FALSE, quote=FALSE,sep="\t")
res <- subset(mm4res$enrichment_result,p.adjustMANOVA<0.05)

mx <- res[1:30,c("s.Alv","s.MDM")]
rownames(mx) <- res$set[1:30]
colnames(mx) <- gsub("s.","",colnames(mx))

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.75,cexCol=0.8)
mtext("DE4: bystander vs latent (top effect size)")

mm4res <- mitch_calc(mm4,genesets=go,minsetsize=5,cores=4,priority="SD")
## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.
res <- subset(mm4res$enrichment_result,p.adjustMANOVA<0.05)

mx <- res[1:30,c("s.Alv","s.MDM")]
rownames(mx) <- res$set[1:30]
colnames(mx) <- gsub("s.","",colnames(mx))

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.75,cexCol=0.8)
mtext("DE4: bystander vs latent (top discordant)")

Cross comparison pathway enrichment heatmap

l1 <- list("de1a"=de1af,"de1m"=de1mf,"de2a"=de2af,"de2m"=de2mf,
  "de3a"=de3af,"de3m"=de3mf,"de4a"=de4af,"de4m"=de4mf)

lm <- mitch_import(x=l1,DEtype="deseq2",joinType="inner")
## Note: Mean no. genes in input = 15289.5
## Note: no. genes in output = 13075
## Note: estimated proportion of input genes in output = 0.855
lmres <- mitch_calc(x=lm,genesets=go,minsetsize=5,cores=8,priority="effect")
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
top <- head(lmres$enrichment_result,50)

top %>%
  kbl(caption="Top pathways across all contrasts") %>%
  kable_paper("hover", full_width = F)
Top pathways across all contrasts
set setSize pMANOVA s.de1a s.de1m s.de2a s.de2m s.de3a s.de3m s.de4a s.de4m p.de1a p.de1m p.de2a p.de2m p.de3a p.de3m p.de4a p.de4m s.dist SD p.adjustMANOVA
1987 GO:0019773 proteasome core complex, alpha-subunit complex 7 0.0000005 0.6050549 0.9032970 -0.0435961 -0.9440072 0.2302899 0.7698413 -0.3727316 0.9066203 0.0055694 0.0000348 0.8417073 0.0000152 0.2914436 0.0004196 0.0877207 0.0000326 1.918767 0.6712362 0.0000218
827 GO:0005942 phosphatidylinositol 3-kinase complex 6 0.0017551 -0.6327442 -0.8817303 -0.5778305 0.6804142 -0.5754075 -0.7218864 0.5752799 -0.4869284 0.0072746 0.0001835 0.0142455 0.0038983 0.0146575 0.0021967 0.0146795 0.0388890 1.842532 0.6019408 0.0226293
2525 GO:0032395 MHC class II receptor activity 6 0.0000000 -0.5974443 -0.9133828 -0.0693754 0.8306425 -0.3554722 -0.7298442 0.7327518 0.2412579 0.0112695 0.0001066 0.7685748 0.0004254 0.1316272 0.0019613 0.0018812 0.3061856 1.772047 0.6598176 0.0000000
3678 GO:0045987 positive regulation of smooth muscle contraction 5 0.0101335 0.8095486 0.2471920 0.5097475 0.4037643 0.8472226 0.7841775 -0.0862433 0.5271002 0.0017182 0.3385103 0.0484017 0.1179592 0.0010343 0.0023913 0.7384371 0.0412481 1.660522 0.3195083 0.0806529
1418 GO:0008541 proteasome regulatory particle, lid subcomplex 8 0.0000037 0.6424390 0.8028430 -0.2268501 -0.5253119 0.1970422 0.7433420 -0.5400436 0.6378090 0.0016517 0.0000840 0.2666054 0.0100884 0.3345773 0.0002715 0.0081702 0.0017845 1.635399 0.5731949 0.0001109
3599 GO:0045656 negative regulation of monocyte differentiation 5 0.0542237 -0.6803060 -0.5200918 -0.7271920 0.2757460 -0.8680643 -0.6089977 0.3037184 -0.3391890 0.0084284 0.0440216 0.0048622 0.2856664 0.0007745 0.0183632 0.2395988 0.1890685 1.633972 0.4501117 0.2319354
1998 GO:0019864 IgG binding 6 0.0000021 -0.1938429 -0.3811054 -0.8371974 -0.6933966 -0.7660112 -0.7669549 -0.3044099 0.0162216 0.4109920 0.1059974 0.0003829 0.0032677 0.0011560 0.0011399 0.1966609 0.9451488 1.622478 0.3173704 0.0000671
3179 GO:0042555 MCM complex 10 0.0000006 -0.1791198 -0.2798316 -0.6423728 0.0448220 -0.7221737 -0.6785610 -0.6015002 -0.8397091 0.3267760 0.1255133 0.0004355 0.8061596 0.0000766 0.0002025 0.0009889 0.0000042 1.604349 0.3103454 0.0000245
3042 GO:0036402 proteasome-activating activity 6 0.0000215 0.6448338 0.6789859 -0.4255873 -0.4924376 0.1080164 0.8049583 -0.4134466 0.6504961 0.0062323 0.0039741 0.0710529 0.0367320 0.6468607 0.0006383 0.0794925 0.0057921 1.598174 0.5671468 0.0005530
3196 GO:0042613 MHC class II protein complex 12 0.0000000 -0.5941718 -0.8359233 -0.1753681 0.3985430 -0.3978667 -0.7472760 0.6870678 0.3011177 0.0003654 0.0000005 0.2929614 0.0168395 0.0170264 0.0000074 0.0000376 0.0709470 1.587719 0.5717556 0.0000000
4429 GO:0070106 interleukin-27-mediated signaling pathway 8 0.0000000 -0.5012627 -0.5856930 -0.3605648 0.0131055 0.6299648 0.0545267 0.7929900 0.8291689 0.0140895 0.0041230 0.0774302 0.9488285 0.0020321 0.7894552 0.0001026 0.0000487 1.562262 0.5788613 0.0000000
791 GO:0005839 proteasome core complex 18 0.0000000 0.4072741 0.6723682 -0.2010671 -0.8836299 0.1631990 0.4500949 -0.1415928 0.8621004 0.0027806 0.0000008 0.1398262 0.0000000 0.2307788 0.0009477 0.2984810 0.0000000 1.559380 0.5620079 0.0000000
1213 GO:0007221 positive regulation of transcription of Notch receptor target 6 0.0006085 -0.7818757 -0.6360089 -0.2070294 0.6810263 -0.7343842 -0.3454485 -0.3446068 -0.3461117 0.0009104 0.0069788 0.3799011 0.0038663 0.0018376 0.1428670 0.1438433 0.1421013 1.555538 0.4627055 0.0099976
3835 GO:0048245 eosinophil chemotaxis 7 0.0008746 0.7602650 0.6174516 0.4083038 0.1342866 0.6317723 0.7230530 -0.5379553 0.1884866 0.0004949 0.0046705 0.0614105 0.5384495 0.0037969 0.0009232 0.0137162 0.3878887 1.546240 0.4343996 0.0133472
3573 GO:0045569 TRAIL binding 5 0.0002655 0.3120735 0.2467330 0.6861515 0.3189594 0.8409793 0.5596940 0.4449273 0.6551798 0.2269156 0.3394082 0.0078828 0.2168281 0.0011267 0.0302152 0.0849248 0.0111780 1.542248 0.2115620 0.0049454
368 GO:0002503 peptide antigen assembly with MHC class II protein complex 11 0.0000000 -0.5592607 -0.8241524 -0.1902661 0.3513890 -0.3574152 -0.7365279 0.6692507 0.3267689 0.0013196 0.0000022 0.2746326 0.0436235 0.0401406 0.0000233 0.0001212 0.0606139 1.541612 0.5553246 0.0000000
3024 GO:0036150 phosphatidylserine acyl-chain remodeling 5 0.0344068 -0.3952869 -0.2515685 -0.8074981 -0.3382402 -0.7806274 -0.7368018 0.1204285 -0.4610559 0.1258759 0.3300274 0.0017653 0.1903088 0.0025027 0.0043275 0.6410111 0.0742194 1.537963 0.3160976 0.1753394
105 GO:0000727 double-strand break repair via break-induced replication 8 0.0000098 0.0215619 -0.0583531 -0.6544348 -0.2268118 -0.6179115 -0.6446774 -0.6898676 -0.7464605 0.9159083 0.7750648 0.0013487 0.2666861 0.0024744 0.0015908 0.0007274 0.0002558 1.521231 0.3114133 0.0002733
483 GO:0004045 aminoacyl-tRNA hydrolase activity 5 0.1220208 0.6489977 0.7124713 0.2589441 -0.4733282 0.6089059 0.6669013 -0.1271920 0.4660750 0.0119662 0.0057979 0.3160452 0.0668347 0.0183808 0.0098091 0.6223845 0.0711224 1.506264 0.4334920 0.3618095
3077 GO:0038156 interleukin-3-mediated signaling pathway 5 0.0102599 -0.6699617 -0.1914308 -0.7123183 -0.0579648 -0.8381943 -0.6428462 0.2915379 -0.1967253 0.0094773 0.4585702 0.0058084 0.8224207 0.0011702 0.0127990 0.2589717 0.4462368 1.495248 0.3959257 0.0809592
479 GO:0004017 adenylate kinase activity 6 0.0086182 0.4222205 0.4974112 0.7777693 0.0001785 0.7701686 0.6169562 0.1363277 0.4421914 0.0733162 0.0348730 0.0009689 0.9993958 0.0010863 0.0088702 0.5631256 0.0607136 1.489472 0.2780353 0.0740790
47 GO:0000221 vacuolar proton-transporting V-type ATPase, V1 domain 8 0.0018759 0.5662738 0.7380998 0.4783041 -0.6398179 0.5363320 0.2917273 -0.5395844 0.1306536 0.0055464 0.0002999 0.0191547 0.0017257 0.0086199 0.1531055 0.0082247 0.5222915 1.478032 0.5181839 0.0238358
3206 GO:0042719 mitochondrial intermembrane space protein transporter complex 6 0.0207354 0.6511592 0.6053511 0.6990588 -0.2810978 0.7635371 0.4297957 -0.1700972 0.0731757 0.0057424 0.0102353 0.0030230 0.2331665 0.0011995 0.0683046 0.4706427 0.7562888 1.469748 0.4141143 0.1280448
4734 GO:0075525 viral translational termination-reinitiation 5 0.0041057 -0.5564805 0.0458761 -0.3446060 -0.7438409 -0.7290589 -0.5652946 -0.5075440 0.2328386 0.0311765 0.8590164 0.1821004 0.0039703 0.0047539 0.0286005 0.0493806 0.3673038 1.465171 0.3569938 0.0427454
4847 GO:0097250 mitochondrial respirasome assembly 6 0.0169886 0.1711429 0.7093376 0.7775142 -0.5550539 0.4670595 0.4248221 -0.3813860 0.3931186 0.4679190 0.0026210 0.0009726 0.0185537 0.0475834 0.0715622 0.1057405 0.0954334 1.464131 0.4840891 0.1127847
5277 GO:1903238 positive regulation of leukocyte tethering or rolling 5 0.0266902 -0.7802601 -0.5706809 -0.5008110 0.0113236 -0.6506809 -0.6382555 0.2035195 -0.2804591 0.0025145 0.0271183 0.0524751 0.9650290 0.0117468 0.0134535 0.4306903 0.2775116 1.461594 0.3487134 0.1500586
5226 GO:1902254 negative regulation of intrinsic apoptotic signaling pathway by p53 class mediator 6 0.0002445 0.2727829 -0.1421940 0.6560563 0.3803913 0.6468488 0.5118219 0.5242431 0.6994924 0.2472832 0.5464514 0.0053872 0.1066535 0.0060723 0.0299337 0.0261712 0.0030050 1.454040 0.2775954 0.0045812
830 GO:0005955 calcineurin complex 5 0.0485964 -0.0502525 -0.6351033 -0.7677123 0.6596480 -0.5683244 -0.4083244 0.0185769 -0.4272686 0.8457285 0.0139199 0.0029492 0.0106373 0.0277584 0.1138653 0.9426597 0.0980430 1.450181 0.4643966 0.2182931
2351 GO:0031123 RNA 3’-end processing 9 0.0031881 -0.1547868 -0.7065837 -0.5734816 0.4007177 -0.4049866 -0.7035393 0.3024134 -0.5623246 0.4214247 0.0002417 0.0028910 0.0373930 0.0354112 0.0002572 0.1162363 0.0034878 1.441985 0.4404180 0.0355870
4567 GO:0071139 resolution of DNA recombination intermediates 5 0.1086849 -0.5603366 -0.3439021 -0.3644989 0.0632594 -0.7136343 -0.7317215 -0.0139250 -0.6758378 0.0300260 0.1829949 0.1581424 0.8065087 0.0057184 0.0046032 0.9570022 0.0088680 1.439009 0.3107133 0.3399072
2924 GO:0035456 response to interferon-beta 9 0.0000000 -0.5219654 -0.4969301 -0.3039951 0.1702638 0.5690086 -0.1039849 0.9139752 0.5050258 0.0067000 0.0098424 0.1143394 0.3765123 0.0031181 0.5891372 0.0000020 0.0087062 1.437281 0.5343765 0.0000000
790 GO:0005838 proteasome regulatory particle 8 0.0000318 0.5865922 0.6290273 -0.3417961 -0.5342466 0.0794559 0.6062026 -0.4576988 0.5762225 0.0040659 0.0020637 0.0941562 0.0088822 0.6972040 0.0029869 0.0249891 0.0047696 1.434338 0.5201376 0.0007918
2188 GO:0030292 protein tyrosine kinase inhibitor activity 5 0.1028375 -0.7509105 -0.3667942 -0.5198470 -0.2270543 -0.8221270 -0.5949197 0.0584545 0.0006121 0.0036386 0.1555392 0.0441212 0.3793262 0.0014536 0.0212402 0.8209459 0.9981091 1.432982 0.3284827 0.3314841
349 GO:0002260 lymphocyte homeostasis 5 0.0626584 -0.3103290 -0.5384239 -0.4622188 0.3204591 -0.4823871 -0.7143688 -0.1439021 -0.7635807 0.2295234 0.0370800 0.0734922 0.2146748 0.0617807 0.0056688 0.5774087 0.0031067 1.431547 0.3489049 0.2523552
4645 GO:0071541 eukaryotic translation initiation factor 3 complex, eIF3m 7 0.0000253 -0.2624951 0.1643273 -0.5868862 -0.7476497 -0.6509030 -0.3316280 -0.5715379 0.4327693 0.2291699 0.4515841 0.0071700 0.0006133 0.0028615 0.1287058 0.0088315 0.0474092 1.430706 0.4194473 0.0006387
3067 GO:0038094 Fc-gamma receptor signaling pathway 7 0.0000470 -0.2769688 -0.2712187 -0.7018016 -0.6609384 -0.6994622 -0.6669728 -0.1619223 0.0155232 0.2045094 0.2140649 0.0013021 0.0024599 0.0013515 0.0022439 0.4582332 0.9433099 1.428354 0.2865879 0.0011143
4491 GO:0070508 cholesterol import 5 0.0442395 0.7080949 0.4696251 0.5153175 0.2861209 0.7380260 0.5992655 -0.1586840 -0.1457995 0.0061058 0.0689964 0.0459997 0.2679268 0.0042634 0.0203128 0.5389453 0.5723998 1.421127 0.3556906 0.2044928
3504 GO:0045053 protein retention in Golgi apparatus 5 0.0455333 -0.1862586 -0.7629074 -0.5667024 0.6868248 -0.4713389 -0.5541239 -0.0322877 -0.2714308 0.4708008 0.0031330 0.0282066 0.0078220 0.0679889 0.0318980 0.9005126 0.2932735 1.418989 0.4521834 0.2085693
1988 GO:0019774 proteasome core complex, beta-subunit complex 10 0.0000000 0.2475163 0.5213165 -0.3462993 -0.8605587 0.0663146 0.2348718 -0.0471029 0.8541906 0.1753851 0.0043112 0.0579623 0.0000024 0.7165696 0.1984879 0.7965069 0.0000029 1.408880 0.5249202 0.0000024
4367 GO:0061470 T follicular helper cell differentiation 7 0.0013840 0.1096244 -0.7378766 -0.3521142 0.6913726 -0.2457694 -0.5864708 -0.3050855 -0.5609777 0.6155395 0.0007225 0.1067262 0.0015364 0.2602185 0.0072110 0.1622267 0.0101665 1.403775 0.4593341 0.0190812
2069 GO:0022624 proteasome accessory complex 17 0.0000000 0.5994883 0.5815952 -0.3337057 -0.4353608 0.1067635 0.6280937 -0.4655068 0.5856315 0.0000187 0.0000330 0.0172368 0.0018877 0.4461295 0.0000073 0.0008918 0.0000291 1.401463 0.5019160 0.0000000
4054 GO:0051086 chaperone mediated protein folding independent of cofactor 8 0.0000082 0.5296740 0.5470651 -0.4719905 -0.6193273 -0.1963343 0.2590495 -0.6928714 0.4329035 0.0094824 0.0073765 0.0208009 0.0024183 0.3363167 0.2045810 0.0006894 0.0339963 1.399752 0.5282988 0.0002335
1598 GO:0010756 positive regulation of plasminogen activation 5 0.0967624 0.3686305 0.7359143 0.3823412 -0.6272992 0.3277735 0.3854935 -0.6704208 0.1829533 0.1534803 0.0043745 0.1387562 0.0151365 0.2043924 0.1355301 0.0094284 0.4787101 1.398615 0.5083399 0.3194026
3765 GO:0046934 1-phosphatidylinositol-4,5-bisphosphate 3-kinase activity 7 0.0279483 -0.2194893 -0.7774061 -0.3870523 0.5776160 -0.3125847 -0.6209716 0.4124579 -0.4042590 0.3146657 0.0003679 0.0762024 0.0081363 0.1521499 0.0044404 0.0588166 0.0640264 1.397443 0.4747938 0.1550078
1132 GO:0007006 mitochondrial membrane organization 5 0.0405298 0.5904514 0.3232747 0.6109870 0.1513083 0.6778577 0.4729916 -0.6074063 0.2231982 0.0222320 0.2106734 0.0179855 0.5579760 0.0086668 0.0670288 0.0186703 0.3874741 1.396893 0.4149846 0.1944453
1682 GO:0014909 smooth muscle cell migration 5 0.0541765 0.5947972 0.2724713 0.6437643 0.2788982 0.7069931 0.5379954 -0.4066106 0.2595562 0.0212669 0.2914269 0.0126715 0.2801945 0.0061856 0.0372309 0.1153906 0.3149026 1.393636 0.3585188 0.2319354
2110 GO:0030091 protein repair 5 0.0177764 0.5993267 0.6435195 0.2069166 -0.3611324 0.3382708 0.5563275 -0.3120735 0.6837031 0.0203000 0.0127054 0.4230364 0.1620198 0.1902687 0.0312229 0.2269156 0.0081073 1.391153 0.4212462 0.1166250
5383 GO:1905665 positive regulation of calcium ion import across plasma membrane 5 0.1735341 -0.1418516 -0.6852028 -0.7240704 0.3944912 -0.5119204 -0.5903902 0.2369702 -0.3003826 0.5828446 0.0079691 0.0050484 0.1266398 0.0474524 0.0222458 0.3588634 0.2447999 1.387996 0.4229769 0.4359944
2543 GO:0032454 histone H3K9 demethylase activity 11 0.0017110 -0.3739353 -0.6051328 -0.5470829 0.4672939 -0.5398597 -0.4988727 0.4624923 -0.3815899 0.0317793 0.0005104 0.0016796 0.0072882 0.0019337 0.0041731 0.0079120 0.0284419 1.386906 0.4496238 0.0222674
3578 GO:0045588 positive regulation of gamma-delta T cell differentiation 6 0.0217161 -0.5527584 -0.3413676 -0.4473436 -0.0854184 -0.5350830 -0.7044915 0.1993267 -0.6867651 0.0190455 0.1476485 0.0577710 0.7171407 0.0232288 0.0028040 0.3978831 0.0035772 1.386898 0.3117003 0.1313341
colfunc <- colorRampPalette(c("blue", "white", "red"))

mx <- top[,grep("^s\\.",colnames(top))]
mx <- mx[,-ncol(mx)]
rownames(mx) <- top$set

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.6,cexCol=0.8)

DE5 Mock MDM vs Mock AlvMDM

head(pbmdm)
##               mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_bystander1
## HIV-Gagp17          38092       23541       40568       15110            255
## HIV-Gagp24              0           0           0           0              0
## HIV-Gagp2p7          1462        1021        2365         414             23
## HIV-Gagp1Pol         2021        1375        3032         785             20
## HIV-Polprot         27388       18583       44857        9126            331
## HIV-Polp15p31       75686       55267      105649       14984           1033
##               mdm_bystander2 mdm_bystander3 mdm_bystander4 mdm_latent1
## HIV-Gagp17               254             57             61        1927
## HIV-Gagp24                 0              0              0           0
## HIV-Gagp2p7               32              3              8          51
## HIV-Gagp1Pol              61             16             10          83
## HIV-Polprot              492            181             81        1383
## HIV-Polp15p31           1505            413            181        3589
##               mdm_latent2 mdm_latent3 mdm_latent4 mdm_mock1 mdm_mock2 mdm_mock3
## HIV-Gagp17           2077         566         534       117       253        37
## HIV-Gagp24              0           0           0         0         0         0
## HIV-Gagp2p7           108          37          12         5         8         1
## HIV-Gagp1Pol          129          63          24         7        14         3
## HIV-Polprot          1587         877         250       136       196        47
## HIV-Polp15p31        5077        2425         441       341       550       101
##               mdm_mock4
## HIV-Gagp17          159
## HIV-Gagp24            0
## HIV-Gagp2p7           5
## HIV-Gagp1Pol         13
## HIV-Polprot         146
## HIV-Polp15p31       257
head(pbalv)
##               alv_active1 alv_active2 alv_active3 alv_bystander1 alv_bystander2
## HIV-Gagp17          32789       27176       17079            106            162
## HIV-Gagp24              0           0           0              0              0
## HIV-Gagp2p7          1201        1242         744             16             26
## HIV-Gagp1Pol         2100        2334        1592             26             50
## HIV-Polprot         23710       30544       21871            208            515
## HIV-Polp15p31       38437       59592       41124            476           1203
##               alv_bystander3 alv_latent1 alv_latent2 alv_latent3 alv_mock1
## HIV-Gagp17               183        2306        1784        2576       106
## HIV-Gagp24                 0           0           0           0         0
## HIV-Gagp2p7               17          69         104         121         2
## HIV-Gagp1Pol              42         129         163         210         6
## HIV-Polprot              534        1465        2065        3280        95
## HIV-Polp15p31           1151        2414        4070        5631       164
##               alv_mock2 alv_mock3
## HIV-Gagp17          178      1530
## HIV-Gagp24            0         0
## HIV-Gagp2p7           7        52
## HIV-Gagp1Pol         21        94
## HIV-Polprot         230      1596
## HIV-Polp15p31       360      2804
mock <- cbind( pbmdm[,grep("mock",colnames(pbmdm))],
  pbalv[,grep("mock",colnames(pbalv))] )


head(mock)
##               mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 alv_mock1 alv_mock2
## HIV-Gagp17          117       253        37       159       106       178
## HIV-Gagp24            0         0         0         0         0         0
## HIV-Gagp2p7           5         8         1         5         2         7
## HIV-Gagp1Pol          7        14         3        13         6        21
## HIV-Polprot         136       196        47       146        95       230
## HIV-Polp15p31       341       550       101       257       164       360
##               alv_mock3
## HIV-Gagp17         1530
## HIV-Gagp24            0
## HIV-Gagp2p7          52
## HIV-Gagp1Pol         94
## HIV-Polprot        1596
## HIV-Polp15p31      2804
mockf <- mock[which(rowMeans(mock)>=10),]
head(mockf)
##               mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 alv_mock1 alv_mock2
## HIV-Gagp17          117       253        37       159       106       178
## HIV-Gagp2p7           5         8         1         5         2         7
## HIV-Gagp1Pol          7        14         3        13         6        21
## HIV-Polprot         136       196        47       146        95       230
## HIV-Polp15p31       341       550       101       257       164       360
## HIV-Vif              15        45         8        17        10        33
##               alv_mock3
## HIV-Gagp17         1530
## HIV-Gagp2p7          52
## HIV-Gagp1Pol         94
## HIV-Polprot        1596
## HIV-Polp15p31      2804
## HIV-Vif             162
colSums(mockf)
## mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 alv_mock1 alv_mock2 alv_mock3 
##  28548322  20540758   7023535  20633808  20209667  24560365  33149165
desmock <- as.data.frame(grepl("alv",colnames(mockf)))
colnames(desmock) <- "case"

plot(cmdscale(dist(t(mockf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(mockf))), labels=colnames(mockf) )

dds <- DESeqDataSetFromMatrix(countData = mockf , colData = desmock, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
demock <- de
write.table(demock,"demockf.tsv",sep="\t")

nrow(subset(demock,padj<0.05 & log2FoldChange>0))
## [1] 890
nrow(subset(demock,padj<0.05 & log2FoldChange<0))
## [1] 1317
head(subset(demock,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in Alv cells compared to MDM") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in Alv cells compared to MDM
baseMean log2FoldChange lfcSE stat pvalue padj
STAC 411.32660 6.160050 0.4031531 15.279676 0 0
PPP1R16A 301.58230 2.616665 0.1799476 14.541256 0 0
SPRED1 1277.40147 2.756665 0.2789790 9.881264 0 0
FANCE 139.95617 2.513477 0.2613868 9.615929 0 0
PPP1R13B 224.15035 2.295557 0.2433500 9.433149 0 0
PPARD 673.81261 1.768911 0.1914837 9.237919 0 0
RGS16 135.58276 2.850967 0.3157553 9.029037 0 0
TXNRD3 124.06561 1.937977 0.2300826 8.422961 0 0
SMIM1 78.28049 3.015942 0.3611540 8.350848 0 0
RAB40B 175.67465 2.549723 0.3054759 8.346725 0 0
head(subset(demock, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in Alv cells compared to MDM") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in Alv cells compared to MDM
baseMean log2FoldChange lfcSE stat pvalue padj
PLIN2 7536.75693 -1.756264 0.1082373 -16.226051 0 0
OLFML2B 334.87303 -4.478105 0.3001465 -14.919728 0 0
STMN1 2557.73767 -3.563794 0.2633830 -13.530842 0 0
MCUB 378.71322 -1.972595 0.1700001 -11.603491 0 0
FABP4 47047.92410 -3.900944 0.3954489 -9.864598 0 0
C12orf75 70.89386 -3.884104 0.3972731 -9.776913 0 0
SELENOW 4343.79011 -1.808964 0.1933682 -9.355026 0 0
CPA6 131.12764 -2.615639 0.2953175 -8.857042 0 0
CST7 120.81091 -4.333456 0.4897891 -8.847596 0 0
MME 2273.35794 -2.538423 0.3010091 -8.433045 0 0
demockm <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15053
## Note: no. genes in output = 15053
## Note: estimated proportion of input genes in output = 1
mresmock <- mitch_calc(demockm,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mresmock$enrichment_result

mitchtbl <- mresmock$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"demock_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("mock_Mdm_vs_Alv.html") ) {
  mitch_report(mres4a,outfile="mock_Mdm_vs_Alv.html")
}

Session information

For reproducibility.

save.image("scanalysis.Rdata")

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pkgload_1.4.0               GGally_2.2.1               
##  [3] reshape2_1.4.4              gtools_3.9.5               
##  [5] tibble_3.2.1                dplyr_1.1.4                
##  [7] echarts4r_0.4.5             gplots_3.2.0               
##  [9] limma_3.60.6                SingleR_2.6.0              
## [11] celldex_1.14.0              harmony_1.2.1              
## [13] Rcpp_1.0.13-1               mitch_1.19.3               
## [15] DESeq2_1.44.0               muscat_1.18.0              
## [17] beeswarm_0.4.0              stringi_1.8.4              
## [19] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
## [21] Biobase_2.64.0              GenomicRanges_1.56.2       
## [23] GenomeInfoDb_1.40.1         IRanges_2.38.1             
## [25] S4Vectors_0.42.1            BiocGenerics_0.50.0        
## [27] MatrixGenerics_1.16.0       matrixStats_1.4.1          
## [29] hdf5r_1.3.11                Seurat_5.1.0               
## [31] SeuratObject_5.0.2          sp_2.1-4                   
## [33] plyr_1.8.9                  ggplot2_3.5.1              
## [35] kableExtra_1.4.0           
## 
## loaded via a namespace (and not attached):
##   [1] progress_1.2.3            goftest_1.2-3            
##   [3] HDF5Array_1.32.1          Biostrings_2.72.1        
##   [5] vctrs_0.6.5               spatstat.random_3.3-2    
##   [7] digest_0.6.37             png_0.1-8                
##   [9] corpcor_1.6.10            shape_1.4.6.1            
##  [11] gypsum_1.0.1              ggrepel_0.9.6            
##  [13] deldir_2.0-4              parallelly_1.39.0        
##  [15] MASS_7.3-64               httpuv_1.6.15            
##  [17] foreach_1.5.2             withr_3.0.2              
##  [19] xfun_0.49                 survival_3.8-3           
##  [21] memoise_2.0.1.9000        ggbeeswarm_0.7.2         
##  [23] systemfonts_1.1.0         zoo_1.8-12               
##  [25] GlobalOptions_0.1.2       pbapply_1.7-2            
##  [27] prettyunits_1.2.0         KEGGREST_1.44.1          
##  [29] promises_1.3.1            httr_1.4.7               
##  [31] globals_0.16.3            fitdistrplus_1.2-1       
##  [33] rhdf5filters_1.16.0       rhdf5_2.48.0             
##  [35] rstudioapi_0.17.1         UCSC.utils_1.0.0         
##  [37] miniUI_0.1.1.1            generics_0.1.3           
##  [39] curl_6.0.1                zlibbioc_1.50.0          
##  [41] ScaledMatrix_1.12.0       polyclip_1.10-7          
##  [43] GenomeInfoDbData_1.2.12   ExperimentHub_2.12.0     
##  [45] SparseArray_1.4.8         xtable_1.8-4             
##  [47] stringr_1.5.1             doParallel_1.0.17        
##  [49] evaluate_1.0.1            S4Arrays_1.4.1           
##  [51] BiocFileCache_2.12.0      hms_1.1.3                
##  [53] irlba_2.3.5.1             colorspace_2.1-1         
##  [55] filelock_1.0.3            ROCR_1.0-11              
##  [57] reticulate_1.40.0         spatstat.data_3.1-4      
##  [59] magrittr_2.0.3            lmtest_0.9-40            
##  [61] later_1.4.0               viridis_0.6.5            
##  [63] lattice_0.22-6            spatstat.geom_3.3-4      
##  [65] future.apply_1.11.3       scattermore_1.2          
##  [67] scuttle_1.14.0            cowplot_1.1.3            
##  [69] RcppAnnoy_0.0.22          pillar_1.9.0             
##  [71] nlme_3.1-167              iterators_1.0.14         
##  [73] caTools_1.18.3            compiler_4.4.2           
##  [75] beachmat_2.20.0           RSpectra_0.16-2          
##  [77] tensor_1.5                minqa_1.2.8              
##  [79] crayon_1.5.3              abind_1.4-8              
##  [81] scater_1.32.1             blme_1.0-6               
##  [83] locfit_1.5-9.10           bit_4.5.0                
##  [85] codetools_0.2-20          BiocSingular_1.20.0      
##  [87] bslib_0.8.0               alabaster.ranges_1.4.2   
##  [89] GetoptLong_1.0.5          plotly_4.10.4            
##  [91] remaCor_0.0.18            mime_0.12                
##  [93] splines_4.4.2             circlize_0.4.16          
##  [95] fastDummies_1.7.4         dbplyr_2.5.0             
##  [97] sparseMatrixStats_1.16.0  knitr_1.49               
##  [99] blob_1.2.4                utf8_1.2.4               
## [101] clue_0.3-66               BiocVersion_3.19.1       
## [103] lme4_1.1-35.5             listenv_0.9.1            
## [105] DelayedMatrixStats_1.26.0 Rdpack_2.6.2             
## [107] Matrix_1.7-1              statmod_1.5.0            
## [109] svglite_2.1.3             fANCOVA_0.6-1            
## [111] pkgconfig_2.0.3           tools_4.4.2              
## [113] cachem_1.1.0              RhpcBLASctl_0.23-42      
## [115] rbibutils_2.3             RSQLite_2.3.8            
## [117] viridisLite_0.4.2         DBI_1.2.3                
## [119] numDeriv_2016.8-1.1       fastmap_1.2.0            
## [121] rmarkdown_2.29            scales_1.3.0             
## [123] grid_4.4.2                ica_1.0-3                
## [125] broom_1.0.7               AnnotationHub_3.12.0     
## [127] sass_0.4.9                patchwork_1.3.0          
## [129] BiocManager_1.30.25       ggstats_0.7.0            
## [131] dotCall64_1.2             RANN_2.6.2               
## [133] alabaster.schemas_1.4.0   farver_2.1.2             
## [135] reformulas_0.4.0          aod_1.3.3                
## [137] mgcv_1.9-1                yaml_2.3.10              
## [139] cli_3.6.3                 purrr_1.0.2              
## [141] leiden_0.4.3.1            lifecycle_1.0.4          
## [143] uwot_0.2.2                glmmTMB_1.1.10           
## [145] mvtnorm_1.3-2             backports_1.5.0          
## [147] BiocParallel_1.38.0       gtable_0.3.6             
## [149] rjson_0.2.23              ggridges_0.5.6           
## [151] progressr_0.15.1          jsonlite_1.8.9           
## [153] edgeR_4.2.2               RcppHNSW_0.6.0           
## [155] bitops_1.0-9              bit64_4.5.2              
## [157] Rtsne_0.17                alabaster.matrix_1.4.2   
## [159] spatstat.utils_3.1-1      BiocNeighbors_1.22.0     
## [161] alabaster.se_1.4.1        jquerylib_0.1.4          
## [163] spatstat.univar_3.1-1     pbkrtest_0.5.3           
## [165] lazyeval_0.2.2            alabaster.base_1.4.2     
## [167] shiny_1.9.1               htmltools_0.5.8.1        
## [169] sctransform_0.4.1         rappdirs_0.3.3           
## [171] glue_1.8.0                spam_2.11-0              
## [173] httr2_1.0.7               XVector_0.44.0           
## [175] gridExtra_2.3             EnvStats_3.0.0           
## [177] boot_1.3-31               igraph_2.1.1             
## [179] variancePartition_1.34.0  TMB_1.9.15               
## [181] R6_2.5.1                  tidyr_1.3.1              
## [183] labeling_0.4.3            cluster_2.1.8            
## [185] Rhdf5lib_1.26.0           nloptr_2.1.1             
## [187] DelayedArray_0.30.1       tidyselect_1.2.1         
## [189] vipor_0.4.7               xml2_1.3.6               
## [191] AnnotationDbi_1.66.0      future_1.34.0            
## [193] rsvd_1.0.5                munsell_0.5.1            
## [195] KernSmooth_2.23-24        data.table_1.16.2        
## [197] htmlwidgets_1.6.4         ComplexHeatmap_2.20.0    
## [199] RColorBrewer_1.1-3        rlang_1.1.4              
## [201] spatstat.sparse_3.1-0     spatstat.explore_3.3-3   
## [203] lmerTest_3.1-3            fansi_1.0.6