Introduction

Ee are interested in comparing DEGs between the following comparison groups:

  • Latently- vs productively-infected cells (groups 3 vs 4).

  • Latently-infected vs bystander cells (2 vs 3).

  • Productively-infected vs mock (4 vs 1)

  • Mock vs bystander (1 vs 2).

As discussed, I think we will do these comparisons separately for both MDM and AlvMDM samples initially. Then, it would be of interest to compare DEGs for MDM vs AlvMDM for each of the four infection groups.

suppressPackageStartupMessages({
  library("kableExtra")
  library("ggplot2")
  library("plyr")
  library("Seurat")
  library("hdf5r")
  library("SingleCellExperiment")
  library("parallel")
  library("stringi")
  library("beeswarm")
  library("muscat")
  library("DESeq2")
  library("mitch")
  library("harmony")
  library("celldex")
  library("SingleR")
  library("limma")
  library("gplots")
})

Load data

Sample Patient Group
mock0 MDMP236 mock
mock1 MDMV236 mock
mock2 MDMO236 mock
mock3 MDMP239 mock
mock4 AlvP239 mock
mock5 AlvM239 mock
mock6 AlvN239 mock
active0 MDMP236 active
active1 MDMV236 active
active2 MDMO236 active
active3 MDMP239 active
active4 AlvP239 active
active5 AlvM239 active
active6 AlvN239 active
latent0 MDMP236 latent
latent1 MDMV236 latent
latent2 MDMO236 latent
latent3 MDMP239 latent
latent4 AlvP239 latent
latent5 AlvM239 latent
latent6 AlvN239 latent
bystander0 MDMP236 bystander
bystander1 MDMV236 bystander
bystander2 MDMO236 bystander
bystander3 MDMP239 bystander
bystander4 AlvP239 bystander
bystander5 AlvM239 bystander
bystander6 AlvN239 bystander
react6 AlvN239 reactivated

exclude react6

ss <- read.table("samplesheet.tsv",header=TRUE,row.names=1)

ss %>% kbl(caption="sample sheet") %>% kable_paper("hover", full_width = F)
sample sheet
Patient Group
mock0 MDMP236 mock
mock1 MDMV236 mock
mock2 MDMO236 mock
mock3 MDMP239 mock
mock4 AlvP239 mock
mock5 AlvM239 mock
mock6 AlvN239 mock
active0 MDMP236 active
active1 MDMV236 active
active2 MDMO236 active
active3 MDMP239 active
active4 AlvP239 active
active5 AlvM239 active
active6 AlvN239 active
latent0 MDMP236 latent
latent1 MDMV236 latent
latent2 MDMO236 latent
latent3 MDMP239 latent
latent4 AlvP239 latent
latent5 AlvM239 latent
latent6 AlvN239 latent
bystander0 MDMP236 bystander
bystander1 MDMV236 bystander
bystander2 MDMO236 bystander
bystander3 MDMP239 bystander
bystander4 AlvP239 bystander
bystander5 AlvM239 bystander
bystander6 AlvN239 bystander
react6 AlvN239 reactivated
mylist <- readRDS("macrophage_counts.rds")

Make single cell experiment object

comb <- do.call(cbind,mylist)
sce <- SingleCellExperiment(list(counts=comb))
sce
## class: SingleCellExperiment 
## dim: 36603 23788 
## metadata(0):
## assays(1): counts
## rownames(36603): gene-HIV1-1 gene-HIV1-2 ... AC007325.4 AC007325.2
## rowData names(0):
## colnames(23788): mdm_mock1|AAACGAATCACATACG mdm_mock1|AAACGCTCATCAGCGC
##   ... react6|TTTGTTGTCTGAACGT react6|TTTGTTGTCTTGGCTC
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Normalise data

cellmetadata <- data.frame(colnames(comb) ,sapply(strsplit(colnames(comb),"\\|"),"[[",1))
colnames(cellmetadata) <- c("cell","sample")
comb <- CreateSeuratObject(comb, project = "mac", assay = "RNA", meta.data = cellmetadata)
comb <- NormalizeData(comb)
## Normalizing layer: counts
comb <- FindVariableFeatures(comb, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
comb <- ScaleData(comb)
## Centering and scaling data matrix

PCA and Cluster

comb <- RunPCA(comb, features = VariableFeatures(object = comb))
## PC_ 1 
## Positive:  GAPDH, FABP3, TXN, IFI30, S100A10, PRDX6, BLVRB, TUBA1B, OTOA, S100A9 
##     FAH, CYSTM1, GCHFR, C15orf48, CARD16, HAMP, GSTP1, FABP4, CSTA, ACTB 
##     PSMA7, LINC01827, ACTG1, LDHB, CTSB, CFD, H2AFZ, TUBA1A, MMP9, SLAMF9 
## Negative:  ARL15, DOCK3, FTX, NEAT1, EXOC4, MALAT1, DPYD, RASAL2, LRMDA, JMJD1C 
##     TMEM117, PLXDC2, VPS13B, FHIT, ATG7, TPRG1, MAML2, MITF, ZNF438, ZEB2 
##     TRIO, COP1, ZFAND3, ELMO1, DENND4C, TCF12, MED13L, ERC1, JARID2, UBE2E2 
## PC_ 2 
## Positive:  HLA-DRB1, CD74, HLA-DRA, HLA-DPA1, GCLC, HLA-DPB1, SPRED1, RCBTB2, KCNMA1, MRC1 
##     LYZ, AC020656.1, C1QA, FGL2, CYP1B1, SLCO2B1, S100A4, AIF1, PTGDS, LINC02345 
##     CRIP1, HLA-DRB5, VAMP5, CA2, CAMK1, CLEC7A, ALOX5AP, RTN1, HLA-DQB1, MX1 
## Negative:  CYSTM1, CD164, PSAT1, GDF15, FAH, FDX1, ATP6V1D, CCPG1, SAT1, BCAT1 
##     PHGDH, PSMA7, HEBP2, RETREG1, SLAMF9, GARS, TCEA1, TXN, HES2, NUPR1 
##     RHOQ, CSTA, RILPL2, CLGN, B4GALT5, STMN1, SNHG5, SPTBN1, SUPV3L1, PTER 
## PC_ 3 
## Positive:  NCAPH, CRABP2, RGCC, TM4SF19, CHI3L1, GAL, DUSP2, CCL22, AC015660.2, ACAT2 
##     DCSTAMP, TMEM114, RGS20, MGST1, LINC01010, TRIM54, MREG, LINC02244, NUMB, GPC4 
##     TCTEX1D2, CCND1, SYNGR1, POLE4, SLC20A1, PLEK, SERTAD2, IL1RN, AC005280.2, CLU 
## Negative:  MARCKS, CD14, BTG1, MS4A6A, TGFBI, CTSC, FPR3, HLA-DQA1, AIF1, MPEG1 
##     MEF2C, CD163, HLA-DPB1, IFI30, SELENOP, TIMP1, ALDH2, HLA-DQB1, NAMPT, C1QC 
##     NUPR1, MS4A7, FUCA1, HIF1A, EPB41L3, HLA-DQA2, HLA-DRA, TCF4, RNASE1, ARL4C 
## PC_ 4 
## Positive:  ACTB, ACTG1, TPM4, TUBA1B, CCL3, CTSB, CSF1, SMS, TUBB, DHCR24 
##     LGMN, GAPDH, CYTOR, INSIG1, HAMP, CD36, TYMS, C1QA, PCLAF, CCL7 
##     CTSL, AIF1, HSP90B1, CLSPN, TK1, LIMA1, MGLL, TNFSF13, CENPM, CAMK1 
## Negative:  PTGDS, LINC02244, CLU, SYNGR1, MGST1, CSTA, NCF1, CCPG1, LY86-AS1, EPHB1 
##     LINC01010, GAS5, ALDH2, AC015660.2, PDE4D, C1QTNF4, VAMP5, AP000331.1, SNHG5, APOD 
##     CLEC12A, XIST, BX664727.3, ARHGAP15, S100P, ZBTB20, AOAH, TMEM91, RCBTB2, ANKRD44 
## PC_ 5 
## Positive:  CTSB, TPM4, CSF1, CCL3, LGMN, ACTB, ACTG1, DHCR24, SMS, CD36 
##     C15orf48, SLCO2B1, INSIG1, IL18BP, C1QA, MREG, BCAT1, MGLL, SLC11A1, MADD 
##     TM4SF19, LIMA1, OTOA, AIF1, CAMK1, PHLDA1, MRC1, UGCG, TCTEX1D2, PPARG 
## Negative:  TYMS, PCLAF, TK1, MKI67, MYBL2, CENPM, BIRC5, RRM2, CDK1, DIAPH3 
##     CEP55, CLSPN, SHCBP1, NUSAP1, PRC1, CENPF, ESCO2, KIF11, TOP2A, CENPK 
##     NCAPG, ANLN, TPX2, CCNA2, ASPM, MAD2L1, GTSE1, FAM111B, HMMR, HELLS
comb <- RunHarmony(comb,"sample")
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony converged after 4 iterations
DimHeatmap(comb, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(comb)

comb <- JackStraw(comb, num.replicate = 100)
comb <- FindNeighbors(comb, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
comb <- FindClusters(comb, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 23788
## Number of edges: 724381
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8833
## Number of communities: 13
## Elapsed time: 2 seconds

UMAP

comb <- RunUMAP(comb, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:56:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:56:01 Read 23788 rows and found 10 numeric columns
## 16:56:01 Using Annoy for neighbor search, n_neighbors = 30
## 16:56:01 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:56:03 Writing NN index file to temp file /tmp/Rtmp4EQbK0/file29e311797371f
## 16:56:03 Searching Annoy index using 1 thread, search_k = 3000
## 16:56:09 Annoy recall = 100%
## 16:56:10 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:56:11 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:56:12 Commencing optimization for 200 epochs, with 958784 positive edges
## 16:56:18 Optimization finished
DimPlot(comb, reduction = "umap")

Assign names to clusters

ADGRE1, CCR2, CD169, CX3CR1, CD206, CD163, LYVE1, CD9, TREM2

HLA-DP, HLA-DM, HLA-DOA, HLA-DOB, HLA-DQ, and HLA-DR.

message("macrophage markers")
## macrophage markers
FeaturePlot(comb, features = c("ADGRE1", "CCR2", "SIGLEC1", "CX3CR1", "MRC1", "CD163", "LYVE1", "CD9", "TREM2"))

DimPlot(comb, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

That’s pretty useless. Let’s use celldex pkg to annotate cells and get the macs.

ref <- celldex::MonacoImmuneData()

DefaultAssay(comb) <- "RNA"
comb2 <- as.SingleCellExperiment(comb)

lc <- logcounts(comb2)

pred_imm_broad <- SingleR(test=comb2, ref=ref,
                          labels=ref$label.main)

head(pred_imm_broad)
## DataFrame with 6 rows and 4 columns
##                                                    scores      labels
##                                                  <matrix> <character>
## mdm_mock1|AAACGAATCACATACG 0.307826:0.325650:0.169064:...   Monocytes
## mdm_mock1|AAACGCTCATCAGCGC 0.311008:0.281528:0.189016:...   Monocytes
## mdm_mock1|AAACGCTGTCGAGTGA 0.318344:0.277003:0.170895:...   Monocytes
## mdm_mock1|AAAGGTAAGCCATATC 0.290416:0.291088:0.155001:...   Monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG 0.292140:0.281397:0.184535:...   Monocytes
## mdm_mock1|AAATGGAAGATCGCCC 0.240858:0.241299:0.117359:...   Monocytes
##                            delta.next pruned.labels
##                             <numeric>   <character>
## mdm_mock1|AAACGAATCACATACG  0.1786319     Monocytes
## mdm_mock1|AAACGCTCATCAGCGC  0.0338547     Monocytes
## mdm_mock1|AAACGCTGTCGAGTGA  0.1301308     Monocytes
## mdm_mock1|AAAGGTAAGCCATATC  0.1794308     Monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG  0.0952702     Monocytes
## mdm_mock1|AAATGGAAGATCGCCC  0.1508090     Monocytes
table(pred_imm_broad$pruned.labels)
## 
##       Basophils Dendritic cells       Monocytes 
##               1              87           22944
cellmetadata$label <- pred_imm_broad$pruned.labels

pred_imm_fine <- SingleR(test=comb2, ref=ref,
                          labels=ref$label.fine)
head(pred_imm_fine)
## DataFrame with 6 rows and 4 columns
##                                                    scores              labels
##                                                  <matrix>         <character>
## mdm_mock1|AAACGAATCACATACG 0.183641:0.485683:0.206442:... Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC 0.196123:0.430705:0.226843:... Classical monocytes
## mdm_mock1|AAACGCTGTCGAGTGA 0.171861:0.442610:0.188544:... Classical monocytes
## mdm_mock1|AAAGGTAAGCCATATC 0.156398:0.415082:0.167965:... Classical monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG 0.185762:0.432131:0.207144:... Classical monocytes
## mdm_mock1|AAATGGAAGATCGCCC 0.124993:0.382124:0.145601:... Classical monocytes
##                            delta.next       pruned.labels
##                             <numeric>         <character>
## mdm_mock1|AAACGAATCACATACG  0.0626269 Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC  0.1150706 Classical monocytes
## mdm_mock1|AAACGCTGTCGAGTGA  0.0651352 Classical monocytes
## mdm_mock1|AAAGGTAAGCCATATC  0.1073274 Classical monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG  0.1521533 Classical monocytes
## mdm_mock1|AAATGGAAGATCGCCC  0.1282183 Classical monocytes
table(pred_imm_fine$pruned.labels)
## 
##          Classical monocytes       Intermediate monocytes 
##                        20387                         2604 
##      Low-density neutrophils      Myeloid dendritic cells 
##                            1                           85 
##      Non classical monocytes Plasmacytoid dendritic cells 
##                            9                            6
cellmetadata$finelabel <- pred_imm_fine$pruned.labels

col_pal <- c('#e31a1c', '#ff7f00', "#999900", '#cc00ff', '#1f78b4', '#fdbf6f',
             '#33a02c', '#fb9a99', "#a6cee3", "#cc6699", "#b2df8a", "#99004d", "#66ff99",
             "#669999", "#006600", "#9966ff", "#cc9900", "#e6ccff", "#3399ff", "#ff66cc",
             "#ffcc66", "#003399")

annot_df <- data.frame(
  barcodes = rownames(pred_imm_broad),
  monaco_broad_annotation = pred_imm_broad$labels,
  monaco_broad_pruned_labels = pred_imm_broad$pruned.labels,
  monaco_fine_annotation = pred_imm_fine$labels,
  monaco_fine_pruned_labels = pred_imm_fine$pruned.labels
)

meta_inf <- comb@meta.data
meta_inf$cell_barcode <- colnames(comb)

meta_inf <- meta_inf %>% dplyr::left_join(y = annot_df,
                                          by = c("cell_barcode" = "barcodes"))
rownames(meta_inf) <- colnames(lc)

comb@meta.data <- meta_inf

DimPlot(comb, label=TRUE, group.by = "monaco_broad_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

DimPlot(comb, label=TRUE, group.by = "monaco_fine_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

Make MDM object

mdmlist <- mylist[grep("mdm",names(mylist))]
comb1 <- do.call(cbind,mdmlist)
sce1 <- SingleCellExperiment(list(counts=comb1))
sce1
## class: SingleCellExperiment 
## dim: 36603 9994 
## metadata(0):
## assays(1): counts
## rownames(36603): gene-HIV1-1 gene-HIV1-2 ... AC007325.4 AC007325.2
## rowData names(0):
## colnames(9994): mdm_mock1|AAACGAATCACATACG mdm_mock1|AAACGCTCATCAGCGC
##   ... mdm_bystander3|TTTCACAAGAGTCGAC mdm_bystander3|TTTGACTGTATGTGTC
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
cellmetadata1 <- data.frame(colnames(comb1) ,sapply(strsplit(colnames(comb1),"\\|"),"[[",1))
colnames(cellmetadata1) <- c("cell","sample")
comb1 <- CreateSeuratObject(comb1, project = "mac", assay = "RNA", meta.data = cellmetadata1)
comb1 <- NormalizeData(comb1)
## Normalizing layer: counts
comb1 <- FindVariableFeatures(comb1, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
comb1 <- ScaleData(comb1)
## Centering and scaling data matrix
comb1 <- RunPCA(comb1, features = VariableFeatures(object = comb1))
## PC_ 1 
## Positive:  ARL15, DPYD, FTX, EXOC4, NEAT1, ZEB2, ATG7, LRMDA, PLXDC2, RASAL2 
##     JMJD1C, VPS13B, DOCK3, ELMO1, TRIO, TCF12, MAML2, COP1, DOCK4, MALAT1 
##     LPP, ZFAND3, ZSWIM6, SPIDR, ATP9B, TMEM117, MED13L, FHIT, ARHGAP15, JARID2 
## Negative:  S100A10, TXN, GAPDH, CYSTM1, ACP5, FABP3, PRDX6, FAH, COX5B, LDHB 
##     PFN1, BCL2A1, BLVRB, C15orf48, SPP1, S100A9, PSME2, FABP4, CSTA, GDF15 
##     STMN1, MGST1, HSP90AA1, SLAMF9, CTSL, IFI30, ANXA1, LILRA1, CD164, NMB 
## PC_ 2 
## Positive:  TM4SF19, ANO5, ATP6V1D, GPC4, FNIP2, TXNRD1, CYSTM1, CD164, FAH, PSD3 
##     BCL2A1, SPP1, RGS20, RETREG1, TCTEX1D2, EPB41L1, MGST1, TXN, ACAT2, MREG 
##     C15orf48, SCD, FABP3, SLC28A3, CCDC26, TGM2, LINC01135, BCAT1, S100A10, CCL22 
## Negative:  HLA-DRA, HLA-DPB1, TGFBI, HLA-DPA1, CD74, AIF1, CEBPD, MS4A6A, HLA-DQB1, C1QA 
##     HLA-DQA1, MS4A7, HLA-DRB1, MPEG1, FPR3, C1QC, HLA-DRB5, CD14, LYZ, CD163 
##     LILRB2, ST8SIA4, FOS, RNASE1, EPB41L3, TCF4, ARL4C, SELENOP, TIMP1, CTSC 
## PC_ 3 
## Positive:  SAT1, SNHG5, NUPR1, CCPG1, BTG1, MARCKS, CSTA, TCEA1, PSAT1, STMN1 
##     G0S2, PLEKHA5, GDF15, XIST, NAMPT, SUPV3L1, CLGN, PHGDH, ADAMDEC1, PLIN2 
##     BCAT1, HES2, CXCR4, SLAMF9, REV3L, CYSTM1, CARD16, S100P, BLVRB, CLEC4A 
## Negative:  GSN, GCLC, CYP1B1, LPL, S100A4, DUSP2, NCAPH, CALR, TIMP3, LINC02345 
##     OCSTAMP, CRIP1, ACTB, ID2, CAMK1, FAIM2, LHFPL2, RGCC, STAC, IGSF6 
##     SPRED1, PLCL1, RCBTB2, CCND2, CA2, PPARGC1B, LINC02408, HIVEP3, MRC1, RNF128 
## PC_ 4 
## Positive:  PTGDS, CLU, LINC02244, BX664727.3, LINC01010, AL136317.2, RCBTB2, SYNGR1, AC015660.2, NCAPH 
##     AC008591.1, LY86-AS1, MT1G, EPHB1, MT1X, CPE, KCNJ1, SPON2, MEIKIN, ACP5 
##     FAIM2, RAB6B, PKD1L1, MT1H, MT2A, PEBP4, TDRD3, CSTA, MT1E, CFD 
## Negative:  ACTB, ACTG1, TPM4, PFN1, TUBA1B, CSF1, LCP1, SLC35F1, CD36, TUBB 
##     CCL3, MSMO1, GAPDH, INSIG1, GLIPR1, MGLL, CCL7, PHLDA1, SQLE, LDHA 
##     PLEK, CALR, MARCKS, LGMN, DUSP6, AC078850.1, DHCR24, C3, LIMA1, MMP19 
## PC_ 5 
## Positive:  TYMS, PCLAF, TK1, MKI67, MYBL2, CEP55, BIRC5, DIAPH3, RRM2, KIF11 
##     CENPK, CENPM, SHCBP1, CLSPN, PRC1, CDK1, CENPF, ESCO2, ANLN, NUSAP1 
##     KIF4A, CENPU, CIT, TPX2, KNL1, TOP2A, ASPM, HELLS, CCNA2, RAD51AP1 
## Negative:  ACTB, TM4SF19, TCTEX1D2, CSF1, gene-HIV1-2, MSMO1, gene-HIV1-1, ACTG1, TPM4, SPOCD1 
##     SPRED1, CD36, PPARG, CBLB, CCL3, UGCG, MADD, HSPA5, TM4SF19-AS1, LCP1 
##     ATP13A3, PHLDA1, ANK2, C15orf48, PCSK6, TIMP3, B4GALT5, GSN, CCND1, PLEK
comb1 <- RunHarmony(comb1,"sample")
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony converged after 5 iterations
DimHeatmap(comb1, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(comb1)

comb1 <- JackStraw(comb1, num.replicate = 100)
comb1 <- FindNeighbors(comb1, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
comb1 <- FindClusters(comb1, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 9994
## Number of edges: 312701
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8801
## Number of communities: 12
## Elapsed time: 0 seconds
comb1 <- RunUMAP(comb1, dims = 1:10)
## 17:00:14 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:00:14 Read 9994 rows and found 10 numeric columns
## 17:00:14 Using Annoy for neighbor search, n_neighbors = 30
## 17:00:14 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:00:14 Writing NN index file to temp file /tmp/Rtmp4EQbK0/file29e31174c8fd74
## 17:00:14 Searching Annoy index using 1 thread, search_k = 3000
## 17:00:17 Annoy recall = 100%
## 17:00:18 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:00:19 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:00:19 Commencing optimization for 500 epochs, with 400918 positive edges
## 17:00:26 Optimization finished
DimPlot(comb1, reduction = "umap")

message("macrophage markers")
## macrophage markers
FeaturePlot(comb1, features = c("ADGRE1", "CCR2", "SIGLEC1", "CX3CR1", "MRC1", "CD163", "LYVE1", "CD9", "TREM2"))

DimPlot(comb1, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

ref <- celldex::MonacoImmuneData()
DefaultAssay(comb1) <- "RNA"
comb21 <- as.SingleCellExperiment(comb1)
lc1 <- logcounts(comb21)
pred_imm_broad1 <- SingleR(test=comb21, ref=ref,labels=ref$label.main)
head(pred_imm_broad1)
## DataFrame with 6 rows and 4 columns
##                                                    scores      labels
##                                                  <matrix> <character>
## mdm_mock1|AAACGAATCACATACG 0.307826:0.325650:0.169064:...   Monocytes
## mdm_mock1|AAACGCTCATCAGCGC 0.311008:0.281528:0.189016:...   Monocytes
## mdm_mock1|AAACGCTGTCGAGTGA 0.318344:0.277003:0.170895:...   Monocytes
## mdm_mock1|AAAGGTAAGCCATATC 0.290416:0.291088:0.155001:...   Monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG 0.292140:0.281397:0.184535:...   Monocytes
## mdm_mock1|AAATGGAAGATCGCCC 0.240858:0.241299:0.117359:...   Monocytes
##                            delta.next pruned.labels
##                             <numeric>   <character>
## mdm_mock1|AAACGAATCACATACG  0.1786319     Monocytes
## mdm_mock1|AAACGCTCATCAGCGC  0.0338547     Monocytes
## mdm_mock1|AAACGCTGTCGAGTGA  0.1301308     Monocytes
## mdm_mock1|AAAGGTAAGCCATATC  0.1794308     Monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG  0.0952702     Monocytes
## mdm_mock1|AAATGGAAGATCGCCC  0.1508090     Monocytes
table(pred_imm_broad1$pruned.labels)
## 
##       Basophils Dendritic cells       Monocytes 
##               1              71            9372
cellmetadata1$label <- pred_imm_broad1$pruned.labels
pred_imm_fine1 <- SingleR(test=comb21, ref=ref, labels=ref$label.fine)
head(pred_imm_fine1)
## DataFrame with 6 rows and 4 columns
##                                                    scores              labels
##                                                  <matrix>         <character>
## mdm_mock1|AAACGAATCACATACG 0.183641:0.485683:0.206442:... Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC 0.196123:0.430705:0.226843:... Classical monocytes
## mdm_mock1|AAACGCTGTCGAGTGA 0.171861:0.442610:0.188544:... Classical monocytes
## mdm_mock1|AAAGGTAAGCCATATC 0.156398:0.415082:0.167965:... Classical monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG 0.185762:0.432131:0.207144:... Classical monocytes
## mdm_mock1|AAATGGAAGATCGCCC 0.124993:0.382124:0.145601:... Classical monocytes
##                            delta.next       pruned.labels
##                             <numeric>         <character>
## mdm_mock1|AAACGAATCACATACG  0.0626269 Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC  0.1150706 Classical monocytes
## mdm_mock1|AAACGCTGTCGAGTGA  0.0651352 Classical monocytes
## mdm_mock1|AAAGGTAAGCCATATC  0.1073274 Classical monocytes
## mdm_mock1|AAAGGTAGTTTCCAAG  0.1521533 Classical monocytes
## mdm_mock1|AAATGGAAGATCGCCC  0.1282183 Classical monocytes
table(pred_imm_fine1$pruned.labels)
## 
##          Classical monocytes       Intermediate monocytes 
##                         8592                          816 
##      Low-density neutrophils      Myeloid dendritic cells 
##                            1                           48 
##      Non classical monocytes Plasmacytoid dendritic cells 
##                            8                            6
cellmetadata1$finelabel <- pred_imm_fine1$pruned.labels
col_pal <- c('#e31a1c', '#ff7f00', "#999900", '#cc00ff', '#1f78b4', '#fdbf6f',
             '#33a02c', '#fb9a99', "#a6cee3", "#cc6699", "#b2df8a", "#99004d", "#66ff99",
             "#669999", "#006600", "#9966ff", "#cc9900", "#e6ccff", "#3399ff", "#ff66cc",
             "#ffcc66", "#003399")
annot_df1 <- data.frame(
  barcodes = rownames(pred_imm_broad1),
  monaco_broad_annotation = pred_imm_broad1$labels,
  monaco_broad_pruned_labels = pred_imm_broad1$pruned.labels,
  monaco_fine_annotation = pred_imm_fine1$labels,
  monaco_fine_pruned_labels = pred_imm_fine1$pruned.labels)

meta_inf1 <- comb1@meta.data
meta_inf1$cell_barcode <- colnames(comb1)
meta_inf1 <- meta_inf1 %>% dplyr::left_join(y = annot_df1, by = c("cell_barcode" = "barcodes"))
rownames(meta_inf1) <- colnames(lc1)
comb1@meta.data <- meta_inf1
DimPlot(comb1, label=TRUE, group.by = "monaco_broad_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

DimPlot(comb1, label=TRUE, group.by = "monaco_fine_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

message("extract mono")
## extract mono
mono <- comb1[,which(meta_inf1$monaco_broad_annotation == "Monocytes")]
mono_metainf1 <- meta_inf1[which(meta_inf1$monaco_broad_annotation == "Monocytes"),]
mono_metainf1 <- mono_metainf1[grep("monocytes",mono_metainf1$monaco_fine_pruned_labels),]
mono <- mono[,which(colnames(mono) %in% rownames(mono_metainf1))]
mono <- FindVariableFeatures(mono, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono <- RunPCA(mono, features = VariableFeatures(object = mono))
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: AL358779.1, DBI, CALM3, RALA,
## AL669970.1, BCL11A, PLXNC1, FLNB, KCNJ5, ENG, FBXW11, KLHDC8B, FDX1, SLC1A3,
## SCLT1, SLC16A1-AS1, ARHGEF3, RAB7B, SPIRE1, SCAPER.
## PC_ 1 
## Positive:  DPYD, ARL15, FTX, EXOC4, ZEB2, VPS13B, LRMDA, ATG7, NEAT1, ELMO1 
##     RASAL2, JMJD1C, DOCK4, SPIDR, TCF12, MALAT1, MAML2, ZSWIM6, ATP9B, DOCK3 
##     PLXDC2, TRIO, COP1, MED13L, TMEM117, ARHGAP15, ZFAND3, FHIT, MBD5, TBC1D5 
## Negative:  S100A10, GAPDH, TXN, CYSTM1, PRDX6, FABP3, ACP5, FAH, PFN1, COX5B 
##     LDHB, BCL2A1, BLVRB, C15orf48, PSME2, HSP90AA1, CSTA, GDF15, S100A9, MGST1 
##     SPP1, CTSL, SLAMF9, ATP6V1D, ANXA1, STMN1, IFI30, CD164, FABP4, TUBA1B 
## PC_ 2 
## Positive:  HLA-DRA, HLA-DPB1, HLA-DPA1, CD74, TGFBI, AIF1, CEBPD, HLA-DQB1, MS4A6A, C1QA 
##     HLA-DRB1, MS4A7, HLA-DQA1, C1QC, FPR3, MPEG1, CD14, HLA-DRB5, LYZ, LILRB2 
##     CD163, FOS, CTSC, TIMP1, ST8SIA4, SELENOP, RNASE1, TCF4, ARL4C, MAFB 
## Negative:  TM4SF19, ANO5, GPC4, FNIP2, ATP6V1D, TXNRD1, PSD3, CD164, CYSTM1, SPP1 
##     FAH, RGS20, TCTEX1D2, BCL2A1, RETREG1, EPB41L1, MGST1, SLC28A3, MREG, LINC01135 
##     ACAT2, CCDC26, TXN, CCL22, NUMB, SCD, NIBAN1, STX18-AS1, BCAT1, B4GALT5 
## PC_ 3 
## Positive:  GSN, GCLC, CYP1B1, LPL, S100A4, DUSP2, NCAPH, TIMP3, CALR, LINC02345 
##     OCSTAMP, CRIP1, CAMK1, ACTB, ID2, FAIM2, LHFPL2, STAC, RGCC, SPRED1 
##     IGSF6, PLCL1, RCBTB2, PPARGC1B, CA2, LINC02408, CCND2, HIVEP3, MRC1, RNF128 
## Negative:  SAT1, SNHG5, NUPR1, CCPG1, BTG1, MARCKS, CSTA, TCEA1, PSAT1, STMN1 
##     G0S2, PLEKHA5, GDF15, XIST, NAMPT, SUPV3L1, PHGDH, CLGN, ADAMDEC1, PLIN2 
##     HES2, BCAT1, CXCR4, CYSTM1, REV3L, SLAMF9, CARD16, BLVRB, S100P, SDS 
## PC_ 4 
## Positive:  ACTG1, TPM4, ACTB, CSF1, SLC35F1, TUBA1B, MSMO1, LCP1, CCL3, INSIG1 
##     PFN1, CCL7, CD36, SQLE, TUBB, PHLDA1, MGLL, GLIPR1, C3, GAPDH 
##     AC078850.1, DHCR24, LIMA1, DUSP6, LDHA, PLEK, LINC01091, MMP19, ATP13A3, MARCKS 
## Negative:  PTGDS, CLU, LINC02244, BX664727.3, LINC01010, SYNGR1, RCBTB2, CFD, AL136317.2, ACP5 
##     NCAPH, CSTA, AC015660.2, HLA-C, CPE, MT-ND5, RAB6B, AC008591.1, MEIKIN, LY86-AS1 
##     EPHB1, MT1X, SPON2, KCNJ1, PEBP4, FAIM2, HES2, MT1G, CCPG1, COX5B 
## PC_ 5 
## Positive:  TYMS, PCLAF, TK1, MYBL2, MKI67, CEP55, BIRC5, DIAPH3, RRM2, KIF11 
##     CENPK, SHCBP1, CENPM, CLSPN, CDK1, PRC1, CENPF, ESCO2, ANLN, NUSAP1 
##     KIF4A, CENPU, CIT, TPX2, KNL1, TOP2A, ASPM, HELLS, CCNA2, NCAPG 
## Negative:  ACTB, CSF1, TM4SF19, TCTEX1D2, MSMO1, ACTG1, C15orf48, SPOCD1, TPM4, HSPA5 
##     gene-HIV1-2, CD36, SPRED1, GSN, LCP1, CBLB, UGCG, CCL3, PLEK, gene-HIV1-1 
##     PPARG, CCND1, RGCC, MADD, PHLDA1, TIMP3, ATP13A3, MMP19, MGLL, TM4SF19-AS1
DimHeatmap(mono, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono)

mono <- FindNeighbors(mono, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono <- FindClusters(mono, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono <- RunUMAP(mono, dims = 1:4)
## 17:01:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:01:07 Read 9413 rows and found 4 numeric columns
## 17:01:07 Using Annoy for neighbor search, n_neighbors = 30
## 17:01:07 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:01:07 Writing NN index file to temp file /tmp/Rtmp4EQbK0/file29e311324f0623
## 17:01:07 Searching Annoy index using 1 thread, search_k = 3000
## 17:01:10 Annoy recall = 100%
## 17:01:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:01:12 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:01:12 Commencing optimization for 500 epochs, with 325236 positive edges
## 17:01:19 Optimization finished
DimPlot(mono, reduction = "umap", label=TRUE)

DimPlot(mono, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono, group.by="sample" , reduction = "umap", label=TRUE)

Make Alv object

alvlist <- mylist[grep("alv",names(mylist))]

comb1 <- do.call(cbind,alvlist)
sce1 <- SingleCellExperiment(list(counts=comb1))
sce1
## class: SingleCellExperiment 
## dim: 36603 10974 
## metadata(0):
## assays(1): counts
## rownames(36603): gene-HIV1-1 gene-HIV1-2 ... AC007325.4 AC007325.2
## rowData names(0):
## colnames(10974): alv_mock1|AAACCCAGTGCTGCAC alv_mock1|AAAGGATAGCATGAAT
##   ... alv_bystander4|TTTCGATGTGAGCAGT alv_bystander4|TTTGATCTCGGCTTGG
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
cellmetadata1 <- data.frame(colnames(comb1) ,sapply(strsplit(colnames(comb1),"\\|"),"[[",1))
colnames(cellmetadata1) <- c("cell","sample")
comb1 <- CreateSeuratObject(comb1, project = "mac", assay = "RNA", meta.data = cellmetadata1)
comb1 <- NormalizeData(comb1)
## Normalizing layer: counts
comb1 <- FindVariableFeatures(comb1, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
comb1 <- ScaleData(comb1)
## Centering and scaling data matrix
comb1 <- RunPCA(comb1, features = VariableFeatures(object = comb1))
## PC_ 1 
## Positive:  GAPDH, LGALS1, S100A6, MIF, CSTB, PRDX6, LGALS3, TXN, BLVRB, IFI30 
##     TUBA1B, ACTB, CYSTM1, ELOC, MMP9, FAH, CRABP2, TMEM176A, FABP4, TUBA1A 
##     EMP3, SAT1, C15orf48, CD74, S100A9, CALM3, H2AFZ, TIMP1, CFD, HLA-DRB1 
## Negative:  DOCK3, MALAT1, ARL15, RASAL2, PLXDC2, TMEM117, DPYD, EXOC4, LRMDA, NEAT1 
##     FTX, ASAP1, ATG7, MITF, TPRG1, JMJD1C, FHIT, MAML2, ZNF438, VPS13B 
##     ELMO1, FMNL2, LPP, COP1, CHD9, FRMD4B, UBE2E2, TRIO, DENND4C, ZFAND3 
## PC_ 2 
## Positive:  HLA-DPA1, HLA-DRA, CD74, HLA-DPB1, AIF1, LYZ, HLA-DRB1, MARCH1, CTSC, C1QA 
##     TGFBI, SAMSN1, FOS, C1QC, VAMP5, CD14, HMOX1, MRC1, MS4A6A, SLC8A1 
##     FPR3, CLEC4A, SELENOP, SLCO2B1, FGL2, CLEC7A, TRPS1, SLC9A9, TNFSF13B, MARCKS 
## Negative:  TM4SF19, GAL, CYSTM1, CCL22, ATP6V1D, GM2A, FDX1, TGM2, ACAT2, CSTB 
##     CD164, SCD, NCAPH, RHOF, S100A6, CIR1, DCSTAMP, SH3BP5, IARS, CSF1 
##     TCTEX1D2, FCMR, TFRC, BCAT1, EPB41L1, GOLGA7B, MREG, SLC20A1, GAPLINC, SNHG32 
## PC_ 3 
## Positive:  PTGDS, LINC02244, CLU, LINC01010, NCAPH, CRABP2, CRIP1, AC015660.2, SYNGR1, GCLC 
##     MGST1, LINC01800, KCNMA1, RCBTB2, DUSP2, C1QTNF4, AC067751.1, SERTAD2, MX1, FGL2 
##     IFIT3, S100A6, KCNJ1, GPC4, TRIM54, NCF1, RNF128, SPON2, LGALS3, RGS20 
## Negative:  CTSZ, CTSL, TPM4, LGMN, MARCKS, CD36, HAMP, CTSB, AIF1, SMS 
##     ACTG1, FPR3, BLVRB, HLA-DQA1, IFI30, IL18BP, FMN1, CCL3, GYPC, MARCO 
##     HLA-DQB1, CD163, STMN1, S100A9, FUCA1, C1QC, BCAT1, DOCK2, CSTB, FABP4 
## PC_ 4 
## Positive:  JAML, HLA-DRB5, GCHFR, XIST, AC020656.1, QPCT, ALDH1A1, GPX3, PAX8-AS1, SLC11A1 
##     S100A9, RARRES1, MS4A7, GPRIN3, NMB, TMTC1, MSR1, AL136317.2, CLEC7A, NIPAL2 
##     ST6GAL1, TDRD3, BX664727.3, MARCO, CFD, FDX1, SASH1, AL035446.2, SERINC2, OSBP2 
## Negative:  PLEK, TUBA1B, MIF, ACTB, TMEM176A, LGALS3, CYTOR, LINC01091, ACTG1, TYMS 
##     TUBA1A, PCLAF, TUBB, PRDX6, IL1RN, CLSPN, CENPM, DIAPH3, LINC00278, MYL9 
##     TEX14, MYBL2, TK1, RRM2, SHCBP1, BIRC5, HELLS, CEP55, CDK1, NCAPG 
## PC_ 5 
## Positive:  TYMS, PCLAF, TK1, MKI67, CLSPN, RRM2, DIAPH3, CENPM, CDK1, MYBL2 
##     ESCO2, CENPK, NCAPG, BIRC5, SHCBP1, CEP55, FAM111B, TOP2A, NUSAP1, CCNA2 
##     CENPF, ATAD2, RAD51AP1, PRC1, ASPM, TPX2, HMMR, KIF11, ANLN, H2AFZ 
## Negative:  PLEK, TMEM176A, MGLL, LINC01091, MIF, MMP19, ACTB, PHLDA1, ACTG1, CYTOR 
##     CTSB, MYL9, RABGEF1, ADAMDEC1, IL1RN, AC006001.2, TUBA1A, SDS, LINC00278, PLA2G7 
##     MARCKS, DPYSL3, LCP1, GLIPR1, CSF1, TTTY14, PPM1H, LGALS3, PRDX6, TPM4
comb1 <- RunHarmony(comb1,"sample")
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony converged after 4 iterations
DimHeatmap(comb1, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(comb1)

comb1 <- JackStraw(comb1, num.replicate = 100)
comb1 <- FindNeighbors(comb1, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
comb1 <- FindClusters(comb1, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10974
## Number of edges: 338158
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8589
## Number of communities: 11
## Elapsed time: 1 seconds
comb1 <- RunUMAP(comb1, dims = 1:10)
## 17:04:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:04:27 Read 10974 rows and found 10 numeric columns
## 17:04:27 Using Annoy for neighbor search, n_neighbors = 30
## 17:04:27 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:04:28 Writing NN index file to temp file /tmp/Rtmp4EQbK0/file29e3113e6a7d5
## 17:04:28 Searching Annoy index using 1 thread, search_k = 3000
## 17:04:31 Annoy recall = 100%
## 17:04:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:04:33 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:04:33 Commencing optimization for 200 epochs, with 441842 positive edges
## 17:04:37 Optimization finished
DimPlot(comb1, reduction = "umap")

message("macrophage markers")
## macrophage markers
FeaturePlot(comb1, features = c("ADGRE1", "CCR2", "SIGLEC1", "CX3CR1", "MRC1", "CD163", "LYVE1", "CD9", "TREM2"))

DimPlot(comb1, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

ref <- celldex::MonacoImmuneData()
DefaultAssay(comb1) <- "RNA"
comb21 <- as.SingleCellExperiment(comb1)
lc1 <- logcounts(comb21)
pred_imm_broad1 <- SingleR(test=comb21, ref=ref,labels=ref$label.main)
head(pred_imm_broad1)
## DataFrame with 6 rows and 4 columns
##                                                    scores      labels
##                                                  <matrix> <character>
## alv_mock1|AAACCCAGTGCTGCAC 0.273681:0.272395:0.161267:...   Monocytes
## alv_mock1|AAAGGATAGCATGAAT 0.318637:0.307464:0.191912:...   Monocytes
## alv_mock1|AAAGGATAGTCAGGGT 0.294499:0.275663:0.182915:...   Monocytes
## alv_mock1|AAAGGATAGTTCCGGC 0.293979:0.294156:0.182685:...   Monocytes
## alv_mock1|AAAGGATTCACCATCC 0.278964:0.268607:0.190625:...   Monocytes
## alv_mock1|AAAGGGCCATGACGTT 0.287339:0.296804:0.173249:...   Monocytes
##                            delta.next pruned.labels
##                             <numeric>   <character>
## alv_mock1|AAACCCAGTGCTGCAC   0.134488     Monocytes
## alv_mock1|AAAGGATAGCATGAAT   0.104668     Monocytes
## alv_mock1|AAAGGATAGTCAGGGT   0.139855     Monocytes
## alv_mock1|AAAGGATAGTTCCGGC   0.128407     Monocytes
## alv_mock1|AAAGGATTCACCATCC   0.147810     Monocytes
## alv_mock1|AAAGGGCCATGACGTT   0.166792     Monocytes
table(pred_imm_broad1$pruned.labels)
## 
## Dendritic cells       Monocytes 
##              13           10807
cellmetadata1$label <- pred_imm_broad1$pruned.labels
pred_imm_fine1 <- SingleR(test=comb21, ref=ref, labels=ref$label.fine)
head(pred_imm_fine1)
## DataFrame with 6 rows and 4 columns
##                                                    scores              labels
##                                                  <matrix>         <character>
## alv_mock1|AAACCCAGTGCTGCAC 0.163029:0.398305:0.176713:... Classical monocytes
## alv_mock1|AAAGGATAGCATGAAT 0.180372:0.435098:0.208853:... Classical monocytes
## alv_mock1|AAAGGATAGTCAGGGT 0.170366:0.388786:0.186892:... Classical monocytes
## alv_mock1|AAAGGATAGTTCCGGC 0.166489:0.422481:0.189454:... Classical monocytes
## alv_mock1|AAAGGATTCACCATCC 0.184520:0.383706:0.203877:... Classical monocytes
## alv_mock1|AAAGGGCCATGACGTT 0.177067:0.441301:0.201631:... Classical monocytes
##                            delta.next       pruned.labels
##                             <numeric>         <character>
## alv_mock1|AAACCCAGTGCTGCAC  0.1433756 Classical monocytes
## alv_mock1|AAAGGATAGCATGAAT  0.1212572 Classical monocytes
## alv_mock1|AAAGGATAGTCAGGGT  0.0502055 Classical monocytes
## alv_mock1|AAAGGATAGTTCCGGC  0.0994518 Classical monocytes
## alv_mock1|AAAGGATTCACCATCC  0.0283404 Classical monocytes
## alv_mock1|AAAGGGCCATGACGTT  0.0654704 Classical monocytes
table(pred_imm_fine1$pruned.labels)
## 
##     Classical monocytes  Intermediate monocytes Myeloid dendritic cells 
##                    9514                    1299                      23 
## Non classical monocytes 
##                       1
cellmetadata1$finelabel <- pred_imm_fine1$pruned.labels
col_pal <- c('#e31a1c', '#ff7f00', "#999900", '#cc00ff', '#1f78b4', '#fdbf6f',
             '#33a02c', '#fb9a99', "#a6cee3", "#cc6699", "#b2df8a", "#99004d", "#66ff99",
             "#669999", "#006600", "#9966ff", "#cc9900", "#e6ccff", "#3399ff", "#ff66cc",
             "#ffcc66", "#003399")
annot_df1 <- data.frame(
  barcodes = rownames(pred_imm_broad1),
  monaco_broad_annotation = pred_imm_broad1$labels,
  monaco_broad_pruned_labels = pred_imm_broad1$pruned.labels,
  monaco_fine_annotation = pred_imm_fine1$labels,
  monaco_fine_pruned_labels = pred_imm_fine1$pruned.labels)

meta_inf1 <- comb1@meta.data
meta_inf1$cell_barcode <- colnames(comb1)
meta_inf1 <- meta_inf1 %>% dplyr::left_join(y = annot_df1, by = c("cell_barcode" = "barcodes"))
rownames(meta_inf1) <- colnames(lc1)
comb1@meta.data <- meta_inf1
DimPlot(comb1, label=TRUE, group.by = "monaco_broad_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

DimPlot(comb1, label=TRUE, group.by = "monaco_fine_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

message("extract mono")
## extract mono
mono <- comb1[,which(meta_inf1$monaco_broad_annotation == "Monocytes")]
mono_metainf1 <- meta_inf1[which(meta_inf1$monaco_broad_annotation == "Monocytes"),]
mono_metainf1 <- mono_metainf1[grep("monocytes",mono_metainf1$monaco_fine_pruned_labels),]
mono <- mono[,which(colnames(mono) %in% rownames(mono_metainf1))]
mono <- FindVariableFeatures(mono, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono <- RunPCA(mono, features = VariableFeatures(object = mono))
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: AC008443.9, SPOCK1, SHANK2, GPR143,
## PAN3, TNS3, USP10, PSME2, CAMK2D, LMNB1-DT, HNF4G, GSTP1, SLC1A3, LINC02741,
## NETO1, AC006974.2, LINC00355, AC068051.1, KCNIP1, DENND1A, CCDC88A, IFITM10.
## PC_ 1 
## Positive:  GAPDH, LGALS1, S100A6, MIF, PRDX6, CSTB, LGALS3, BLVRB, TXN, IFI30 
##     TUBA1B, MMP9, ACTB, CYSTM1, ELOC, FAH, TMEM176A, CRABP2, TUBA1A, EMP3 
##     FABP4, CD74, SAT1, C15orf48, H2AFZ, S100A9, CFD, TIMP1, HLA-DRB1, CALM3 
## Negative:  DOCK3, MALAT1, ARL15, RASAL2, TMEM117, DPYD, PLXDC2, EXOC4, LRMDA, FTX 
##     NEAT1, ASAP1, ATG7, TPRG1, MITF, JMJD1C, MAML2, FHIT, ZNF438, VPS13B 
##     ELMO1, FMNL2, COP1, LPP, TRIO, FRMD4B, CHD9, DENND4C, UBE2E2, MED13L 
## PC_ 2 
## Positive:  HLA-DPA1, HLA-DRA, CD74, HLA-DPB1, AIF1, HLA-DRB1, LYZ, MARCH1, CTSC, C1QA 
##     SAMSN1, TGFBI, FOS, VAMP5, C1QC, CD14, HMOX1, MRC1, SLC8A1, MS4A6A 
##     CLEC4A, FPR3, SELENOP, SLCO2B1, FGL2, CLEC7A, TRPS1, SLC9A9, TNFSF13B, PDGFC 
## Negative:  TM4SF19, GAL, CYSTM1, CCL22, ATP6V1D, GM2A, FDX1, TGM2, CSTB, ACAT2 
##     CD164, SCD, NCAPH, RHOF, CIR1, IARS, SH3BP5, CSF1, S100A6, DCSTAMP 
##     TCTEX1D2, FCMR, BCAT1, TFRC, EPB41L1, GOLGA7B, SLC20A1, MREG, GAPLINC, GPC4 
## PC_ 3 
## Positive:  PTGDS, LINC02244, CLU, LINC01010, NCAPH, CRABP2, CRIP1, AC015660.2, SYNGR1, GCLC 
##     MGST1, LINC01800, KCNMA1, RCBTB2, DUSP2, C1QTNF4, MX1, AC067751.1, FGL2, SERTAD2 
##     S100A6, IFIT3, GPC4, TRIM54, NCF1, KCNJ1, RNF128, SPON2, LGALS3, IFI6 
## Negative:  CTSZ, CTSL, TPM4, LGMN, MARCKS, HAMP, CD36, AIF1, CTSB, SMS 
##     ACTG1, FPR3, HLA-DQA1, BLVRB, IL18BP, FMN1, IFI30, GYPC, CCL3, MARCO 
##     HLA-DQB1, STMN1, CD163, S100A9, BCAT1, C1QC, FUCA1, DOCK2, FABP4, CSTB 
## PC_ 4 
## Positive:  JAML, HLA-DRB5, GCHFR, XIST, AC020656.1, QPCT, ALDH1A1, GPX3, PAX8-AS1, SLC11A1 
##     MS4A7, GPRIN3, S100A9, RARRES1, NMB, TMTC1, MSR1, AL136317.2, CLEC7A, NIPAL2 
##     ST6GAL1, TDRD3, CFD, BX664727.3, MARCO, FDX1, SERINC2, SASH1, AL035446.2, OSBP2 
## Negative:  PLEK, TUBA1B, ACTB, MIF, TMEM176A, TYMS, PCLAF, LGALS3, LINC01091, CYTOR 
##     TUBB, ACTG1, CLSPN, TUBA1A, CENPM, DIAPH3, TK1, MYBL2, PRDX6, RRM2 
##     IL1RN, SHCBP1, BIRC5, LINC00278, CEP55, MYL9, TEX14, HELLS, CDK1, NCAPG 
## PC_ 5 
## Positive:  TYMS, PCLAF, MKI67, TK1, CLSPN, RRM2, DIAPH3, CENPM, CDK1, MYBL2 
##     ESCO2, CENPK, NCAPG, BIRC5, SHCBP1, CEP55, TOP2A, FAM111B, NUSAP1, CCNA2 
##     CENPF, ATAD2, RAD51AP1, PRC1, ASPM, TPX2, H2AFZ, ANLN, HMMR, KIF11 
## Negative:  PLEK, TMEM176A, MGLL, LINC01091, MIF, ACTB, MMP19, ACTG1, PHLDA1, CYTOR 
##     MYL9, RABGEF1, CTSB, ADAMDEC1, IL1RN, AC006001.2, TUBA1A, LINC00278, SDS, PLA2G7 
##     LCP1, DPYSL3, MARCKS, CSF1, TTTY14, GLIPR1, LGALS3, PPM1H, PRDX6, TPM4
DimHeatmap(mono, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono)

mono <- FindNeighbors(mono, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono <- FindClusters(mono, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono <- RunUMAP(mono, dims = 1:4)
## 17:05:21 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:05:21 Read 10813 rows and found 4 numeric columns
## 17:05:21 Using Annoy for neighbor search, n_neighbors = 30
## 17:05:21 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:05:22 Writing NN index file to temp file /tmp/Rtmp4EQbK0/file29e311492a0043
## 17:05:22 Searching Annoy index using 1 thread, search_k = 3000
## 17:05:25 Annoy recall = 100%
## 17:05:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:05:27 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:05:27 Commencing optimization for 200 epochs, with 357872 positive edges
## 17:05:31 Optimization finished
DimPlot(mono, reduction = "umap", label=TRUE)

DimPlot(mono, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono, group.by="sample" , reduction = "umap", label=TRUE)

Extract monocytes MDM only

mono <- comb[,which(meta_inf$monaco_broad_annotation == "Monocytes")]
mono_metainf <- meta_inf[which(meta_inf$monaco_broad_annotation == "Monocytes"),]
mono_metainf1 <- mono_metainf[grep("monocytes",mono_metainf$monaco_fine_pruned_labels),]
mono <- mono[,which(colnames(mono) %in% rownames(mono_metainf))]
mono <- FindVariableFeatures(mono, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono <- RunPCA(mono, features = VariableFeatures(object = mono))
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: ADGRF1, OSBPL6, AC105940.1, TEK, GSTO1,
## LY9, KIAA1217, PRH1, MX2, AL050349.1.
## PC_ 1 
## Positive:  GAPDH, FABP3, TXN, IFI30, S100A10, PRDX6, TUBA1B, BLVRB, OTOA, S100A9 
##     FAH, CYSTM1, C15orf48, GCHFR, CARD16, HAMP, GSTP1, FABP4, ACTB, CSTA 
##     PSMA7, ACTG1, CTSB, LINC01827, LDHB, H2AFZ, CFD, TUBA1A, MMP9, SLAMF9 
## Negative:  ARL15, DOCK3, FTX, NEAT1, EXOC4, MALAT1, DPYD, RASAL2, LRMDA, JMJD1C 
##     TMEM117, PLXDC2, VPS13B, FHIT, TPRG1, ATG7, MAML2, MITF, ZNF438, ZEB2 
##     TRIO, COP1, ZFAND3, ELMO1, MED13L, DENND4C, TCF12, ERC1, JARID2, FMNL2 
## PC_ 2 
## Positive:  HLA-DRB1, CD74, HLA-DRA, HLA-DPA1, GCLC, HLA-DPB1, SPRED1, RCBTB2, MRC1, KCNMA1 
##     LYZ, C1QA, AC020656.1, FGL2, CYP1B1, SLCO2B1, S100A4, AIF1, PTGDS, LINC02345 
##     HLA-DRB5, CRIP1, VAMP5, CA2, CAMK1, CLEC7A, ALOX5AP, RTN1, HLA-DQB1, STAC 
## Negative:  CYSTM1, CD164, PSAT1, FAH, GDF15, FDX1, ATP6V1D, CCPG1, SAT1, BCAT1 
##     PHGDH, PSMA7, HEBP2, RETREG1, SLAMF9, GARS, TCEA1, HES2, TXN, NUPR1 
##     CSTA, RHOQ, RILPL2, CLGN, B4GALT5, STMN1, SNHG5, SPTBN1, SUPV3L1, PTER 
## PC_ 3 
## Positive:  MARCKS, CD14, BTG1, MS4A6A, TGFBI, CTSC, FPR3, HLA-DQA1, AIF1, MPEG1 
##     MEF2C, CD163, HLA-DPB1, IFI30, SELENOP, TIMP1, ALDH2, HLA-DQB1, NAMPT, C1QC 
##     MS4A7, NUPR1, FUCA1, HIF1A, EPB41L3, HLA-DQA2, HLA-DRA, TCF4, RNASE1, ARL4C 
## Negative:  NCAPH, CRABP2, RGCC, TM4SF19, CHI3L1, GAL, DUSP2, CCL22, AC015660.2, ACAT2 
##     DCSTAMP, RGS20, TMEM114, MGST1, LINC01010, TRIM54, MREG, LINC02244, NUMB, GPC4 
##     TCTEX1D2, CCND1, SYNGR1, POLE4, SLC20A1, PLEK, SERTAD2, IL1RN, CLU, AC005280.2 
## PC_ 4 
## Positive:  ACTG1, ACTB, TPM4, TUBA1B, CCL3, CTSB, CSF1, TUBB, DHCR24, LGMN 
##     CYTOR, GAPDH, TYMS, INSIG1, PCLAF, HAMP, CD36, TK1, CLSPN, C1QA 
##     CCL7, CENPM, MYBL2, CTSL, AIF1, HSP90B1, SHCBP1, CENPK, MKI67, CEP55 
## Negative:  PTGDS, LINC02244, CLU, SYNGR1, MGST1, NCF1, CSTA, CCPG1, LY86-AS1, LINC01010 
##     EPHB1, GAS5, ALDH2, AC015660.2, C1QTNF4, VAMP5, PDE4D, SNHG5, AP000331.1, APOD 
##     CLEC12A, XIST, BX664727.3, ARHGAP15, AOAH, RCBTB2, ZBTB20, S100P, TMEM91, ANKRD44 
## PC_ 5 
## Positive:  CTSB, TPM4, CSF1, CCL3, ACTB, LGMN, ACTG1, DHCR24, CD36, C15orf48 
##     INSIG1, SLCO2B1, IL18BP, C1QA, BCAT1, MREG, MGLL, LIMA1, SLC11A1, MADD 
##     TM4SF19, AIF1, OTOA, CAMK1, PHLDA1, MRC1, UGCG, TCTEX1D2, CCL7, C1QB 
## Negative:  TYMS, PCLAF, TK1, MKI67, MYBL2, CENPM, BIRC5, RRM2, CDK1, DIAPH3 
##     CEP55, CLSPN, SHCBP1, NUSAP1, PRC1, CENPF, ESCO2, KIF11, TOP2A, NCAPG 
##     CENPK, ANLN, TPX2, CCNA2, ASPM, GTSE1, MAD2L1, FAM111B, HMMR, KIF4A
DimHeatmap(mono, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono)

mono <- FindNeighbors(mono, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono <- FindClusters(mono, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono <- RunUMAP(mono, dims = 1:4)
## 17:06:14 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:06:14 Read 23700 rows and found 4 numeric columns
## 17:06:14 Using Annoy for neighbor search, n_neighbors = 30
## 17:06:14 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:06:16 Writing NN index file to temp file /tmp/Rtmp4EQbK0/file29e311337e52d8
## 17:06:16 Searching Annoy index using 1 thread, search_k = 3000
## 17:06:25 Annoy recall = 100%
## 17:06:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:06:28 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:06:28 Commencing optimization for 200 epochs, with 776440 positive edges
## 17:06:34 Optimization finished
DimPlot(mono, reduction = "umap", label=TRUE)

DimPlot(mono, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono, group.by="sample" , reduction = "umap", label=TRUE)

Cell counts

Most cells are classified as monocytes.

cc <- table(meta_inf[,c("sample","monaco_broad_annotation")])

cc %>% kbl(caption="cell counts") %>% kable_paper("hover", full_width = F)
cell counts
Basophils Dendritic cells Monocytes
alv_active1 0 0 767
alv_active2 0 3 540
alv_active3 0 1 525
alv_bystander1 0 1 872
alv_bystander2 0 1 985
alv_bystander3 0 0 417
alv_bystander4 0 0 435
alv_latent1 0 0 640
alv_latent2 0 1 1748
alv_latent3 0 0 1587
alv_latent4 0 2 1563
alv_mock1 0 1 869
alv_mock2 0 0 632
alv_mock3 0 3 968
mdm_active1 0 3 590
mdm_active2 0 0 410
mdm_active3 0 2 300
mdm_active4 0 0 401
mdm_bystander1 0 19 1148
mdm_bystander2 0 13 1062
mdm_bystander3 0 21 334
mdm_latent1 1 4 826
mdm_latent2 0 3 965
mdm_latent3 0 2 201
mdm_mock1 0 2 688
mdm_mock2 0 0 516
mdm_mock3 0 2 122
mdm_mock4 0 0 772
react6 0 3 2817
tcc <- t(cc)

pctcc <- apply(tcc,2,function(x) { x/sum(x)*100} )

pctcc %>% kbl(caption="cell proportions") %>% kable_paper("hover", full_width = F)
cell proportions
alv_active1 alv_active2 alv_active3 alv_bystander1 alv_bystander2 alv_bystander3 alv_bystander4 alv_latent1 alv_latent2 alv_latent3 alv_latent4 alv_mock1 alv_mock2 alv_mock3 mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_bystander1 mdm_bystander2 mdm_bystander3 mdm_latent1 mdm_latent2 mdm_latent3 mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 react6
Basophils 0 0.0000000 0.0000000 0.0000000 0.0000000 0 0 0 0.0000000 0 0.0000000 0.0000000 0 0.0000000 0.0000000 0 0.0000000 0 0.000000 0.000000 0.000000 0.1203369 0.0000000 0.0000000 0.0000000 0 0.000000 0 0.000000
Dendritic cells 0 0.5524862 0.1901141 0.1145475 0.1014199 0 0 0 0.0571755 0 0.1277955 0.1149425 0 0.3089598 0.5059022 0 0.6622517 0 1.628106 1.209302 5.915493 0.4813478 0.3099174 0.9852217 0.2898551 0 1.612903 0 0.106383
Monocytes 100 99.4475138 99.8098859 99.8854525 99.8985801 100 100 100 99.9428245 100 99.8722045 99.8850575 100 99.6910402 99.4940978 100 99.3377483 100 98.371894 98.790698 94.084507 99.3983153 99.6900826 99.0147783 99.7101449 100 98.387097 100 99.893617

Focus on MOCK vs ACTIVE - exclude latent and bystander

focus <- meta_inf[grep("latent",rownames(meta_inf),invert=TRUE),]
focus <- focus[grep("bystander",rownames(focus),invert=TRUE),]
focus_mdm <- focus[grep("mdm",rownames(focus)),]
focus_alv <- focus[grep("alv",rownames(focus)),]

mono_focus_mdm <- mono[,which(colnames(mono) %in% rownames(focus_mdm))]
mono_focus_alv <- mono[,which(colnames(mono) %in% rownames(focus_alv))]

# mono_focus_mdm
mono_focus_mdm <- FindVariableFeatures(mono_focus_mdm, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono_focus_mdm <- RunPCA(mono_focus_mdm, features = VariableFeatures(object = mono_focus_mdm))
## Warning: The following 41 features requested have zero variance; running
## reduction without them: NMB, LYZ, HDAC9, TMPRSS15, SNTB1, IFIT3, ASAP1, KIF11,
## HES2, ALPK3, MT1A, SLC51A, AC108134.2, PREX2, HLA-C, AC009093.2, PSME2,
## ATP6V0A2, WDPCP, NAMPT, SERTAD2, AMZ1, HELLS, AC007389.1, ANK3, AQP2, E2F7,
## KEL, SOD2, ALDH1A2, CATSPERE, KIAA0825, SCFD2, BIN2, ADIRF, AL137076.1, SAA4,
## BAIAP3, AC006001.2, AXDND1, LINC01572
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: HIST1H2BJ, OASL, ENG, SSBP3, CENPA,
## HIST1H3J, MT-ND5, NCALD, CCND2, PCBP3, LINC00885, AL355388.1, AC013287.1,
## AL009177.1, AL358944.1, SCAI, KLF12, SETD2, IFT80, TVP23A, C16orf89, IPCEF1,
## PPP6R2, TTK, HCAR2, SLC2A14, C5AR2, FANCM, RPS4Y1, TNRC6C, MT-ATP6, AC106793.1,
## ZZEF1, STXBP1, HIF1A-AS3, PNPLA7, AC064805.2, RASIP1, AL357873.1, ANKRD34B,
## AL353759.1, KLRG1, AL021917.1, WDR49, GPR155, MARCH1, EIF2B3, RAP1GDS1, ZBTB41,
## LINC02585, AGPAT5, NUP93, ESR2, CLPX, LINC02853, LRRK2, LPL, ASTL, SH3GL1,
## AC007364.1, CWC22, TMEM212-AS1, PIWIL2, OBI1-AS1, PKIA-AS1, CHRNB4, DUSP16,
## PCA3, LYPD1, P2RY12, CREM, AC025262.1, RAB10, BACE2, MT-ND2, NR2F1-AS1, IFT140,
## AC100849.1, STAT5B, CCNB2, WWP1, AL157911.1, DGKI, AURKC, TMEM220-AS1, PAWR,
## SF3B3, GYG2, MT-CO3, MT-CYB, LDHA, IFI6, AC034213.1, BTBD2, SIGLEC10,
## ZFPM2-AS1, AP000462.1, AC072022.2, SRGN, ZNF276, FAM184A, AC117383.1, ACTR3C,
## DOK5, MAP4K1, PACS1, SRRM2-AS1, PDIA4, CU638689.5, AC108879.1, CFAP52, CDK19,
## AC105402.3, CLIC5, RGS18, RASSF4, SF3B1, COX5B, GRM7, LINC01054, DAPK2, LRRC9,
## MT-ND1, SNAP25-AS1, CLEC4C, AP000446.1, AC133550.2, NYX, TTC21B-AS1, BCL2A1,
## AC105001.1, AL031658.1, SETBP1, UBASH3B, AL589740.1, FHOD3, NLRP4, AC099552.1,
## AL592295.3, CCDC7, ZNF385D-AS1, IGSF6, AC007091.1, AC092490.1, AC078923.1,
## AC240274.1, MRC2, SLC44A5, MAP3K15, P2RY13, ZDHHC17, AC089983.1, AC100849.2,
## JAKMIP3, PARD3, TSPAN8, DPEP3, CARD11, AHR, OR3A2, FDXACB1, LINC01098,
## LINC02015, MASP2, CACNA2D3, TFG, GFRA2, NELL2, APCDD1L, DARS, AL360015.1,
## MMP28, BMPR1B, CLCA4-AS1, TPTE2, KIF21A, MIR155HG, TRIM37, MANF, CTHRC1, RORA,
## VIPR1, AC104459.1, ARC, CPLX1, SAMD11, GAS1RR, TNS3, ADRA2B, TUBB3, CEP112,
## AC114977.1, AC019117.1, DHX8, AC093766.1, GTSF1, CEP135, AC005264.1, ATP6V0D2,
## GSN, AC117473.1, AL078602.1, AL132775.1, AC016074.2, ALDH1A1, CPE, MAPK8,
## STPG2, AL049828.1, TEX15, AC008555.2, OR8D1, MCF2L-AS1, AC005393.1, TACC3,
## AC084809.2, AKR7A2, CFAP69, U62317.4, ATF3, PAX8-AS1, TMEM45A, CHRM3,
## AL391807.1, AC007128.2, AC073569.1, ZIC2, NR2F2-AS1, SIGLEC7, SNHG12, SCAPER,
## SESN3, HIST1H2AC, CD93, LINC00649, LINC01600, AC026333.3, GAL3ST4, Z99758.1,
## GLIPR1, MNAT1, AC011365.1, OSBPL6, P3H2, TASP1, GAK, LNX1, JAKMIP2-AS1, RTKN2,
## TFRC, AC137770.1, HIST1H2BG, RUFY2, TUBB4B, SULF2, LY9, LINC01353, PDE11A,
## CRISPLD2, MAPK13, AC096570.1, RRAS2, HIST2H2BE, ACMSD, LINC00237, MCCC2,
## AC021231.1, FKBP1B, AP000829.1, RALGAPA2, TMEM71, PRTG, AL136418.1, PPP1R12B,
## MKX, LINC01855, TRIM71, LINC01917, CORIN, KCNMB4, ZFP36L1, Z94721.1, MAFB,
## GLRX, PODN, RAP2C-AS1, LGALS3, OXSR1, AC087683.3, C3orf79, CREG2, LINC02112,
## CYP4F22, AC092134.3, DPF1, AC079062.1, RPS6KA6, COL8A2, ZBTB46, LGALSL, SRGAP3,
## C11orf45, HERPUD1, ABCA13, LINC01191, MAP1A, SOX15, PCLO, LY96, MGAT5, DNAH3,
## KDM6A, MLLT3, SPAG7, CALM3, VGLL3, STAG1, AC021086.1, TUBB4A, IGF2R, GNG4,
## ADAMTS10, RNF24, SLC26A7, BACH1, AC009315.1, RNF180, PPFIA2, LINC01891,
## AL645929.2, AC015849.1, AC124017.1, CALR, TMEM131L, AC011476.3, KIAA1841, SPEG,
## PTPRG-AS1, LYPLAL1-DT, SEMA6D, CLDN4, AL096794.1, MDH1, AC008115.2, MARCH3,
## NWD1, NPRL3, AL031710.2, MRVI1, SOX21-AS1, CKMT1A, PROCR, HCAR3, AC090630.1,
## PKP2, CD72, CHM, PLBD1, AC010834.2, RFX3, LINC00958, PCP4L1, AC098588.3,
## FILIP1L, PAFAH1B1, ENPEP, AC114763.1, ADORA2B, ZHX2, TNIK, AL356379.2, PRKG2,
## NETO2, PIP4K2C, SNX10, MCM9, AL136298.1, SPTLC3, AFF1, EPHA6, UBE2T, NFKB1,
## SOCS3, PLXNC1, CASP1, SOS1, BFSP2, AF131216.1, AC103726.2, EFCAB6-AS1,
## ARHGAP29-AS1, AC019330.1, AC063923.2, ECE2, SOAT2, AC008655.2, EXD2, KCNJ1,
## SPIRE1, CDT1, TRAF2, PKN2, NEGR1, AC096992.3, COL28A1, ANGPTL4, LINC00571,
## SGO1, AHCYL2, AC092811.1, AC097654.1, AL121772.1, LILRB2, FAM135A, BUB1, PTPRB,
## KIF20B, ITPR2, AC020743.2, LINC01605, LINC00407, PLA2G5, NKAIN1, LINC02851,
## DYM, GMDS, TXNIP, E2F1, AC024901.1, AC073475.1, AL365295.2, SPOCK3, SPIB,
## POF1B, LPP, GNG7, AC246817.1, GLUL, XKR9, QKI, ERO1A, PRORP, ITM2A, COL27A1,
## ELANE, AL139246.1, AL158839.1, AC114810.1, AC009229.3, AC012358.1, AC063944.3,
## AC092902.6, AC128709.2, AC021220.2, ADAMTS3, AC093801.1, AC116351.1, LINC02149,
## AC113386.1, AC011374.3, LINC02571, Z84484.1, MRAP2, AL357992.1, AL078582.1,
## AC002480.2, ITPRID1, DEFB136, AC084026.1, CALB1, AF178030.1, AF235103.1,
## AL353764.1, TMEM246-AS1, PPP3R2, AL353803.1, AL731559.1, AL121748.1, LINC02625,
## AC016816.1, SYCE1, OR10A4, LINC02751, OR8J1, AP002884.1, AC024940.1,
## AC012464.3, AC073863.1, AC063943.1, C1QTNF9-AS1, SMAD9-IT1, LINC00563,
## CLYBL-AS2, AL442125.2, KLHL33, CMA1, AC008056.1, LINC02280, AC012170.2,
## AC048382.1, AC091167.5, AL133297.1, NPIPB8, AC012186.2, AC092378.1, AC129507.2,
## RAI1-AS1, AC007952.6, AC004448.3, AC243773.2, MIR4527HG, AC105094.2, OR7E24,
## ANGPTL8, AL021396.1, NUDT11, TCEAL7, LINC00266-4P, CAMK2D, MYADM, ERBB4,
## PFKFB3, ARRDC3-AS1, BDNF-AS, LRRIQ4, AP002793.1, LINC00987, FAIM, CNIH3, DMXL2,
## ITGA9, SLC41A2, LINC02798, PRH1, MARF1, AL162171.2, PCNX2, LINC00511, SCLT1,
## AC021546.1, LUZP4, CACNA2D2, AC084816.1, ST3GAL3, SLC7A8, ZC4H2, PYGL, STUM,
## PDE2A, SUCLG2-AS1, SIRPD, MMD2, ANTXR2, COBL, PLAA, PRAG1, SH3D19, AC006525.1,
## TIMP4, EDA, SPATA5, EFL1, LINC02398, STAU2, ABCC1, DUSP5, RAB7B, WDFY3, MYO10,
## DECR1, AC024084.1, CD200R1, SH2D7, EXPH5, AL158068.2, UFL1-AS1, MCTP2, NCOA1,
## ENTPD1-AS1, KIAA1109, DUSP27, NCAPD3, YBX1, HIVEP1, INTU, EMP1, GPAT3, NABP1,
## MTUS1, LINC01762, LINC02457, FAM110B, GOLGA4, TMEM72-AS1, KPNA2, CASC2, PEAK1,
## AXL, AC006115.2, TMEM92, PMAIP1, FAAH2, LGALS3BP, ABCA1, PTK2.
## PC_ 1 
## Positive:  PRDX6, TUBA1B, ACTB, C15orf48, FABP3, S100A10, HSP90AA1, TXN, TUBA1A, S100A9 
##     CHI3L1, CRABP2, SLC35F1, ACAT2, RGCC, ACTG1, LILRA1, TUBA1C, FBP1, MMP9 
##     BLVRB, MGST1, GAL, LDHB, HAMP, MYL9, HPGDS, CYSTM1, UCHL1, SPP1 
## Negative:  NEAT1, RAD51B, ARL15, FHIT, FMN1, MALAT1, FTX, AL035446.2, EXOC4, MBD5 
##     SLC22A15, ZFAND3, FNDC3B, COP1, GMDS-DT, JMJD1C, VTI1A, DENND1A, DOCK4, PDE4D 
##     VPS13B, TRIO, SPIDR, SBF2, DOCK3, FRMD4B, SMYD3, TBC1D8, GAB2, REV3L 
## PC_ 2 
## Positive:  GDF15, PSAT1, CYSTM1, B4GALT5, TCEA1, PHGDH, BEX2, SNCA, ATP6V1D, SLAMF9 
##     OCIAD2, UGCG, GARS, SUPV3L1, gene-HIV1-2, TXN, FDX1, S100A10, RAB38, G0S2 
##     RETREG1, CCPG1, TM4SF19, SAT1, NMRK2, PHLDA1, CLGN, TPM4, CLMP, DRAXIN 
## Negative:  HLA-DRB1, CD74, HLA-DRA, HLA-DPA1, HLA-DPB1, PTGDS, CEBPD, KCNMA1, CFD, TGFBI 
##     CYP1B1, RNASE6, RCBTB2, GCLC, HLA-DQB1, HLA-DRB5, C1QA, MS4A6A, SIPA1L1, FOS 
##     CRIP1, MS4A7, ALOX5AP, VAMP5, S100A4, EPB41L3, MNDA, NCAPH, MX1, MRC1 
## PC_ 3 
## Positive:  RGCC, NCAPH, CRABP2, CHI3L1, LINC01010, LINC02244, AC015660.2, MREG, TM4SF19, ACTB 
##     GCLC, NUMB, TMEM114, RGS20, LRCH1, GPC4, ACAT2, PLEK, DUSP2, TCTEX1D2 
##     ST3GAL6, PSD3, FNIP2, CYP1B1, AC005280.2, S100A4, AC092353.2, DOCK3, SLC28A3, CCND1 
## Negative:  BTG1, MARCKS, G0S2, CD14, SAT1, ARL4C, CLEC4E, FUCA1, CXCR4, TGFBI 
##     SEMA4A, HIF1A, MS4A6A, TIMP1, VMO1, MEF2C, SDS, MS4A7, CEBPD, P2RY8 
##     PDK4, TCF4, STMN1, RNASE1, CTSC, MPEG1, FPR3, NUPR1, HLA-DQA2, LGMN 
## PC_ 4 
## Positive:  CCPG1, LINC02244, LINC01010, PTGDS, CLU, CPD, CLGN, S100P, BEX2, CARD16 
##     BX664727.3, QPCT, SYNGR1, NIBAN1, SNHG32, TDRD3, CLEC12A, PSAT1, RAB6B, NUPR1 
##     RETREG1, PDE4D, GAS5, PHGDH, NCAPH, TCEA1, MGST1, MT1X, AC015660.2, MEIKIN 
## Negative:  AC078850.1, ACTB, SLC35F1, FABP4, ACTG1, CD36, INSIG1, TUBA1B, CCL7, TPM4 
##     MMP19, PHLDA1, C3, LINC01091, CADM1, MGLL, CD300LB, TUBB, SDS, CSF1 
##     TIMP3, LGMN, TMEM176A, IL18BP, ALCAM, LIMA1, TBC1D8, HSP90B1, TNFRSF9, TPM2 
## PC_ 5 
## Positive:  TYMS, MKI67, PCLAF, CENPF, CEP55, BIRC5, KIF4A, CDKN3, PRC1, NUSAP1 
##     DLGAP5, HMMR, TPX2, CDK1, ASPM, PTTG1, CENPM, ANLN, CENPK, CCNA2 
##     SHCBP1, NCAPG, DIAPH3, TK1, MAD2L1, CIT, RRM2, BUB1B, MYBL2, KNL1 
## Negative:  gene-HIV1-2, gene-HIV1-1, TCTEX1D2, MMP19, PHLDA1, RHOF, CSF1, IL1RN, TM4SF19, SPOCD1 
##     PLEK, RGCC, TNFRSF9, C15orf48, CBLB, DPYSL3, TM4SF19-AS1, CD36, PDGFB, MADD 
##     MYL9, TIMP3, NUMB, ACTB, C3, LIMA1, PCYT1A, CCL7, B4GALT5, CPA6
DimHeatmap(mono_focus_mdm, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono_focus_mdm, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono_focus_mdm)

mono_focus_mdm <- FindNeighbors(mono_focus_mdm, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono_focus_mdm <- FindClusters(mono_focus_mdm, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono_focus_mdm <- RunUMAP(mono_focus_mdm, dims = 1:4)
## 17:06:46 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:06:46 Read 3799 rows and found 4 numeric columns
## 17:06:46 Using Annoy for neighbor search, n_neighbors = 30
## 17:06:46 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:06:47 Writing NN index file to temp file /tmp/Rtmp4EQbK0/file29e3112ec926a0
## 17:06:47 Searching Annoy index using 1 thread, search_k = 3000
## 17:06:48 Annoy recall = 100%
## 17:06:49 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:06:51 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:06:51 Commencing optimization for 500 epochs, with 133482 positive edges
## 17:06:54 Optimization finished
DimPlot(mono_focus_mdm, reduction = "umap", label=TRUE)

DimPlot(mono_focus_mdm, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono_focus_mdm, group.by="sample" , reduction = "umap", label=TRUE)

# mono_focus_alv
mono_focus_alv <- FindVariableFeatures(mono_focus_alv, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono_focus_alv <- RunPCA(mono_focus_alv, features = VariableFeatures(object = mono_focus_alv))
## Warning: The following 33 features requested have zero variance; running
## reduction without them: TMEM163, MRC1, MARCKS, C1QC, MT2A, PDE4B, ADCY1,
## CASC20, BX664727.3, CCND1, CD28, NYAP2, SYN3, NPTX2, TMEM176B, CNTNAP2,
## AF117829.1, SSTR2, ZNF157, HIP1, AC097512.1, BIRC3, CRIM1, LINGO2, NCAM2,
## ACOXL, TRIO, CNIH3, AP000424.1, AC130650.2, MYO16-AS1, BTBD11, KDM2A
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: STEAP1B, NUP210L, CPNE8, IFI6,
## AC083837.1, AP002075.1, HCAR2, SLC6A16, EMP3, AL096794.1, MIF, PPP1R1C, LYPD1,
## ACVR2A, IQCA1, EFCAB7, LINC02068, COL25A1, SPINK6, TENM4, BTNL8, HIST1H2BG,
## C11orf49, AC092821.3, DSG2, DNAH12, MRPS6, IL6, SLC7A14-AS1, AC135050.3, ERAP2,
## DNAJC9, ACTN2, ADAM22, LINC02269, AC076968.2, INO80, AC207130.1, GNLY,
## LINC01605, MIR4300HG, AC096666.1, MS4A5, KIF6, EXO1, BANK1, LINC02073, ARMC8,
## MSRB3, SMCHD1, AC013391.2, TEK, LINC00350, PLAAT3, LGALS1, CD226, AC084740.1,
## GCH1, CTSW, NWD1, COL27A1, SCFD1, PMAIP1, TRIM2, SVEP1, AHCTF1, CCDC85A,
## AC010834.2, STK32B, RFX3-AS1, IGF2R, TUBB4A, GIGYF2, MARCKSL1, CH25H, RAD51C,
## BRMS1L, LPCAT1, RLF, AL353596.1, CTSZ, LINC00299, NEURL3, ADCY3, AC019131.1,
## KIAA1755, SHANK2, GPRC5D, CR1, LINC02789, AC079313.2, OR10G3, TNFAIP3,
## LINC01344, PTPN2, USP10, YJEFN3, ASAP3, AC104596.1, XDH, ZC3H8, DYNC1H1, RFTN2,
## CCDC57, CSTB, SPOCK1, AC007785.1, MECP2, ABCB1, RPH3AL, PSME2, NEK4, LINC01862,
## NDRG1, AC005264.1, AC084809.2, SOAT2, CCNC, SH3TC2, AC009292.2, LINC00511,
## URB1, MFSD11, STS, SPACA3, CFAP74, ZPLD1, SLC2A5, AC110992.1, HSF2BP, XPO5,
## PPEF1, PTX3, EML2-AS1, LDHA, LGALS3, FCRLB, SOX6, AC009435.1, TIMELESS,
## PAX8-AS1, PHACTR1, FBXO4, RAB7B, GAPLINC, MBOAT4, AKAP6, CKAP5, POU6F2, RGS8,
## LINC00842, GLIPR1, SLC44A5, LINC02466, SMAD1-AS2, DZIP1, AC092718.1,
## AC125421.1, COL4A5, ANOS1, INKA2-AS1, WDR90, TNC, TEPP, CNR2, HNRNPM,
## AC011346.1, WIPF3, SYT12, TMEM37, MID2, SRL, DHRS9, ZNF385D-AS1, LINC01999,
## GRID2, GOLGA7B, NRIP3, NRG1, HRH2, AC073569.2, FSD1, LMNB1-DT, PLXDC1, CD5L,
## C2orf72, ELOC, AL135926.2, LCP1, CFAP57, KLB, AC007100.1, NBAT1, HMOX1, SVOP,
## MTFMT, RFX8, ERI2, PTPRB, FO393415.3, IGSF23, AC008415.1, PMPCA, NCMAP,
## LINC01414, AC010997.3, EAF2, IPCEF1, SLC23A2, COBL, STXBP6, HCAR3, SERINC2,
## GTF2IRD2, GNRHR, GATA2, CLDN18, AOC1, SLC6A7, AL513166.1, FAF1, AC041005.1,
## AC107081.2, HLTF-AS1, NNMT, AC004949.1, WDFY4, SLC4A8, GLUL, ANGPT1, NDRG2,
## AL592295.3, RGS7, MOSPD1, IL17RB, DACH1, C22orf34, KIF21A, STPG2, AL662884.3,
## NEIL3, AC022035.1, AC006441.4, LINC01358, ZSCAN5A, GPAT3, ADM, CYP4F22,
## SPRY4-AS1, ELOVL5, MRC2, YLPM1, RELN, MIR2052HG, ABLIM1, TDRD9, SIK2, ULBP2,
## VIPR1, PLCH1, NCR3, MIR3142HG, GALNT14, KIAA0825, SPSB1, HSPA1B, MSH4, PDE1C,
## PARD3, AC068228.3, UST, AC116563.1, Z99758.1, AC006525.1, CLSTN2, RXFP1, GM2A,
## RASSF4, CNTNAP5, PTP4A2, CRIPT, AC109454.3, SEMA6D, PAWR, MARCH1, SLA, SCIN,
## DENND2A, AC097518.2, TNR, IGSF6, AC005753.2, PARN, NIPAL4, AL136456.1, TNIK,
## LINC00378, SHROOM4, TPSAB1, AL353595.1, AC025430.1, BACE2, NRCAM, NES, ARHGAP6,
## CHD9, POU2F2, LINC00571, RBL1, FOXM1, KDR, AC008609.1, PRDM14, AL161646.1,
## AL354811.2, KRTAP10-4, CTXND1, CLPB, PRKCG, MEI4, AC006460.1, OASL, AL354733.2,
## AHCYL2, FUT2, BRCA1, FCMR, S100A6, AL049828.1, BPIFC, ABCG2, NSG2, APOM, FRRS1,
## KDM7A, AC016831.7, DPYS, MS4A4A, SH3BP5, RBM47, GINS2, RBPJ, MEP1A, EMP1,
## PCDH15, DNAJC1, CDH12, SLC4A1, AC002454.1, TROAP, LINC00973, AC110491.2,
## COL8A2, ANKH, FBXW7, AC008443.9, SEPTIN4-AS1, SRGAP3, ATP1B1, SNX10, PDE2A,
## MX2, ITGA2, TBC1D24, SNAI3, CHDH, RALGAPA2, KCP, CLCA4-AS1, AC246817.1,
## SH3TC2-DT, TMEM132C, AC073091.4, CHODL, INPP5J, AC116616.1, AL360015.1, GRHL2,
## SDC2, LINC01643, JAML, TMEM45A, TGFB3, ARL9, CD1E, ITSN2, SCFD2, AP000812.1,
## FCER1G, AC024901.1, AC104041.1, AC093307.1, IAPP, ATP1B2, LINC01900,
## AC007381.1, AC006333.1, AC015908.2, AL033530.1, SFTPD-AS1, ZAR1L, CENPU, CHAC1,
## AC117453.1, PDLIM4, IKZF3, MAS1, ING3, LIPG, AC099489.1, C11orf45, TNFRSF11A,
## GNAI1, CDHR3, ASMT, AC011893.1, AIM2, LINC01924, TNFRSF12A, AC079742.1, MCAM,
## PCLO, SOX5, HIST1H2AC, RHOD, LINC02777, SLC22A2, EXOSC10, KIAA1217, ASPH,
## ITPR2, KCNJ1, LINC01800, FHAD1, AC093898.1, DGKI, AMPD3, CEP126, SAMD12,
## AC092957.1, FAXC, LINC00519, AF274853.1, AC084816.1, LINC01572, DEGS2, HTRA4,
## PLTP, XKR9, LPP, SLC23A1, AL109930.1, GRIK5, AL359220.1, STXBP5L, MB21D2,
## LINC02742, PRRX2, CNGA4, EOGT, LINC02698, AC019197.1, AMPD1, GLCCI1, SPOPL,
## ZNF431, CDC5L, UPK1A-AS1, AC093010.2, SLC35F3, AC007402.1, CD96, AL591845.1,
## BCL2A1, AC007529.2, DNAH2, BX004807.1, NR1H3, RASL10A, MAP1A, BICD1, LINC00894,
## FAM13B, SSH2, GALNTL6, LINC01933, CSMD2, ERBB4, IARS, FCHO2, AC010343.3,
## SH3PXD2B, AC108066.1, AL713998.1, RNF212, LIN28B-AS1, CHD5, AC130456.2, ABCA1,
## LMO4, NABP1, IL3RA, LINC02752, RNF144A, CPEB2-DT, STX4, HIST1H1C, TFRC,
## AC099560.1, LPAR1, AC026391.1, RBPJL, SLC25A23, AL645465.1, ASTN2, NR6A1,
## GFOD1, STUM, ELF5, AL355981.1, TMEM213, TEX49, OPRD1, KCNN2, FBLN1, LIMCH1,
## ZNF609, AC011476.3, C1orf143, SKA3, AC055855.2, NANOS1, SERPINA1, TRMT5,
## LDLRAD4, CTNNA3, SLC9A2, FA2H, ZNF385B, GLT1D1, SYNE1, LINC02805, AC124852.1,
## HIST2H2BE, QKI, AC087280.2, AP001496.1, PPP1R14C, FRMD6-AS1, TYW1B, AP001636.3,
## DOCK2, MIR155HG, SDSL, AC068587.4, CFI, PROSER2-AS1, AC009107.1, TEKT1,
## LINC02109, ERLEC1, CORO1A, SLAMF7, GRAMD2A, GALNT18, LINC01169, FBXO43,
## TMEM131L, SPOCK3, CLDN4, IGF1R, AC005280.1, BARD1, PRAG1, SPIRE1, FAM107B,
## PLAT, AGPAT4, MARCH3, HECW1, ATP6V1H, CLSTN3, SLC1A3, AC239860.4, LHCGR, ZBED9,
## AL162411.1, AL157702.2, AC087897.2, LINC01198, AC090515.6, AC018618.1,
## AL805961.1, SCG2, AC021134.1, AC137810.1, AC145146.1, AL591519.1, AC108860.2,
## ADD2, EGFL7, DENND5B, GNGT1, SLC30A8, FAM92B, AL078604.4, ATP6V0D2, BACH1,
## CDT1, ZNF543, AC079793.1, PDE3A, RBM11, SLC30A10, AC006511.6, WDR54, VRTN,
## SHCBP1L, AC005808.1, TSGA13, MYADM, CIDEC, STMND1, SSPO, NLRP7, LINC02224,
## RNF24, AC096531.2, LINC01276, NUDT10, LMCD1-AS1, PLA2G7, NPC1, PTPRJ, LAMA3,
## RNF217-AS1, AC079163.2, MAFB, ATF3, CDC42EP3.
## PC_ 1 
## Positive:  GAPDH, BLVRB, H2AFZ, TXN, GSTP1, NUPR1, SAT1, CYSTM1, FAH, FABP4 
##     MMP9, CARD16, STMN1, PRDX6, ALDH2, CFD, GDF15, PSAT1, IFITM3, PHGDH 
##     BTG1, DDIT3, CD74, TUBA1B, S100P, PTGDS, ADAMDEC1, HLA-DPA1, SELENOP, GYPC 
## Negative:  RASAL2, DOCK3, TMEM117, DPYD, AC092353.2, CPEB2, LRMDA, LINC01374, PLXDC2, ASAP1 
##     TPRG1, FMNL2, ATG7, ARL15, NEAT1, LRCH1, MALAT1, MITF, MAML2, EXOC4 
##     ELMO1, ATXN1, RAPGEF1, VWA8, DENND1B, ZNF438, TTC28, ARHGAP10, NUMB, UBE2E2 
## PC_ 2 
## Positive:  LYZ, SLC8A1, CTSC, FCGR3A, HLA-DPA1, AIF1, SAMSN1, RCBTB2, HLA-DRA, CLEC7A 
##     ME1, HLA-DPB1, NRP1, CFD, AL356124.1, PDGFC, TRPS1, CD74, SELENOP, SLCO2B1 
##     C1QA, RARRES1, CAMK1D, ALDH2, FCHSD2, SPRED1, FOS, XYLT1, VAMP5, ATP8B4 
## Negative:  gene-HIV1-1, CCL22, gene-HIV1-2, IL6R-AS1, GAL, AL157912.1, SLC20A1, DUSP13, CYTOR, DCSTAMP 
##     TRIM54, MIR4435-2HG, TCTEX1D2, IL1RN, TM4SF19, CSF1, MYL9, RHOF, CIR1, NMRK2 
##     BIN2, POLE4, ATP6V1D, ACAT2, GPC3, SCD, LAYN, NCAPH, C3, UPP1 
## PC_ 3 
## Positive:  CTSL, MARCO, BCAT1, FMN1, HAMP, SAT1, LGMN, TPM4, SLC11A1, FDX1 
##     SNCA, S100A9, B4GALT5, UGCG, STMN1, MSR1, BLVRB, NUPR1, FABP4, TXN 
##     CCL3, SMS, CD164, FRMD4A, GYPC, FAH, PLAU, CLMP, DNASE2B, SNTB1 
## Negative:  CRIP1, CLU, MMP7, PTGDS, DUSP2, GCLC, CRABP2, KCNMA1, RNF128, NCAPH 
##     CDKN2A, TRIM54, AC067751.1, SERTAD2, C1QTNF4, IL1RN, ALOX5AP, TIMP3, RGCC, LINC02408 
##     S100A4, LINC00910, FGL2, TMEM176A, CYP1B1, RCBTB2, SLC35F1, RAMP1, VAMP5, SYNGR1 
## PC_ 4 
## Positive:  QPCT, AL136317.2, AC012668.3, NIPAL2, XIST, AC008591.1, LINC02320, AC020656.1, FDX1, OSBP2 
##     GCHFR, ANO5, MIR646HG, CCDC26, TDRD3, LINC01340, AC015660.2, GPX3, LINC02244, PKD1L1 
##     AP000331.1, CFD, CTSK, RARRES1, NMB, HES2, LIX1-AS1, LINC01010, SKAP1, MGST1 
## Negative:  CLSPN, SHCBP1, TYMS, IL1RN, DIAPH3, ACTG1, HELLS, RRM2, TK1, ESCO2 
##     TUBA1B, PCLAF, FAM111B, CENPK, MYBL2, ACTB, CENPM, TUBB, CIT, CCNA2 
##     MKI67, CDK1, NCAPG, ATAD2, TOP2A, WDR76, CYTOR, KIF11, CEP55, HMMR 
## PC_ 5 
## Positive:  gene-HIV1-1, gene-HIV1-2, CTSB, IL6R-AS1, AIF1, AL157912.1, SPRED1, IL1RN, CCL2, STAC 
##     CCL7, FPR3, C1QA, CDK6, ALCAM, ID3, LINC02345, SLCO2B1, SDS, IL7R 
##     C1QB, ISG15, CCL3, STON2, MMP19, LGMN, CAMK1, ALOX5, IL4I1, TNFSF13B 
## Negative:  CLSPN, TK1, MKI67, TYMS, PCLAF, DIAPH3, SHCBP1, RRM2, CENPK, FAM111B 
##     MYBL2, CDCA2, ESCO2, CDK1, NCAPG, HELLS, ATAD2, CCNA2, BIRC5, CENPM 
##     TOP2A, RAD51AP1, NUSAP1, KIF11, POLQ, HMMR, KNL1, CEP55, CENPF, MELK
DimHeatmap(mono_focus_alv, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono_focus_alv, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono_focus_alv)

mono_focus_alv <- FindNeighbors(mono_focus_alv, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono_focus_alv <- FindClusters(mono_focus_alv, algorithm = 3, resolution = 0.3, verbose = FALSE)
mono_focus_alv <- RunUMAP(mono_focus_alv, dims = 1:4)
## 17:07:02 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:07:02 Read 4301 rows and found 4 numeric columns
## 17:07:02 Using Annoy for neighbor search, n_neighbors = 30
## 17:07:02 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:07:03 Writing NN index file to temp file /tmp/Rtmp4EQbK0/file29e3116e4a0dbf
## 17:07:03 Searching Annoy index using 1 thread, search_k = 3000
## 17:07:04 Annoy recall = 100%
## 17:07:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:07:06 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:07:06 Commencing optimization for 500 epochs, with 150438 positive edges
## 17:07:09 Optimization finished
DimPlot(mono_focus_alv, reduction = "umap", label=TRUE)

DimPlot(mono_focus_alv, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono_focus_alv, group.by="sample" , reduction = "umap", label=TRUE)

Differential expression

We are going to use muscat for pseudobulk analysis. First need to convert seurat obj to singlecellexperiment object. Then summarise counts to pseudobulk level.

sce <- Seurat::as.SingleCellExperiment(comb, assay = "RNA")

head(colData(sce),2)
## DataFrame with 2 rows and 13 columns
##                            orig.ident nCount_RNA nFeature_RNA
##                              <factor>  <numeric>    <integer>
## mdm_mock1|AAACGAATCACATACG        mac      72710         7097
## mdm_mock1|AAACGCTCATCAGCGC        mac      49092         6276
##                                              cell      sample RNA_snn_res.0.5
##                                       <character> <character>        <factor>
## mdm_mock1|AAACGAATCACATACG mdm_mock1|AAACGAATCA..   mdm_mock1              4 
## mdm_mock1|AAACGCTCATCAGCGC mdm_mock1|AAACGCTCAT..   mdm_mock1              11
##                            seurat_clusters           cell_barcode
##                                   <factor>            <character>
## mdm_mock1|AAACGAATCACATACG              4  mdm_mock1|AAACGAATCA..
## mdm_mock1|AAACGCTCATCAGCGC              11 mdm_mock1|AAACGCTCAT..
##                            monaco_broad_annotation monaco_broad_pruned_labels
##                                        <character>                <character>
## mdm_mock1|AAACGAATCACATACG               Monocytes                  Monocytes
## mdm_mock1|AAACGCTCATCAGCGC               Monocytes                  Monocytes
##                            monaco_fine_annotation monaco_fine_pruned_labels
##                                       <character>               <character>
## mdm_mock1|AAACGAATCACATACG    Classical monocytes       Classical monocytes
## mdm_mock1|AAACGCTCATCAGCGC    Classical monocytes       Classical monocytes
##                               ident
##                            <factor>
## mdm_mock1|AAACGAATCACATACG       4 
## mdm_mock1|AAACGCTCATCAGCGC       11
colnames(colData(sce))
##  [1] "orig.ident"                 "nCount_RNA"                
##  [3] "nFeature_RNA"               "cell"                      
##  [5] "sample"                     "RNA_snn_res.0.5"           
##  [7] "seurat_clusters"            "cell_barcode"              
##  [9] "monaco_broad_annotation"    "monaco_broad_pruned_labels"
## [11] "monaco_fine_annotation"     "monaco_fine_pruned_labels" 
## [13] "ident"
#muscat library
pb <- aggregateData(sce,
    assay = "counts", fun = "sum",
    by = c("monaco_broad_annotation", "sample"))

# one sheet per subpopulation
assayNames(pb)
## [1] "Basophils"       "Dendritic cells" "Monocytes"
t(head(assay(pb)))
##                gene-HIV1-1 gene-HIV1-2 MIR1302-2HG FAM138A OR4F5 AL627309.1
## alv_active1              0           0           0       0     0          0
## alv_active2              0           0           0       0     0          0
## alv_active3              0           0           0       0     0          0
## alv_bystander1           0           0           0       0     0          0
## alv_bystander2           0           0           0       0     0          0
## alv_bystander3           0           0           0       0     0          0
## alv_bystander4           0           0           0       0     0          0
## alv_latent1              0           0           0       0     0          0
## alv_latent2              0           0           0       0     0          0
## alv_latent3              0           0           0       0     0          0
## alv_latent4              0           0           0       0     0          0
## alv_mock1                0           0           0       0     0          0
## alv_mock2                0           0           0       0     0          0
## alv_mock3                0           0           0       0     0          0
## mdm_active1              0           0           0       0     0          0
## mdm_active2              0           0           0       0     0          0
## mdm_active3              0           0           0       0     0          0
## mdm_active4              0           0           0       0     0          0
## mdm_bystander1           0           0           0       0     0          0
## mdm_bystander2           0           0           0       0     0          0
## mdm_bystander3           0           0           0       0     0          0
## mdm_latent1              0           2           0       0     0          0
## mdm_latent2              0           0           0       0     0          0
## mdm_latent3              0           0           0       0     0          0
## mdm_mock1                0           0           0       0     0          0
## mdm_mock2                0           0           0       0     0          0
## mdm_mock3                0           0           0       0     0          0
## mdm_mock4                0           0           0       0     0          0
## react6                   0           0           0       0     0          0
plotMDS(assays(pb)[[3]], main="Monocytes" )

par(mfrow=c(2,3))

dump <- lapply(1:length(assays(pb)) , function(i) {
  cellname=names(assays(pb))[i]
  plotMDS(assays(pb)[[i]],cex=1,pch=19,main=paste(cellname),labels=colnames(assays(pb)[[1]]))
})
## Warning in plotMDS.default(assays(pb)[[i]], cex = 1, pch = 19, main =
## paste(cellname), : dimension 2 is degenerate or all zero
par(mfrow=c(1,1))

DE Prep

pbmono <- assays(pb)[["Monocytes"]]

head(pbmono)
##             alv_active1 alv_active2 alv_active3 alv_bystander1 alv_bystander2
## gene-HIV1-1       28838       20513       13724              0              0
## gene-HIV1-2      111930      102597       94165              0              0
## MIR1302-2HG           0           0           0              0              0
## FAM138A               0           0           0              0              0
## OR4F5                 0           0           0              0              0
## AL627309.1           15          11          26             19             38
##             alv_bystander3 alv_bystander4 alv_latent1 alv_latent2 alv_latent3
## gene-HIV1-1              0              0         367        1745        1615
## gene-HIV1-2              0              0        1898       11401       12766
## MIR1302-2HG              0              0           0           1           0
## FAM138A                  0              0           0           0           0
## OR4F5                    0              0           0           0           0
## AL627309.1               7             13           6          65          56
##             alv_latent4 alv_mock1 alv_mock2 alv_mock3 mdm_active1 mdm_active2
## gene-HIV1-1        2124        90       111      1008       10808        7633
## gene-HIV1-2       22066       708      1514      6265       60823       40691
## MIR1302-2HG           0         0         0         0           0           0
## FAM138A               0         0         0         0           0           0
## OR4F5                 0         0         0         0           0           0
## AL627309.1           68        45        24        45          48          35
##             mdm_active3 mdm_active4 mdm_bystander1 mdm_bystander2
## gene-HIV1-1        4822       11786              0              0
## gene-HIV1-2       68090       34792              0              0
## MIR1302-2HG           0           0              0              0
## FAM138A               0           0              0              0
## OR4F5                 0           0              0              0
## AL627309.1           28           5             59             49
##             mdm_bystander3 mdm_latent1 mdm_latent2 mdm_latent3 mdm_mock1
## gene-HIV1-1              0         602         764         131        25
## gene-HIV1-2              6        4970        5987        1990       443
## MIR1302-2HG              0           0           0           0         0
## FAM138A                  0           0           0           0         0
## OR4F5                    0           0           0           0         0
## AL627309.1              18          67          73          24        61
##             mdm_mock2 mdm_mock3 mdm_mock4 react6
## gene-HIV1-1        64         1       141    890
## gene-HIV1-2       460        80      1068   7290
## MIR1302-2HG         0         0         0      0
## FAM138A             0         0         0      0
## OR4F5               0         0         0      0
## AL627309.1         23         5        15     37
dim(pbmono)
## [1] 36603    29
hiv <- pbmono[1:2,]
pbmono <- pbmono[3:nrow(pbmono),]

Gene sets

Gene ontology.

#go <- gmt_import("c5.go.v2023.2.Hs.symbols.gmt")
#names(go) <- gsub("_"," ",names(go))

#wget https://ziemann-lab.net/public/tmp/go_2024-11.gmt
go <- gmt_import("go_2024-11.gmt")

DE1 Latently- vs productively-infected cells (groups 3 vs 4).

MDM group.

pbmdm <- pbmono[,grep("mdm",colnames(pbmono))]

pb1m <- pbmdm[,c(grep("active",colnames(pbmdm)),grep("latent",colnames(pbmdm)))]

head(pb1m)
##             mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_latent1
## MIR1302-2HG           0           0           0           0           0
## FAM138A               0           0           0           0           0
## OR4F5                 0           0           0           0           0
## AL627309.1           48          35          28           5          67
## AL627309.3            0           2           0           0           0
## AL627309.2            0           0           0           0           0
##             mdm_latent2 mdm_latent3
## MIR1302-2HG           0           0
## FAM138A               0           0
## OR4F5                 0           0
## AL627309.1           73          24
## AL627309.3            0           2
## AL627309.2            0           0
pb1mf <- pb1m[which(rowMeans(pb1m)>=10),]
head(pb1mf)
##            mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_latent1
## AL627309.1          48          35          28           5          67
## AL627309.5          13          14          13           6          25
## LINC01409          121         113         103          27         145
## LINC01128          259         159         165          94         203
## LINC00115           22           6           7           4          23
## FAM41C              49          39          59          68          62
##            mdm_latent2 mdm_latent3
## AL627309.1          73          24
## AL627309.5          43           6
## LINC01409          195          56
## LINC01128          184          87
## LINC00115           10           4
## FAM41C              49          21
colSums(pb1mf)
## mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_latent1 mdm_latent2 
##    29207487    22179088    24817660    13781693    33350019    35746225 
## mdm_latent3 
##    13042298
des1m <- as.data.frame(grepl("active",colnames(pb1mf)))
colnames(des1m) <- "case"

plot(cmdscale(dist(t(pb1mf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb1mf))), labels=colnames(pb1mf) )

dds <- DESeqDataSetFromMatrix(countData = pb1mf , colData = des1m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de1mf <- de
write.table(de1mf,"de1mf.tsv",sep="\t")

nrow(subset(de1mf,padj<0.05 & log2FoldChange>0))
## [1] 89
nrow(subset(de1mf,padj<0.05 & log2FoldChange<0))
## [1] 220
head(subset(de1mf,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active MDM cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active MDM cells
baseMean log2FoldChange lfcSE stat pvalue padj
LRRC25 376.06550 1.2103732 0.2157952 5.608898 0.0e+00 0.0000188
DIRC3 341.57339 1.3734214 0.2511003 5.469613 0.0e+00 0.0000352
NMRK2 1177.72772 1.7648566 0.3437406 5.134268 3.0e-07 0.0001400
PHACTR1 346.19868 1.4751322 0.2893149 5.098708 3.0e-07 0.0001626
IL7R 18.00880 4.2794300 0.8649131 4.947815 8.0e-07 0.0002782
HBEGF 392.47689 0.8652375 0.1780231 4.860253 1.2e-06 0.0003779
IL6R-AS1 75.84533 2.5623385 0.5471857 4.682758 2.8e-06 0.0007631
DRAXIN 463.78427 0.9414512 0.2029507 4.638817 3.5e-06 0.0008920
NDFIP2 259.80791 0.8854957 0.1943690 4.555745 5.2e-06 0.0011909
UBE2J1 2719.20226 0.4593819 0.1018937 4.508441 6.5e-06 0.0014673
head(subset(de1mf,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in active MDM cells") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in active MDM cells
baseMean log2FoldChange lfcSE stat pvalue padj
TGFBI 427.57748 -2.5264116 0.3051821 -8.278375 0 0.0e+00
RCBTB2 750.28583 -1.4606374 0.1956971 -7.463768 0 0.0e+00
AL031123.1 486.37075 -0.9808341 0.1378591 -7.114757 0 0.0e+00
HIST1H1C 215.54194 -2.1622778 0.3303384 -6.545645 0 2.0e-07
PTGDS 27902.53523 -2.3314964 0.3653605 -6.381359 0 5.0e-07
EVI2A 537.77927 -1.1520692 0.1808678 -6.369675 0 5.0e-07
MAP1A 91.84906 -2.0493382 0.3350264 -6.116945 0 2.0e-06
FLRT2 269.01384 -1.8412264 0.3060105 -6.016873 0 3.2e-06
CFD 2922.32979 -1.5877729 0.2645314 -6.002209 0 3.2e-06
LY6E 2918.79121 -0.9011246 0.1535698 -5.867850 0 6.5e-06
m1m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14859
## Note: no. genes in output = 14859
## Note: estimated proportion of input genes in output = 1
mres1m <- mitch_calc(m1m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres1m$enrichment_result
mitchtbl <- mres1m$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de1mf_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_latent_vs_active.html") ) {
  mitch_report(mres1m,outfile="MDM_latent_vs_active.html")
}

Alv cells.

pbalv <- pbmono[,grep("alv",colnames(pbmono))]

pb1a <- pbalv[,c(grep("active",colnames(pbalv)),grep("latent",colnames(pbalv)))]
head(pb1a)
##             alv_active1 alv_active2 alv_active3 alv_latent1 alv_latent2
## MIR1302-2HG           0           0           0           0           1
## FAM138A               0           0           0           0           0
## OR4F5                 0           0           0           0           0
## AL627309.1           15          11          26           6          65
## AL627309.3            0           0           0           0           1
## AL627309.2            0           0           0           0           0
##             alv_latent3 alv_latent4
## MIR1302-2HG           0           0
## FAM138A               0           0
## OR4F5                 0           0
## AL627309.1           56          68
## AL627309.3            1           0
## AL627309.2            0           1
pb1af <- pb1a[which(rowMeans(pb1a)>=10),]
head(pb1af)
##            alv_active1 alv_active2 alv_active3 alv_latent1 alv_latent2
## AL627309.1          15          11          26           6          65
## AL627309.5          25          23          14          18         107
## LINC01409           87         150          87          34         133
## LINC01128          289         284         211         101         298
## LINC00115           10           7          10           5          14
## FAM41C             135         117          82          80         123
##            alv_latent3 alv_latent4
## AL627309.1          56          68
## AL627309.5          85          59
## LINC01409          297         182
## LINC01128          407         331
## LINC00115           27          21
## FAM41C             129         147
colSums(pb1af)
## alv_active1 alv_active2 alv_active3 alv_latent1 alv_latent2 alv_latent3 
##    29430670    28094912    22989978    16380939    43027634    54998948 
## alv_latent4 
##    49705299
des1a <- as.data.frame(grepl("active",colnames(pb1af)))
colnames(des1a) <- "case"

plot(cmdscale(dist(t(pb1af))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb1af))), labels=colnames(pb1af) )

dds <- DESeqDataSetFromMatrix(countData = pb1af , colData = des1a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de1af <- de
write.table(de1af,"de1af.tsv",sep="\t")

nrow(subset(de1af,padj<0.05 & log2FoldChange>0))
## [1] 350
nrow(subset(de1af,padj<0.05 & log2FoldChange<0))
## [1] 257
head(subset(de1af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
IL6R-AS1 491.32925 3.344759 0.3087344 10.833775 0 0
AL157912.1 865.11530 2.268564 0.2143187 10.585006 0 0
LAYN 639.72954 1.952185 0.1859796 10.496773 0 0
DNAJA4 333.37407 1.595802 0.1668874 9.562148 0 0
AC108749.1 68.12540 3.498804 0.3740091 9.354863 0 0
CDKN1A 6259.89208 1.349454 0.1442885 9.352476 0 0
TNFSF15 499.63337 2.273759 0.2548087 8.923393 0 0
TCTEX1D2 5734.73805 1.302629 0.1532160 8.501910 0 0
IGFL2 134.40594 4.192072 0.4956321 8.458032 0 0
CLIC6 75.46614 2.646968 0.3163931 8.366072 0 0
head(subset(de1af,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
CEBPD 925.87893 -1.5642990 0.1616091 -9.679522 0 0e+00
LYZ 57017.41162 -1.1376232 0.1239515 -9.177970 0 0e+00
H1FX 1134.47886 -1.2308466 0.1577790 -7.801080 0 0e+00
JAKMIP2 74.14276 -3.9817329 0.5563480 -7.156911 0 0e+00
P2RY11 196.59218 -1.2443256 0.1757505 -7.080068 0 0e+00
IFNGR2 2528.34676 -0.6638474 0.0970826 -6.837965 0 0e+00
CSF1R 636.85186 -1.7077317 0.2544453 -6.711587 0 0e+00
SLCO3A1 239.52416 -1.7426455 0.2747200 -6.343351 0 1e-07
GCA 1459.56587 -0.9266230 0.1486595 -6.233192 0 2e-07
CD44 5307.08067 -0.8194619 0.1321098 -6.202884 0 2e-07
m1a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 16425
## Note: no. genes in output = 16425
## Note: estimated proportion of input genes in output = 1
mres1a <- mitch_calc(m1a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres1a$enrichment_result

mitchtbl <- mres1a$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de1af_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_latent_vs_active.html") ) {
  mitch_report(mres1a,outfile="Alv_latent_vs_active.html")
}

Combined.

mm1 <- merge(m1a,m1m,by=0)

head(mm1)
##   Row.names        x.x         x.y
## 1      A1BG  1.0715379  0.01769167
## 2  A1BG-AS1 -0.6640405 -0.71513470
## 3       A2M  0.5885570 -0.44983749
## 4   A2M-AS1 -0.3249521 -1.19424997
## 5 A2ML1-AS1 -1.6623791  0.58481298
## 6    A4GALT  3.8039819 -0.78661598
rownames(mm1) <- mm1[,1]
mm1[,1]=NULL
colnames(mm1) <- c("Alv","MDM")
plot(mm1)
mylm <- lm(mm1)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9174 -0.8111 -0.0748  0.7329 10.0974 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.031800   0.010979  -2.896  0.00378 ** 
## MDM          0.453210   0.008187  55.356  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.327 on 14615 degrees of freedom
## Multiple R-squared:  0.1733, Adjusted R-squared:  0.1733 
## F-statistic:  3064 on 1 and 14615 DF,  p-value: < 2.2e-16
cor.test(mm1$Alv,mm1$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm1$Alv and mm1$MDM
## t = 55.356, df = 14615, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4028310 0.4296357
## sample estimates:
##       cor 
## 0.4163238
mm1r <- as.data.frame(apply(mm1,2,rank))
plot(mm1r,cex=0.3)
mylm <- lm(mm1r)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm1r)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9848.5 -3186.0   -50.8  3162.3  9861.1 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.413e+03  6.410e+01   68.85   <2e-16 ***
## MDM         3.963e-01  7.595e-03   52.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3874 on 14615 degrees of freedom
## Multiple R-squared:  0.157,  Adjusted R-squared:  0.157 
## F-statistic:  2722 on 1 and 14615 DF,  p-value: < 2.2e-16
cor.test(mm1r$Alv,mm1r$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm1r$Alv and mm1r$MDM
## t = 52.177, df = 14615, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3825092 0.4098423
## sample estimates:
##       cor 
## 0.3962636

DE2 Latently-infected vs bystander cells

MDM group.

pb2m <- pbmdm[,c(grep("bystander",colnames(pbmdm)),grep("latent",colnames(pbmdm)))]
head(pb2m)
##             mdm_bystander1 mdm_bystander2 mdm_bystander3 mdm_latent1
## MIR1302-2HG              0              0              0           0
## FAM138A                  0              0              0           0
## OR4F5                    0              0              0           0
## AL627309.1              59             49             18          67
## AL627309.3               2              4              0           0
## AL627309.2               0              0              0           0
##             mdm_latent2 mdm_latent3
## MIR1302-2HG           0           0
## FAM138A               0           0
## OR4F5                 0           0
## AL627309.1           73          24
## AL627309.3            0           2
## AL627309.2            0           0
pb2mf <- pb2m[which(rowMeans(pb2m)>=10),]
head(pb2mf)
##            mdm_bystander1 mdm_bystander2 mdm_bystander3 mdm_latent1 mdm_latent2
## AL627309.1             59             49             18          67          73
## AL627309.5             29             28              8          25          43
## LINC01409             131            225             56         145         195
## LINC01128             210            192            104         203         184
## LINC00115              18             19              7          23          10
## FAM41C                 42             64             19          62          49
##            mdm_latent3
## AL627309.1          24
## AL627309.5           6
## LINC01409           56
## LINC01128           87
## LINC00115            4
## FAM41C              21
colSums(pb2mf)
## mdm_bystander1 mdm_bystander2 mdm_bystander3    mdm_latent1    mdm_latent2 
##       38884796       34027074       14640993       33353223       35750602 
##    mdm_latent3 
##       13043966
des2m <- as.data.frame(grepl("latent",colnames(pb2mf)))
colnames(des2m) <- "case"

plot(cmdscale(dist(t(pb2mf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb2mf))), labels=colnames(pb2mf) )

dds <- DESeqDataSetFromMatrix(countData = pb2mf , colData = des2m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2mf <- de
#write.table(de2mf,"de2mf.tsv",sep="\t")

nrow(subset(de2mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2mf,log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (MDM unpaired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (MDM unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
PAEP 8.142811 3.5046021 1.2046316 2.909273 0.0036227 0.9999225
AC015660.1 12.234620 1.9102870 0.7055835 2.707386 0.0067815 0.9999225
TEX9 32.173310 1.1304885 0.4408795 2.564166 0.0103424 0.9999225
ZFPM2 714.918493 0.9688775 0.3860336 2.509827 0.0120790 0.9999225
MAP1A 151.056128 0.5811599 0.2388178 2.433487 0.0149542 0.9999225
ZFPM2-AS1 11.061917 1.8221405 0.7624686 2.389791 0.0168580 0.9999225
GRIP1 17.564590 1.3076447 0.5736850 2.279377 0.0226446 0.9999225
AL603840.1 59.855630 0.6644743 0.2942069 2.258528 0.0239128 0.9999225
NEO1 22.047207 1.2171485 0.5540317 2.196893 0.0280281 0.9999225
ADNP-AS1 33.148424 0.8628219 0.3965298 2.175932 0.0295603 0.9999225
head(subset(de2mf,log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (MDM unpaired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (MDM unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
PLCB1 84.972431 -2.3446731 0.9129682 -2.568187 0.0102232 0.9999225
HSPA5 3017.869830 -0.5715170 0.2235749 -2.556266 0.0105802 0.9999225
ZNF821 211.115855 -0.3987169 0.1774786 -2.246563 0.0246680 0.9999225
ZNF93 218.660442 -0.4681573 0.2107928 -2.220935 0.0263553 0.9999225
AKR1C3 47.731379 -0.7565375 0.3411221 -2.217791 0.0265691 0.9999225
HAUS7 16.611553 -1.1304070 0.5396434 -2.094730 0.0361950 0.9999225
SHF 9.172784 -1.8387071 0.9124940 -2.015035 0.0439010 0.9999225
SYN2 12.140393 -1.3633075 0.6941905 -1.963881 0.0495439 0.9999225
AC090510.1 17.147784 -1.0975340 0.5672766 -1.934742 0.0530219 0.9999225
C4orf36 22.911472 -0.8492655 0.4404656 -1.928109 0.0538416 0.9999225
des2m$sample <- rep(1:3,2)

dds <- DESeqDataSetFromMatrix(countData = pb2mf , colData = des2m, design = ~ sample + case)
## converting counts to integer mode
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2mf <- de
write.table(de2mf,"de2mf.tsv",sep="\t")

nrow(subset(de2mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2mf,log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (MDM paired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (MDM paired)
baseMean log2FoldChange lfcSE stat pvalue padj
IL1RN 245.661464 0.7006479 0.2011075 3.483948 0.0004941 0.9998781
PAEP 8.142811 3.6482166 1.0524389 3.466440 0.0005274 0.9998781
ZFPM2 714.918493 0.9963654 0.3278453 3.039133 0.0023726 0.9998781
MAP1A 151.056128 0.5725643 0.2010013 2.848561 0.0043917 0.9998781
IGFBP6 146.951685 0.5889533 0.2142868 2.748435 0.0059881 0.9998781
AC015660.1 12.234620 1.8678656 0.7222682 2.586111 0.0097066 0.9998781
NEO1 22.047207 1.2595548 0.4935045 2.552266 0.0107025 0.9998781
TEX9 32.173310 1.1370826 0.4640502 2.450344 0.0142720 0.9998781
CCSER1 157.257924 0.5490120 0.2245204 2.445266 0.0144745 0.9998781
FAM229B 35.971998 0.8916187 0.3800095 2.346306 0.0189605 0.9998781
head(subset(de2mf,log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (MDM paired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (MDM paired)
baseMean log2FoldChange lfcSE stat pvalue padj
HSPA5 3017.86983 -0.5467787 0.2013490 -2.715578 0.0066160 0.9998781
ZNF93 218.66044 -0.4662445 0.2020375 -2.307713 0.0210151 0.9998781
ZNF821 211.11586 -0.3975298 0.1764294 -2.253195 0.0242468 0.9998781
VSIG4 125.95653 -0.5046837 0.2352123 -2.145652 0.0319008 0.9998781
SOWAHD 231.48704 -0.3932856 0.1833638 -2.144838 0.0319658 0.9998781
AKR1C3 47.73138 -0.7523278 0.3575641 -2.104036 0.0353753 0.9998781
ACTG1 16412.87343 -0.2656931 0.1321463 -2.010598 0.0443679 0.9998781
CYTL1 692.79779 -0.4923319 0.2469275 -1.993832 0.0461704 0.9998781
HAUS7 16.61155 -1.1216075 0.5651186 -1.984729 0.0471746 0.9998781
SYN2 12.14039 -1.4392474 0.7369530 -1.952970 0.0508231 0.9998781
m2m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15098
## Note: no. genes in output = 15098
## Note: estimated proportion of input genes in output = 1
mres2m <- mitch_calc(m2m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres2m$enrichment_result

mitchtbl <- mres2m$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de2mf_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_bystander_vs_latent.html") ) {
  mitch_report(mres2m,outfile="MDM_bystander_vs_latent.html")
}

Alv cells.

pb2a <- pbalv[,c(grep("bystander",colnames(pbalv)),grep("latent",colnames(pbalv)))]
head(pb2a)
##             alv_bystander1 alv_bystander2 alv_bystander3 alv_bystander4
## MIR1302-2HG              0              0              0              0
## FAM138A                  0              0              0              0
## OR4F5                    0              0              0              0
## AL627309.1              19             38              7             13
## AL627309.3               0              1              0              0
## AL627309.2               0              0              0              0
##             alv_latent1 alv_latent2 alv_latent3 alv_latent4
## MIR1302-2HG           0           1           0           0
## FAM138A               0           0           0           0
## OR4F5                 0           0           0           0
## AL627309.1            6          65          56          68
## AL627309.3            0           1           1           0
## AL627309.2            0           0           0           1
pb2af <- pb2a[which(rowMeans(pb2a)>=10),]
head(pb2af)
##            alv_bystander1 alv_bystander2 alv_bystander3 alv_bystander4
## AL627309.1             19             38              7             13
## AL627309.5             27             51             21              9
## LINC01409              57             83             72             67
## LINC01128             141            161            108             97
## LINC00115               1              7              5              2
## FAM41C                 81             59             27             33
##            alv_latent1 alv_latent2 alv_latent3 alv_latent4
## AL627309.1           6          65          56          68
## AL627309.5          18         107          85          59
## LINC01409           34         133         297         182
## LINC01128          101         298         407         331
## LINC00115            5          14          27          21
## FAM41C              80         123         129         147
colSums(pb2af)
## alv_bystander1 alv_bystander2 alv_bystander3 alv_bystander4    alv_latent1 
##       20415741       22067093       13619223       12645752       16378672 
##    alv_latent2    alv_latent3    alv_latent4 
##       43018570       54991310       49697837
des2a <- as.data.frame(grepl("latent",colnames(pb2af)))
colnames(des2a) <- "case"

plot(cmdscale(dist(t(pb2af))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb2af))), labels=colnames(pb2af) )

dds <- DESeqDataSetFromMatrix(countData = pb2af , colData = des2a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2af <- de
#write.table(de2af,"de2af.tsv",sep="\t")

nrow(subset(de2af,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2af,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2af, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (Alv unpaired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (Alv unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
IGFL2 6.632043 4.1056754 1.1478266 3.576913 0.0003477 0.9999718
IL6R-AS1 58.748677 0.9916797 0.3091276 3.207995 0.0013366 0.9999718
GUCY1A2 56.302051 1.1278567 0.3890713 2.898843 0.0037454 0.9999718
CLIC6 12.035415 1.4253463 0.5440537 2.619863 0.0087965 0.9999718
IL7R 44.955704 1.0508460 0.4643669 2.262965 0.0236379 0.9999718
AC131254.1 20.344522 1.5108743 0.6849120 2.205939 0.0273882 0.9999718
CCL17 34.450952 1.5282159 0.6990779 2.186045 0.0288123 0.9999718
CXCL10 28.716464 4.3734495 2.0056611 2.180553 0.0292165 0.9999718
AC113404.1 13.458177 1.8403490 0.8516291 2.160975 0.0306973 0.9999718
TAFA4 17.737913 1.3604113 0.6469488 2.102811 0.0354823 0.9999718
head(subset(de2af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (Alv unpaired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (Alv unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
PBK 15.634622 -1.5504792 0.5202383 -2.980325 0.0028794 0.9999718
AL162586.1 22.745031 -0.8260107 0.3491440 -2.365817 0.0179903 0.9999718
CENPF 109.660073 -1.0278008 0.4516407 -2.275705 0.0228637 0.9999718
TOP2A 99.850934 -0.9894668 0.4414743 -2.241278 0.0250080 0.9999718
GTSE1 39.800778 -0.9582828 0.4581620 -2.091581 0.0364760 0.9999718
ASPM 52.260165 -1.1192018 0.5399715 -2.072706 0.0381997 0.9999718
CCNA2 54.235046 -0.7795141 0.3782414 -2.060891 0.0393135 0.9999718
HJURP 12.728176 -1.4717877 0.7421919 -1.983028 0.0473643 0.9999718
CEP55 66.592260 -0.6920150 0.3555615 -1.946260 0.0516236 0.9999718
AC016590.1 9.953336 -1.0063285 0.5255072 -1.914966 0.0554968 0.9999718
des2a$sample <- rep(1:4,2)

dds <- DESeqDataSetFromMatrix(countData = pb2af , colData = des2a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2af <- de
write.table(de2af,"de2af.tsv",sep="\t")

nrow(subset(de2af,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2af,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2af, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (Alv paired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (Alv paired)
baseMean log2FoldChange lfcSE stat pvalue padj
IGFL2 6.632043 4.1056754 1.1478266 3.576913 0.0003477 0.9999718
IL6R-AS1 58.748677 0.9916797 0.3091276 3.207995 0.0013366 0.9999718
GUCY1A2 56.302051 1.1278567 0.3890713 2.898843 0.0037454 0.9999718
CLIC6 12.035415 1.4253463 0.5440537 2.619863 0.0087965 0.9999718
IL7R 44.955704 1.0508460 0.4643669 2.262965 0.0236379 0.9999718
AC131254.1 20.344522 1.5108743 0.6849120 2.205939 0.0273882 0.9999718
CCL17 34.450952 1.5282159 0.6990779 2.186045 0.0288123 0.9999718
CXCL10 28.716464 4.3734495 2.0056611 2.180553 0.0292165 0.9999718
AC113404.1 13.458177 1.8403490 0.8516291 2.160975 0.0306973 0.9999718
TAFA4 17.737913 1.3604113 0.6469488 2.102811 0.0354823 0.9999718
head(subset(de2af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (Alv paired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (Alv paired)
baseMean log2FoldChange lfcSE stat pvalue padj
PBK 15.634622 -1.5504792 0.5202383 -2.980325 0.0028794 0.9999718
AL162586.1 22.745031 -0.8260107 0.3491440 -2.365817 0.0179903 0.9999718
CENPF 109.660073 -1.0278008 0.4516407 -2.275705 0.0228637 0.9999718
TOP2A 99.850934 -0.9894668 0.4414743 -2.241278 0.0250080 0.9999718
GTSE1 39.800778 -0.9582828 0.4581620 -2.091581 0.0364760 0.9999718
ASPM 52.260165 -1.1192018 0.5399715 -2.072706 0.0381997 0.9999718
CCNA2 54.235046 -0.7795141 0.3782414 -2.060891 0.0393135 0.9999718
HJURP 12.728176 -1.4717877 0.7421919 -1.983028 0.0473643 0.9999718
CEP55 66.592260 -0.6920150 0.3555615 -1.946260 0.0516236 0.9999718
AC016590.1 9.953336 -1.0063285 0.5255072 -1.914966 0.0554968 0.9999718
m2a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15873
## Note: no. genes in output = 15873
## Note: estimated proportion of input genes in output = 1
mres2a <- mitch_calc(m2a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres2a$enrichment_result

mitchtbl <- mres2a$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de2af_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_bystander_vs_latent.html") ) {
  mitch_report(mres2a,outfile="Alv_bystander_vs_latent.html")
}

Combined.

mm2 <- merge(m2a,m2m,by=0)

head(mm2)
##   Row.names         x.x         x.y
## 1      A1BG  0.12895812 -0.27310166
## 2  A1BG-AS1  0.52559940  0.28170815
## 3       A2M -0.29067901  0.56977979
## 4   A2M-AS1  0.21293797 -0.14017269
## 5 A2ML1-AS1 -0.68017727 -0.02237059
## 6      AAAS -0.08413218 -0.36476533
rownames(mm2) <- mm2[,1]
mm2[,1]=NULL
colnames(mm2) <- c("Alv","MDM")
plot(mm2)
mylm <- lm(mm2)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3772 -0.2711 -0.0083  0.2687  3.5575 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.007530   0.003892  -1.935    0.053 .  
## MDM          0.096613   0.006503  14.856   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4706 on 14627 degrees of freedom
## Multiple R-squared:  0.01486,    Adjusted R-squared:  0.0148 
## F-statistic: 220.7 on 1 and 14627 DF,  p-value: < 2.2e-16
cor.test(mm2$Alv,mm2$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm2$Alv and mm2$MDM
## t = 14.856, df = 14627, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1059234 0.1378517
## sample estimates:
##       cor 
## 0.1219191
mm2r <- as.data.frame(apply(mm2,2,rank))
plot(mm2r,cex=0.3)
mylm <- lm(mm2r)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm2r)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8204.4 -3583.2    39.8  3588.0  8168.1 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6374.1918    69.2591   92.03   <2e-16 ***
## MDM            0.1286     0.0082   15.69   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4188 on 14627 degrees of freedom
## Multiple R-squared:  0.01654,    Adjusted R-squared:  0.01647 
## F-statistic:   246 on 1 and 14627 DF,  p-value: < 2.2e-16
cor.test(mm2r$Alv,mm2r$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm2r$Alv and mm2r$MDM
## t = 15.685, df = 14627, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1126434 0.1445173
## sample estimates:
##       cor 
## 0.1286136

DE3 Active vs mock

MDM group.

pb3m <- pbmdm[,c(grep("active",colnames(pbmdm)),grep("mock",colnames(pbmdm)))]

head(pb3m)
##             mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_mock1 mdm_mock2
## MIR1302-2HG           0           0           0           0         0         0
## FAM138A               0           0           0           0         0         0
## OR4F5                 0           0           0           0         0         0
## AL627309.1           48          35          28           5        61        23
## AL627309.3            0           2           0           0         0         0
## AL627309.2            0           0           0           0         0         0
##             mdm_mock3 mdm_mock4
## MIR1302-2HG         0         0
## FAM138A             0         0
## OR4F5               0         0
## AL627309.1          5        15
## AL627309.3          0         0
## AL627309.2          0         0
pb3mf <- pb3m[which(rowMeans(pb3m)>=10),]
head(pb3mf)
##            mdm_active1 mdm_active2 mdm_active3 mdm_active4 mdm_mock1 mdm_mock2
## AL627309.1          48          35          28           5        61        23
## AL627309.5          13          14          13           6        17        23
## LINC01409          121         113         103          27       116        97
## LINC01128          259         159         165          94       171       106
## FAM41C              49          39          59          68        61        25
## NOC2L              176         124         237         155       182       140
##            mdm_mock3 mdm_mock4
## AL627309.1         5        15
## AL627309.5         0        16
## LINC01409         25        57
## LINC01128         50       159
## FAM41C            14        84
## NOC2L             68       218
colSums(pb3mf)
## mdm_active1 mdm_active2 mdm_active3 mdm_active4   mdm_mock1   mdm_mock2 
##    29202422    22174759    24813970    13779958    28410039    19626111 
##   mdm_mock3   mdm_mock4 
##     6592887    20575471
des3m <- as.data.frame(grepl("active",colnames(pb3mf)))
colnames(des3m) <- "case"

plot(cmdscale(dist(t(pb3mf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb3mf))), labels=colnames(pb3mf) )

dds <- DESeqDataSetFromMatrix(countData = pb3mf , colData = des3m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de3mf <- de
write.table(de3mf,"de3mf.tsv",sep="\t")

nrow(subset(de3mf,padj<0.05 & log2FoldChange>0))
## [1] 96
nrow(subset(de3mf,padj<0.05 & log2FoldChange<0))
## [1] 224
head(subset(de3mf,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
PLAAT3 34.45790 2.5809846 0.4369180 5.907252 0.0e+00 0.0000040
LYRM1 202.20832 0.8989158 0.1544503 5.820098 0.0e+00 0.0000063
CDKN1A 2523.16617 1.0940198 0.1980369 5.524322 0.0e+00 0.0000220
KCNMB2-AS1 201.09253 1.5269176 0.2803037 5.447369 1.0e-07 0.0000301
GRIP1 32.27753 2.1193454 0.3915092 5.413271 1.0e-07 0.0000344
FAS 171.68202 1.0034999 0.1929009 5.202153 2.0e-07 0.0000944
DDB2 451.12209 0.6768164 0.1304074 5.190015 2.0e-07 0.0000974
MDM2 2838.19083 1.0523548 0.2124303 4.953882 7.0e-07 0.0002465
FAM241B 95.44460 1.4332204 0.2908356 4.927941 8.0e-07 0.0002685
EEF1E1 735.51247 0.7374655 0.1553766 4.746311 2.1e-06 0.0005643
head(subset(de1mf,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in active MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in active MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
TGFBI 427.57748 -2.5264116 0.3051821 -8.278375 0 0.0e+00
RCBTB2 750.28583 -1.4606374 0.1956971 -7.463768 0 0.0e+00
AL031123.1 486.37075 -0.9808341 0.1378591 -7.114757 0 0.0e+00
HIST1H1C 215.54194 -2.1622778 0.3303384 -6.545645 0 2.0e-07
PTGDS 27902.53523 -2.3314964 0.3653605 -6.381359 0 5.0e-07
EVI2A 537.77927 -1.1520692 0.1808678 -6.369675 0 5.0e-07
MAP1A 91.84906 -2.0493382 0.3350264 -6.116945 0 2.0e-06
FLRT2 269.01384 -1.8412264 0.3060105 -6.016873 0 3.2e-06
CFD 2922.32979 -1.5877729 0.2645314 -6.002209 0 3.2e-06
LY6E 2918.79121 -0.9011246 0.1535698 -5.867850 0 6.5e-06
m3m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14499
## Note: no. genes in output = 14499
## Note: estimated proportion of input genes in output = 1
mres3m <- mitch_calc(m3m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres3m$enrichment_result

mitchtbl <- mres3m$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de3mf_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_mock_vs_active.html") ) {
  mitch_report(mres3m,outfile="MDM_mock_vs_active.html")
}
pb3a <- pbalv[,c(grep("active",colnames(pbalv)),grep("mock",colnames(pbalv)))]

head(pb3a)
##             alv_active1 alv_active2 alv_active3 alv_mock1 alv_mock2 alv_mock3
## MIR1302-2HG           0           0           0         0         0         0
## FAM138A               0           0           0         0         0         0
## OR4F5                 0           0           0         0         0         0
## AL627309.1           15          11          26        45        24        45
## AL627309.3            0           0           0         0         0         1
## AL627309.2            0           0           0         0         0         0
pb3af <- pb3a[which(rowMeans(pb3a)>=10),]
head(pb3af)
##            alv_active1 alv_active2 alv_active3 alv_mock1 alv_mock2 alv_mock3
## AL627309.1          15          11          26        45        24        45
## AL627309.5          25          23          14        62        27        42
## LINC01409           87         150          87        59       103       134
## LINC01128          289         284         211       173       193       234
## LINC00115           10           7          10        10        11        14
## FAM41C             135         117          82        72        49        91
colSums(pb3af)
## alv_active1 alv_active2 alv_active3   alv_mock1   alv_mock2   alv_mock3 
##    29422044    28089020    22984945    20040381    24104137    31819751
des3a <- as.data.frame(grepl("active",colnames(pb3af)))
colnames(des3a) <- "case"

plot(cmdscale(dist(t(pb3af))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb3af))), labels=colnames(pb3af) )

dds <- DESeqDataSetFromMatrix(countData = pb3af , colData = des3a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de3af <- de
write.table(de3af,"de3af.tsv",sep="\t")

nrow(subset(de3af,padj<0.05 & log2FoldChange>0))
## [1] 859
nrow(subset(de3af,padj<0.05 & log2FoldChange<0))
## [1] 639
head(subset(de3af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
AL157912.1 737.16124 2.712744 0.2033799 13.338311 0 0
HES4 373.42203 2.357157 0.2049170 11.502981 0 0
LAYN 542.66388 2.244575 0.1979330 11.340071 0 0
OCIAD2 345.32925 2.465190 0.2268308 10.867971 0 0
IL6R-AS1 446.10274 3.493167 0.3283356 10.639014 0 0
SDS 4593.69204 2.114977 0.1991786 10.618494 0 0
CDKN1A 5211.31545 1.538069 0.1454253 10.576344 0 0
TNFRSF9 166.67521 2.542923 0.2518094 10.098601 0 0
CCL7 1420.32515 1.931993 0.1932949 9.995052 0 0
CLIC6 62.56654 4.049949 0.4414408 9.174388 0 0
head(subset(de3af,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
HIST1H1C 774.7090 -1.1801306 0.1107571 -10.655120 0 0
NDRG2 308.8194 -1.8303366 0.1740395 -10.516789 0 0
ADA2 1740.2985 -1.0574528 0.1143291 -9.249198 0 0
CEBPD 672.1239 -1.4911151 0.1694941 -8.797445 0 0
RPL10A 7954.6715 -0.8835448 0.1026393 -8.608253 0 0
CRIM1 372.3420 -1.6759023 0.1956833 -8.564362 0 0
LYZ 44438.4426 -1.1703227 0.1411348 -8.292232 0 0
TGFBI 385.9754 -2.3296299 0.2812928 -8.281869 0 0
CHST13 449.4602 -1.6366874 0.2065248 -7.924895 0 0
H1FX 850.3073 -1.1913259 0.1552444 -7.673875 0 0
m3a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15644
## Note: no. genes in output = 15644
## Note: estimated proportion of input genes in output = 1
mres3a <- mitch_calc(m3a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres3a$enrichment_result

mitchtbl <- mres3a$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de3af_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_mock_vs_active.html") ) {
  mitch_report(mres3a,outfile="Alv_mock_vs_active.html")
}

Combined.

mm3 <- merge(m3a,m3m,by=0)

head(mm3)
##   Row.names        x.x         x.y
## 1      A1BG  1.2332385  0.35513074
## 2  A1BG-AS1 -0.7257115 -1.19099078
## 3       A2M -1.1388941 -0.37744176
## 4 A2ML1-AS1 -0.5230821  0.35501285
## 5    A4GALT  3.1488900  0.07724861
## 6      AAAS  0.8163738 -0.38369064
rownames(mm3) <- mm3[,1]
mm3[,1]=NULL
colnames(mm3) <- c("Alv","MDM")
plot(mm3)
mylm <- lm(mm3)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1071 -0.8756 -0.1002  0.7711 11.8659 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.078801   0.011877   6.635 3.37e-11 ***
## MDM         0.821568   0.009078  90.504  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.418 on 14248 degrees of freedom
## Multiple R-squared:  0.365,  Adjusted R-squared:  0.365 
## F-statistic:  8191 on 1 and 14248 DF,  p-value: < 2.2e-16
cor.test(mm3$Alv,mm3$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm3$Alv and mm3$MDM
## t = 90.504, df = 14248, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5936486 0.6145018
## sample estimates:
##       cor 
## 0.6041786
mm3r <- as.data.frame(apply(mm3,2,rank))
plot(mm3r,cex=0.3)
mylm <- lm(mm3r)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm3r)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9985.8 -2516.6  -109.9  2447.5 10921.5 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.845e+03  5.510e+01   51.62   <2e-16 ***
## MDM         6.008e-01  6.697e-03   89.70   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3289 on 14248 degrees of freedom
## Multiple R-squared:  0.3609, Adjusted R-squared:  0.3609 
## F-statistic:  8047 on 1 and 14248 DF,  p-value: < 2.2e-16
cor.test(mm3r$Alv,mm3r$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm3r$Alv and mm3r$MDM
## t = 89.704, df = 14248, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5901737 0.6111617
## sample estimates:
##       cor 
## 0.6007712

DE4 Mock vs bystander

MDM group.

pb4m <- pbmdm[,c(grep("mock",colnames(pbmdm)),grep("bystander",colnames(pbmdm)))]

head(pb4m)
##             mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 mdm_bystander1
## MIR1302-2HG         0         0         0         0              0
## FAM138A             0         0         0         0              0
## OR4F5               0         0         0         0              0
## AL627309.1         61        23         5        15             59
## AL627309.3          0         0         0         0              2
## AL627309.2          0         0         0         0              0
##             mdm_bystander2 mdm_bystander3
## MIR1302-2HG              0              0
## FAM138A                  0              0
## OR4F5                    0              0
## AL627309.1              49             18
## AL627309.3               4              0
## AL627309.2               0              0
pb4mf <- pb4m[which(rowMeans(pb4m)>=10),]
head(pb4mf)
##            mdm_mock1 mdm_mock2 mdm_mock3 mdm_mock4 mdm_bystander1
## AL627309.1        61        23         5        15             59
## AL627309.5        17        23         0        16             29
## LINC01409        116        97        25        57            131
## LINC01128        171       106        50       159            210
## LINC00115         19         6         3         1             18
## FAM41C            61        25        14        84             42
##            mdm_bystander2 mdm_bystander3
## AL627309.1             49             18
## AL627309.5             28              8
## LINC01409             225             56
## LINC01128             192            104
## LINC00115              19              7
## FAM41C                 64             19
colSums(pb4mf)
##      mdm_mock1      mdm_mock2      mdm_mock3      mdm_mock4 mdm_bystander1 
##       28413100       19628713        6593943       20576828       38878445 
## mdm_bystander2 mdm_bystander3 
##       34021279       14638519
des4m <- as.data.frame(grepl("bystander",colnames(pb4mf)))
colnames(des4m) <- "case"

plot(cmdscale(dist(t(pb4mf))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb4mf))), labels=colnames(pb4mf) )

dds <- DESeqDataSetFromMatrix(countData = pb4mf , colData = des4m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de4mf <- de
write.table(de4mf,"de4mf.tsv",sep="\t")

nrow(subset(de4mf,padj<0.05 & log2FoldChange>0))
## [1] 1
nrow(subset(de4mf,padj<0.05 & log2FoldChange<0))
## [1] 20
head(subset(de4mf, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in bystander MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in bystander MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
RBP7 326.028127 0.9067136 0.2121128 4.274677 0.0000191 0.0190573
CCL8 9.162784 3.2527549 0.8571023 3.795060 0.0001476 0.0721210
IFI6 11879.819069 1.0527511 0.2789926 3.773401 0.0001610 0.0737650
ISG15 863.534570 0.7056347 0.2016368 3.499533 0.0004661 0.1797817
TNFAIP6 114.936927 1.4033943 0.4097313 3.425158 0.0006144 0.2196708
PSME2 3367.282738 0.3934963 0.1172348 3.356480 0.0007894 0.2515486
PLAAT3 19.265684 1.5118027 0.4542672 3.328003 0.0008747 0.2649566
IFIT1 156.579593 1.0931583 0.3521626 3.104129 0.0019084 0.4907599
LINC01705 128.523675 1.1757211 0.3821648 3.076476 0.0020946 0.5154403
PSMC4 1021.037328 0.4332448 0.1415805 3.060060 0.0022129 0.5231780
head(subset(de4mf, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in bystander MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in bystander MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
CDK1 60.21042 -3.339535 0.6068380 -5.503174 0.0e+00 0.0005468
CENPF 124.33004 -2.947245 0.5538184 -5.321680 1.0e-07 0.0007535
CIT 18.26338 -2.928893 0.5638682 -5.194287 2.0e-07 0.0010041
CEP55 45.79740 -2.188058 0.4266846 -5.128045 3.0e-07 0.0010728
CCL4L2 36.96500 -3.956992 0.7930729 -4.989443 6.0e-07 0.0017752
UBE2C 73.18878 -3.727130 0.7732970 -4.819791 1.4e-06 0.0035108
TYMS 129.32211 -2.183946 0.4640121 -4.706657 2.5e-06 0.0052730
TOP2A 78.89420 -2.965697 0.6392146 -4.639596 3.5e-06 0.0063962
MKI67 150.61137 -3.079465 0.6743358 -4.566664 5.0e-06 0.0080708
DIAPH3 25.97459 -2.784506 0.6173007 -4.510778 6.5e-06 0.0094676
m4m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14684
## Note: no. genes in output = 14684
## Note: estimated proportion of input genes in output = 1
mres4m <- mitch_calc(m4m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres4m$enrichment_result

mitchtbl <- mres4m$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de4mf_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_mock_vs_bystander.html") ) {
  mitch_report(mres4m,outfile="MDM_mock_vs_bystander.html")
}

Alv cells.

pb4a <- pbalv[,c(grep("mock",colnames(pbalv)),grep("bystander",colnames(pbalv)))]

head(pb4a)
##             alv_mock1 alv_mock2 alv_mock3 alv_bystander1 alv_bystander2
## MIR1302-2HG         0         0         0              0              0
## FAM138A             0         0         0              0              0
## OR4F5               0         0         0              0              0
## AL627309.1         45        24        45             19             38
## AL627309.3          0         0         1              0              1
## AL627309.2          0         0         0              0              0
##             alv_bystander3 alv_bystander4
## MIR1302-2HG              0              0
## FAM138A                  0              0
## OR4F5                    0              0
## AL627309.1               7             13
## AL627309.3               0              0
## AL627309.2               0              0
pb4af <- pb4a[which(rowMeans(pb4a)>=10),]
head(pb4af)
##            alv_mock1 alv_mock2 alv_mock3 alv_bystander1 alv_bystander2
## AL627309.1        45        24        45             19             38
## AL627309.5        62        27        42             27             51
## LINC01409         59       103       134             57             83
## LINC01128        173       193       234            141            161
## FAM41C            72        49        91             81             59
## SAMD11            19        44        35              1             29
##            alv_bystander3 alv_bystander4
## AL627309.1              7             13
## AL627309.5             21              9
## LINC01409              72             67
## LINC01128             108             97
## FAM41C                 27             33
## SAMD11                 15             23
colSums(pb4af)
##      alv_mock1      alv_mock2      alv_mock3 alv_bystander1 alv_bystander2 
##       20034681       24098623       31812193       20409149       22057204 
## alv_bystander3 alv_bystander4 
##       13615188       12641737
des4a <- as.data.frame(grepl("bystander",colnames(pb4af)))
colnames(des4a) <- "case"

plot(cmdscale(dist(t(pb4af))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",pch=19,col="gray",cex=2)

text(cmdscale(dist(t(pb4af))), labels=colnames(pb4af) )

dds <- DESeqDataSetFromMatrix(countData = pb4af , colData = des4a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de4af <- de
write.table(de4af,"de4af.tsv",sep="\t")

nrow(subset(de4af,padj<0.05 & log2FoldChange>0))
## [1] 4
nrow(subset(de4af,padj<0.05 & log2FoldChange<0))
## [1] 1
head(subset(de4af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent Alv cells compared to ") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent Alv cells compared to
baseMean log2FoldChange lfcSE stat pvalue padj
IFITM3 387.3558 1.8110887 0.3184991 5.686322 0.00e+00 0.0001715
EPSTI1 404.5284 2.9377475 0.5256928 5.588335 0.00e+00 0.0001715
IFI6 12742.0935 2.2785557 0.4417746 5.157734 2.00e-07 0.0012469
STAT1 2257.6674 0.8787825 0.2031447 4.325895 1.52e-05 0.0454678
head(subset(de4af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
PPBP 20.66716 -7.0431822 1.5419667 -4.567662 0.0000049 0.0184517
SPP1 39565.15627 -0.4223416 0.1014427 -4.163350 0.0000314 0.0782199
F13A1 26.19006 -5.1114766 1.2747629 -4.009747 0.0000608 0.1206994
FCGBP 19.82321 -4.2590412 1.2046241 -3.535577 0.0004069 0.3044522
MTSS1 185.34414 -0.6833321 0.2221431 -3.076090 0.0020973 0.9053521
SCN9A 18.08201 -1.2410189 0.4545526 -2.730199 0.0063296 0.9999985
CTSV 116.64990 -0.9133736 0.3440446 -2.654812 0.0079353 0.9999985
SLC9A3R2 31.22342 -1.0363782 0.4079946 -2.540176 0.0110797 0.9999985
TDO2 14.96632 -1.7020978 0.6706046 -2.538154 0.0111439 0.9999985
PLD4 52.77475 -0.8285980 0.3306464 -2.505994 0.0122108 0.9999985
m4a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14995
## Note: no. genes in output = 14995
## Note: estimated proportion of input genes in output = 1
mres4a <- mitch_calc(m4a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres4a$enrichment_result

mitchtbl <- mres4a$enrichment_result
goid <- sapply(strsplit(mitchtbl$set," "),"[[",1)
mysplit <- strsplit(mitchtbl$set," ")
mysplit <- lapply(mysplit, function(x) { x[2:length(x)] } )
godescription <- unlist(lapply(mysplit, function(x) { paste(x,collapse=" ") } ))
em <- data.frame(goid,godescription,mitchtbl$pANOVA,mitchtbl$p.adjustANOVA,sign(mitchtbl$s.dist),mitchtbl$s.dist)
colnames(em) <- c("GO.ID","Description","p.Val","FDR","Phenotype","ES")
write.table(em,"de4af_em.tsv",row.names = FALSE, quote=FALSE,sep="\t")

res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_mock_vs_bystander.html") ) {
  mitch_report(mres4a,outfile="Alv_mock_vs_bystander.html")
}

Combined.

mm4 <- merge(m4a,m4m,by=0)

head(mm4)
##   Row.names        x.x         x.y
## 1      A1BG -0.1137238  0.35454526
## 2  A1BG-AS1 -0.5694129 -0.65760285
## 3       A2M -1.4529828 -0.30537399
## 4 A2ML1-AS1  1.5613621 -0.14649971
## 5      AAAS  0.6246779 -0.06997099
## 6      AACS  0.2968281 -0.46679878
rownames(mm4) <- mm4[,1]
mm4[,1]=NULL
colnames(mm4) <- c("Alv","MDM")
plot(mm4)
mylm <- lm(mm4)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2958 -0.4644 -0.0258  0.4481  5.5250 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.092827   0.005778  16.066  < 2e-16 ***
## MDM         0.047919   0.006226   7.696 1.49e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6856 on 14157 degrees of freedom
## Multiple R-squared:  0.004167,   Adjusted R-squared:  0.004096 
## F-statistic: 59.24 on 1 and 14157 DF,  p-value: 1.491e-14
cor.test(mm4$Alv,mm4$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm4$Alv and mm4$MDM
## t = 7.6965, df = 14157, p-value = 1.491e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04812995 0.08093613
## sample estimates:
##        cor 
## 0.06455048
mm4r <- as.data.frame(apply(mm4,2,rank))
plot(mm4r,cex=0.3)
mylm <- lm(mm4r)
abline(mylm,col="red",lty=2,lwd=3)

summary(mylm)
## 
## Call:
## lm(formula = mm4r)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -7320  -3534     19   3518   7346 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.773e+03  6.864e+01  98.667  < 2e-16 ***
## MDM         4.338e-02  8.397e-03   5.166 2.42e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4084 on 14157 degrees of freedom
## Multiple R-squared:  0.001882,   Adjusted R-squared:  0.001811 
## F-statistic: 26.69 on 1 and 14157 DF,  p-value: 2.418e-07
cor.test(mm4r$Alv,mm4r$MDM)
## 
##  Pearson's product-moment correlation
## 
## data:  mm4r$Alv and mm4r$MDM
## t = 5.1665, df = 14157, p-value = 2.418e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02692858 0.05981001
## sample estimates:
##        cor 
## 0.04338104

Cross comparison pathway enrichment heatmap

l1 <- list("de1a"=de1af,"de1m"=de1mf,"de2a"=de2af,"de2m"=de2mf,
  "de3a"=de3af,"de3m"=de3mf,"de4a"=de4af,"de4m"=de4mf)

lm <- mitch_import(x=l1,DEtype="deseq2",joinType="inner")
## Note: Mean no. genes in input = 15259.625
## Note: no. genes in output = 13884
## Note: estimated proportion of input genes in output = 0.91
lmres <- mitch_calc(x=lm,genesets=go,minsetsize=5,cores=8,priority="effect")
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
top <- head(lmres$enrichment_result,50)

top %>%
  kbl(caption="Top pathways across all contrasts") %>%
  kable_paper("hover", full_width = F)
Top pathways across all contrasts
set setSize pMANOVA s.de1a s.de1m s.de2a s.de2m s.de3a s.de3m s.de4a s.de4m p.de1a p.de1m p.de2a p.de2m p.de3a p.de3m p.de4a p.de4m s.dist SD p.adjustMANOVA
2048 GO:0019864 IgG binding 6 0.0003787 -0.8219004 -0.6157467 -0.6088774 -0.4316424 -0.7655762 -0.7649277 0.5542345 0.1167796 0.0004890 0.0090037 0.0098011 0.0671224 0.0011635 0.0011748 0.0187265 0.6203854 1.761745 0.4944335 0.0066996
4599 GO:0070508 cholesterol import 5 0.0028657 0.7407594 0.8305065 -0.3238994 0.2842136 0.7580517 0.6274083 0.5512933 -0.5329635 0.0041229 0.0012987 0.2097861 0.2711226 0.0033292 0.0151178 0.0327816 0.0390405 1.726387 0.5214488 0.0330798
2036 GO:0019773 proteasome core complex, alpha-subunit complex 7 0.0000001 -0.0773222 0.2213014 -0.4428602 -0.7769794 0.2541616 0.7774941 0.8304286 0.8351023 0.7231783 0.3106771 0.0424688 0.0003706 0.2442844 0.0003673 0.0001417 0.0001299 1.706130 0.6073703 0.0000057
2583 GO:0032395 MHC class II receptor activity 6 0.0000000 -0.8664313 -0.4027958 0.3350147 -0.2672815 -0.3367200 -0.7275784 0.9626747 -0.4250132 0.0002372 0.0875465 0.1553274 0.2569377 0.1532351 0.0020257 0.0000442 0.0714303 1.687232 0.5944321 0.0000005
3266 GO:0042613 MHC class II protein complex 12 0.0000000 -0.8045343 -0.5595324 0.1815648 -0.4901841 -0.3870507 -0.7483059 0.8601620 -0.1655133 0.0000014 0.0007904 0.2762231 0.0032818 0.0202698 0.0000072 0.0000002 0.3209154 1.646403 0.5545058 0.0000000
3923 GO:0048245 eosinophil chemotaxis 7 0.0000413 0.6432741 0.6972586 0.6025077 0.4645405 0.6579747 0.7346483 -0.1296184 -0.4227241 0.0032056 0.0013996 0.0057718 0.0333174 0.0025725 0.0007623 0.5526549 0.0527902 1.626936 0.4356185 0.0010526
373 GO:0002503 peptide antigen assembly with MHC class II protein complex 11 0.0000000 -0.7899517 -0.5704409 0.1564255 -0.4921070 -0.3450325 -0.7381048 0.8516019 -0.1189950 0.0000057 0.0010530 0.3690938 0.0047137 0.0475647 0.0000224 0.0000010 0.4944502 1.618424 0.5471645 0.0000000
3676 GO:0045656 negative regulation of monocyte differentiation 5 0.0144439 -0.7492615 -0.3667267 0.4180272 0.4569926 -0.8633043 -0.5712659 -0.5119821 -0.4297860 0.0037134 0.1556093 0.1055222 0.0768027 0.0008278 0.0269597 0.0474230 0.0960750 1.611816 0.4988077 0.1025248
3090 GO:0036150 phosphatidylserine acyl-chain remodeling 6 0.0052677 -0.7084354 -0.6011433 0.2439833 0.1701974 -0.7578421 -0.7745352 -0.4609694 -0.4776865 0.0026540 0.0107737 0.3007453 0.4703730 0.0013051 0.0010173 0.0505520 0.0427462 1.602079 0.4053303 0.0506731
3330 GO:0043009 chordate embryonic development 5 0.0035632 -0.4052886 -0.5159594 -0.3157432 0.8383169 -0.6394553 -0.6779307 -0.4325816 -0.5059010 0.1165730 0.0457269 0.2214918 0.0011682 0.0132788 0.0086590 0.0939316 0.0501184 1.595148 0.4875116 0.0387074
2240 GO:0030292 protein tyrosine kinase inhibitor activity 5 0.0094313 -0.8765617 -0.6996902 -0.2298869 -0.3413358 -0.8182578 -0.5910080 0.1765689 0.1003963 0.0006870 0.0067382 0.3734013 0.1862772 0.0015306 0.0221048 0.4941839 0.6974765 1.577142 0.4040441 0.0777644
3134 GO:0038094 Fc-gamma receptor signaling pathway 8 0.0011538 -0.8042123 -0.6126946 -0.2978704 -0.2659988 -0.6958958 -0.6921303 0.3618838 0.1527097 0.0000817 0.0026916 0.1446297 0.1926875 0.0006530 0.0006985 0.0763462 0.4545528 1.516321 0.4277710 0.0165656
920 GO:0006271 DNA strand elongation involved in DNA replication 8 0.0000000 -0.5793817 -0.1169465 -0.7978704 0.5044141 -0.6514485 -0.4363289 -0.1430528 -0.5725894 0.0045437 0.5668442 0.0000929 0.0134943 0.0014187 0.0326034 0.4835816 0.0050404 1.484413 0.4188816 0.0000022
1454 GO:0008541 proteasome regulatory particle, lid subcomplex 8 0.0001151 0.0824986 0.5074769 -0.6005693 -0.5306464 0.2211192 0.7549366 0.6046591 0.5528250 0.6862075 0.0129386 0.0032660 0.0093507 0.2788654 0.0002174 0.0030608 0.0067769 1.482113 0.5182018 0.0025384
2615 GO:0032489 regulation of Cdc42 protein signal transduction 5 0.0686644 -0.7225160 -0.5601700 0.2506088 -0.0119461 -0.7387996 -0.7235824 -0.4601340 -0.0515455 0.0051431 0.0300730 0.3318680 0.9631073 0.0042230 0.0050777 0.0747962 0.8418082 1.477344 0.3861790 0.2761989
5409 GO:1903238 positive regulation of leukocyte tethering or rolling 6 0.0009097 -0.8003555 -0.6623433 0.3001393 -0.1045540 -0.6880915 -0.6716866 0.0398472 -0.2211654 0.0006856 0.0049599 0.2030072 0.6574398 0.0035129 0.0043820 0.8657920 0.3482172 1.468134 0.4087721 0.0137149
3649 GO:0045569 TRAIL binding 5 0.0006107 0.5621875 0.2699762 0.4788674 0.1126162 0.8548887 0.5740327 0.1848980 0.6527992 0.0294850 0.2958605 0.0637021 0.6628046 0.0009305 0.0262282 0.4740406 0.0114751 1.466840 0.2533667 0.0097472
2504 GO:0031726 CCR1 chemokine receptor binding 6 0.0000038 0.5553634 0.5337465 0.6017918 0.4442763 0.5968439 0.4735312 0.1866744 -0.6037133 0.0184868 0.0235735 0.0106890 0.0595044 0.0113507 0.0445845 0.4285014 0.0104414 1.460349 0.4071931 0.0001378
508 GO:0004185 serine-type carboxypeptidase activity 5 0.0019432 -0.4486923 -0.7447078 -0.1082066 -0.5438864 -0.2728871 -0.2903235 0.5346062 0.7891203 0.0823161 0.0039279 0.6752376 0.0351989 0.2906837 0.2609536 0.0384410 0.0022436 1.459694 0.5323208 0.0245645
4535 GO:0070106 interleukin-27-mediated signaling pathway 8 0.0000000 -0.4471029 -0.4429410 0.2358208 -0.3486055 0.6397557 0.0799042 0.8894134 0.5754540 0.0285454 0.0300582 0.2481427 0.0877747 0.0017272 0.6955741 0.0000132 0.0048252 1.452966 0.5259765 0.0000000
4200 GO:0051383 kinetochore organization 5 0.0067211 -0.4115426 -0.3729231 -0.5365084 0.4661575 -0.4454932 -0.7320844 0.3773327 -0.6379278 0.1110391 0.1487420 0.0377565 0.0710687 0.0845249 0.0045826 0.1439977 0.0135004 1.448215 0.4535734 0.0603843
4676 GO:0071139 resolution of DNA recombination intermediates 5 0.0650925 -0.7541610 -0.2514158 -0.2345270 -0.0855825 -0.7110743 -0.7440738 -0.0394985 -0.5706031 0.0034946 0.3303134 0.3638321 0.7403644 0.0058942 0.0039587 0.8784489 0.0271376 1.442469 0.3031762 0.2671971
3276 GO:0042719 mitochondrial intermembrane space protein transporter complex 6 0.0065828 0.5839939 0.6298938 0.0623289 0.3612192 0.7772974 0.4470865 0.5043234 -0.3746697 0.0132421 0.0075414 0.7915038 0.1254931 0.0009757 0.0579112 0.0324207 0.1120209 1.439089 0.3688519 0.0595161
4944 GO:0097100 supercoiled DNA binding 6 0.0055547 -0.7983139 -0.4631071 -0.3823077 -0.1509103 -0.7920209 -0.4810011 0.3852380 0.1713023 0.0007076 0.0494916 0.1048945 0.5221291 0.0007797 0.0413261 0.1022578 0.4674964 1.434266 0.4257602 0.0526362
3108 GO:0036402 proteasome-activating activity 6 0.0009906 0.0486622 0.4388481 -0.5440025 -0.4938752 0.1275400 0.8123889 0.5689100 0.5423212 0.8364840 0.0626848 0.0210257 0.0361835 0.5885465 0.0005682 0.0158134 0.0214262 1.424367 0.4996103 0.0146634
3248 GO:0042555 MCM complex 11 0.0000007 -0.5336527 -0.1262164 -0.2497264 0.1744985 -0.6810941 -0.6701375 -0.4830115 -0.6979876 0.0021794 0.4686286 0.1515938 0.3163685 0.0000916 0.0001187 0.0055419 0.0000610 1.423724 0.3145428 0.0000276
809 GO:0005839 proteasome core complex 18 0.0000000 -0.1652804 0.0253698 -0.4041380 -0.6609372 0.1862269 0.4569851 0.7622161 0.7523519 0.2248649 0.8522160 0.0029967 0.0000012 0.1714629 0.0007898 0.0000000 0.0000000 1.420792 0.5216967 0.0000000
2616 GO:0032494 response to peptidoglycan 5 0.0558945 -0.6605807 -0.2431731 -0.5802579 -0.0111391 -0.6137186 -0.6343252 0.2707544 -0.5567692 0.0105267 0.3464148 0.0246447 0.9655979 0.0174767 0.0140362 0.2944706 0.0310871 1.412318 0.3479796 0.2439506
5356 GO:1902254 negative regulation of intrinsic apoptotic signaling pathway by p53 class mediator 6 0.0019279 0.5827689 0.1055387 0.5283422 0.1473795 0.6645290 0.5532017 0.0387424 0.7361772 0.0134362 0.6544220 0.0250207 0.5319110 0.0048189 0.0189481 0.8694794 0.0017906 1.393812 0.2762764 0.0244264
5159 GO:0140367 antibacterial innate immune response 5 0.0032445 -0.2230276 -0.3246776 0.6700339 0.5828518 -0.0618344 -0.6751063 -0.1400821 -0.7183082 0.3878294 0.2086932 0.0094689 0.0240098 0.8107805 0.0089413 0.5875484 0.0054086 1.392480 0.5126896 0.0360687
1723 GO:0014909 smooth muscle cell migration 5 0.0740263 0.6645291 0.4753513 0.0518913 -0.3267815 0.7390014 0.5658765 0.3402407 0.4192089 0.0100727 0.0656740 0.8407610 0.2057589 0.0042126 0.0284353 0.1876940 0.1045408 1.390998 0.3509707 0.2890518
2382 GO:0031048 regulatory ncRNA-mediated heterochromatin formation 7 0.0030493 -0.5898249 -0.2851275 -0.3151463 0.5208310 -0.6497390 -0.5274401 0.1519163 -0.6122052 0.0068852 0.1914807 0.1488116 0.0170246 0.0029115 0.0156722 0.4864689 0.0050330 1.377679 0.4196720 0.0343668
2329 GO:0030836 positive regulation of actin filament depolymerization 5 0.0633843 0.2546725 0.8388068 -0.3125730 -0.1274299 0.3940486 0.6523957 0.5188414 -0.3444773 0.3240873 0.0011605 0.2261658 0.6217280 0.1270609 0.0115263 0.0445300 0.1822572 1.360570 0.4491313 0.2622576
3265 GO:0042612 MHC class I protein complex 9 0.0000000 -0.2034915 -0.5553473 -0.2324164 -0.4210531 0.3184064 -0.1946186 0.8283083 0.6580420 0.2905316 0.0039153 0.2273541 0.0287324 0.0981510 0.3120797 0.0000168 0.0006295 1.356280 0.5119436 0.0000001
901 GO:0006172 ADP biosynthetic process 5 0.0334160 0.5974062 0.3352835 0.5195619 0.5493912 0.6598890 0.5846387 -0.0431299 -0.1331652 0.0207038 0.1942062 0.0442349 0.0333884 0.0106081 0.0235807 0.8673729 0.6061269 1.355704 0.3070404 0.1774526
5564 GO:1990414 replication-born double-strand break repair via sister chromatid exchange 6 0.0061788 -0.3740933 -0.4674545 -0.6125522 0.4832589 -0.6210309 -0.5358121 -0.2521497 -0.3431570 0.1125739 0.0473926 0.0093670 0.0403819 0.0084303 0.0230398 0.2848557 0.1455304 1.349566 0.3574701 0.0571211
3144 GO:0038156 interleukin-3-mediated signaling pathway 6 0.0132067 -0.8704905 -0.2839266 0.0156603 -0.2488591 -0.8484172 -0.4169189 0.1310467 -0.0688620 0.0002217 0.2284886 0.9470430 0.2911886 0.0003191 0.0769949 0.5783359 0.7702349 1.347625 0.3736085 0.0962539
4679 GO:0071162 CMG complex 10 0.0000000 -0.4144010 0.0994090 -0.3590601 0.1046994 -0.5184229 -0.6709240 -0.3774686 -0.7916246 0.0232710 0.5862742 0.0493096 0.5665041 0.0045304 0.0002387 0.0387616 0.0000145 1.345195 0.3247094 0.0000001
5226 GO:0140948 histone H3K9 monomethyltransferase activity 5 0.0663427 -0.6396282 -0.6457670 -0.4026659 -0.0566756 -0.7108149 -0.5457598 -0.0811730 -0.0058938 0.0132539 0.0123965 0.1189572 0.8263039 0.0059123 0.0345736 0.7532952 0.9817935 1.342098 0.2949495 0.2708928
655 GO:0005216 monoatomic ion channel activity 5 0.0043684 0.4628431 0.7200951 -0.0682326 -0.2980186 0.7217955 0.5756755 0.2027956 -0.1991354 0.0731004 0.0052944 0.7916315 0.2485260 0.0051877 0.0258020 0.4323243 0.4406785 1.326425 0.4138252 0.0437909
1156 GO:0007006 mitochondrial membrane organization 5 0.1084297 0.7434397 0.4853808 0.4419194 0.1038259 0.6909864 0.4696160 -0.1327905 -0.1743497 0.0039896 0.0601785 0.0870489 0.6876777 0.0074551 0.0689982 0.6071407 0.4996270 1.319243 0.3539746 0.3525665
493 GO:0004045 aminoacyl-tRNA hydrolase activity 5 0.0329381 0.4010808 0.4328410 -0.2385330 -0.3984581 0.6292816 0.6728295 0.5216946 0.2319620 0.1204167 0.0937347 0.3556951 0.1228621 0.0148179 0.0091748 0.0433712 0.3691028 1.318541 0.3971720 0.1763932
2914 GO:0035005 1-phosphatidylinositol-4-phosphate 3-kinase activity 6 0.0227589 -0.0530096 -0.6953932 0.2884421 0.6564106 -0.2366335 -0.6094778 -0.5163088 -0.1855695 0.8221101 0.0031792 0.2211731 0.0053618 0.3155416 0.0097290 0.0285220 0.4312395 1.314886 0.4630015 0.1401343
5363 GO:1902425 positive regulation of attachment of mitotic spindle microtubules to kinetochore 8 0.0000498 -0.2922312 -0.0833814 -0.6435032 0.2333165 -0.3263008 -0.4789745 0.5359073 -0.7360010 0.1523884 0.6830311 0.0016222 0.2532016 0.1100408 0.0189847 0.0086717 0.0003120 1.313647 0.4350093 0.0012255
4748 GO:0071492 cellular response to UV-A 6 0.0000066 -0.5269251 -0.7190277 0.2304126 -0.3491377 -0.2108133 -0.5622328 0.6207186 0.0716242 0.0254127 0.0022872 0.3284327 0.1386410 0.3712448 0.0170854 0.0084632 0.7612932 1.311719 0.4566128 0.0002239
2037 GO:0019774 proteasome core complex, beta-subunit complex 10 0.0000005 -0.2988323 -0.0743405 -0.3448897 -0.6754361 0.0891884 0.2335592 0.7064581 0.6990486 0.1018157 0.6840098 0.0589844 0.0002167 0.6253464 0.2009964 0.0001094 0.0001291 1.311582 0.4937088 0.0000225
4436 GO:0060992 response to fungicide 6 0.0600153 -0.6614546 -0.6811981 -0.0740501 0.1968343 -0.5853629 -0.6453620 -0.1078926 0.0548830 0.0050184 0.0038570 0.7534662 0.4038010 0.0130281 0.0061895 0.6472319 0.8159339 1.311326 0.3656836 0.2530828
489 GO:0004017 adenylate kinase activity 7 0.0052968 0.6204202 0.2605648 0.5173308 0.3771400 0.7185888 0.5574383 0.1212386 0.0540154 0.0044752 0.2326032 0.0177815 0.0840287 0.0009930 0.0106515 0.5786223 0.8045637 1.306677 0.2408099 0.0508672
5504 GO:1904948 midbrain dopaminergic neuron differentiation 5 0.0849398 -0.6747892 -0.2430002 -0.5709489 -0.0629008 -0.5508322 -0.5538295 0.4258952 -0.2552489 0.0089735 0.3467579 0.0270446 0.8075808 0.0329278 0.0319872 0.0991230 0.3229934 1.304228 0.3642411 0.3113186
846 GO:0005942 phosphatidylinositol 3-kinase complex 6 0.0488771 -0.6246337 -0.5123457 0.0710477 0.3407792 -0.5613441 -0.7080271 0.0892780 -0.2841668 0.0080582 0.0297639 0.7631567 0.1483412 0.0172612 0.0026692 0.7049407 0.2280952 1.295753 0.3927528 0.2265076
colfunc <- colorRampPalette(c("blue", "white", "red"))

mx <- top[,grep("^s\\.",colnames(top))]
mx <- mx[,-ncol(mx)]
rownames(mx) <- top$set

heatmap.2(as.matrix(mx),scale="none",trace="none",margins=c(6,25),
  col=colfunc(25),cexRow=0.6,cexCol=0.8)

Session information

For reproducibility.

save.image("scanalysis.Rdata")

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] gplots_3.2.0                limma_3.60.6               
##  [3] SingleR_2.6.0               celldex_1.14.0             
##  [5] harmony_1.2.1               Rcpp_1.0.13-1              
##  [7] mitch_1.19.3                DESeq2_1.44.0              
##  [9] muscat_1.18.0               beeswarm_0.4.0             
## [11] stringi_1.8.4               SingleCellExperiment_1.26.0
## [13] SummarizedExperiment_1.34.0 Biobase_2.64.0             
## [15] GenomicRanges_1.56.2        GenomeInfoDb_1.40.1        
## [17] IRanges_2.38.1              S4Vectors_0.42.1           
## [19] BiocGenerics_0.50.0         MatrixGenerics_1.16.0      
## [21] matrixStats_1.4.1           hdf5r_1.3.11               
## [23] Seurat_5.1.0                SeuratObject_5.0.2         
## [25] sp_2.1-4                    plyr_1.8.9                 
## [27] ggplot2_3.5.1               kableExtra_1.4.0           
## 
## loaded via a namespace (and not attached):
##   [1] progress_1.2.3            goftest_1.2-3            
##   [3] HDF5Array_1.32.1          Biostrings_2.72.1        
##   [5] vctrs_0.6.5               spatstat.random_3.3-2    
##   [7] digest_0.6.37             png_0.1-8                
##   [9] corpcor_1.6.10            shape_1.4.6.1            
##  [11] gypsum_1.0.1              ggrepel_0.9.6            
##  [13] echarts4r_0.4.5           deldir_2.0-4             
##  [15] parallelly_1.39.0         MASS_7.3-61              
##  [17] reshape2_1.4.4            httpuv_1.6.15            
##  [19] foreach_1.5.2             withr_3.0.2              
##  [21] xfun_0.49                 survival_3.7-0           
##  [23] memoise_2.0.1.9000        ggbeeswarm_0.7.2         
##  [25] systemfonts_1.1.0         zoo_1.8-12               
##  [27] GlobalOptions_0.1.2       gtools_3.9.5             
##  [29] pbapply_1.7-2             prettyunits_1.2.0        
##  [31] GGally_2.2.1              KEGGREST_1.44.1          
##  [33] promises_1.3.1            httr_1.4.7               
##  [35] globals_0.16.3            fitdistrplus_1.2-1       
##  [37] rhdf5filters_1.16.0       rhdf5_2.48.0             
##  [39] rstudioapi_0.17.1         UCSC.utils_1.0.0         
##  [41] miniUI_0.1.1.1            generics_0.1.3           
##  [43] curl_6.0.1                zlibbioc_1.50.0          
##  [45] ScaledMatrix_1.12.0       polyclip_1.10-7          
##  [47] GenomeInfoDbData_1.2.12   ExperimentHub_2.12.0     
##  [49] SparseArray_1.4.8         xtable_1.8-4             
##  [51] stringr_1.5.1             doParallel_1.0.17        
##  [53] evaluate_1.0.1            S4Arrays_1.4.1           
##  [55] BiocFileCache_2.12.0      hms_1.1.3                
##  [57] irlba_2.3.5.1             colorspace_2.1-1         
##  [59] filelock_1.0.3            ROCR_1.0-11              
##  [61] reticulate_1.40.0         spatstat.data_3.1-4      
##  [63] magrittr_2.0.3            lmtest_0.9-40            
##  [65] later_1.4.0               viridis_0.6.5            
##  [67] lattice_0.22-6            spatstat.geom_3.3-4      
##  [69] future.apply_1.11.3       scattermore_1.2          
##  [71] scuttle_1.14.0            cowplot_1.1.3            
##  [73] RcppAnnoy_0.0.22          pillar_1.9.0             
##  [75] nlme_3.1-166              iterators_1.0.14         
##  [77] caTools_1.18.3            compiler_4.4.1           
##  [79] beachmat_2.20.0           RSpectra_0.16-2          
##  [81] tensor_1.5                minqa_1.2.8              
##  [83] crayon_1.5.3              abind_1.4-8              
##  [85] scater_1.32.1             blme_1.0-6               
##  [87] locfit_1.5-9.10           bit_4.5.0                
##  [89] dplyr_1.1.4               codetools_0.2-20         
##  [91] BiocSingular_1.20.0       bslib_0.8.0              
##  [93] alabaster.ranges_1.4.2    GetoptLong_1.0.5         
##  [95] plotly_4.10.4             remaCor_0.0.18           
##  [97] mime_0.12                 splines_4.4.1            
##  [99] circlize_0.4.16           fastDummies_1.7.4        
## [101] dbplyr_2.5.0              sparseMatrixStats_1.16.0 
## [103] knitr_1.49                blob_1.2.4               
## [105] utf8_1.2.4                clue_0.3-66              
## [107] BiocVersion_3.19.1        lme4_1.1-35.5            
## [109] listenv_0.9.1             DelayedMatrixStats_1.26.0
## [111] Rdpack_2.6.2              tibble_3.2.1             
## [113] Matrix_1.7-1              statmod_1.5.0            
## [115] svglite_2.1.3             fANCOVA_0.6-1            
## [117] pkgconfig_2.0.3           tools_4.4.1              
## [119] cachem_1.1.0              RhpcBLASctl_0.23-42      
## [121] rbibutils_2.3             RSQLite_2.3.8            
## [123] viridisLite_0.4.2         DBI_1.2.3                
## [125] numDeriv_2016.8-1.1       fastmap_1.2.0            
## [127] rmarkdown_2.29            scales_1.3.0             
## [129] grid_4.4.1                ica_1.0-3                
## [131] broom_1.0.7               AnnotationHub_3.12.0     
## [133] sass_0.4.9                patchwork_1.3.0          
## [135] BiocManager_1.30.25       ggstats_0.7.0            
## [137] dotCall64_1.2             RANN_2.6.2               
## [139] alabaster.schemas_1.4.0   farver_2.1.2             
## [141] reformulas_0.4.0          aod_1.3.3                
## [143] mgcv_1.9-1                yaml_2.3.10              
## [145] cli_3.6.3                 purrr_1.0.2              
## [147] leiden_0.4.3.1            lifecycle_1.0.4          
## [149] uwot_0.2.2                glmmTMB_1.1.10           
## [151] mvtnorm_1.3-2             backports_1.5.0          
## [153] BiocParallel_1.38.0       gtable_0.3.6             
## [155] rjson_0.2.23              ggridges_0.5.6           
## [157] progressr_0.15.1          jsonlite_1.8.9           
## [159] edgeR_4.2.2               RcppHNSW_0.6.0           
## [161] bitops_1.0-9              bit64_4.5.2              
## [163] Rtsne_0.17                alabaster.matrix_1.4.2   
## [165] spatstat.utils_3.1-1      BiocNeighbors_1.22.0     
## [167] alabaster.se_1.4.1        jquerylib_0.1.4          
## [169] spatstat.univar_3.1-1     pbkrtest_0.5.3           
## [171] lazyeval_0.2.2            alabaster.base_1.4.2     
## [173] shiny_1.9.1               htmltools_0.5.8.1        
## [175] sctransform_0.4.1         rappdirs_0.3.3           
## [177] glue_1.8.0                spam_2.11-0              
## [179] httr2_1.0.7               XVector_0.44.0           
## [181] gridExtra_2.3             EnvStats_3.0.0           
## [183] boot_1.3-31               igraph_2.1.1             
## [185] variancePartition_1.34.0  TMB_1.9.15               
## [187] R6_2.5.1                  tidyr_1.3.1              
## [189] labeling_0.4.3            cluster_2.1.6            
## [191] Rhdf5lib_1.26.0           nloptr_2.1.1             
## [193] DelayedArray_0.30.1       tidyselect_1.2.1         
## [195] vipor_0.4.7               xml2_1.3.6               
## [197] AnnotationDbi_1.66.0      future_1.34.0            
## [199] rsvd_1.0.5                munsell_0.5.1            
## [201] KernSmooth_2.23-24        data.table_1.16.2        
## [203] htmlwidgets_1.6.4         ComplexHeatmap_2.20.0    
## [205] RColorBrewer_1.1-3        rlang_1.1.4              
## [207] spatstat.sparse_3.1-0     spatstat.explore_3.3-3   
## [209] lmerTest_3.1-3            fansi_1.0.6