Introduction

Ee are interested in comparing DEGs between the following comparison groups:

  • Latently- vs productively-infected cells (groups 3 vs 4).

  • Latently-infected vs bystander cells (2 vs 3).

  • Productively-infected vs mock (4 vs 1)

  • Mock vs bystander (1 vs 2).

As discussed, I think we will do these comparisons separately for both MDM and AlvMDM samples initially. Then, it would be of interest to compare DEGs for MDM vs AlvMDM for each of the four infection groups.

suppressPackageStartupMessages({
  library("kableExtra")
  library("ggplot2")
  library("plyr")
  library("Seurat")
  library("hdf5r")
  library("SingleCellExperiment")
  library("parallel")
  library("stringi")
  library("beeswarm")
  library("muscat")
  library("DESeq2")
  library("mitch")
  library("harmony")
  library("celldex")
  library("SingleR")
  library("limma")
})

Load data

Sample Patient Group
mock0 MDMP236 mock
mock1 MDMV236 mock
mock2 MDMO236 mock
mock3 MDMP239 mock
mock4 AlvP239 mock
mock5 AlvM239 mock
mock6 AlvN239 mock
active0 MDMP236 active
active1 MDMV236 active
active2 MDMO236 active
active3 MDMP239 active
active4 AlvP239 active
active5 AlvM239 active
active6 AlvN239 active
latent0 MDMP236 latent
latent1 MDMV236 latent
latent2 MDMO236 latent
latent3 MDMP239 latent
latent4 AlvP239 latent
latent5 AlvM239 latent
latent6 AlvN239 latent
bystander0 MDMP236 bystander
bystander1 MDMV236 bystander
bystander2 MDMO236 bystander
bystander3 MDMP239 bystander
bystander4 AlvP239 bystander
bystander5 AlvM239 bystander
bystander6 AlvN239 bystander
react6 AlvN239 reactivated
ss <- read.table("samplesheet.tsv",header=TRUE,row.names=1)

ss %>% kbl(caption="sample sheet") %>% kable_paper("hover", full_width = F)
sample sheet
Patient Group
mock0 MDMP236 mock
mock1 MDMV236 mock
mock2 MDMO236 mock
mock3 MDMP239 mock
mock4 AlvP239 mock
mock5 AlvM239 mock
mock6 AlvN239 mock
active0 MDMP236 active
active1 MDMV236 active
active2 MDMO236 active
active3 MDMP239 active
active4 AlvP239 active
active5 AlvM239 active
active6 AlvN239 active
latent0 MDMP236 latent
latent1 MDMV236 latent
latent2 MDMO236 latent
latent3 MDMP239 latent
latent4 AlvP239 latent
latent5 AlvM239 latent
latent6 AlvN239 latent
bystander0 MDMP236 bystander
bystander1 MDMV236 bystander
bystander2 MDMO236 bystander
bystander3 MDMP239 bystander
bystander4 AlvP239 bystander
bystander5 AlvM239 bystander
bystander6 AlvN239 bystander
react6 AlvN239 reactivated
mylist <- readRDS("macrophage_counts.rds")

Make single cell experiment object

comb <- do.call(cbind,mylist)

sce <- SingleCellExperiment(list(counts=comb))

sce
## class: SingleCellExperiment 
## dim: 36603 28971 
## metadata(0):
## assays(1): counts
## rownames(36603): gene-HIV1-1 gene-HIV1-2 ... AC007325.4 AC007325.2
## rowData names(0):
## colnames(28971): mock0|AAACGAATCACATACG mock0|AAACGCTCATCAGCGC ...
##   react6|TTTGTTGTCTGAACGT react6|TTTGTTGTCTTGGCTC
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Normalise data

cellmetadata <- data.frame(colnames(comb) ,sapply(strsplit(colnames(comb),"\\|"),"[[",1))
colnames(cellmetadata) <- c("cell","sample")
comb <- CreateSeuratObject(comb, project = "mac", assay = "RNA", meta.data = cellmetadata)
comb <- NormalizeData(comb)
## Normalizing layer: counts
comb <- FindVariableFeatures(comb, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
comb <- ScaleData(comb)
## Centering and scaling data matrix

PCA and Cluster

comb <- RunPCA(comb, features = VariableFeatures(object = comb))
## PC_ 1 
## Positive:  MT-ATP8, MTRNR2L1, FABP4, AC105402.3, FABP3, gene-HIV1-2, HAMP, C1orf56, PVALB, AL136097.2 
##     GAPDH, BIRC7, UTS2, S100A9, C1QC, MT1G, MT1H, AC025154.2, CARD16, COX7A1 
##     BEX3, MT1A, AL450311.1, PPP1R1A, NUPR1, LILRA1, MT2A, CCL23, FXYD2, CCL15 
## Negative:  ARL15, NEAT1, DOCK3, EXOC4, MALAT1, RASAL2, FTX, LRMDA, DPYD, PLXDC2 
##     JMJD1C, TMEM117, FHIT, ZEB2, MITF, DENND4C, ATG7, FRMD4B, VPS13B, ZNF438 
##     MAML2, COP1, TPRG1, FMNL2, ATXN1, JARID2, ASAP1, ELMO1, FNDC3B, TCF12 
## PC_ 2 
## Positive:  CYSTM1, FAH, CD164, FDX1, GDF15, PSAT1, ATP6V1D, SAT1, TXN, BCAT1 
##     CCPG1, SLAMF9, CSTA, CTSL, HES2, HEBP2, S100A10, NUPR1, PHGDH, STMN1 
##     TCEA1, GARS, MARCO, SCD, RILPL2, SNHG5, RHOQ, CD48, PLIN2, SNHG32 
## Negative:  SPRED1, RCBTB2, KCNMA1, GCLC, FGL2, HLA-DRA, MRC1, HLA-DRB1, RTN1, SLCO2B1 
##     HLA-DPA1, AC020656.1, CD74, HLA-DPB1, PDGFC, LINC02345, DPYD, MERTK, C1QA, CCDC102B 
##     STAC, ATP8B4, NFATC3, RNF128, VAMP5, MX1, ATG7, TGFBI, XYLT1, CRIP1 
## PC_ 3 
## Positive:  NCAPH, CRABP2, TM4SF19, GAL, DUSP2, RGCC, CHI3L1, CCL22, TMEM114, ACAT2 
##     RGS20, TRIM54, LINC01010, LINC02244, TCTEX1D2, MGST1, GPC4, NUMB, CCND1, SYNGR1 
##     CLU, POLE4, SERTAD2, AC092353.2, SLC20A1, IL1RN, ZNF366, AC005280.2, OCSTAMP, GCLC 
## Negative:  MARCKS, CD14, TGFBI, BTG1, MS4A6A, HLA-DQA1, FPR3, CTSC, CD163, MPEG1 
##     MEF2C, AIF1, TIMP1, HLA-DPB1, C1QC, HLA-DQB1, SELENOP, RNASE1, NAMPT, HLA-DQA2 
##     TCF4, EPB41L3, ARL4C, NUPR1, ALDH2, SAMSN1, ZNF331, HIF1A, MS4A7, C1QA 
## PC_ 4 
## Positive:  MT-ATP8, MTRNR2L1, XIST, PDE4D, AL035446.2, AC073359.2, CCDC26, AL365295.1, CLVS1, PRKCE 
##     EPHB1, RAD51B, C5orf17, LINC01135, PLEKHA5, LINC02320, LY86-AS1, STX18-AS1, FHIT, LINC01473 
##     DMGDH, FTX, AC079142.1, PKD1L1, SLC22A15, AF117829.1, GUCY2F, SH3RF3, STOX2, MIR646HG 
## Negative:  HLA-DRB1, CD74, CTSB, HSP90B1, TUBA1B, GAPDH, HLA-DPA1, HLA-DRA, ACTG1, IFI30 
##     RGCC, AIF1, HSPA5, CYP1B1, HLA-DPB1, C1QA, C15orf48, LYZ, CA2, S100A4 
##     LGMN, TUBB, FBP1, TUBA1A, MMP9, RNASE6, HLA-DQB1, CHI3L1, CCL3, TXN 
## PC_ 5 
## Positive:  PTGDS, LINC02244, MMP9, MGST1, LYZ, RGCC, IFI30, CHI3L1, CLU, GCLC 
##     GCHFR, CRABP2, FABP3, VAMP5, TMEM176B, HLA-DRB1, SYNGR1, CTSB, NCAPH, ISG15 
##     CD74, NCF1, SOD2, SAT1, FGL2, HSPA5, CDKN2A, MX1, CLEC12A, S100A10 
## Negative:  TYMS, PCLAF, TK1, MKI67, MYBL2, CENPM, CLSPN, RRM2, BIRC5, SHCBP1 
##     CEP55, DIAPH3, CDK1, CENPK, PRC1, CENPF, ESCO2, HELLS, NUSAP1, TOP2A 
##     NCAPG, FAM111B, ANLN, TPX2, CCNA2, KIF11, ASPM, CENPU, MAD2L1, HMMR
comb <- RunHarmony(comb,"sample")
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony converged after 4 iterations
DimHeatmap(comb, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(comb)

comb <- JackStraw(comb, num.replicate = 100)
comb <- FindNeighbors(comb, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
comb <- FindClusters(comb, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 28971
## Number of edges: 896422
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8952
## Number of communities: 12
## Elapsed time: 4 seconds

UMAP

comb <- RunUMAP(comb, dims = 1:10)
## 17:02:49 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:02:49 Read 28971 rows and found 10 numeric columns
## 17:02:49 Using Annoy for neighbor search, n_neighbors = 30
## 17:02:49 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:02:51 Writing NN index file to temp file /tmp/Rtmp9zv4vx/file25988e230dbf5f
## 17:02:51 Searching Annoy index using 1 thread, search_k = 3000
## 17:02:59 Annoy recall = 100%
## 17:03:00 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:03:01 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:03:02 Commencing optimization for 200 epochs, with 1186670 positive edges
## 17:03:10 Optimization finished
DimPlot(comb, reduction = "umap")

Assign names to clusters

ADGRE1, CCR2, CD169, CX3CR1, CD206, CD163, LYVE1, CD9, TREM2

HLA-DP, HLA-DM, HLA-DOA, HLA-DOB, HLA-DQ, and HLA-DR.

message("macrophage markers")
## macrophage markers
FeaturePlot(comb, features = c("ADGRE1", "CCR2", "SIGLEC1", "CX3CR1", "MRC1", "CD163", "LYVE1", "CD9", "TREM2"))

DimPlot(comb, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

That’s pretty useless. Let’s use celldex pkg to annotate cells and get the macs.

ref <- celldex::MonacoImmuneData()

DefaultAssay(comb) <- "RNA"
comb2 <- as.SingleCellExperiment(comb)

lc <- logcounts(comb2)

pred_imm_broad <- SingleR(test=comb2, ref=ref,
                          labels=ref$label.main)

head(pred_imm_broad)
## DataFrame with 6 rows and 4 columns
##                                                scores      labels delta.next
##                                              <matrix> <character>  <numeric>
## mock0|AAACGAATCACATACG 0.307826:0.325650:0.169064:...   Monocytes  0.1786319
## mock0|AAACGCTCATCAGCGC 0.311008:0.281528:0.189016:...   Monocytes  0.0338547
## mock0|AAACGCTGTCGAGTGA 0.318344:0.277003:0.170895:...   Monocytes  0.1301308
## mock0|AAAGAACGTGCAACAG 0.214292:0.190767:0.121117:...   Monocytes  0.1116322
## mock0|AAAGGTAAGCCATATC 0.290416:0.291088:0.155001:...   Monocytes  0.1794308
## mock0|AAAGGTAGTTTCCAAG 0.292140:0.281397:0.184535:...   Monocytes  0.0952702
##                        pruned.labels
##                          <character>
## mock0|AAACGAATCACATACG     Monocytes
## mock0|AAACGCTCATCAGCGC     Monocytes
## mock0|AAACGCTGTCGAGTGA     Monocytes
## mock0|AAAGAACGTGCAACAG            NA
## mock0|AAAGGTAAGCCATATC     Monocytes
## mock0|AAAGGTAGTTTCCAAG     Monocytes
table(pred_imm_broad$pruned.labels)
## 
##       Basophils Dendritic cells       Monocytes     Neutrophils        NK cells 
##               4             657           24154              14               4 
##     Progenitors 
##               2
cellmetadata$label <- pred_imm_broad$pruned.labels

pred_imm_fine <- SingleR(test=comb2, ref=ref,
                          labels=ref$label.fine)
head(pred_imm_fine)
## DataFrame with 6 rows and 4 columns
##                                                 scores              labels
##                                               <matrix>         <character>
## mock0|AAACGAATCACATACG 0.1836411:0.485683:0.206442:... Classical monocytes
## mock0|AAACGCTCATCAGCGC 0.1961234:0.430705:0.226843:... Classical monocytes
## mock0|AAACGCTGTCGAGTGA 0.1718613:0.442610:0.188544:... Classical monocytes
## mock0|AAAGAACGTGCAACAG 0.0943609:0.264157:0.120656:... Classical monocytes
## mock0|AAAGGTAAGCCATATC 0.1563980:0.415082:0.167965:... Classical monocytes
## mock0|AAAGGTAGTTTCCAAG 0.1857623:0.432131:0.207144:... Classical monocytes
##                        delta.next       pruned.labels
##                         <numeric>         <character>
## mock0|AAACGAATCACATACG  0.0626269 Classical monocytes
## mock0|AAACGCTCATCAGCGC  0.1150706 Classical monocytes
## mock0|AAACGCTGTCGAGTGA  0.0651352 Classical monocytes
## mock0|AAAGAACGTGCAACAG  0.0648711 Classical monocytes
## mock0|AAAGGTAAGCCATATC  0.1073274 Classical monocytes
## mock0|AAAGGTAGTTTCCAAG  0.1521533 Classical monocytes
table(pred_imm_fine$pruned.labels)
## 
##           Classical monocytes        Intermediate monocytes 
##                         21583                          3569 
##         Low-density basophils       Low-density neutrophils 
##                             2                            21 
##       Myeloid dendritic cells                 Naive B cells 
##                           461                             1 
##          Natural killer cells       Non classical monocytes 
##                             3                            64 
##            Non-Vd2 gd T cells  Plasmacytoid dendritic cells 
##                             1                            35 
##              Progenitor cells Terminal effector CD4 T cells 
##                             4                             1
cellmetadata$finelabel <- pred_imm_fine$pruned.labels

col_pal <- c('#e31a1c', '#ff7f00', "#999900", '#cc00ff', '#1f78b4', '#fdbf6f',
             '#33a02c', '#fb9a99', "#a6cee3", "#cc6699", "#b2df8a", "#99004d", "#66ff99",
             "#669999", "#006600", "#9966ff", "#cc9900", "#e6ccff", "#3399ff", "#ff66cc",
             "#ffcc66", "#003399")

annot_df <- data.frame(
  barcodes = rownames(pred_imm_broad),
  monaco_broad_annotation = pred_imm_broad$labels,
  monaco_broad_pruned_labels = pred_imm_broad$pruned.labels,
  monaco_fine_annotation = pred_imm_fine$labels,
  monaco_fine_pruned_labels = pred_imm_fine$pruned.labels
)

meta_inf <- comb@meta.data
meta_inf$cell_barcode <- colnames(comb)

meta_inf <- meta_inf %>% dplyr::left_join(y = annot_df,
                                          by = c("cell_barcode" = "barcodes"))
rownames(meta_inf) <- colnames(lc)

comb@meta.data <- meta_inf

DimPlot(comb, label=TRUE, group.by = "monaco_broad_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

DimPlot(comb, label=TRUE, group.by = "monaco_fine_annotation", reduction = "umap",
  cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")

Extract monocytes

mono <- comb[,which(meta_inf$monaco_broad_annotation == "Monocytes")]

mono_metainf <- meta_inf[which(meta_inf$monaco_broad_annotation == "Monocytes"),]

# remove non monos
mono_metainf1 <- mono_metainf[grep("monocytes",mono_metainf$monaco_fine_pruned_labels),]
mono <- mono[,which(colnames(mono) %in% rownames(mono_metainf))]

mono <- FindVariableFeatures(mono, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
mono <- RunPCA(mono, features = VariableFeatures(object = mono))
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: LINC02739, ARHGAP10, AL157702.2, DUSP1,
## RPS6KA2, CEP85L, CAMK1, OTOA, ANKRD44, RSPO3, TMCC1.
## PC_ 1 
## Positive:  MT-ATP8, MTRNR2L1, FABP4, FABP3, GAPDH, HAMP, S100A9, AC105402.3, C1orf56, PVALB 
##     AL136097.2, TUBA1B, CARD16, FAH, BIRC7, C1QC, UTS2, gene-HIV1-2, MT2A, NUPR1 
##     BLVRB, BEX3, LILRA1, GCHFR, AC025154.2, MT1G, TXN, GSTP1, PRDX6, FXYD2 
## Negative:  ARL15, DOCK3, NEAT1, EXOC4, FTX, MALAT1, RASAL2, LRMDA, DPYD, JMJD1C 
##     TMEM117, FHIT, PLXDC2, VPS13B, MITF, ZEB2, DENND4C, MAML2, ATG7, ZNF438 
##     TPRG1, COP1, FRMD4B, ELMO1, JARID2, ATXN1, FMNL2, TCF12, ERC1, ARID1B 
## PC_ 2 
## Positive:  CYSTM1, FAH, GDF15, PSAT1, CD164, FDX1, ATP6V1D, CCPG1, BCAT1, SAT1 
##     SLAMF9, TXN, PHGDH, HES2, CSTA, HEBP2, NUPR1, STMN1, TCEA1, CTSL 
##     GARS, MARCO, RETREG1, S100A10, RILPL2, CLGN, SNHG5, SCD, RHOQ, SNHG32 
## Negative:  SPRED1, RCBTB2, KCNMA1, GCLC, HLA-DRB1, HLA-DRA, FGL2, CD74, HLA-DPA1, MRC1 
##     SLCO2B1, AC020656.1, HLA-DPB1, C1QA, RTN1, LINC02345, STAC, MERTK, PDGFC, VAMP5 
##     CCDC102B, DPYD, RNF128, MX1, ATP8B4, TGFBI, CRIP1, NFATC3, HLA-DRB5, ATG7 
## PC_ 3 
## Positive:  NCAPH, CRABP2, TM4SF19, GAL, DUSP2, RGCC, CHI3L1, CCL22, TMEM114, ACAT2 
##     TRIM54, RGS20, LINC01010, LINC02244, MGST1, TCTEX1D2, GPC4, NUMB, CCND1, SYNGR1 
##     CLU, SERTAD2, AC092353.2, POLE4, SLC20A1, IL1RN, ZNF366, AC005280.2, OCSTAMP, GCLC 
## Negative:  MARCKS, CD14, TGFBI, BTG1, MS4A6A, HLA-DQA1, FPR3, CTSC, CD163, MPEG1 
##     MEF2C, AIF1, TIMP1, HLA-DPB1, C1QC, HLA-DQB1, SELENOP, RNASE1, HLA-DQA2, NAMPT 
##     TCF4, EPB41L3, ARL4C, NUPR1, ALDH2, SAMSN1, ZNF331, HIF1A, MS4A7, SEMA4A 
## PC_ 4 
## Positive:  MT-ATP8, MTRNR2L1, XIST, PDE4D, AL035446.2, AC073359.2, AL365295.1, CCDC26, EPHB1, LY86-AS1 
##     CLVS1, RAD51B, PRKCE, C5orf17, LINC02320, FTX, DMGDH, LINC01473, LINC01135, FHIT 
##     AC008591.1, PLEKHA5, MIR646HG, BCAS3, PKD1L1, AF117829.1, LINC02649, ZBTB20, STX18-AS1, AC079142.1 
## Negative:  HLA-DRB1, CTSB, TUBA1B, HSP90B1, CD74, GAPDH, ACTG1, HLA-DPA1, HLA-DRA, IFI30 
##     RGCC, HSPA5, AIF1, C15orf48, C1QA, LGMN, TUBB, CYP1B1, HLA-DPB1, FBP1 
##     CA2, CCL3, TUBA1A, HLA-DQB1, S100A4, RNASE6, LYZ, TXN, MMP9, TNFSF13 
## PC_ 5 
## Positive:  RGCC, MMP9, IFI30, CTSB, PTGDS, LINC02244, MGST1, HLA-DRB1, CHI3L1, LYZ 
##     CD74, C15orf48, HSPA5, FABP3, GCLC, GCHFR, CRABP2, HLA-DPA1, TXN, HLA-DRA 
##     ISG15, S100A10, NCAPH, TMEM176B, CDKN2A, RALA, HSP90B1, SAT1, CLU, VAMP5 
## Negative:  TYMS, PCLAF, TK1, MKI67, MYBL2, CENPM, RRM2, BIRC5, CLSPN, CEP55 
##     SHCBP1, DIAPH3, CDK1, CENPK, PRC1, CENPF, ESCO2, NUSAP1, TOP2A, HELLS 
##     NCAPG, ANLN, FAM111B, TPX2, CCNA2, KIF11, ASPM, CENPU, MAD2L1, HMMR
DimHeatmap(mono, dims = 1:2, cells = 500, balanced = TRUE)

DimHeatmap(mono, dims = 3:4, cells = 500, balanced = TRUE)

ElbowPlot(mono)

#comb <- JackStraw(comb, num.replicate = 100)
mono <- FindNeighbors(mono, dims = 1:4)
## Computing nearest neighbor graph
## Computing SNN
mono <- FindClusters(mono, algorithm = 3, resolution = 0.3, verbose = FALSE)

mono <- RunUMAP(mono, dims = 1:4)
## 17:04:52 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:04:52 Read 28289 rows and found 4 numeric columns
## 17:04:52 Using Annoy for neighbor search, n_neighbors = 30
## 17:04:52 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:04:54 Writing NN index file to temp file /tmp/Rtmp9zv4vx/file25988e35fac4ab
## 17:04:54 Searching Annoy index using 1 thread, search_k = 3000
## 17:05:02 Annoy recall = 100%
## 17:05:03 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:05:05 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:05:06 Commencing optimization for 200 epochs, with 936166 positive edges
## 17:05:13 Optimization finished
DimPlot(mono, reduction = "umap", label=TRUE)

DimPlot(mono, group.by="monaco_fine_annotation" , reduction = "umap", label=TRUE)

DimPlot(mono, group.by="sample" , reduction = "umap", label=TRUE)

Cell counts

Most cells are classified as monocytes.

cc <- table(meta_inf[,c("sample","monaco_broad_annotation")])

cc %>% kbl(caption="cell counts") %>% kable_paper("hover", full_width = F)
cell counts
Basophils Dendritic cells Monocytes Neutrophils NK cells Progenitors
active0 0 17 863 0 0 0
active1 0 3 476 0 0 0
active2 0 13 408 0 0 0
active3 0 4 432 0 0 0
active4 1 34 908 2 0 0
active5 0 8 613 0 0 0
active6 1 28 684 0 0 0
bystander0 0 48 1446 0 0 0
bystander1 0 31 1303 0 0 0
bystander2 0 47 490 3 0 0
bystander3 0 29 1028 2 0 0
bystander4 0 20 1100 0 1 1
bystander5 0 12 491 1 0 0
bystander6 0 20 558 3 0 0
latent0 1 17 1154 0 0 0
latent1 0 11 1240 0 0 0
latent2 0 11 337 0 0 0
latent3 0 12 740 0 0 0
latent4 0 30 1905 1 0 0
latent5 1 26 1804 0 0 0
latent6 0 52 1823 0 0 0
mock0 0 6 790 0 0 0
mock1 0 7 652 0 0 0
mock2 0 4 173 0 0 0
mock3 0 4 811 0 0 0
mock4 0 18 956 0 0 0
mock5 0 19 785 0 0 0
mock6 0 47 1304 1 2 0
react6 1 79 3015 1 1 1
tcc <- t(cc)

pctcc <- apply(tcc,2,function(x) { x/sum(x)*100} )

pctcc %>% kbl(caption="cell proportions") %>% kable_paper("hover", full_width = F)
cell proportions
active0 active1 active2 active3 active4 active5 active6 bystander0 bystander1 bystander2 bystander3 bystander4 bystander5 bystander6 latent0 latent1 latent2 latent3 latent4 latent5 latent6 mock0 mock1 mock2 mock3 mock4 mock5 mock6 react6
Basophils 0.000000 0.0000000 0.000000 0.0000000 0.1058201 0.000000 0.1402525 0.000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0853242 0.0000000 0.000000 0.000000 0.0000000 0.054615 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.0322789
Dendritic cells 1.931818 0.6263048 3.087886 0.9174312 3.5978836 1.288245 3.9270687 3.212851 2.323838 8.7037037 2.7384325 1.7825312 2.3809524 3.4423408 1.4505119 0.8792966 3.160919 1.595745 1.5495868 1.419989 2.773333 0.7537688 1.062215 2.259887 0.4907975 1.848049 2.363184 3.4711965 2.5500323
Monocytes 98.068182 99.3736952 96.912114 99.0825688 96.0846561 98.711755 95.9326788 96.787149 97.676162 90.7407407 97.0727101 98.0392157 97.4206349 96.0413081 98.4641638 99.1207034 96.839080 98.404255 98.3987603 98.525396 97.226667 99.2462312 98.937785 97.740113 99.5092025 98.151951 97.636816 96.3072378 97.3208522
Neutrophils 0.000000 0.0000000 0.000000 0.0000000 0.2116402 0.000000 0.0000000 0.000000 0.000000 0.5555556 0.1888574 0.0000000 0.1984127 0.5163511 0.0000000 0.0000000 0.000000 0.000000 0.0516529 0.000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.000000 0.000000 0.0738552 0.0322789
NK cells 0.000000 0.0000000 0.000000 0.0000000 0.0000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.0000000 0.0891266 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.000000 0.000000 0.1477105 0.0322789
Progenitors 0.000000 0.0000000 0.000000 0.0000000 0.0000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.0000000 0.0891266 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.000000 0.000000 0.0000000 0.0322789

Differential expression

We are going to use muscat for pseudobulk analysis. First need to convert seurat obj to singlecellexperiment object. Then summarise counts to pseudobulk level.

sce <- Seurat::as.SingleCellExperiment(comb, assay = "RNA")

head(colData(sce),2)
## DataFrame with 2 rows and 13 columns
##                        orig.ident nCount_RNA nFeature_RNA
##                          <factor>  <numeric>    <integer>
## mock0|AAACGAATCACATACG        mac      72710         7097
## mock0|AAACGCTCATCAGCGC        mac      49092         6276
##                                          cell      sample RNA_snn_res.0.5
##                                   <character> <character>        <factor>
## mock0|AAACGAATCACATACG mock0|AAACGAATCACATACG       mock0              3 
## mock0|AAACGCTCATCAGCGC mock0|AAACGCTCATCAGCGC       mock0              11
##                        seurat_clusters           cell_barcode
##                               <factor>            <character>
## mock0|AAACGAATCACATACG              3  mock0|AAACGAATCACATACG
## mock0|AAACGCTCATCAGCGC              11 mock0|AAACGCTCATCAGCGC
##                        monaco_broad_annotation monaco_broad_pruned_labels
##                                    <character>                <character>
## mock0|AAACGAATCACATACG               Monocytes                  Monocytes
## mock0|AAACGCTCATCAGCGC               Monocytes                  Monocytes
##                        monaco_fine_annotation monaco_fine_pruned_labels
##                                   <character>               <character>
## mock0|AAACGAATCACATACG    Classical monocytes       Classical monocytes
## mock0|AAACGCTCATCAGCGC    Classical monocytes       Classical monocytes
##                           ident
##                        <factor>
## mock0|AAACGAATCACATACG       3 
## mock0|AAACGCTCATCAGCGC       11
colnames(colData(sce))
##  [1] "orig.ident"                 "nCount_RNA"                
##  [3] "nFeature_RNA"               "cell"                      
##  [5] "sample"                     "RNA_snn_res.0.5"           
##  [7] "seurat_clusters"            "cell_barcode"              
##  [9] "monaco_broad_annotation"    "monaco_broad_pruned_labels"
## [11] "monaco_fine_annotation"     "monaco_fine_pruned_labels" 
## [13] "ident"
#muscat library
pb <- aggregateData(sce,
    assay = "counts", fun = "sum",
    by = c("monaco_broad_annotation", "sample"))

# one sheet per subpopulation
assayNames(pb)
## [1] "Basophils"       "Dendritic cells" "Monocytes"       "Neutrophils"    
## [5] "NK cells"        "Progenitors"
t(head(assay(pb)))
##            gene-HIV1-1 gene-HIV1-2 MIR1302-2HG FAM138A OR4F5 AL627309.1
## active0              0           0           0       0     0          0
## active1              0           0           0       0     0          0
## active2              0           0           0       0     0          0
## active3              0           0           0       0     0          0
## active4              2          29           0       0     0          0
## active5              0           0           0       0     0          0
## active6              0           2           0       0     0          0
## bystander0           0           0           0       0     0          0
## bystander1           0           0           0       0     0          0
## bystander2           0           0           0       0     0          0
## bystander3           0           0           0       0     0          0
## bystander4           0           0           0       0     0          0
## bystander5           0           0           0       0     0          0
## bystander6           0           0           0       0     0          0
## latent0              0           2           0       0     0          0
## latent1              0           0           0       0     0          0
## latent2              0           0           0       0     0          0
## latent3              0           0           0       0     0          0
## latent4              0           0           0       0     0          0
## latent5              0          15           0       0     0          0
## latent6              0           0           0       0     0          0
## mock0                0           0           0       0     0          0
## mock1                0           0           0       0     0          0
## mock2                0           0           0       0     0          0
## mock3                0           0           0       0     0          0
## mock4                0           0           0       0     0          0
## mock5                0           0           0       0     0          0
## mock6                0           0           0       0     0          0
## react6               0           0           0       0     0          0
plotMDS(assays(pb)[[3]], main="Monocytes" )

par(mfrow=c(2,3))

dump <- lapply(1:length(assays(pb)) , function(i) {
  cellname=names(assays(pb))[i]
  plotMDS(assays(pb)[[i]],cex=1,pch=19,main=paste(cellname),labels=colnames(assays(pb)[[1]]))
})

par(mfrow=c(1,1))

DE Prep

pbmono <- assays(pb)[["Monocytes"]]

head(pbmono)
##             active0 active1 active2 active3 active4 active5 active6 bystander0
## gene-HIV1-1   11089    7725    4907   11852   29173   20670   14188          0
## gene-HIV1-2   65635   42374   73030   35142  120841  105818  103519          0
## MIR1302-2HG       0       0       0       0       0       0       0          0
## FAM138A           0       0       0       0       0       0       0          0
## OR4F5             0       0       0       0       0       0       0          0
## AL627309.1       54      37      30       5      16      11      27         65
##             bystander1 bystander2 bystander3 bystander4 bystander5 bystander6
## gene-HIV1-1          0          0          0          0          0          0
## gene-HIV1-2          0          6          0          0          0          0
## MIR1302-2HG          0          0          0          0          0          0
## FAM138A              0          0          0          0          0          0
## OR4F5                0          0          0          0          0          0
## AL627309.1          51         19         20         38          9         14
##             latent0 latent1 latent2 latent3 latent4 latent5 latent6 mock0 mock1
## gene-HIV1-1     649     799     156     380    1777    1654    2215    37    67
## gene-HIV1-2    6540    7091    3290    2126   12238   13801   26684   542   517
## MIR1302-2HG       0       0       0       0       1       0       0     0     0
## FAM138A           0       0       0       0       0       0       0     0     0
## OR4F5             0       0       0       0       0       0       0     0     0
## AL627309.1       72      78      28       6      65      59      76    61    30
##             mock2 mock3 mock4 mock5 mock6 react6
## gene-HIV1-1     3   141    92   115  1038    895
## gene-HIV1-2   100  1078   727  1653  6685   7528
## MIR1302-2HG     0     0     0     0     0      0
## FAM138A         0     0     0     0     0      0
## OR4F5           0     0     0     0     0      0
## AL627309.1      6    15    46    26    52     37
dim(pbmono)
## [1] 36603    29
hiv <- pbmono[1:2,]
pbmono <- pbmono[3:nrow(pbmono),]

Gene sets

Gene ontology.

go <- gmt_import("c5.go.v2023.2.Hs.symbols.gmt")
names(go) <- gsub("_"," ",names(go))

DE1 Latently- vs productively-infected cells (groups 3 vs 4).

MDM group.

pb1m <- pbmono[,c(grep("active",colnames(pbmono))[1:4],grep("latent",colnames(pbmono))[1:4])]
head(pb1m)
##             active0 active1 active2 active3 latent0 latent1 latent2 latent3
## MIR1302-2HG       0       0       0       0       0       0       0       0
## FAM138A           0       0       0       0       0       0       0       0
## OR4F5             0       0       0       0       0       0       0       0
## AL627309.1       54      37      30       5      72      78      28       6
## AL627309.3        0       2       0       0       0       2       2       0
## AL627309.2        0       0       0       0       0       0       0       0
pb1mf <- pb1m[which(rowMeans(pb1m)>=10),]
head(pb1mf)
##            active0 active1 active2 active3 latent0 latent1 latent2 latent3
## AL627309.1      54      37      30       5      72      78      28       6
## AL627309.5      15      14      15       6      35      49       8      18
## LINC01409      129     114     104      27     153     221      60      34
## LINC01128      262     164     167      94     215     202      87     102
## LINC00115       22       6       7       4      23      10       4       5
## FAM41C          49      39      63      68      62      54      21      80
colSums(pb1mf)
##  active0  active1  active2  active3  latent0  latent1  latent2  latent3 
## 30842467 22854684 26195864 13879630 35237534 39383109 15058058 16636428
des1m <- as.data.frame(grepl("active",colnames(pb1mf)))
colnames(des1m) <- "case"

dds <- DESeqDataSetFromMatrix(countData = pb1mf , colData = des1m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de1mf <- de
write.table(de1mf,"de1mf.tsv",sep="\t")

nrow(subset(de1mf,padj<0.05 & log2FoldChange>0))
## [1] 63
nrow(subset(de1mf,padj<0.05 & log2FoldChange<0))
## [1] 129
head(subset(de1mf,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active MDM cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active MDM cells
baseMean log2FoldChange lfcSE stat pvalue padj
NMRK2 1074.67323 1.8875954 0.3119420 6.051109 0.00e+00 0.0000021
DRAXIN 437.34530 1.0201166 0.1785128 5.714530 0.00e+00 0.0000136
SNN 1654.20307 0.4610305 0.0929074 4.962256 7.00e-07 0.0004148
LINC01283 57.96925 2.2989590 0.4974259 4.621711 3.80e-06 0.0016184
SPHK1 657.69565 0.6715581 0.1477030 4.546679 5.40e-06 0.0021113
NDFIP2 253.88188 0.8328987 0.1844892 4.514620 6.30e-06 0.0023601
GLTP 867.99599 0.5206143 0.1161963 4.480475 7.40e-06 0.0026392
UBE2J1 2682.56714 0.4338202 0.0976798 4.441249 8.90e-06 0.0029800
MIR155HG 303.86139 0.9212520 0.2138280 4.308378 1.64e-05 0.0047069
DIRC3 353.61101 1.0195426 0.2368761 4.304117 1.68e-05 0.0047079
head(subset(de1mf,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in active MDM cells") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in active MDM cells
baseMean log2FoldChange lfcSE stat pvalue padj
AL031123.1 522.7177 -0.9662151 0.1225730 -7.882770 0 0.0e+00
HIST1H1C 231.6309 -2.1253675 0.2958066 -7.184990 0 0.0e+00
EVI2A 562.4969 -1.1141044 0.1595318 -6.983587 0 0.0e+00
TGFBI 459.4321 -2.3312427 0.3338677 -6.982533 0 0.0e+00
IGF1R 387.8538 -1.0023515 0.1441265 -6.954663 0 0.0e+00
RCBTB2 798.4883 -1.3656222 0.2027045 -6.737011 0 0.0e+00
IGFBP7 201.0449 -1.0455119 0.1618281 -6.460632 0 2.0e-07
CFD 3142.4873 -1.5714825 0.2440764 -6.438487 0 2.0e-07
HVCN1 883.5545 -0.8124014 0.1316376 -6.171499 0 1.1e-06
MLLT3 126.9514 -1.2348104 0.2115674 -5.836487 0 7.2e-06
m1m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14930
## Note: no. genes in output = 14930
## Note: estimated proportion of input genes in output = 1
mres1m <- mitch_calc(m1m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres1m$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_latent_vs_active.html") ) {
  mitch_report(mres1m,outfile="MDM_latent_vs_active.html")
}

Alv cells.

pb1a <- pbmono[,c(grep("active",colnames(pbmono))[5:7],grep("latent",colnames(pbmono))[5:7])]
head(pb1a)
##             active4 active5 active6 latent4 latent5 latent6
## MIR1302-2HG       0       0       0       1       0       0
## FAM138A           0       0       0       0       0       0
## OR4F5             0       0       0       0       0       0
## AL627309.1       16      11      27      65      59      76
## AL627309.3        0       0       0       1       1       0
## AL627309.2        0       0       0       0       0       1
pb1af <- pb1a[which(rowMeans(pb1a)>=10),]
head(pb1af)
##            active4 active5 active6 latent4 latent5 latent6
## AL627309.1      16      11      27      65      59      76
## AL627309.5      26      23      15     108      90      67
## LINC01409       87     151      92     133     306     198
## LINC01128      293     284     219     299     413     348
## LINC00115       10       7      10      14      27      21
## FAM41C         136     117      84     123     134     150
colSums(pb1af)
##  active4  active5  active6  latent4  latent5  latent6 
## 30062063 28512290 24034851 43690232 56861881 52274352
des1a <- as.data.frame(grepl("active",colnames(pb1af)))
colnames(des1a) <- "case"

dds <- DESeqDataSetFromMatrix(countData = pb1af , colData = des1a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de1af <- de
write.table(de1af,"de1af.tsv",sep="\t")

nrow(subset(de1af,padj<0.05 & log2FoldChange>0))
## [1] 728
nrow(subset(de1af,padj<0.05 & log2FoldChange<0))
## [1] 516
head(subset(de1af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
TNFRSF9 234.9777 2.554378 0.2373229 10.763298 0 0
LAYN 796.6365 1.993289 0.1872469 10.645244 0 0
GPR183 728.9516 1.698811 0.1598683 10.626316 0 0
IL6R-AS1 639.3675 3.344442 0.3299866 10.135084 0 0
AC025154.2 540.3679 1.680591 0.1671525 10.054239 0 0
AL157912.1 1103.5795 2.244131 0.2252164 9.964333 0 0
DNAJA4 409.1289 1.640952 0.1670522 9.822992 0 0
CDKN1A 7744.9647 1.296803 0.1358093 9.548707 0 0
TNFSF15 643.0691 2.166424 0.2336359 9.272648 0 0
SDS 6980.4298 1.742274 0.1889640 9.220139 0 0
head(subset(de1af,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
HIST1H1C 1334.8700 -1.545697 0.1413173 -10.937778 0 0
NDRG2 454.9626 -1.861218 0.1797278 -10.355764 0 0
CRIM1 562.9900 -1.757921 0.1816972 -9.675004 0 0
CEBPD 977.2921 -1.521105 0.1633979 -9.309211 0 0
ANPEP 1719.1661 -1.419452 0.1524820 -9.308984 0 0
CDA 3857.9549 -1.401971 0.1529377 -9.166946 0 0
LYZ 62727.8698 -1.134071 0.1312488 -8.640614 0 0
ADA2 2554.1200 -1.094289 0.1283573 -8.525333 0 0
PIK3CD 865.4022 -1.176155 0.1379745 -8.524439 0 0
CHST13 650.0852 -1.654109 0.1981835 -8.346353 0 0
m1a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 16735
## Note: no. genes in output = 16735
## Note: estimated proportion of input genes in output = 1
mres1a <- mitch_calc(m1a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres1a$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_latent_vs_active.html") ) {
  mitch_report(mres1a,outfile="Alv_latent_vs_active.html")
}

DE2 Latently-infected vs bystander cells

MDM group.

pb2m <- pbmono[,c(grep("bystander",colnames(pbmono))[1:4],grep("latent",colnames(pbmono))[1:4])]
head(pb2m)
##             bystander0 bystander1 bystander2 bystander3 latent0 latent1 latent2
## MIR1302-2HG          0          0          0          0       0       0       0
## FAM138A              0          0          0          0       0       0       0
## OR4F5                0          0          0          0       0       0       0
## AL627309.1          65         51         19         20      72      78      28
## AL627309.3           2          4          0          0       0       2       2
## AL627309.2           0          0          0          0       0       0       0
##             latent3
## MIR1302-2HG       0
## FAM138A           0
## OR4F5             0
## AL627309.1        6
## AL627309.3        0
## AL627309.2        0
pb2mf <- pb2m[which(rowMeans(pb2m)>=10),]
head(pb2mf)
##            bystander0 bystander1 bystander2 bystander3 latent0 latent1 latent2
## AL627309.1         65         51         19         20      72      78      28
## AL627309.5         33         35          8         27      35      49       8
## LINC01409         136        246         63         58     153     221      60
## LINC01128         216        208        111        141     215     202      87
## LINC00115          18         24          7          1      23      10       4
## FAM41C             43         69         19         82      62      54      21
##            latent3
## AL627309.1       6
## AL627309.5      18
## LINC01409       34
## LINC01128      102
## LINC00115        5
## FAM41C          80
colSums(pb2mf)
## bystander0 bystander1 bystander2 bystander3    latent0    latent1    latent2 
##   40546820   36893233   16234146   20814781   35240150   39387253   15059728 
##    latent3 
##   16638493
des2m <- as.data.frame(grepl("latent",colnames(pb2mf)))
colnames(des2m) <- "case"

dds <- DESeqDataSetFromMatrix(countData = pb2mf , colData = des2m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2mf <- de
#write.table(de2mf,"de2mf.tsv",sep="\t")

nrow(subset(de2mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2mf,log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (MDM unpaired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (MDM unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
AK5 15.05395 1.1816719 0.4658904 2.536373 0.0112007 0.9999921
ALDH8A1 14.71811 1.0528431 0.4617860 2.279937 0.0226114 0.9999921
GRIP1 16.81663 1.3228992 0.5834082 2.267536 0.0233575 0.9999921
AL603840.1 57.88118 0.5323230 0.2444172 2.177928 0.0294114 0.9999921
ANKRD55 27.96977 0.7305876 0.3382814 2.159704 0.0307956 0.9999921
PPFIA3 16.22125 0.9211957 0.4280414 2.152118 0.0313881 0.9999921
KIAA1549 11.25948 0.9927866 0.4624909 2.146608 0.0318245 0.9999921
DNAH10 11.24340 1.2264628 0.5846425 2.097800 0.0359228 0.9999921
TCAF1 321.32727 0.3064457 0.1497142 2.046871 0.0406708 0.9999921
CD163L1 47.78763 1.1462332 0.5642158 2.031551 0.0421991 0.9999921
head(subset(de2mf,log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (MDM unpaired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (MDM unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
AC092142.1 8.874593 -1.4639853 0.5814365 -2.517877 0.0118065 0.9999921
AKR1C3 44.639234 -0.6594009 0.2705113 -2.437609 0.0147848 0.9999921
PASK 13.557613 -0.9541852 0.4436364 -2.150827 0.0314899 0.9999921
SNHG29 6178.071590 -0.1433975 0.0699609 -2.049681 0.0403956 0.9999921
SOWAHD 231.034620 -0.3853261 0.1890656 -2.038055 0.0415444 0.9999921
SYN2 11.648390 -0.9851932 0.4958940 -1.986701 0.0469555 0.9999921
AC123768.3 12.316020 -1.1100340 0.5617697 -1.975959 0.0481594 0.9999921
CDKL4 36.291936 -0.7281966 0.3905537 -1.864524 0.0622482 0.9999921
AC058791.1 37.697607 -0.6314356 0.3411188 -1.851072 0.0641591 0.9999921
DCAF15 110.261130 -0.4264170 0.2343414 -1.819640 0.0688138 0.9999921
des2m$sample <- rep(1:4,2)

dds <- DESeqDataSetFromMatrix(countData = pb2mf , colData = des2m, design = ~ sample + case)
## converting counts to integer mode
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2mf <- de
write.table(de2mf,"de2mf.tsv",sep="\t")

nrow(subset(de2mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2mf,log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (MDM paired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (MDM paired)
baseMean log2FoldChange lfcSE stat pvalue padj
AC015660.1 15.23099 1.3254698 0.4974453 2.664554 0.0077090 0.9999971
AK5 15.05395 1.1808827 0.4829616 2.445086 0.0144818 0.9999971
GRIP1 16.81663 1.3291608 0.5725859 2.321330 0.0202690 0.9999971
EGR3 12.13846 1.3882821 0.6065704 2.288740 0.0220944 0.9999971
CCSER1 166.89681 0.4356867 0.1935627 2.250881 0.0243931 0.9999971
ALDH8A1 14.71811 1.0517501 0.4730069 2.223541 0.0261794 0.9999971
NBPF26 63.46919 0.5246505 0.2401057 2.185081 0.0288829 0.9999971
TCAF1 321.32727 0.3053698 0.1415228 2.157742 0.0309479 0.9999971
PPFIA3 16.22125 0.9052509 0.4201300 2.154692 0.0311859 0.9999971
AL603840.1 57.88118 0.5236175 0.2460469 2.128121 0.0333271 0.9999971
head(subset(de2mf,log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (MDM paired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (MDM paired)
baseMean log2FoldChange lfcSE stat pvalue padj
AC092142.1 8.874593 -1.4827955 0.5974655 -2.481810 0.0130717 0.9999971
SNHG29 6178.071590 -0.1439740 0.0590698 -2.437354 0.0147952 0.9999971
AKR1C3 44.639234 -0.6556986 0.2756886 -2.378403 0.0173878 0.9999971
DCAF15 110.261130 -0.4116646 0.1738338 -2.368150 0.0178773 0.9999971
AP002761.1 26.247297 -0.8978419 0.4173554 -2.151264 0.0314553 0.9999971
PASK 13.557613 -0.9561974 0.4559365 -2.097216 0.0359744 0.9999971
NEGR1 16.942101 -0.9204080 0.4470928 -2.058651 0.0395277 0.9999971
TNFAIP8L2 390.333065 -0.2942246 0.1430726 -2.056470 0.0397372 0.9999971
PLCB1 67.430581 -1.6576782 0.8123472 -2.040603 0.0412903 0.9999971
EIF4A1 18848.808593 -0.1341236 0.0659899 -2.032485 0.0421046 0.9999971
m2m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15152
## Note: no. genes in output = 15152
## Note: estimated proportion of input genes in output = 1
mres2m <- mitch_calc(m2m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres2m$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_bystander_vs_latent.html") ) {
  mitch_report(mres2m,outfile="MDM_bystander_vs_latent.html")
}

Alv cells.

pb2a <- pbmono[,c(grep("bystander",colnames(pbmono))[5:7],grep("latent",colnames(pbmono))[5:7])]
head(pb2a)
##             bystander4 bystander5 bystander6 latent4 latent5 latent6
## MIR1302-2HG          0          0          0       1       0       0
## FAM138A              0          0          0       0       0       0
## OR4F5                0          0          0       0       0       0
## AL627309.1          38          9         14      65      59      76
## AL627309.3           1          0          0       1       1       0
## AL627309.2           0          0          0       0       0       1
pb2af <- pb2a[which(rowMeans(pb2a)>=10),]
head(pb2af)
##            bystander4 bystander5 bystander6 latent4 latent5 latent6
## AL627309.1         38          9         14      65      59      76
## AL627309.5         51         24         12     108      90      67
## LINC01409          83         75         73     133     306     198
## LINC01128         162        109        105     299     413     348
## LINC00115           7          5          2      14      27      21
## FAM41C             60         27         37     123     134     150
colSums(pb2af)
## bystander4 bystander5 bystander6    latent4    latent5    latent6 
##   22499172   14125923   13511893   43683486   56856783   52269186
des2a <- as.data.frame(grepl("latent",colnames(pb2af)))
colnames(des2a) <- "case"

dds <- DESeqDataSetFromMatrix(countData = pb2af , colData = des2a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2af <- de
#write.table(de2af,"de2af.tsv",sep="\t")

nrow(subset(de2af,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2af,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2af, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (Alv unpaired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (Alv unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
CXCL10 44.012170 4.2741860 1.1047839 3.868798 0.0001094 0.5928750
IGFL2 7.886268 3.7012817 1.3089618 2.827647 0.0046892 0.9999928
IL6R-AS1 67.015522 1.1151355 0.3973662 2.806317 0.0050111 0.9999928
AC008074.2 499.422599 0.3448462 0.1275850 2.702875 0.0068743 0.9999928
TSPAN33 1034.763894 0.3459507 0.1288327 2.685270 0.0072471 0.9999928
PGM2L1 536.284970 0.3759213 0.1403729 2.678020 0.0074059 0.9999928
CCL17 48.843825 1.7304610 0.6601862 2.621171 0.0087628 0.9999928
NDRG2 497.756042 0.4322300 0.1707487 2.531380 0.0113615 0.9999928
AC113404.1 17.559930 2.4633517 0.9805551 2.512201 0.0119981 0.9999928
AC131254.1 28.499803 1.7491837 0.7045568 2.482673 0.0130401 0.9999928
head(subset(de2af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (Alv unpaired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (Alv unpaired)
baseMean log2FoldChange lfcSE stat pvalue padj
STMN1 566.46714 -0.6392585 0.1450765 -4.406355 0.0000105 0.1709534
TOP2A 116.84436 -1.5152874 0.3805821 -3.981499 0.0000685 0.5568270
CENPF 114.89641 -1.5106116 0.4150031 -3.640001 0.0002726 0.9999928
CDKN3 98.04444 -0.9608618 0.3247637 -2.958649 0.0030899 0.9999928
S100A8 1122.32389 -0.4171230 0.1479561 -2.819235 0.0048138 0.9999928
GTSE1 43.18318 -1.3775450 0.4948834 -2.783575 0.0053763 0.9999928
PBK 18.32348 -1.7734490 0.6531597 -2.715184 0.0066239 0.9999928
ASPM 61.36350 -1.5226501 0.6052329 -2.515808 0.0118760 0.9999928
DLGAP5 45.23869 -1.5881515 0.6341380 -2.504425 0.0122650 0.9999928
CDK1 85.60023 -1.4296657 0.5767159 -2.478977 0.0131760 0.9999928
des2a$sample <- rep(1:3,2)

dds <- DESeqDataSetFromMatrix(countData = pb2af , colData = des2a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2af <- de
write.table(de2af,"de2af.tsv",sep="\t")

nrow(subset(de2af,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2af,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2af, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent compared to bystander (Alv paired)") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (Alv paired)
baseMean log2FoldChange lfcSE stat pvalue padj
CXCL10 44.012170 4.2741860 1.1047839 3.868798 0.0001094 0.5928750
IGFL2 7.886268 3.7012817 1.3089618 2.827647 0.0046892 0.9999928
IL6R-AS1 67.015522 1.1151355 0.3973662 2.806317 0.0050111 0.9999928
AC008074.2 499.422599 0.3448462 0.1275850 2.702875 0.0068743 0.9999928
TSPAN33 1034.763894 0.3459507 0.1288327 2.685270 0.0072471 0.9999928
PGM2L1 536.284970 0.3759213 0.1403729 2.678020 0.0074059 0.9999928
CCL17 48.843825 1.7304610 0.6601862 2.621171 0.0087628 0.9999928
NDRG2 497.756042 0.4322300 0.1707487 2.531380 0.0113615 0.9999928
AC113404.1 17.559930 2.4633517 0.9805551 2.512201 0.0119981 0.9999928
AC131254.1 28.499803 1.7491837 0.7045568 2.482673 0.0130401 0.9999928
head(subset(de2af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in latent compared to bystander (Alv paired)") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (Alv paired)
baseMean log2FoldChange lfcSE stat pvalue padj
STMN1 566.46714 -0.6392585 0.1450765 -4.406355 0.0000105 0.1709534
TOP2A 116.84436 -1.5152874 0.3805821 -3.981499 0.0000685 0.5568270
CENPF 114.89641 -1.5106116 0.4150031 -3.640001 0.0002726 0.9999928
CDKN3 98.04444 -0.9608618 0.3247637 -2.958649 0.0030899 0.9999928
S100A8 1122.32389 -0.4171230 0.1479561 -2.819235 0.0048138 0.9999928
GTSE1 43.18318 -1.3775450 0.4948834 -2.783575 0.0053763 0.9999928
PBK 18.32348 -1.7734490 0.6531597 -2.715184 0.0066239 0.9999928
ASPM 61.36350 -1.5226501 0.6052329 -2.515808 0.0118760 0.9999928
DLGAP5 45.23869 -1.5881515 0.6341380 -2.504425 0.0122650 0.9999928
CDK1 85.60023 -1.4296657 0.5767159 -2.478977 0.0131760 0.9999928
m2a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 16279
## Note: no. genes in output = 16279
## Note: estimated proportion of input genes in output = 1
mres2a <- mitch_calc(m2a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres2a$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_bystander_vs_latent.html") ) {
  mitch_report(mres2a,outfile="Alv_bystander_vs_latent.html")
}

DE3 Active vs mock

MDM group.

pb3m <- pbmono[,c(grep("active",colnames(pbmono))[1:4],grep("mock",colnames(pbmono))[1:4])]
head(pb3m)
##             active0 active1 active2 active3 mock0 mock1 mock2 mock3
## MIR1302-2HG       0       0       0       0     0     0     0     0
## FAM138A           0       0       0       0     0     0     0     0
## OR4F5             0       0       0       0     0     0     0     0
## AL627309.1       54      37      30       5    61    30     6    15
## AL627309.3        0       2       0       0     0     0     0     0
## AL627309.2        0       0       0       0     0     0     0     0
pb3mf <- pb3m[which(rowMeans(pb3m)>=10),]
head(pb3mf)
##            active0 active1 active2 active3 mock0 mock1 mock2 mock3
## AL627309.1      54      37      30       5    61    30     6    15
## AL627309.5      15      14      15       6    17    27     1    16
## LINC01409      129     114     104      27   119   116    33    57
## LINC01128      262     164     167      94   173   118    57   159
## FAM41C          49      39      63      68    61    25    14    85
## NOC2L          183     126     246     155   185   153    75   221
colSums(pb3mf)
##  active0  active1  active2  active3    mock0    mock1    mock2    mock3 
## 30838055 22850851 26192488 13877828 29112212 21615267  7549870 20729914
des3m <- as.data.frame(grepl("active",colnames(pb3mf)))
colnames(des3m) <- "case"

dds <- DESeqDataSetFromMatrix(countData = pb3mf , colData = des3m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de3mf <- de
write.table(de3mf,"de3mf.tsv",sep="\t")

nrow(subset(de3mf,padj<0.05 & log2FoldChange>0))
## [1] 102
nrow(subset(de3mf,padj<0.05 & log2FoldChange<0))
## [1] 220
head(subset(de3mf,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
PLAAT3 35.54458 2.6018024 0.4350913 5.979899 0.0e+00 0.0000031
LYRM1 207.51046 0.9010836 0.1550569 5.811308 0.0e+00 0.0000060
FAM241B 98.31075 1.4123422 0.2609457 5.412399 1.0e-07 0.0000431
CDKN1A 2611.64337 1.1118551 0.2071428 5.367578 1.0e-07 0.0000484
GRIP1 33.91692 2.0231034 0.3825670 5.288233 1.0e-07 0.0000641
DDB2 470.91041 0.6805458 0.1292762 5.264279 1.0e-07 0.0000706
KCNMB2-AS1 215.01389 1.5174273 0.2887412 5.255319 1.0e-07 0.0000716
FAS 177.02499 0.9989147 0.1966111 5.080661 4.0e-07 0.0001505
MDM2 3009.65137 1.0736852 0.2190317 4.901962 9.0e-07 0.0003209
SFXN1 174.71064 0.8474226 0.1751359 4.838657 1.3e-06 0.0004321
head(subset(de1mf,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in active MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in active MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
AL031123.1 522.7177 -0.9662151 0.1225730 -7.882770 0 0.0e+00
HIST1H1C 231.6309 -2.1253675 0.2958066 -7.184990 0 0.0e+00
EVI2A 562.4969 -1.1141044 0.1595318 -6.983587 0 0.0e+00
TGFBI 459.4321 -2.3312427 0.3338677 -6.982533 0 0.0e+00
IGF1R 387.8538 -1.0023515 0.1441265 -6.954663 0 0.0e+00
RCBTB2 798.4883 -1.3656222 0.2027045 -6.737011 0 0.0e+00
IGFBP7 201.0449 -1.0455119 0.1618281 -6.460632 0 2.0e-07
CFD 3142.4873 -1.5714825 0.2440764 -6.438487 0 2.0e-07
HVCN1 883.5545 -0.8124014 0.1316376 -6.171499 0 1.1e-06
MLLT3 126.9514 -1.2348104 0.2115674 -5.836487 0 7.2e-06
m3m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14592
## Note: no. genes in output = 14592
## Note: estimated proportion of input genes in output = 1
mres3m <- mitch_calc(m3m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres3m$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_mock_vs_active.html") ) {
  mitch_report(mres3m,outfile="MDM_mock_vs_active.html")
}
pb3a <- pbmono[,c(grep("active",colnames(pbmono))[5:7],grep("mock",colnames(pbmono))[5:7])]
head(pb3a)
##             active4 active5 active6 mock4 mock5 mock6
## MIR1302-2HG       0       0       0     0     0     0
## FAM138A           0       0       0     0     0     0
## OR4F5             0       0       0     0     0     0
## AL627309.1       16      11      27    46    26    52
## AL627309.3        0       0       0     0     0     1
## AL627309.2        0       0       0     0     0     0
pb3af <- pb3a[which(rowMeans(pb3a)>=10),]
head(pb3af)
##            active4 active5 active6 mock4 mock5 mock6
## AL627309.1      16      11      27    46    26    52
## AL627309.5      26      23      15    63    29    50
## LINC01409       87     151      92    59   103   140
## LINC01128      293     284     219   176   200   250
## LINC00115       10       7      10    10    12    15
## FAM41C         136     117      84    74    53    98
colSums(pb3af)
##  active4  active5  active6    mock4    mock5    mock6 
## 30051410 28505235 24028409 20446755 25545005 34873898
des3a <- as.data.frame(grepl("active",colnames(pb3af)))
colnames(des3a) <- "case"

dds <- DESeqDataSetFromMatrix(countData = pb3af , colData = des3a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de3af <- de
write.table(de3af,"de3af.tsv",sep="\t")

nrow(subset(de3af,padj<0.05 & log2FoldChange>0))
## [1] 891
nrow(subset(de3af,padj<0.05 & log2FoldChange<0))
## [1] 653
head(subset(de3af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
AL157912.1 754.26670 2.731970 0.2091846 13.060089 0 0
LAYN 553.09496 2.261020 0.1871843 12.079108 0 0
HES4 382.20667 2.359190 0.2058619 11.460064 0 0
CDKN1A 5303.58884 1.555197 0.1361511 11.422583 0 0
OCIAD2 353.43173 2.483107 0.2219754 11.186406 0 0
SDS 4763.87880 2.107153 0.1953024 10.789184 0 0
IL6R-AS1 455.12951 3.520018 0.3307596 10.642225 0 0
CCL7 1444.11412 1.947067 0.1938562 10.043870 0 0
TNFRSF9 169.65948 2.509770 0.2514923 9.979509 0 0
CLIC6 64.19776 4.054497 0.4350349 9.319935 0 0
head(subset(de3af,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
NDRG2 317.0287 -1.8033036 0.1656273 -10.887716 0 0
HIST1H1C 791.4543 -1.1579180 0.1105796 -10.471351 0 0
CEBPD 686.4608 -1.4769083 0.1637149 -9.021221 0 0
ADA2 1791.4889 -1.0408494 0.1158943 -8.981020 0 0
CRIM1 388.2713 -1.6778340 0.1948415 -8.611277 0 0
LYZ 46011.7884 -1.1761399 0.1385676 -8.487841 0 0
RPL10A 8121.1434 -0.8692328 0.1046647 -8.304927 0 0
CHST13 459.7071 -1.6239334 0.1970679 -8.240475 0 0
TGFBI 395.6418 -2.3302926 0.2874197 -8.107631 0 0
PIK3CD 583.8467 -1.0404514 0.1382467 -7.526050 0 0
m3a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15748
## Note: no. genes in output = 15748
## Note: estimated proportion of input genes in output = 1
mres3a <- mitch_calc(m3a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres3a$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_mock_vs_active.html") ) {
  mitch_report(mres3a,outfile="Alv_mock_vs_active.html")
}

DE4 Mock vs bystander

MDM group.

pb4m <- pbmono[,c(grep("mock",colnames(pbmono))[1:4],grep("bystander",colnames(pbmono))[1:4])]
head(pb4m)
##             mock0 mock1 mock2 mock3 bystander0 bystander1 bystander2 bystander3
## MIR1302-2HG     0     0     0     0          0          0          0          0
## FAM138A         0     0     0     0          0          0          0          0
## OR4F5           0     0     0     0          0          0          0          0
## AL627309.1     61    30     6    15         65         51         19         20
## AL627309.3      0     0     0     0          2          4          0          0
## AL627309.2      0     0     0     0          0          0          0          0
pb4mf <- pb4m[which(rowMeans(pb4m)>=10),]
head(pb4mf)
##            mock0 mock1 mock2 mock3 bystander0 bystander1 bystander2 bystander3
## AL627309.1    61    30     6    15         65         51         19         20
## AL627309.5    17    27     1    16         33         35          8         27
## LINC01409    119   116    33    57        136        246         63         58
## LINC01128    173   118    57   159        216        208        111        141
## FAM41C        61    25    14    85         43         69         19         82
## NOC2L        185   153    75   221        261        253        105        221
colSums(pb4mf)
##      mock0      mock1      mock2      mock3 bystander0 bystander1 bystander2 
##   29115390   21618583    7551101   20732742   40541968   36888193   16232018 
## bystander3 
##   20812550
des4m <- as.data.frame(grepl("bystander",colnames(pb4mf)))
colnames(des4m) <- "case"

dds <- DESeqDataSetFromMatrix(countData = pb4mf , colData = des4m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de4mf <- de
write.table(de4mf,"de4mf.tsv",sep="\t")

nrow(subset(de4mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de4mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de4mf, log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in bystander MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in bystander MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
ISG15 9.581964e+02 0.7481468 0.1770234 4.226260 0.0000238 0.3512317
PLAAT3 2.158600e+01 1.5238610 0.3771661 4.040292 0.0000534 0.3945669
CCL8 9.105935e+00 3.0204487 0.8337410 3.622766 0.0002915 0.8616994
PSME2 3.488322e+03 0.3628331 0.1041101 3.485090 0.0004920 0.9999825
RBP7 3.255822e+02 0.7310547 0.2625760 2.784164 0.0053666 0.9999825
IL3RA 2.090766e+01 1.3943247 0.5148105 2.708423 0.0067604 0.9999825
AL022724.3 9.883659e+00 1.5278869 0.5704803 2.678247 0.0074009 0.9999825
IFI6 1.175490e+04 0.8461631 0.3376461 2.506065 0.0122083 0.9999825
APOE 2.866074e+05 0.2505801 0.1007699 2.486658 0.0128949 0.9999825
SLC9B1 4.880829e+01 0.7097613 0.2861211 2.480632 0.0131150 0.9999825
head(subset(de4mf, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top downregulated genes in bystander MDM cells compared to mock") %>%
  kable_paper("hover", full_width = F)
Top downregulated genes in bystander MDM cells compared to mock
baseMean log2FoldChange lfcSE stat pvalue padj
MKI67 142.22093 -2.4532927 0.6692499 -3.665735 0.0002466 0.8616994
ANLN 34.25600 -1.8308757 0.5006972 -3.656652 0.0002555 0.8616994
TOP2A 74.55230 -2.2512332 0.6515213 -3.455349 0.0005496 0.9999825
CENPF 123.39692 -2.1330043 0.6199216 -3.440765 0.0005801 0.9999825
TPX2 71.13683 -1.3763558 0.4157971 -3.310162 0.0009324 0.9999825
TYMS 129.47857 -1.6430364 0.5091321 -3.227132 0.0012504 0.9999825
CDK1 59.07470 -2.2656029 0.7147499 -3.169784 0.0015255 0.9999825
CCNB1 141.60884 -0.8643030 0.2739723 -3.154709 0.0016066 0.9999825
UBE2C 68.40282 -2.5544715 0.8122793 -3.144819 0.0016619 0.9999825
ATAD2 196.22099 -0.6122296 0.1958850 -3.125455 0.0017753 0.9999825
m4m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14829
## Note: no. genes in output = 14829
## Note: estimated proportion of input genes in output = 1
mres4m <- mitch_calc(m4m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres4m$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("MDM_mock_vs_bystander.html") ) {
  mitch_report(mres4m,outfile="MDM_mock_vs_bystander.html")
}

Alv cells.

pb4a <- pbmono[,c(grep("mock",colnames(pbmono))[5:7],grep("bystander",colnames(pbmono))[5:7])]
head(pb4a)
##             mock4 mock5 mock6 bystander4 bystander5 bystander6
## MIR1302-2HG     0     0     0          0          0          0
## FAM138A         0     0     0          0          0          0
## OR4F5           0     0     0          0          0          0
## AL627309.1     46    26    52         38          9         14
## AL627309.3      0     0     1          1          0          0
## AL627309.2      0     0     0          0          0          0
pb4af <- pb4a[which(rowMeans(pb4a)>=10),]
head(pb4af)
##            mock4 mock5 mock6 bystander4 bystander5 bystander6
## AL627309.1    46    26    52         38          9         14
## AL627309.5    63    29    50         51         24         12
## LINC01409     59   103   140         83         75         73
## LINC01128    176   200   250        162        109        105
## FAM41C        74    53    98         60         27         37
## SAMD11        19    44    37         29         16         24
colSums(pb4af)
##      mock4      mock5      mock6 bystander4 bystander5 bystander6 
##   20441633   25539944   34866574   22487395   14121087   13506744
des4a <- as.data.frame(grepl("bystander",colnames(pb4af)))
colnames(des4a) <- "case"

dds <- DESeqDataSetFromMatrix(countData = pb4af , colData = des4a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de4af <- de
write.table(de4af,"de4af.tsv",sep="\t")

nrow(subset(de4af,padj<0.05 & log2FoldChange>0))
## [1] 34
nrow(subset(de4af,padj<0.05 & log2FoldChange<0))
## [1] 1
head(subset(de4af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in latent Alv cells compared to ") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in latent Alv cells compared to
baseMean log2FoldChange lfcSE stat pvalue padj
IFI6 14056.2140 2.519812 0.3516368 7.165952 0e+00 0.0000000
PARP14 1373.6146 1.158578 0.1699413 6.817516 0e+00 0.0000001
EPSTI1 451.5036 3.158925 0.4698560 6.723178 0e+00 0.0000001
IFI44L 110.5209 4.268213 0.7249179 5.887858 0e+00 0.0000148
LY6E 7066.7899 1.224629 0.2212063 5.536140 0e+00 0.0000935
OAS1 515.0690 1.508904 0.2764750 5.457649 0e+00 0.0001216
OAS2 433.7604 2.170933 0.4130633 5.255691 1e-07 0.0002860
PLSCR1 626.7749 1.356400 0.2583158 5.250936 2e-07 0.0002860
IRF7 151.9566 1.638111 0.3201803 5.116214 3e-07 0.0004805
IFITM3 363.0933 1.758384 0.3439334 5.112572 3e-07 0.0004805
head(subset(de4af, log2FoldChange<0),10)[,1:6] %>%
  kbl(caption="Top upregulated genes in active Alv cells") %>%
  kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
baseMean log2FoldChange lfcSE stat pvalue padj
SPP1 42572.64456 -0.3791828 0.0816333 -4.644950 0.0000034 0.0030256
PPBP 25.43916 -6.1176830 1.6189686 -3.778753 0.0001576 0.0595826
MTSS1 204.32602 -0.7552875 0.2151692 -3.510203 0.0004478 0.1538782
FCGBP 24.74282 -4.0993571 1.2982279 -3.157656 0.0015904 0.4537531
CCL7 377.23383 -0.7745713 0.2736480 -2.830539 0.0046470 0.9999996
C3orf14 248.32783 -0.4720003 0.1708214 -2.763122 0.0057251 0.9999996
F13A1 36.21284 -5.1869923 1.8823361 -2.755614 0.0058582 0.9999996
CLEC5A 29.78097 -2.0391123 0.7478480 -2.726640 0.0063983 0.9999996
MT1H 898.86105 -0.4389781 0.1650434 -2.659773 0.0078193 0.9999996
STEAP1B 36.95284 -1.1214690 0.4341759 -2.582983 0.0097950 0.9999996
m4a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15135
## Note: no. genes in output = 15135
## Note: estimated proportion of input genes in output = 1
mres4a <- mitch_calc(m4a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res <- mres4a$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")

if (! file.exists("Alv_mock_vs_bystander.html") ) {
  mitch_report(mres4a,outfile="Alv_mock_vs_bystander.html")
}

Session information

For reproducibility.

save.image("scanalysis.Rdata")

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pkgload_1.4.0               GGally_2.2.1               
##  [3] reshape2_1.4.4              gplots_3.1.3.1             
##  [5] gtools_3.9.5                tibble_3.2.1               
##  [7] dplyr_1.1.4                 echarts4r_0.4.5            
##  [9] limma_3.60.0                SingleR_2.6.0              
## [11] celldex_1.14.0              harmony_1.2.0              
## [13] Rcpp_1.0.12                 mitch_1.16.0               
## [15] DESeq2_1.44.0               muscat_1.18.0              
## [17] beeswarm_0.4.0              stringi_1.8.4              
## [19] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
## [21] Biobase_2.64.0              GenomicRanges_1.56.0       
## [23] GenomeInfoDb_1.40.0         IRanges_2.38.0             
## [25] S4Vectors_0.42.0            BiocGenerics_0.50.0        
## [27] MatrixGenerics_1.16.0       matrixStats_1.3.0          
## [29] hdf5r_1.3.11                Seurat_5.1.0               
## [31] SeuratObject_5.0.2          sp_2.1-4                   
## [33] plyr_1.8.9                  ggplot2_3.5.1              
## [35] kableExtra_1.4.0           
## 
## loaded via a namespace (and not attached):
##   [1] progress_1.2.3            goftest_1.2-3            
##   [3] Biostrings_2.72.0         HDF5Array_1.32.0         
##   [5] vctrs_0.6.5               spatstat.random_3.2-3    
##   [7] digest_0.6.36             png_0.1-8                
##   [9] corpcor_1.6.10            shape_1.4.6.1            
##  [11] gypsum_1.0.1              ggrepel_0.9.5            
##  [13] deldir_2.0-4              parallelly_1.37.1        
##  [15] MASS_7.3-61               httpuv_1.6.15            
##  [17] foreach_1.5.2             withr_3.0.0              
##  [19] xfun_0.45                 survival_3.7-0           
##  [21] memoise_2.0.1.9000        ggbeeswarm_0.7.2         
##  [23] systemfonts_1.1.0         zoo_1.8-12               
##  [25] GlobalOptions_0.1.2       pbapply_1.7-2            
##  [27] prettyunits_1.2.0         KEGGREST_1.44.0          
##  [29] promises_1.3.0            httr_1.4.7               
##  [31] globals_0.16.3            fitdistrplus_1.1-11      
##  [33] rhdf5filters_1.16.0       rhdf5_2.48.0             
##  [35] rstudioapi_0.16.0         UCSC.utils_1.0.0         
##  [37] miniUI_0.1.1.1            generics_0.1.3           
##  [39] curl_5.2.1                zlibbioc_1.50.0          
##  [41] ScaledMatrix_1.12.0       polyclip_1.10-6          
##  [43] GenomeInfoDbData_1.2.12   ExperimentHub_2.12.0     
##  [45] SparseArray_1.4.3         xtable_1.8-4             
##  [47] stringr_1.5.1             doParallel_1.0.17        
##  [49] evaluate_0.24.0           S4Arrays_1.4.0           
##  [51] BiocFileCache_2.12.0      hms_1.1.3                
##  [53] irlba_2.3.5.1             colorspace_2.1-0         
##  [55] filelock_1.0.3            ROCR_1.0-11              
##  [57] reticulate_1.38.0         spatstat.data_3.1-2      
##  [59] magrittr_2.0.3            lmtest_0.9-40            
##  [61] later_1.3.2               viridis_0.6.5            
##  [63] lattice_0.22-6            spatstat.geom_3.2-9      
##  [65] future.apply_1.11.2       scattermore_1.2          
##  [67] scuttle_1.14.0            cowplot_1.1.3            
##  [69] RcppAnnoy_0.0.22          pillar_1.9.0             
##  [71] nlme_3.1-165              iterators_1.0.14         
##  [73] caTools_1.18.2            compiler_4.4.1           
##  [75] beachmat_2.20.0           RSpectra_0.16-1          
##  [77] tensor_1.5                minqa_1.2.7              
##  [79] crayon_1.5.3              abind_1.4-5              
##  [81] scater_1.32.0             blme_1.0-5               
##  [83] locfit_1.5-9.10           bit_4.0.5                
##  [85] codetools_0.2-20          BiocSingular_1.20.0      
##  [87] bslib_0.7.0               alabaster.ranges_1.4.0   
##  [89] GetoptLong_1.0.5          plotly_4.10.4            
##  [91] remaCor_0.0.18            mime_0.12                
##  [93] splines_4.4.1             circlize_0.4.16          
##  [95] fastDummies_1.7.3         dbplyr_2.5.0             
##  [97] sparseMatrixStats_1.16.0  knitr_1.48               
##  [99] blob_1.2.4                utf8_1.2.4               
## [101] clue_0.3-65               BiocVersion_3.19.1       
## [103] lme4_1.1-35.3             listenv_0.9.1            
## [105] DelayedMatrixStats_1.26.0 Rdpack_2.6               
## [107] Matrix_1.7-0              statmod_1.5.0            
## [109] svglite_2.1.3             fANCOVA_0.6-1            
## [111] pkgconfig_2.0.3           tools_4.4.1              
## [113] cachem_1.1.0              RhpcBLASctl_0.23-42      
## [115] rbibutils_2.2.16          RSQLite_2.3.7            
## [117] viridisLite_0.4.2         DBI_1.2.3                
## [119] numDeriv_2016.8-1.1       fastmap_1.2.0            
## [121] rmarkdown_2.27            scales_1.3.0             
## [123] grid_4.4.1                ica_1.0-3                
## [125] broom_1.0.6               AnnotationHub_3.12.0     
## [127] sass_0.4.9                patchwork_1.2.0          
## [129] BiocManager_1.30.23       ggstats_0.6.0            
## [131] dotCall64_1.1-1           RANN_2.6.1               
## [133] alabaster.schemas_1.4.0   farver_2.1.2             
## [135] aod_1.3.3                 mgcv_1.9-1               
## [137] yaml_2.3.9                cli_3.6.3                
## [139] purrr_1.0.2               leiden_0.4.3.1           
## [141] lifecycle_1.0.4           uwot_0.2.2               
## [143] glmmTMB_1.1.9             mvtnorm_1.2-5            
## [145] backports_1.5.0           BiocParallel_1.38.0      
## [147] gtable_0.3.5              rjson_0.2.21             
## [149] ggridges_0.5.6            progressr_0.14.0         
## [151] jsonlite_1.8.8            edgeR_4.2.0              
## [153] RcppHNSW_0.6.0            bitops_1.0-7             
## [155] bit64_4.0.5               Rtsne_0.17               
## [157] alabaster.matrix_1.4.0    spatstat.utils_3.0-5     
## [159] BiocNeighbors_1.22.0      highr_0.11               
## [161] jquerylib_0.1.4           alabaster.se_1.4.0       
## [163] pbkrtest_0.5.3            lazyeval_0.2.2           
## [165] alabaster.base_1.4.1      shiny_1.8.1.1            
## [167] htmltools_0.5.8.1         sctransform_0.4.1        
## [169] rappdirs_0.3.3            glue_1.7.0               
## [171] spam_2.10-0               httr2_1.0.1              
## [173] XVector_0.44.0            gridExtra_2.3            
## [175] EnvStats_2.8.1            boot_1.3-30              
## [177] igraph_2.0.3              variancePartition_1.34.0 
## [179] TMB_1.9.11                R6_2.5.1                 
## [181] tidyr_1.3.1               labeling_0.4.3           
## [183] cluster_2.1.6             Rhdf5lib_1.26.0          
## [185] nloptr_2.1.1              DelayedArray_0.30.1      
## [187] tidyselect_1.2.1          vipor_0.4.7              
## [189] xml2_1.3.6                AnnotationDbi_1.66.0     
## [191] future_1.33.2             rsvd_1.0.5               
## [193] munsell_0.5.1             KernSmooth_2.23-24       
## [195] data.table_1.15.4         htmlwidgets_1.6.4        
## [197] ComplexHeatmap_2.20.0     RColorBrewer_1.1-3       
## [199] rlang_1.1.4               spatstat.sparse_3.1-0    
## [201] spatstat.explore_3.2-7    lmerTest_3.1-3           
## [203] fansi_1.0.6