Introduction
Ee are interested in comparing DEGs between the following comparison groups:
Latently- vs productively-infected cells (groups 3 vs 4).
Latently-infected vs bystander cells (2 vs 3).
Productively-infected vs mock (4 vs 1)
Mock vs bystander (1 vs 2).
As discussed, I think we will do these comparisons separately for both MDM and AlvMDM samples initially. Then, it would be of interest to compare DEGs for MDM vs AlvMDM for each of the four infection groups.
suppressPackageStartupMessages({
library("kableExtra")
library("ggplot2")
library("plyr")
library("Seurat")
library("hdf5r")
library("SingleCellExperiment")
library("parallel")
library("stringi")
library("beeswarm")
library("muscat")
library("DESeq2")
library("mitch")
library("harmony")
library("celldex")
library("SingleR")
library("limma")
})
Load data
mock0 |
MDMP236 |
mock |
mock1 |
MDMV236 |
mock |
mock2 |
MDMO236 |
mock |
mock3 |
MDMP239 |
mock |
mock4 |
AlvP239 |
mock |
mock5 |
AlvM239 |
mock |
mock6 |
AlvN239 |
mock |
active0 |
MDMP236 |
active |
active1 |
MDMV236 |
active |
active2 |
MDMO236 |
active |
active3 |
MDMP239 |
active |
active4 |
AlvP239 |
active |
active5 |
AlvM239 |
active |
active6 |
AlvN239 |
active |
latent0 |
MDMP236 |
latent |
latent1 |
MDMV236 |
latent |
latent2 |
MDMO236 |
latent |
latent3 |
MDMP239 |
latent |
latent4 |
AlvP239 |
latent |
latent5 |
AlvM239 |
latent |
latent6 |
AlvN239 |
latent |
bystander0 |
MDMP236 |
bystander |
bystander1 |
MDMV236 |
bystander |
bystander2 |
MDMO236 |
bystander |
bystander3 |
MDMP239 |
bystander |
bystander4 |
AlvP239 |
bystander |
bystander5 |
AlvM239 |
bystander |
bystander6 |
AlvN239 |
bystander |
react6 |
AlvN239 |
reactivated |
ss <- read.table("samplesheet.tsv",header=TRUE,row.names=1)
ss %>% kbl(caption="sample sheet") %>% kable_paper("hover", full_width = F)
sample sheet
|
Patient
|
Group
|
mock0
|
MDMP236
|
mock
|
mock1
|
MDMV236
|
mock
|
mock2
|
MDMO236
|
mock
|
mock3
|
MDMP239
|
mock
|
mock4
|
AlvP239
|
mock
|
mock5
|
AlvM239
|
mock
|
mock6
|
AlvN239
|
mock
|
active0
|
MDMP236
|
active
|
active1
|
MDMV236
|
active
|
active2
|
MDMO236
|
active
|
active3
|
MDMP239
|
active
|
active4
|
AlvP239
|
active
|
active5
|
AlvM239
|
active
|
active6
|
AlvN239
|
active
|
latent0
|
MDMP236
|
latent
|
latent1
|
MDMV236
|
latent
|
latent2
|
MDMO236
|
latent
|
latent3
|
MDMP239
|
latent
|
latent4
|
AlvP239
|
latent
|
latent5
|
AlvM239
|
latent
|
latent6
|
AlvN239
|
latent
|
bystander0
|
MDMP236
|
bystander
|
bystander1
|
MDMV236
|
bystander
|
bystander2
|
MDMO236
|
bystander
|
bystander3
|
MDMP239
|
bystander
|
bystander4
|
AlvP239
|
bystander
|
bystander5
|
AlvM239
|
bystander
|
bystander6
|
AlvN239
|
bystander
|
react6
|
AlvN239
|
reactivated
|
mylist <- readRDS("macrophage_counts.rds")
Make single cell experiment object
comb <- do.call(cbind,mylist)
sce <- SingleCellExperiment(list(counts=comb))
sce
## class: SingleCellExperiment
## dim: 36603 28971
## metadata(0):
## assays(1): counts
## rownames(36603): gene-HIV1-1 gene-HIV1-2 ... AC007325.4 AC007325.2
## rowData names(0):
## colnames(28971): mock0|AAACGAATCACATACG mock0|AAACGCTCATCAGCGC ...
## react6|TTTGTTGTCTGAACGT react6|TTTGTTGTCTTGGCTC
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
Normalise data
cellmetadata <- data.frame(colnames(comb) ,sapply(strsplit(colnames(comb),"\\|"),"[[",1))
colnames(cellmetadata) <- c("cell","sample")
comb <- CreateSeuratObject(comb, project = "mac", assay = "RNA", meta.data = cellmetadata)
comb <- NormalizeData(comb)
## Normalizing layer: counts
comb <- FindVariableFeatures(comb, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
comb <- ScaleData(comb)
## Centering and scaling data matrix
PCA and Cluster
comb <- RunPCA(comb, features = VariableFeatures(object = comb))
## PC_ 1
## Positive: MT-ATP8, MTRNR2L1, FABP4, AC105402.3, FABP3, gene-HIV1-2, HAMP, C1orf56, PVALB, AL136097.2
## GAPDH, BIRC7, UTS2, S100A9, C1QC, MT1G, MT1H, AC025154.2, CARD16, COX7A1
## BEX3, MT1A, AL450311.1, PPP1R1A, NUPR1, LILRA1, MT2A, CCL23, FXYD2, CCL15
## Negative: ARL15, NEAT1, DOCK3, EXOC4, MALAT1, RASAL2, FTX, LRMDA, DPYD, PLXDC2
## JMJD1C, TMEM117, FHIT, ZEB2, MITF, DENND4C, ATG7, FRMD4B, VPS13B, ZNF438
## MAML2, COP1, TPRG1, FMNL2, ATXN1, JARID2, ASAP1, ELMO1, FNDC3B, TCF12
## PC_ 2
## Positive: CYSTM1, FAH, CD164, FDX1, GDF15, PSAT1, ATP6V1D, SAT1, TXN, BCAT1
## CCPG1, SLAMF9, CSTA, CTSL, HES2, HEBP2, S100A10, NUPR1, PHGDH, STMN1
## TCEA1, GARS, MARCO, SCD, RILPL2, SNHG5, RHOQ, CD48, PLIN2, SNHG32
## Negative: SPRED1, RCBTB2, KCNMA1, GCLC, FGL2, HLA-DRA, MRC1, HLA-DRB1, RTN1, SLCO2B1
## HLA-DPA1, AC020656.1, CD74, HLA-DPB1, PDGFC, LINC02345, DPYD, MERTK, C1QA, CCDC102B
## STAC, ATP8B4, NFATC3, RNF128, VAMP5, MX1, ATG7, TGFBI, XYLT1, CRIP1
## PC_ 3
## Positive: NCAPH, CRABP2, TM4SF19, GAL, DUSP2, RGCC, CHI3L1, CCL22, TMEM114, ACAT2
## RGS20, TRIM54, LINC01010, LINC02244, TCTEX1D2, MGST1, GPC4, NUMB, CCND1, SYNGR1
## CLU, POLE4, SERTAD2, AC092353.2, SLC20A1, IL1RN, ZNF366, AC005280.2, OCSTAMP, GCLC
## Negative: MARCKS, CD14, TGFBI, BTG1, MS4A6A, HLA-DQA1, FPR3, CTSC, CD163, MPEG1
## MEF2C, AIF1, TIMP1, HLA-DPB1, C1QC, HLA-DQB1, SELENOP, RNASE1, NAMPT, HLA-DQA2
## TCF4, EPB41L3, ARL4C, NUPR1, ALDH2, SAMSN1, ZNF331, HIF1A, MS4A7, C1QA
## PC_ 4
## Positive: MT-ATP8, MTRNR2L1, XIST, PDE4D, AL035446.2, AC073359.2, CCDC26, AL365295.1, CLVS1, PRKCE
## EPHB1, RAD51B, C5orf17, LINC01135, PLEKHA5, LINC02320, LY86-AS1, STX18-AS1, FHIT, LINC01473
## DMGDH, FTX, AC079142.1, PKD1L1, SLC22A15, AF117829.1, GUCY2F, SH3RF3, STOX2, MIR646HG
## Negative: HLA-DRB1, CD74, CTSB, HSP90B1, TUBA1B, GAPDH, HLA-DPA1, HLA-DRA, ACTG1, IFI30
## RGCC, AIF1, HSPA5, CYP1B1, HLA-DPB1, C1QA, C15orf48, LYZ, CA2, S100A4
## LGMN, TUBB, FBP1, TUBA1A, MMP9, RNASE6, HLA-DQB1, CHI3L1, CCL3, TXN
## PC_ 5
## Positive: PTGDS, LINC02244, MMP9, MGST1, LYZ, RGCC, IFI30, CHI3L1, CLU, GCLC
## GCHFR, CRABP2, FABP3, VAMP5, TMEM176B, HLA-DRB1, SYNGR1, CTSB, NCAPH, ISG15
## CD74, NCF1, SOD2, SAT1, FGL2, HSPA5, CDKN2A, MX1, CLEC12A, S100A10
## Negative: TYMS, PCLAF, TK1, MKI67, MYBL2, CENPM, CLSPN, RRM2, BIRC5, SHCBP1
## CEP55, DIAPH3, CDK1, CENPK, PRC1, CENPF, ESCO2, HELLS, NUSAP1, TOP2A
## NCAPG, FAM111B, ANLN, TPX2, CCNA2, KIF11, ASPM, CENPU, MAD2L1, HMMR
comb <- RunHarmony(comb,"sample")
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony converged after 4 iterations
DimHeatmap(comb, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(comb)
comb <- JackStraw(comb, num.replicate = 100)
comb <- FindNeighbors(comb, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
comb <- FindClusters(comb, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 28971
## Number of edges: 896422
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8952
## Number of communities: 12
## Elapsed time: 4 seconds
UMAP
comb <- RunUMAP(comb, dims = 1:10)
## 17:02:49 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:02:49 Read 28971 rows and found 10 numeric columns
## 17:02:49 Using Annoy for neighbor search, n_neighbors = 30
## 17:02:49 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:02:51 Writing NN index file to temp file /tmp/Rtmp9zv4vx/file25988e230dbf5f
## 17:02:51 Searching Annoy index using 1 thread, search_k = 3000
## 17:02:59 Annoy recall = 100%
## 17:03:00 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:03:01 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:03:02 Commencing optimization for 200 epochs, with 1186670 positive edges
## 17:03:10 Optimization finished
DimPlot(comb, reduction = "umap")
Assign names to clusters
ADGRE1, CCR2, CD169, CX3CR1, CD206, CD163, LYVE1, CD9, TREM2
HLA-DP, HLA-DM, HLA-DOA, HLA-DOB, HLA-DQ, and HLA-DR.
message("macrophage markers")
## macrophage markers
FeaturePlot(comb, features = c("ADGRE1", "CCR2", "SIGLEC1", "CX3CR1", "MRC1", "CD163", "LYVE1", "CD9", "TREM2"))
DimPlot(comb, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
That’s pretty useless. Let’s use celldex pkg to annotate cells and get the macs.
ref <- celldex::MonacoImmuneData()
DefaultAssay(comb) <- "RNA"
comb2 <- as.SingleCellExperiment(comb)
lc <- logcounts(comb2)
pred_imm_broad <- SingleR(test=comb2, ref=ref,
labels=ref$label.main)
head(pred_imm_broad)
## DataFrame with 6 rows and 4 columns
## scores labels delta.next
## <matrix> <character> <numeric>
## mock0|AAACGAATCACATACG 0.307826:0.325650:0.169064:... Monocytes 0.1786319
## mock0|AAACGCTCATCAGCGC 0.311008:0.281528:0.189016:... Monocytes 0.0338547
## mock0|AAACGCTGTCGAGTGA 0.318344:0.277003:0.170895:... Monocytes 0.1301308
## mock0|AAAGAACGTGCAACAG 0.214292:0.190767:0.121117:... Monocytes 0.1116322
## mock0|AAAGGTAAGCCATATC 0.290416:0.291088:0.155001:... Monocytes 0.1794308
## mock0|AAAGGTAGTTTCCAAG 0.292140:0.281397:0.184535:... Monocytes 0.0952702
## pruned.labels
## <character>
## mock0|AAACGAATCACATACG Monocytes
## mock0|AAACGCTCATCAGCGC Monocytes
## mock0|AAACGCTGTCGAGTGA Monocytes
## mock0|AAAGAACGTGCAACAG NA
## mock0|AAAGGTAAGCCATATC Monocytes
## mock0|AAAGGTAGTTTCCAAG Monocytes
table(pred_imm_broad$pruned.labels)
##
## Basophils Dendritic cells Monocytes Neutrophils NK cells
## 4 657 24154 14 4
## Progenitors
## 2
cellmetadata$label <- pred_imm_broad$pruned.labels
pred_imm_fine <- SingleR(test=comb2, ref=ref,
labels=ref$label.fine)
head(pred_imm_fine)
## DataFrame with 6 rows and 4 columns
## scores labels
## <matrix> <character>
## mock0|AAACGAATCACATACG 0.1836411:0.485683:0.206442:... Classical monocytes
## mock0|AAACGCTCATCAGCGC 0.1961234:0.430705:0.226843:... Classical monocytes
## mock0|AAACGCTGTCGAGTGA 0.1718613:0.442610:0.188544:... Classical monocytes
## mock0|AAAGAACGTGCAACAG 0.0943609:0.264157:0.120656:... Classical monocytes
## mock0|AAAGGTAAGCCATATC 0.1563980:0.415082:0.167965:... Classical monocytes
## mock0|AAAGGTAGTTTCCAAG 0.1857623:0.432131:0.207144:... Classical monocytes
## delta.next pruned.labels
## <numeric> <character>
## mock0|AAACGAATCACATACG 0.0626269 Classical monocytes
## mock0|AAACGCTCATCAGCGC 0.1150706 Classical monocytes
## mock0|AAACGCTGTCGAGTGA 0.0651352 Classical monocytes
## mock0|AAAGAACGTGCAACAG 0.0648711 Classical monocytes
## mock0|AAAGGTAAGCCATATC 0.1073274 Classical monocytes
## mock0|AAAGGTAGTTTCCAAG 0.1521533 Classical monocytes
table(pred_imm_fine$pruned.labels)
##
## Classical monocytes Intermediate monocytes
## 21583 3569
## Low-density basophils Low-density neutrophils
## 2 21
## Myeloid dendritic cells Naive B cells
## 461 1
## Natural killer cells Non classical monocytes
## 3 64
## Non-Vd2 gd T cells Plasmacytoid dendritic cells
## 1 35
## Progenitor cells Terminal effector CD4 T cells
## 4 1
cellmetadata$finelabel <- pred_imm_fine$pruned.labels
col_pal <- c('#e31a1c', '#ff7f00', "#999900", '#cc00ff', '#1f78b4', '#fdbf6f',
'#33a02c', '#fb9a99', "#a6cee3", "#cc6699", "#b2df8a", "#99004d", "#66ff99",
"#669999", "#006600", "#9966ff", "#cc9900", "#e6ccff", "#3399ff", "#ff66cc",
"#ffcc66", "#003399")
annot_df <- data.frame(
barcodes = rownames(pred_imm_broad),
monaco_broad_annotation = pred_imm_broad$labels,
monaco_broad_pruned_labels = pred_imm_broad$pruned.labels,
monaco_fine_annotation = pred_imm_fine$labels,
monaco_fine_pruned_labels = pred_imm_fine$pruned.labels
)
meta_inf <- comb@meta.data
meta_inf$cell_barcode <- colnames(comb)
meta_inf <- meta_inf %>% dplyr::left_join(y = annot_df,
by = c("cell_barcode" = "barcodes"))
rownames(meta_inf) <- colnames(lc)
comb@meta.data <- meta_inf
DimPlot(comb, label=TRUE, group.by = "monaco_broad_annotation", reduction = "umap",
cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")
DimPlot(comb, label=TRUE, group.by = "monaco_fine_annotation", reduction = "umap",
cols = col_pal, pt.size = 0.5) + ggtitle("Annotation With the Monaco Reference Database")
Cell counts
Most cells are classified as monocytes.
cc <- table(meta_inf[,c("sample","monaco_broad_annotation")])
cc %>% kbl(caption="cell counts") %>% kable_paper("hover", full_width = F)
cell counts
|
Basophils
|
Dendritic cells
|
Monocytes
|
Neutrophils
|
NK cells
|
Progenitors
|
active0
|
0
|
17
|
863
|
0
|
0
|
0
|
active1
|
0
|
3
|
476
|
0
|
0
|
0
|
active2
|
0
|
13
|
408
|
0
|
0
|
0
|
active3
|
0
|
4
|
432
|
0
|
0
|
0
|
active4
|
1
|
34
|
908
|
2
|
0
|
0
|
active5
|
0
|
8
|
613
|
0
|
0
|
0
|
active6
|
1
|
28
|
684
|
0
|
0
|
0
|
bystander0
|
0
|
48
|
1446
|
0
|
0
|
0
|
bystander1
|
0
|
31
|
1303
|
0
|
0
|
0
|
bystander2
|
0
|
47
|
490
|
3
|
0
|
0
|
bystander3
|
0
|
29
|
1028
|
2
|
0
|
0
|
bystander4
|
0
|
20
|
1100
|
0
|
1
|
1
|
bystander5
|
0
|
12
|
491
|
1
|
0
|
0
|
bystander6
|
0
|
20
|
558
|
3
|
0
|
0
|
latent0
|
1
|
17
|
1154
|
0
|
0
|
0
|
latent1
|
0
|
11
|
1240
|
0
|
0
|
0
|
latent2
|
0
|
11
|
337
|
0
|
0
|
0
|
latent3
|
0
|
12
|
740
|
0
|
0
|
0
|
latent4
|
0
|
30
|
1905
|
1
|
0
|
0
|
latent5
|
1
|
26
|
1804
|
0
|
0
|
0
|
latent6
|
0
|
52
|
1823
|
0
|
0
|
0
|
mock0
|
0
|
6
|
790
|
0
|
0
|
0
|
mock1
|
0
|
7
|
652
|
0
|
0
|
0
|
mock2
|
0
|
4
|
173
|
0
|
0
|
0
|
mock3
|
0
|
4
|
811
|
0
|
0
|
0
|
mock4
|
0
|
18
|
956
|
0
|
0
|
0
|
mock5
|
0
|
19
|
785
|
0
|
0
|
0
|
mock6
|
0
|
47
|
1304
|
1
|
2
|
0
|
react6
|
1
|
79
|
3015
|
1
|
1
|
1
|
tcc <- t(cc)
pctcc <- apply(tcc,2,function(x) { x/sum(x)*100} )
pctcc %>% kbl(caption="cell proportions") %>% kable_paper("hover", full_width = F)
cell proportions
|
active0
|
active1
|
active2
|
active3
|
active4
|
active5
|
active6
|
bystander0
|
bystander1
|
bystander2
|
bystander3
|
bystander4
|
bystander5
|
bystander6
|
latent0
|
latent1
|
latent2
|
latent3
|
latent4
|
latent5
|
latent6
|
mock0
|
mock1
|
mock2
|
mock3
|
mock4
|
mock5
|
mock6
|
react6
|
Basophils
|
0.000000
|
0.0000000
|
0.000000
|
0.0000000
|
0.1058201
|
0.000000
|
0.1402525
|
0.000000
|
0.000000
|
0.0000000
|
0.0000000
|
0.0000000
|
0.0000000
|
0.0000000
|
0.0853242
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.054615
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.0322789
|
Dendritic cells
|
1.931818
|
0.6263048
|
3.087886
|
0.9174312
|
3.5978836
|
1.288245
|
3.9270687
|
3.212851
|
2.323838
|
8.7037037
|
2.7384325
|
1.7825312
|
2.3809524
|
3.4423408
|
1.4505119
|
0.8792966
|
3.160919
|
1.595745
|
1.5495868
|
1.419989
|
2.773333
|
0.7537688
|
1.062215
|
2.259887
|
0.4907975
|
1.848049
|
2.363184
|
3.4711965
|
2.5500323
|
Monocytes
|
98.068182
|
99.3736952
|
96.912114
|
99.0825688
|
96.0846561
|
98.711755
|
95.9326788
|
96.787149
|
97.676162
|
90.7407407
|
97.0727101
|
98.0392157
|
97.4206349
|
96.0413081
|
98.4641638
|
99.1207034
|
96.839080
|
98.404255
|
98.3987603
|
98.525396
|
97.226667
|
99.2462312
|
98.937785
|
97.740113
|
99.5092025
|
98.151951
|
97.636816
|
96.3072378
|
97.3208522
|
Neutrophils
|
0.000000
|
0.0000000
|
0.000000
|
0.0000000
|
0.2116402
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.5555556
|
0.1888574
|
0.0000000
|
0.1984127
|
0.5163511
|
0.0000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0516529
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0738552
|
0.0322789
|
NK cells
|
0.000000
|
0.0000000
|
0.000000
|
0.0000000
|
0.0000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.0000000
|
0.0891266
|
0.0000000
|
0.0000000
|
0.0000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.1477105
|
0.0322789
|
Progenitors
|
0.000000
|
0.0000000
|
0.000000
|
0.0000000
|
0.0000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.0000000
|
0.0891266
|
0.0000000
|
0.0000000
|
0.0000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.000000
|
0.000000
|
0.0000000
|
0.0322789
|
Differential expression
We are going to use muscat for pseudobulk analysis. First need to convert seurat obj to singlecellexperiment object. Then summarise counts to pseudobulk level.
sce <- Seurat::as.SingleCellExperiment(comb, assay = "RNA")
head(colData(sce),2)
## DataFrame with 2 rows and 13 columns
## orig.ident nCount_RNA nFeature_RNA
## <factor> <numeric> <integer>
## mock0|AAACGAATCACATACG mac 72710 7097
## mock0|AAACGCTCATCAGCGC mac 49092 6276
## cell sample RNA_snn_res.0.5
## <character> <character> <factor>
## mock0|AAACGAATCACATACG mock0|AAACGAATCACATACG mock0 3
## mock0|AAACGCTCATCAGCGC mock0|AAACGCTCATCAGCGC mock0 11
## seurat_clusters cell_barcode
## <factor> <character>
## mock0|AAACGAATCACATACG 3 mock0|AAACGAATCACATACG
## mock0|AAACGCTCATCAGCGC 11 mock0|AAACGCTCATCAGCGC
## monaco_broad_annotation monaco_broad_pruned_labels
## <character> <character>
## mock0|AAACGAATCACATACG Monocytes Monocytes
## mock0|AAACGCTCATCAGCGC Monocytes Monocytes
## monaco_fine_annotation monaco_fine_pruned_labels
## <character> <character>
## mock0|AAACGAATCACATACG Classical monocytes Classical monocytes
## mock0|AAACGCTCATCAGCGC Classical monocytes Classical monocytes
## ident
## <factor>
## mock0|AAACGAATCACATACG 3
## mock0|AAACGCTCATCAGCGC 11
colnames(colData(sce))
## [1] "orig.ident" "nCount_RNA"
## [3] "nFeature_RNA" "cell"
## [5] "sample" "RNA_snn_res.0.5"
## [7] "seurat_clusters" "cell_barcode"
## [9] "monaco_broad_annotation" "monaco_broad_pruned_labels"
## [11] "monaco_fine_annotation" "monaco_fine_pruned_labels"
## [13] "ident"
#muscat library
pb <- aggregateData(sce,
assay = "counts", fun = "sum",
by = c("monaco_broad_annotation", "sample"))
# one sheet per subpopulation
assayNames(pb)
## [1] "Basophils" "Dendritic cells" "Monocytes" "Neutrophils"
## [5] "NK cells" "Progenitors"
t(head(assay(pb)))
## gene-HIV1-1 gene-HIV1-2 MIR1302-2HG FAM138A OR4F5 AL627309.1
## active0 0 0 0 0 0 0
## active1 0 0 0 0 0 0
## active2 0 0 0 0 0 0
## active3 0 0 0 0 0 0
## active4 2 29 0 0 0 0
## active5 0 0 0 0 0 0
## active6 0 2 0 0 0 0
## bystander0 0 0 0 0 0 0
## bystander1 0 0 0 0 0 0
## bystander2 0 0 0 0 0 0
## bystander3 0 0 0 0 0 0
## bystander4 0 0 0 0 0 0
## bystander5 0 0 0 0 0 0
## bystander6 0 0 0 0 0 0
## latent0 0 2 0 0 0 0
## latent1 0 0 0 0 0 0
## latent2 0 0 0 0 0 0
## latent3 0 0 0 0 0 0
## latent4 0 0 0 0 0 0
## latent5 0 15 0 0 0 0
## latent6 0 0 0 0 0 0
## mock0 0 0 0 0 0 0
## mock1 0 0 0 0 0 0
## mock2 0 0 0 0 0 0
## mock3 0 0 0 0 0 0
## mock4 0 0 0 0 0 0
## mock5 0 0 0 0 0 0
## mock6 0 0 0 0 0 0
## react6 0 0 0 0 0 0
plotMDS(assays(pb)[[3]], main="Monocytes" )
par(mfrow=c(2,3))
dump <- lapply(1:length(assays(pb)) , function(i) {
cellname=names(assays(pb))[i]
plotMDS(assays(pb)[[i]],cex=1,pch=19,main=paste(cellname),labels=colnames(assays(pb)[[1]]))
})
par(mfrow=c(1,1))
DE Prep
pbmono <- assays(pb)[["Monocytes"]]
head(pbmono)
## active0 active1 active2 active3 active4 active5 active6 bystander0
## gene-HIV1-1 11089 7725 4907 11852 29173 20670 14188 0
## gene-HIV1-2 65635 42374 73030 35142 120841 105818 103519 0
## MIR1302-2HG 0 0 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0 0 0
## AL627309.1 54 37 30 5 16 11 27 65
## bystander1 bystander2 bystander3 bystander4 bystander5 bystander6
## gene-HIV1-1 0 0 0 0 0 0
## gene-HIV1-2 0 6 0 0 0 0
## MIR1302-2HG 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0
## AL627309.1 51 19 20 38 9 14
## latent0 latent1 latent2 latent3 latent4 latent5 latent6 mock0 mock1
## gene-HIV1-1 649 799 156 380 1777 1654 2215 37 67
## gene-HIV1-2 6540 7091 3290 2126 12238 13801 26684 542 517
## MIR1302-2HG 0 0 0 0 1 0 0 0 0
## FAM138A 0 0 0 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0 0 0 0
## AL627309.1 72 78 28 6 65 59 76 61 30
## mock2 mock3 mock4 mock5 mock6 react6
## gene-HIV1-1 3 141 92 115 1038 895
## gene-HIV1-2 100 1078 727 1653 6685 7528
## MIR1302-2HG 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0
## AL627309.1 6 15 46 26 52 37
dim(pbmono)
## [1] 36603 29
hiv <- pbmono[1:2,]
pbmono <- pbmono[3:nrow(pbmono),]
Gene sets
Gene ontology.
go <- gmt_import("c5.go.v2023.2.Hs.symbols.gmt")
names(go) <- gsub("_"," ",names(go))
DE1 Latently- vs productively-infected cells (groups 3 vs 4).
MDM group.
pb1m <- pbmono[,c(grep("active",colnames(pbmono))[1:4],grep("latent",colnames(pbmono))[1:4])]
head(pb1m)
## active0 active1 active2 active3 latent0 latent1 latent2 latent3
## MIR1302-2HG 0 0 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0 0 0
## AL627309.1 54 37 30 5 72 78 28 6
## AL627309.3 0 2 0 0 0 2 2 0
## AL627309.2 0 0 0 0 0 0 0 0
pb1mf <- pb1m[which(rowMeans(pb1m)>=10),]
head(pb1mf)
## active0 active1 active2 active3 latent0 latent1 latent2 latent3
## AL627309.1 54 37 30 5 72 78 28 6
## AL627309.5 15 14 15 6 35 49 8 18
## LINC01409 129 114 104 27 153 221 60 34
## LINC01128 262 164 167 94 215 202 87 102
## LINC00115 22 6 7 4 23 10 4 5
## FAM41C 49 39 63 68 62 54 21 80
colSums(pb1mf)
## active0 active1 active2 active3 latent0 latent1 latent2 latent3
## 30842467 22854684 26195864 13879630 35237534 39383109 15058058 16636428
des1m <- as.data.frame(grepl("active",colnames(pb1mf)))
colnames(des1m) <- "case"
dds <- DESeqDataSetFromMatrix(countData = pb1mf , colData = des1m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de1mf <- de
write.table(de1mf,"de1mf.tsv",sep="\t")
nrow(subset(de1mf,padj<0.05 & log2FoldChange>0))
## [1] 63
nrow(subset(de1mf,padj<0.05 & log2FoldChange<0))
## [1] 129
head(subset(de1mf,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in active MDM cells") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in active MDM cells
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
NMRK2
|
1074.67323
|
1.8875954
|
0.3119420
|
6.051109
|
0.00e+00
|
0.0000021
|
DRAXIN
|
437.34530
|
1.0201166
|
0.1785128
|
5.714530
|
0.00e+00
|
0.0000136
|
SNN
|
1654.20307
|
0.4610305
|
0.0929074
|
4.962256
|
7.00e-07
|
0.0004148
|
LINC01283
|
57.96925
|
2.2989590
|
0.4974259
|
4.621711
|
3.80e-06
|
0.0016184
|
SPHK1
|
657.69565
|
0.6715581
|
0.1477030
|
4.546679
|
5.40e-06
|
0.0021113
|
NDFIP2
|
253.88188
|
0.8328987
|
0.1844892
|
4.514620
|
6.30e-06
|
0.0023601
|
GLTP
|
867.99599
|
0.5206143
|
0.1161963
|
4.480475
|
7.40e-06
|
0.0026392
|
UBE2J1
|
2682.56714
|
0.4338202
|
0.0976798
|
4.441249
|
8.90e-06
|
0.0029800
|
MIR155HG
|
303.86139
|
0.9212520
|
0.2138280
|
4.308378
|
1.64e-05
|
0.0047069
|
DIRC3
|
353.61101
|
1.0195426
|
0.2368761
|
4.304117
|
1.68e-05
|
0.0047079
|
head(subset(de1mf,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top downregulated genes in active MDM cells") %>%
kable_paper("hover", full_width = F)
Top downregulated genes in active MDM cells
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
AL031123.1
|
522.7177
|
-0.9662151
|
0.1225730
|
-7.882770
|
0
|
0.0e+00
|
HIST1H1C
|
231.6309
|
-2.1253675
|
0.2958066
|
-7.184990
|
0
|
0.0e+00
|
EVI2A
|
562.4969
|
-1.1141044
|
0.1595318
|
-6.983587
|
0
|
0.0e+00
|
TGFBI
|
459.4321
|
-2.3312427
|
0.3338677
|
-6.982533
|
0
|
0.0e+00
|
IGF1R
|
387.8538
|
-1.0023515
|
0.1441265
|
-6.954663
|
0
|
0.0e+00
|
RCBTB2
|
798.4883
|
-1.3656222
|
0.2027045
|
-6.737011
|
0
|
0.0e+00
|
IGFBP7
|
201.0449
|
-1.0455119
|
0.1618281
|
-6.460632
|
0
|
2.0e-07
|
CFD
|
3142.4873
|
-1.5714825
|
0.2440764
|
-6.438487
|
0
|
2.0e-07
|
HVCN1
|
883.5545
|
-0.8124014
|
0.1316376
|
-6.171499
|
0
|
1.1e-06
|
MLLT3
|
126.9514
|
-1.2348104
|
0.2115674
|
-5.836487
|
0
|
7.2e-06
|
m1m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 14930
## Note: no. genes in output = 14930
## Note: estimated proportion of input genes in output = 1
mres1m <- mitch_calc(m1m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
res <- mres1m$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")
if (! file.exists("MDM_latent_vs_active.html") ) {
mitch_report(mres1m,outfile="MDM_latent_vs_active.html")
}
Alv cells.
pb1a <- pbmono[,c(grep("active",colnames(pbmono))[5:7],grep("latent",colnames(pbmono))[5:7])]
head(pb1a)
## active4 active5 active6 latent4 latent5 latent6
## MIR1302-2HG 0 0 0 1 0 0
## FAM138A 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0
## AL627309.1 16 11 27 65 59 76
## AL627309.3 0 0 0 1 1 0
## AL627309.2 0 0 0 0 0 1
pb1af <- pb1a[which(rowMeans(pb1a)>=10),]
head(pb1af)
## active4 active5 active6 latent4 latent5 latent6
## AL627309.1 16 11 27 65 59 76
## AL627309.5 26 23 15 108 90 67
## LINC01409 87 151 92 133 306 198
## LINC01128 293 284 219 299 413 348
## LINC00115 10 7 10 14 27 21
## FAM41C 136 117 84 123 134 150
colSums(pb1af)
## active4 active5 active6 latent4 latent5 latent6
## 30062063 28512290 24034851 43690232 56861881 52274352
des1a <- as.data.frame(grepl("active",colnames(pb1af)))
colnames(des1a) <- "case"
dds <- DESeqDataSetFromMatrix(countData = pb1af , colData = des1a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de1af <- de
write.table(de1af,"de1af.tsv",sep="\t")
nrow(subset(de1af,padj<0.05 & log2FoldChange>0))
## [1] 728
nrow(subset(de1af,padj<0.05 & log2FoldChange<0))
## [1] 516
head(subset(de1af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in active Alv cells") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
TNFRSF9
|
234.9777
|
2.554378
|
0.2373229
|
10.763298
|
0
|
0
|
LAYN
|
796.6365
|
1.993289
|
0.1872469
|
10.645244
|
0
|
0
|
GPR183
|
728.9516
|
1.698811
|
0.1598683
|
10.626316
|
0
|
0
|
IL6R-AS1
|
639.3675
|
3.344442
|
0.3299866
|
10.135084
|
0
|
0
|
AC025154.2
|
540.3679
|
1.680591
|
0.1671525
|
10.054239
|
0
|
0
|
AL157912.1
|
1103.5795
|
2.244131
|
0.2252164
|
9.964333
|
0
|
0
|
DNAJA4
|
409.1289
|
1.640952
|
0.1670522
|
9.822992
|
0
|
0
|
CDKN1A
|
7744.9647
|
1.296803
|
0.1358093
|
9.548707
|
0
|
0
|
TNFSF15
|
643.0691
|
2.166424
|
0.2336359
|
9.272648
|
0
|
0
|
SDS
|
6980.4298
|
1.742274
|
0.1889640
|
9.220139
|
0
|
0
|
head(subset(de1af,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in active Alv cells") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
HIST1H1C
|
1334.8700
|
-1.545697
|
0.1413173
|
-10.937778
|
0
|
0
|
NDRG2
|
454.9626
|
-1.861218
|
0.1797278
|
-10.355764
|
0
|
0
|
CRIM1
|
562.9900
|
-1.757921
|
0.1816972
|
-9.675004
|
0
|
0
|
CEBPD
|
977.2921
|
-1.521105
|
0.1633979
|
-9.309211
|
0
|
0
|
ANPEP
|
1719.1661
|
-1.419452
|
0.1524820
|
-9.308984
|
0
|
0
|
CDA
|
3857.9549
|
-1.401971
|
0.1529377
|
-9.166946
|
0
|
0
|
LYZ
|
62727.8698
|
-1.134071
|
0.1312488
|
-8.640614
|
0
|
0
|
ADA2
|
2554.1200
|
-1.094289
|
0.1283573
|
-8.525333
|
0
|
0
|
PIK3CD
|
865.4022
|
-1.176155
|
0.1379745
|
-8.524439
|
0
|
0
|
CHST13
|
650.0852
|
-1.654109
|
0.1981835
|
-8.346353
|
0
|
0
|
m1a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 16735
## Note: no. genes in output = 16735
## Note: estimated proportion of input genes in output = 1
mres1a <- mitch_calc(m1a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
res <- mres1a$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")
if (! file.exists("Alv_latent_vs_active.html") ) {
mitch_report(mres1a,outfile="Alv_latent_vs_active.html")
}
DE2 Latently-infected vs bystander cells
MDM group.
pb2m <- pbmono[,c(grep("bystander",colnames(pbmono))[1:4],grep("latent",colnames(pbmono))[1:4])]
head(pb2m)
## bystander0 bystander1 bystander2 bystander3 latent0 latent1 latent2
## MIR1302-2HG 0 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0 0
## AL627309.1 65 51 19 20 72 78 28
## AL627309.3 2 4 0 0 0 2 2
## AL627309.2 0 0 0 0 0 0 0
## latent3
## MIR1302-2HG 0
## FAM138A 0
## OR4F5 0
## AL627309.1 6
## AL627309.3 0
## AL627309.2 0
pb2mf <- pb2m[which(rowMeans(pb2m)>=10),]
head(pb2mf)
## bystander0 bystander1 bystander2 bystander3 latent0 latent1 latent2
## AL627309.1 65 51 19 20 72 78 28
## AL627309.5 33 35 8 27 35 49 8
## LINC01409 136 246 63 58 153 221 60
## LINC01128 216 208 111 141 215 202 87
## LINC00115 18 24 7 1 23 10 4
## FAM41C 43 69 19 82 62 54 21
## latent3
## AL627309.1 6
## AL627309.5 18
## LINC01409 34
## LINC01128 102
## LINC00115 5
## FAM41C 80
colSums(pb2mf)
## bystander0 bystander1 bystander2 bystander3 latent0 latent1 latent2
## 40546820 36893233 16234146 20814781 35240150 39387253 15059728
## latent3
## 16638493
des2m <- as.data.frame(grepl("latent",colnames(pb2mf)))
colnames(des2m) <- "case"
dds <- DESeqDataSetFromMatrix(countData = pb2mf , colData = des2m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2mf <- de
#write.table(de2mf,"de2mf.tsv",sep="\t")
nrow(subset(de2mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2mf,log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in latent compared to bystander (MDM unpaired)") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (MDM unpaired)
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
AK5
|
15.05395
|
1.1816719
|
0.4658904
|
2.536373
|
0.0112007
|
0.9999921
|
ALDH8A1
|
14.71811
|
1.0528431
|
0.4617860
|
2.279937
|
0.0226114
|
0.9999921
|
GRIP1
|
16.81663
|
1.3228992
|
0.5834082
|
2.267536
|
0.0233575
|
0.9999921
|
AL603840.1
|
57.88118
|
0.5323230
|
0.2444172
|
2.177928
|
0.0294114
|
0.9999921
|
ANKRD55
|
27.96977
|
0.7305876
|
0.3382814
|
2.159704
|
0.0307956
|
0.9999921
|
PPFIA3
|
16.22125
|
0.9211957
|
0.4280414
|
2.152118
|
0.0313881
|
0.9999921
|
KIAA1549
|
11.25948
|
0.9927866
|
0.4624909
|
2.146608
|
0.0318245
|
0.9999921
|
DNAH10
|
11.24340
|
1.2264628
|
0.5846425
|
2.097800
|
0.0359228
|
0.9999921
|
TCAF1
|
321.32727
|
0.3064457
|
0.1497142
|
2.046871
|
0.0406708
|
0.9999921
|
CD163L1
|
47.78763
|
1.1462332
|
0.5642158
|
2.031551
|
0.0421991
|
0.9999921
|
head(subset(de2mf,log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top downregulated genes in latent compared to bystander (MDM unpaired)") %>%
kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (MDM unpaired)
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
AC092142.1
|
8.874593
|
-1.4639853
|
0.5814365
|
-2.517877
|
0.0118065
|
0.9999921
|
AKR1C3
|
44.639234
|
-0.6594009
|
0.2705113
|
-2.437609
|
0.0147848
|
0.9999921
|
PASK
|
13.557613
|
-0.9541852
|
0.4436364
|
-2.150827
|
0.0314899
|
0.9999921
|
SNHG29
|
6178.071590
|
-0.1433975
|
0.0699609
|
-2.049681
|
0.0403956
|
0.9999921
|
SOWAHD
|
231.034620
|
-0.3853261
|
0.1890656
|
-2.038055
|
0.0415444
|
0.9999921
|
SYN2
|
11.648390
|
-0.9851932
|
0.4958940
|
-1.986701
|
0.0469555
|
0.9999921
|
AC123768.3
|
12.316020
|
-1.1100340
|
0.5617697
|
-1.975959
|
0.0481594
|
0.9999921
|
CDKL4
|
36.291936
|
-0.7281966
|
0.3905537
|
-1.864524
|
0.0622482
|
0.9999921
|
AC058791.1
|
37.697607
|
-0.6314356
|
0.3411188
|
-1.851072
|
0.0641591
|
0.9999921
|
DCAF15
|
110.261130
|
-0.4264170
|
0.2343414
|
-1.819640
|
0.0688138
|
0.9999921
|
des2m$sample <- rep(1:4,2)
dds <- DESeqDataSetFromMatrix(countData = pb2mf , colData = des2m, design = ~ sample + case)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2mf <- de
write.table(de2mf,"de2mf.tsv",sep="\t")
nrow(subset(de2mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2mf,log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in latent compared to bystander (MDM paired)") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (MDM paired)
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
AC015660.1
|
15.23099
|
1.3254698
|
0.4974453
|
2.664554
|
0.0077090
|
0.9999971
|
AK5
|
15.05395
|
1.1808827
|
0.4829616
|
2.445086
|
0.0144818
|
0.9999971
|
GRIP1
|
16.81663
|
1.3291608
|
0.5725859
|
2.321330
|
0.0202690
|
0.9999971
|
EGR3
|
12.13846
|
1.3882821
|
0.6065704
|
2.288740
|
0.0220944
|
0.9999971
|
CCSER1
|
166.89681
|
0.4356867
|
0.1935627
|
2.250881
|
0.0243931
|
0.9999971
|
ALDH8A1
|
14.71811
|
1.0517501
|
0.4730069
|
2.223541
|
0.0261794
|
0.9999971
|
NBPF26
|
63.46919
|
0.5246505
|
0.2401057
|
2.185081
|
0.0288829
|
0.9999971
|
TCAF1
|
321.32727
|
0.3053698
|
0.1415228
|
2.157742
|
0.0309479
|
0.9999971
|
PPFIA3
|
16.22125
|
0.9052509
|
0.4201300
|
2.154692
|
0.0311859
|
0.9999971
|
AL603840.1
|
57.88118
|
0.5236175
|
0.2460469
|
2.128121
|
0.0333271
|
0.9999971
|
head(subset(de2mf,log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top downregulated genes in latent compared to bystander (MDM paired)") %>%
kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (MDM paired)
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
AC092142.1
|
8.874593
|
-1.4827955
|
0.5974655
|
-2.481810
|
0.0130717
|
0.9999971
|
SNHG29
|
6178.071590
|
-0.1439740
|
0.0590698
|
-2.437354
|
0.0147952
|
0.9999971
|
AKR1C3
|
44.639234
|
-0.6556986
|
0.2756886
|
-2.378403
|
0.0173878
|
0.9999971
|
DCAF15
|
110.261130
|
-0.4116646
|
0.1738338
|
-2.368150
|
0.0178773
|
0.9999971
|
AP002761.1
|
26.247297
|
-0.8978419
|
0.4173554
|
-2.151264
|
0.0314553
|
0.9999971
|
PASK
|
13.557613
|
-0.9561974
|
0.4559365
|
-2.097216
|
0.0359744
|
0.9999971
|
NEGR1
|
16.942101
|
-0.9204080
|
0.4470928
|
-2.058651
|
0.0395277
|
0.9999971
|
TNFAIP8L2
|
390.333065
|
-0.2942246
|
0.1430726
|
-2.056470
|
0.0397372
|
0.9999971
|
PLCB1
|
67.430581
|
-1.6576782
|
0.8123472
|
-2.040603
|
0.0412903
|
0.9999971
|
EIF4A1
|
18848.808593
|
-0.1341236
|
0.0659899
|
-2.032485
|
0.0421046
|
0.9999971
|
m2m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 15152
## Note: no. genes in output = 15152
## Note: estimated proportion of input genes in output = 1
mres2m <- mitch_calc(m2m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
res <- mres2m$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")
if (! file.exists("MDM_bystander_vs_latent.html") ) {
mitch_report(mres2m,outfile="MDM_bystander_vs_latent.html")
}
Alv cells.
pb2a <- pbmono[,c(grep("bystander",colnames(pbmono))[5:7],grep("latent",colnames(pbmono))[5:7])]
head(pb2a)
## bystander4 bystander5 bystander6 latent4 latent5 latent6
## MIR1302-2HG 0 0 0 1 0 0
## FAM138A 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0
## AL627309.1 38 9 14 65 59 76
## AL627309.3 1 0 0 1 1 0
## AL627309.2 0 0 0 0 0 1
pb2af <- pb2a[which(rowMeans(pb2a)>=10),]
head(pb2af)
## bystander4 bystander5 bystander6 latent4 latent5 latent6
## AL627309.1 38 9 14 65 59 76
## AL627309.5 51 24 12 108 90 67
## LINC01409 83 75 73 133 306 198
## LINC01128 162 109 105 299 413 348
## LINC00115 7 5 2 14 27 21
## FAM41C 60 27 37 123 134 150
colSums(pb2af)
## bystander4 bystander5 bystander6 latent4 latent5 latent6
## 22499172 14125923 13511893 43683486 56856783 52269186
des2a <- as.data.frame(grepl("latent",colnames(pb2af)))
colnames(des2a) <- "case"
dds <- DESeqDataSetFromMatrix(countData = pb2af , colData = des2a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2af <- de
#write.table(de2af,"de2af.tsv",sep="\t")
nrow(subset(de2af,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2af,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2af, log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in latent compared to bystander (Alv unpaired)") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (Alv unpaired)
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
CXCL10
|
44.012170
|
4.2741860
|
1.1047839
|
3.868798
|
0.0001094
|
0.5928750
|
IGFL2
|
7.886268
|
3.7012817
|
1.3089618
|
2.827647
|
0.0046892
|
0.9999928
|
IL6R-AS1
|
67.015522
|
1.1151355
|
0.3973662
|
2.806317
|
0.0050111
|
0.9999928
|
AC008074.2
|
499.422599
|
0.3448462
|
0.1275850
|
2.702875
|
0.0068743
|
0.9999928
|
TSPAN33
|
1034.763894
|
0.3459507
|
0.1288327
|
2.685270
|
0.0072471
|
0.9999928
|
PGM2L1
|
536.284970
|
0.3759213
|
0.1403729
|
2.678020
|
0.0074059
|
0.9999928
|
CCL17
|
48.843825
|
1.7304610
|
0.6601862
|
2.621171
|
0.0087628
|
0.9999928
|
NDRG2
|
497.756042
|
0.4322300
|
0.1707487
|
2.531380
|
0.0113615
|
0.9999928
|
AC113404.1
|
17.559930
|
2.4633517
|
0.9805551
|
2.512201
|
0.0119981
|
0.9999928
|
AC131254.1
|
28.499803
|
1.7491837
|
0.7045568
|
2.482673
|
0.0130401
|
0.9999928
|
head(subset(de2af, log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top downregulated genes in latent compared to bystander (Alv unpaired)") %>%
kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (Alv unpaired)
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
STMN1
|
566.46714
|
-0.6392585
|
0.1450765
|
-4.406355
|
0.0000105
|
0.1709534
|
TOP2A
|
116.84436
|
-1.5152874
|
0.3805821
|
-3.981499
|
0.0000685
|
0.5568270
|
CENPF
|
114.89641
|
-1.5106116
|
0.4150031
|
-3.640001
|
0.0002726
|
0.9999928
|
CDKN3
|
98.04444
|
-0.9608618
|
0.3247637
|
-2.958649
|
0.0030899
|
0.9999928
|
S100A8
|
1122.32389
|
-0.4171230
|
0.1479561
|
-2.819235
|
0.0048138
|
0.9999928
|
GTSE1
|
43.18318
|
-1.3775450
|
0.4948834
|
-2.783575
|
0.0053763
|
0.9999928
|
PBK
|
18.32348
|
-1.7734490
|
0.6531597
|
-2.715184
|
0.0066239
|
0.9999928
|
ASPM
|
61.36350
|
-1.5226501
|
0.6052329
|
-2.515808
|
0.0118760
|
0.9999928
|
DLGAP5
|
45.23869
|
-1.5881515
|
0.6341380
|
-2.504425
|
0.0122650
|
0.9999928
|
CDK1
|
85.60023
|
-1.4296657
|
0.5767159
|
-2.478977
|
0.0131760
|
0.9999928
|
des2a$sample <- rep(1:3,2)
dds <- DESeqDataSetFromMatrix(countData = pb2af , colData = des2a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de2af <- de
write.table(de2af,"de2af.tsv",sep="\t")
nrow(subset(de2af,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de2af,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de2af, log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in latent compared to bystander (Alv paired)") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in latent compared to bystander (Alv paired)
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
CXCL10
|
44.012170
|
4.2741860
|
1.1047839
|
3.868798
|
0.0001094
|
0.5928750
|
IGFL2
|
7.886268
|
3.7012817
|
1.3089618
|
2.827647
|
0.0046892
|
0.9999928
|
IL6R-AS1
|
67.015522
|
1.1151355
|
0.3973662
|
2.806317
|
0.0050111
|
0.9999928
|
AC008074.2
|
499.422599
|
0.3448462
|
0.1275850
|
2.702875
|
0.0068743
|
0.9999928
|
TSPAN33
|
1034.763894
|
0.3459507
|
0.1288327
|
2.685270
|
0.0072471
|
0.9999928
|
PGM2L1
|
536.284970
|
0.3759213
|
0.1403729
|
2.678020
|
0.0074059
|
0.9999928
|
CCL17
|
48.843825
|
1.7304610
|
0.6601862
|
2.621171
|
0.0087628
|
0.9999928
|
NDRG2
|
497.756042
|
0.4322300
|
0.1707487
|
2.531380
|
0.0113615
|
0.9999928
|
AC113404.1
|
17.559930
|
2.4633517
|
0.9805551
|
2.512201
|
0.0119981
|
0.9999928
|
AC131254.1
|
28.499803
|
1.7491837
|
0.7045568
|
2.482673
|
0.0130401
|
0.9999928
|
head(subset(de2af, log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top downregulated genes in latent compared to bystander (Alv paired)") %>%
kable_paper("hover", full_width = F)
Top downregulated genes in latent compared to bystander (Alv paired)
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
STMN1
|
566.46714
|
-0.6392585
|
0.1450765
|
-4.406355
|
0.0000105
|
0.1709534
|
TOP2A
|
116.84436
|
-1.5152874
|
0.3805821
|
-3.981499
|
0.0000685
|
0.5568270
|
CENPF
|
114.89641
|
-1.5106116
|
0.4150031
|
-3.640001
|
0.0002726
|
0.9999928
|
CDKN3
|
98.04444
|
-0.9608618
|
0.3247637
|
-2.958649
|
0.0030899
|
0.9999928
|
S100A8
|
1122.32389
|
-0.4171230
|
0.1479561
|
-2.819235
|
0.0048138
|
0.9999928
|
GTSE1
|
43.18318
|
-1.3775450
|
0.4948834
|
-2.783575
|
0.0053763
|
0.9999928
|
PBK
|
18.32348
|
-1.7734490
|
0.6531597
|
-2.715184
|
0.0066239
|
0.9999928
|
ASPM
|
61.36350
|
-1.5226501
|
0.6052329
|
-2.515808
|
0.0118760
|
0.9999928
|
DLGAP5
|
45.23869
|
-1.5881515
|
0.6341380
|
-2.504425
|
0.0122650
|
0.9999928
|
CDK1
|
85.60023
|
-1.4296657
|
0.5767159
|
-2.478977
|
0.0131760
|
0.9999928
|
m2a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 16279
## Note: no. genes in output = 16279
## Note: estimated proportion of input genes in output = 1
mres2a <- mitch_calc(m2a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
res <- mres2a$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")
if (! file.exists("Alv_bystander_vs_latent.html") ) {
mitch_report(mres2a,outfile="Alv_bystander_vs_latent.html")
}
DE3 Active vs mock
MDM group.
pb3m <- pbmono[,c(grep("active",colnames(pbmono))[1:4],grep("mock",colnames(pbmono))[1:4])]
head(pb3m)
## active0 active1 active2 active3 mock0 mock1 mock2 mock3
## MIR1302-2HG 0 0 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0 0 0
## AL627309.1 54 37 30 5 61 30 6 15
## AL627309.3 0 2 0 0 0 0 0 0
## AL627309.2 0 0 0 0 0 0 0 0
pb3mf <- pb3m[which(rowMeans(pb3m)>=10),]
head(pb3mf)
## active0 active1 active2 active3 mock0 mock1 mock2 mock3
## AL627309.1 54 37 30 5 61 30 6 15
## AL627309.5 15 14 15 6 17 27 1 16
## LINC01409 129 114 104 27 119 116 33 57
## LINC01128 262 164 167 94 173 118 57 159
## FAM41C 49 39 63 68 61 25 14 85
## NOC2L 183 126 246 155 185 153 75 221
colSums(pb3mf)
## active0 active1 active2 active3 mock0 mock1 mock2 mock3
## 30838055 22850851 26192488 13877828 29112212 21615267 7549870 20729914
des3m <- as.data.frame(grepl("active",colnames(pb3mf)))
colnames(des3m) <- "case"
dds <- DESeqDataSetFromMatrix(countData = pb3mf , colData = des3m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de3mf <- de
write.table(de3mf,"de3mf.tsv",sep="\t")
nrow(subset(de3mf,padj<0.05 & log2FoldChange>0))
## [1] 102
nrow(subset(de3mf,padj<0.05 & log2FoldChange<0))
## [1] 220
head(subset(de3mf,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in active MDM cells compared to mock") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in active MDM cells compared to mock
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
PLAAT3
|
35.54458
|
2.6018024
|
0.4350913
|
5.979899
|
0.0e+00
|
0.0000031
|
LYRM1
|
207.51046
|
0.9010836
|
0.1550569
|
5.811308
|
0.0e+00
|
0.0000060
|
FAM241B
|
98.31075
|
1.4123422
|
0.2609457
|
5.412399
|
1.0e-07
|
0.0000431
|
CDKN1A
|
2611.64337
|
1.1118551
|
0.2071428
|
5.367578
|
1.0e-07
|
0.0000484
|
GRIP1
|
33.91692
|
2.0231034
|
0.3825670
|
5.288233
|
1.0e-07
|
0.0000641
|
DDB2
|
470.91041
|
0.6805458
|
0.1292762
|
5.264279
|
1.0e-07
|
0.0000706
|
KCNMB2-AS1
|
215.01389
|
1.5174273
|
0.2887412
|
5.255319
|
1.0e-07
|
0.0000716
|
FAS
|
177.02499
|
0.9989147
|
0.1966111
|
5.080661
|
4.0e-07
|
0.0001505
|
MDM2
|
3009.65137
|
1.0736852
|
0.2190317
|
4.901962
|
9.0e-07
|
0.0003209
|
SFXN1
|
174.71064
|
0.8474226
|
0.1751359
|
4.838657
|
1.3e-06
|
0.0004321
|
head(subset(de1mf,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top downregulated genes in active MDM cells compared to mock") %>%
kable_paper("hover", full_width = F)
Top downregulated genes in active MDM cells compared to mock
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
AL031123.1
|
522.7177
|
-0.9662151
|
0.1225730
|
-7.882770
|
0
|
0.0e+00
|
HIST1H1C
|
231.6309
|
-2.1253675
|
0.2958066
|
-7.184990
|
0
|
0.0e+00
|
EVI2A
|
562.4969
|
-1.1141044
|
0.1595318
|
-6.983587
|
0
|
0.0e+00
|
TGFBI
|
459.4321
|
-2.3312427
|
0.3338677
|
-6.982533
|
0
|
0.0e+00
|
IGF1R
|
387.8538
|
-1.0023515
|
0.1441265
|
-6.954663
|
0
|
0.0e+00
|
RCBTB2
|
798.4883
|
-1.3656222
|
0.2027045
|
-6.737011
|
0
|
0.0e+00
|
IGFBP7
|
201.0449
|
-1.0455119
|
0.1618281
|
-6.460632
|
0
|
2.0e-07
|
CFD
|
3142.4873
|
-1.5714825
|
0.2440764
|
-6.438487
|
0
|
2.0e-07
|
HVCN1
|
883.5545
|
-0.8124014
|
0.1316376
|
-6.171499
|
0
|
1.1e-06
|
MLLT3
|
126.9514
|
-1.2348104
|
0.2115674
|
-5.836487
|
0
|
7.2e-06
|
m3m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 14592
## Note: no. genes in output = 14592
## Note: estimated proportion of input genes in output = 1
mres3m <- mitch_calc(m3m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
res <- mres3m$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")
if (! file.exists("MDM_mock_vs_active.html") ) {
mitch_report(mres3m,outfile="MDM_mock_vs_active.html")
}
pb3a <- pbmono[,c(grep("active",colnames(pbmono))[5:7],grep("mock",colnames(pbmono))[5:7])]
head(pb3a)
## active4 active5 active6 mock4 mock5 mock6
## MIR1302-2HG 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0
## AL627309.1 16 11 27 46 26 52
## AL627309.3 0 0 0 0 0 1
## AL627309.2 0 0 0 0 0 0
pb3af <- pb3a[which(rowMeans(pb3a)>=10),]
head(pb3af)
## active4 active5 active6 mock4 mock5 mock6
## AL627309.1 16 11 27 46 26 52
## AL627309.5 26 23 15 63 29 50
## LINC01409 87 151 92 59 103 140
## LINC01128 293 284 219 176 200 250
## LINC00115 10 7 10 10 12 15
## FAM41C 136 117 84 74 53 98
colSums(pb3af)
## active4 active5 active6 mock4 mock5 mock6
## 30051410 28505235 24028409 20446755 25545005 34873898
des3a <- as.data.frame(grepl("active",colnames(pb3af)))
colnames(des3a) <- "case"
dds <- DESeqDataSetFromMatrix(countData = pb3af , colData = des3a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de3af <- de
write.table(de3af,"de3af.tsv",sep="\t")
nrow(subset(de3af,padj<0.05 & log2FoldChange>0))
## [1] 891
nrow(subset(de3af,padj<0.05 & log2FoldChange<0))
## [1] 653
head(subset(de3af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in active Alv cells compared to mock") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells compared to mock
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
AL157912.1
|
754.26670
|
2.731970
|
0.2091846
|
13.060089
|
0
|
0
|
LAYN
|
553.09496
|
2.261020
|
0.1871843
|
12.079108
|
0
|
0
|
HES4
|
382.20667
|
2.359190
|
0.2058619
|
11.460064
|
0
|
0
|
CDKN1A
|
5303.58884
|
1.555197
|
0.1361511
|
11.422583
|
0
|
0
|
OCIAD2
|
353.43173
|
2.483107
|
0.2219754
|
11.186406
|
0
|
0
|
SDS
|
4763.87880
|
2.107153
|
0.1953024
|
10.789184
|
0
|
0
|
IL6R-AS1
|
455.12951
|
3.520018
|
0.3307596
|
10.642225
|
0
|
0
|
CCL7
|
1444.11412
|
1.947067
|
0.1938562
|
10.043870
|
0
|
0
|
TNFRSF9
|
169.65948
|
2.509770
|
0.2514923
|
9.979509
|
0
|
0
|
CLIC6
|
64.19776
|
4.054497
|
0.4350349
|
9.319935
|
0
|
0
|
head(subset(de3af,padj<0.05 & log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in active Alv cells compared to mock") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells compared to mock
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
NDRG2
|
317.0287
|
-1.8033036
|
0.1656273
|
-10.887716
|
0
|
0
|
HIST1H1C
|
791.4543
|
-1.1579180
|
0.1105796
|
-10.471351
|
0
|
0
|
CEBPD
|
686.4608
|
-1.4769083
|
0.1637149
|
-9.021221
|
0
|
0
|
ADA2
|
1791.4889
|
-1.0408494
|
0.1158943
|
-8.981020
|
0
|
0
|
CRIM1
|
388.2713
|
-1.6778340
|
0.1948415
|
-8.611277
|
0
|
0
|
LYZ
|
46011.7884
|
-1.1761399
|
0.1385676
|
-8.487841
|
0
|
0
|
RPL10A
|
8121.1434
|
-0.8692328
|
0.1046647
|
-8.304927
|
0
|
0
|
CHST13
|
459.7071
|
-1.6239334
|
0.1970679
|
-8.240475
|
0
|
0
|
TGFBI
|
395.6418
|
-2.3302926
|
0.2874197
|
-8.107631
|
0
|
0
|
PIK3CD
|
583.8467
|
-1.0404514
|
0.1382467
|
-7.526050
|
0
|
0
|
m3a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 15748
## Note: no. genes in output = 15748
## Note: estimated proportion of input genes in output = 1
mres3a <- mitch_calc(m3a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
res <- mres3a$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")
if (! file.exists("Alv_mock_vs_active.html") ) {
mitch_report(mres3a,outfile="Alv_mock_vs_active.html")
}
DE4 Mock vs bystander
MDM group.
pb4m <- pbmono[,c(grep("mock",colnames(pbmono))[1:4],grep("bystander",colnames(pbmono))[1:4])]
head(pb4m)
## mock0 mock1 mock2 mock3 bystander0 bystander1 bystander2 bystander3
## MIR1302-2HG 0 0 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0 0 0
## AL627309.1 61 30 6 15 65 51 19 20
## AL627309.3 0 0 0 0 2 4 0 0
## AL627309.2 0 0 0 0 0 0 0 0
pb4mf <- pb4m[which(rowMeans(pb4m)>=10),]
head(pb4mf)
## mock0 mock1 mock2 mock3 bystander0 bystander1 bystander2 bystander3
## AL627309.1 61 30 6 15 65 51 19 20
## AL627309.5 17 27 1 16 33 35 8 27
## LINC01409 119 116 33 57 136 246 63 58
## LINC01128 173 118 57 159 216 208 111 141
## FAM41C 61 25 14 85 43 69 19 82
## NOC2L 185 153 75 221 261 253 105 221
colSums(pb4mf)
## mock0 mock1 mock2 mock3 bystander0 bystander1 bystander2
## 29115390 21618583 7551101 20732742 40541968 36888193 16232018
## bystander3
## 20812550
des4m <- as.data.frame(grepl("bystander",colnames(pb4mf)))
colnames(des4m) <- "case"
dds <- DESeqDataSetFromMatrix(countData = pb4mf , colData = des4m, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de4mf <- de
write.table(de4mf,"de4mf.tsv",sep="\t")
nrow(subset(de4mf,padj<0.05 & log2FoldChange>0))
## [1] 0
nrow(subset(de4mf,padj<0.05 & log2FoldChange<0))
## [1] 0
head(subset(de4mf, log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in bystander MDM cells compared to mock") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in bystander MDM cells compared to mock
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
ISG15
|
9.581964e+02
|
0.7481468
|
0.1770234
|
4.226260
|
0.0000238
|
0.3512317
|
PLAAT3
|
2.158600e+01
|
1.5238610
|
0.3771661
|
4.040292
|
0.0000534
|
0.3945669
|
CCL8
|
9.105935e+00
|
3.0204487
|
0.8337410
|
3.622766
|
0.0002915
|
0.8616994
|
PSME2
|
3.488322e+03
|
0.3628331
|
0.1041101
|
3.485090
|
0.0004920
|
0.9999825
|
RBP7
|
3.255822e+02
|
0.7310547
|
0.2625760
|
2.784164
|
0.0053666
|
0.9999825
|
IL3RA
|
2.090766e+01
|
1.3943247
|
0.5148105
|
2.708423
|
0.0067604
|
0.9999825
|
AL022724.3
|
9.883659e+00
|
1.5278869
|
0.5704803
|
2.678247
|
0.0074009
|
0.9999825
|
IFI6
|
1.175490e+04
|
0.8461631
|
0.3376461
|
2.506065
|
0.0122083
|
0.9999825
|
APOE
|
2.866074e+05
|
0.2505801
|
0.1007699
|
2.486658
|
0.0128949
|
0.9999825
|
SLC9B1
|
4.880829e+01
|
0.7097613
|
0.2861211
|
2.480632
|
0.0131150
|
0.9999825
|
head(subset(de4mf, log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top downregulated genes in bystander MDM cells compared to mock") %>%
kable_paper("hover", full_width = F)
Top downregulated genes in bystander MDM cells compared to mock
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
MKI67
|
142.22093
|
-2.4532927
|
0.6692499
|
-3.665735
|
0.0002466
|
0.8616994
|
ANLN
|
34.25600
|
-1.8308757
|
0.5006972
|
-3.656652
|
0.0002555
|
0.8616994
|
TOP2A
|
74.55230
|
-2.2512332
|
0.6515213
|
-3.455349
|
0.0005496
|
0.9999825
|
CENPF
|
123.39692
|
-2.1330043
|
0.6199216
|
-3.440765
|
0.0005801
|
0.9999825
|
TPX2
|
71.13683
|
-1.3763558
|
0.4157971
|
-3.310162
|
0.0009324
|
0.9999825
|
TYMS
|
129.47857
|
-1.6430364
|
0.5091321
|
-3.227132
|
0.0012504
|
0.9999825
|
CDK1
|
59.07470
|
-2.2656029
|
0.7147499
|
-3.169784
|
0.0015255
|
0.9999825
|
CCNB1
|
141.60884
|
-0.8643030
|
0.2739723
|
-3.154709
|
0.0016066
|
0.9999825
|
UBE2C
|
68.40282
|
-2.5544715
|
0.8122793
|
-3.144819
|
0.0016619
|
0.9999825
|
ATAD2
|
196.22099
|
-0.6122296
|
0.1958850
|
-3.125455
|
0.0017753
|
0.9999825
|
m4m <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 14829
## Note: no. genes in output = 14829
## Note: estimated proportion of input genes in output = 1
mres4m <- mitch_calc(m4m,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
res <- mres4m$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")
if (! file.exists("MDM_mock_vs_bystander.html") ) {
mitch_report(mres4m,outfile="MDM_mock_vs_bystander.html")
}
Alv cells.
pb4a <- pbmono[,c(grep("mock",colnames(pbmono))[5:7],grep("bystander",colnames(pbmono))[5:7])]
head(pb4a)
## mock4 mock5 mock6 bystander4 bystander5 bystander6
## MIR1302-2HG 0 0 0 0 0 0
## FAM138A 0 0 0 0 0 0
## OR4F5 0 0 0 0 0 0
## AL627309.1 46 26 52 38 9 14
## AL627309.3 0 0 1 1 0 0
## AL627309.2 0 0 0 0 0 0
pb4af <- pb4a[which(rowMeans(pb4a)>=10),]
head(pb4af)
## mock4 mock5 mock6 bystander4 bystander5 bystander6
## AL627309.1 46 26 52 38 9 14
## AL627309.5 63 29 50 51 24 12
## LINC01409 59 103 140 83 75 73
## LINC01128 176 200 250 162 109 105
## FAM41C 74 53 98 60 27 37
## SAMD11 19 44 37 29 16 24
colSums(pb4af)
## mock4 mock5 mock6 bystander4 bystander5 bystander6
## 20441633 25539944 34866574 22487395 14121087 13506744
des4a <- as.data.frame(grepl("bystander",colnames(pb4af)))
colnames(des4a) <- "case"
dds <- DESeqDataSetFromMatrix(countData = pb4af , colData = des4a, design = ~ case)
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
de <- as.data.frame(zz[order(zz$pvalue),])
de4af <- de
write.table(de4af,"de4af.tsv",sep="\t")
nrow(subset(de4af,padj<0.05 & log2FoldChange>0))
## [1] 34
nrow(subset(de4af,padj<0.05 & log2FoldChange<0))
## [1] 1
head(subset(de4af,padj<0.05 & log2FoldChange>0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in latent Alv cells compared to ") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in latent Alv cells compared to
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
IFI6
|
14056.2140
|
2.519812
|
0.3516368
|
7.165952
|
0e+00
|
0.0000000
|
PARP14
|
1373.6146
|
1.158578
|
0.1699413
|
6.817516
|
0e+00
|
0.0000001
|
EPSTI1
|
451.5036
|
3.158925
|
0.4698560
|
6.723178
|
0e+00
|
0.0000001
|
IFI44L
|
110.5209
|
4.268213
|
0.7249179
|
5.887858
|
0e+00
|
0.0000148
|
LY6E
|
7066.7899
|
1.224629
|
0.2212063
|
5.536140
|
0e+00
|
0.0000935
|
OAS1
|
515.0690
|
1.508904
|
0.2764750
|
5.457649
|
0e+00
|
0.0001216
|
OAS2
|
433.7604
|
2.170933
|
0.4130633
|
5.255691
|
1e-07
|
0.0002860
|
PLSCR1
|
626.7749
|
1.356400
|
0.2583158
|
5.250936
|
2e-07
|
0.0002860
|
IRF7
|
151.9566
|
1.638111
|
0.3201803
|
5.116214
|
3e-07
|
0.0004805
|
IFITM3
|
363.0933
|
1.758384
|
0.3439334
|
5.112572
|
3e-07
|
0.0004805
|
head(subset(de4af, log2FoldChange<0),10)[,1:6] %>%
kbl(caption="Top upregulated genes in active Alv cells") %>%
kable_paper("hover", full_width = F)
Top upregulated genes in active Alv cells
|
baseMean
|
log2FoldChange
|
lfcSE
|
stat
|
pvalue
|
padj
|
SPP1
|
42572.64456
|
-0.3791828
|
0.0816333
|
-4.644950
|
0.0000034
|
0.0030256
|
PPBP
|
25.43916
|
-6.1176830
|
1.6189686
|
-3.778753
|
0.0001576
|
0.0595826
|
MTSS1
|
204.32602
|
-0.7552875
|
0.2151692
|
-3.510203
|
0.0004478
|
0.1538782
|
FCGBP
|
24.74282
|
-4.0993571
|
1.2982279
|
-3.157656
|
0.0015904
|
0.4537531
|
CCL7
|
377.23383
|
-0.7745713
|
0.2736480
|
-2.830539
|
0.0046470
|
0.9999996
|
C3orf14
|
248.32783
|
-0.4720003
|
0.1708214
|
-2.763122
|
0.0057251
|
0.9999996
|
F13A1
|
36.21284
|
-5.1869923
|
1.8823361
|
-2.755614
|
0.0058582
|
0.9999996
|
CLEC5A
|
29.78097
|
-2.0391123
|
0.7478480
|
-2.726640
|
0.0063983
|
0.9999996
|
MT1H
|
898.86105
|
-0.4389781
|
0.1650434
|
-2.659773
|
0.0078193
|
0.9999996
|
STEAP1B
|
36.95284
|
-1.1214690
|
0.4341759
|
-2.582983
|
0.0097950
|
0.9999996
|
m4a <- mitch_import(de,DEtype="deseq2",joinType="full")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 15135
## Note: no. genes in output = 15135
## Note: estimated proportion of input genes in output = 1
mres4a <- mitch_calc(m4a,genesets=go,minsetsize=5,cores=4,priority="effect")
## Note: Enrichments with large effect sizes may not be
## statistically significant.
res <- mres4a$enrichment_result
res <- subset(res,p.adjustANOVA<0.05)
resup <- subset(res,s.dist>0)
resdn <- subset(res,s.dist<0)
s <- c(head(resup$s.dist,10), head(resdn$s.dist,10))
names(s) <- c(head(resup$set,10),head(resdn$set,10))
s <- s[order(s)]
cols <- gsub("1","red",gsub("-1","blue",as.character(sign(s))))
par(mar=c(5,27,3,1))
barplot(abs(s),las=1,horiz=TRUE,col=cols,xlab="ES",cex.names=0.8,main="")
if (! file.exists("Alv_mock_vs_bystander.html") ) {
mitch_report(mres4a,outfile="Alv_mock_vs_bystander.html")
}