Joint RNA and ATAC analysis using 10x multiomic technology. This workflow is adapted from the Signac guide.
Source code: TBA.
There are datasets in two batches, April and August.
library("irlba")
## Loading required package: Matrix
library("Signac")
## Signac built for for SeuratObject v4 was just loaded with SeuratObject
## v5; disabling v5 assays and validation routines, and ensuring assays
## work in strict v3/v4 compatibility mode
library("Seurat")
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
## The following object is masked from 'package:base':
##
## intersect
library("hdf5r")
library("EnsDb.Hsapiens.v86")
## Loading required package: ensembldb
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:SeuratObject':
##
## intersect
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:hdf5r':
##
## values
## The following objects are masked from 'package:Matrix':
##
## expand, unname
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:sp':
##
## %over%
## Loading required package: GenomeInfoDb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: AnnotationFilter
##
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
##
## filter
library("BSgenome.Hsapiens.UCSC.hg38")
## Loading required package: BSgenome
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: BiocIO
## Loading required package: rtracklayer
##
## Attaching package: 'rtracklayer'
## The following object is masked from 'package:BiocIO':
##
## FileForFormat
library("glmGamPoi")
library("SeuratDisk") #devtools::install_github("mojaveazure/seurat-disk")
## Registered S3 method overwritten by 'SeuratDisk':
## method from
## as.sparse.H5Group Seurat
library("qlcMatrix") #devtools::install_github("cysouw/qlcMatrix")
## Loading required package: slam
## Loading required package: sparsesvd
##
## Attaching package: 'qlcMatrix'
## The following objects are masked from 'package:Biobase':
##
## rowMax, rowMin
Load the RNA and ATAC data.
Annotate genes by organism.
counts <- Read10X_h5("FLW_april/outs/filtered_feature_bc_matrix.h5")
## Genome matrix has multiple modalities, returning a list of matrices for this genome
fragpath <- "FLW_april/outs/atac_fragments.tsv.gz"
Annotate genes by organism.
#Mycoplasma pneumoniae chromosome
MPstart="gene-AXA72_00005"
MPend="gene-AXA72_03910"
MPgenes <- rownames(counts$`Gene Expression`)[which(rownames(counts$`Gene Expression`) == MPstart) : which(rownames(counts$`Gene Expression`) == MPend)]
MPstart="dnaA"
MPend="gene-EXC43_RS04465"
MPplas1genes <- rownames(counts$`Gene Expression`)[which(rownames(counts$`Gene Expression`) == MPstart) : which(rownames(counts$`Gene Expression`) == MPend)]
MPstart="dnaN"
MPend="gene-EXC43_RS04315"
MPplas2genes <- rownames(counts$`Gene Expression`)[which(rownames(counts$`Gene Expression`) == MPstart) : which(rownames(counts$`Gene Expression`) == MPend)]
MPgenes_full <- unique(c(MPgenes,MPplas1genes,MPplas2genes))
str(MPgenes_full)
## chr [1:1566] "gene-AXA72_00005" "gene-AXA72_00010" "gyrB" ...
table(colSums(as.data.frame(counts$`Gene Expression`[rownames(counts$`Gene Expression`) %in% MPgenes_full,])))
##
## 0 1
## 12287 3
#lambda
Lstart="nu1"
Lend="orf206b"
Lgenes <- rownames(counts$`Gene Expression`)[which(rownames(counts$`Gene Expression`) == Lstart) : which(rownames(counts$`Gene Expression`) == Lend)]
str(Lgenes)
## chr [1:92] "nu1" "A" "W" "B" "C" "nu3" "D" "E" "Fi" "Fii" "Z" "U" "V" "G" ...
table(colSums(as.data.frame(counts$`Gene Expression`[rownames(counts$`Gene Expression`) %in% Lgenes,])))
##
## 0
## 12290
#HHV5
H5start="RL10"
H5end="RL8A"
H5genes <- rownames(counts$`Gene Expression`)[which(rownames(counts$`Gene Expression`) == H5start) : which(rownames(counts$`Gene Expression`) == H5end)]
str(H5genes)
## chr [1:180] "RL10" "RL11" "RL12" "RL13" "UL1" "UL4" "UL5" "UL6" "UL7" ...
table(colSums(as.data.frame(counts$`Gene Expression`[rownames(counts$`Gene Expression`) %in% H5genes,])))
##
## 0
## 12290
#Human gammaherpesvirus 4 (EBV)
Estart="BNRF1"
Eend="LMP-1"
Egenes <- rownames(counts$`Gene Expression`)[which(rownames(counts$`Gene Expression`) == Estart) : which(rownames(counts$`Gene Expression`) == Eend)]
str(Egenes)
## chr [1:77] "BNRF1" "BORF1" "BORF2" "BRRF1" "BRRF2" "BSRF1" "BTRF1" "BVRF1" ...
table(colSums(as.data.frame(counts$`Gene Expression`[rownames(counts$`Gene Expression`) %in% Egenes,])))
##
## 0
## 12290
#Human gene expression
HUstart="MIR1302-2HG"
HUend="LINC00266-4P"
HUgenes <- rownames(counts$`Gene Expression`)[which(rownames(counts$`Gene Expression`) == HUstart) : which(rownames(counts$`Gene Expression`) == HUend)]
str(HUgenes)
## chr [1:36572] "MIR1302-2HG" "OR4F5" "AL627309.4" "AP006222.2" "AL732372.1" ...
mycolsums <- colSums(as.data.frame(counts$`Gene Expression`[rownames(counts$`Gene Expression`) %in% HUgenes,]))
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.3 GiB
str(mycolsums)
## Named num [1:12290] 1733 2753 7394 1390 15216 ...
## - attr(*, "names")= chr [1:12290] "AAACAGCCACATTAAC-1" "AAACATGCAATAATCC-1" "AAACATGCACCGGTAT-1" "AAACATGCATAATGTC-1" ...
summary(mycolsums)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 49 1549 2384 4479 4048 854493
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
## Warning in .merge_two_Seqinfo_objects(x, y): The 2 combined objects have no sequence levels in common. (Use
## suppressWarnings() to suppress this warning.)
seqlevels(annotation) <- paste0('chr', seqlevels(annotation))
annotation
## GRanges object with 3021151 ranges and 5 metadata columns:
## seqnames ranges strand | tx_id gene_name
## <Rle> <IRanges> <Rle> | <character> <character>
## ENSE00001489430 chrX 276322-276394 + | ENST00000399012 PLCXD1
## ENSE00001536003 chrX 276324-276394 + | ENST00000484611 PLCXD1
## ENSE00002160563 chrX 276353-276394 + | ENST00000430923 PLCXD1
## ENSE00001750899 chrX 281055-281121 + | ENST00000445062 PLCXD1
## ENSE00001489388 chrX 281192-281684 + | ENST00000381657 PLCXD1
## ... ... ... ... . ... ...
## ENST00000361739 chrMT 7586-8269 + | ENST00000361739 MT-CO2
## ENST00000361789 chrMT 14747-15887 + | ENST00000361789 MT-CYB
## ENST00000361851 chrMT 8366-8572 + | ENST00000361851 MT-ATP8
## ENST00000361899 chrMT 8527-9207 + | ENST00000361899 MT-ATP6
## ENST00000362079 chrMT 9207-9990 + | ENST00000362079 MT-CO3
## gene_id gene_biotype type
## <character> <character> <factor>
## ENSE00001489430 ENSG00000182378 protein_coding exon
## ENSE00001536003 ENSG00000182378 protein_coding exon
## ENSE00002160563 ENSG00000182378 protein_coding exon
## ENSE00001750899 ENSG00000182378 protein_coding exon
## ENSE00001489388 ENSG00000182378 protein_coding exon
## ... ... ... ...
## ENST00000361739 ENSG00000198712 protein_coding cds
## ENST00000361789 ENSG00000198727 protein_coding cds
## ENST00000361851 ENSG00000228253 protein_coding cds
## ENST00000361899 ENSG00000198899 protein_coding cds
## ENST00000362079 ENSG00000198938 protein_coding cds
## -------
## seqinfo: 25 sequences (1 circular) from GRCh38 genome
It will contain RNA and ATAC data.
pbmc <- CreateSeuratObject( counts = counts$`Gene Expression`, assay = "RNA")
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
head(pbmc)
## orig.ident nCount_RNA nFeature_RNA
## AAACAGCCACATTAAC-1 SeuratProject 1733 1134
## AAACATGCAATAATCC-1 SeuratProject 2753 1398
## AAACATGCACCGGTAT-1 SeuratProject 7394 2904
## AAACATGCATAATGTC-1 SeuratProject 1390 942
## AAACATGCATACTCCT-1 SeuratProject 15216 4875
## AAACATGCATGAGCAG-1 SeuratProject 1930 1032
## AAACATGCATTATGGT-1 SeuratProject 1465 849
## AAACATGCATTCCTCG-1 SeuratProject 3956 1811
## AAACATGCATTGTGTG-1 SeuratProject 2055 1024
## AAACCAACAATTAAGG-1 SeuratProject 1860 1116
pbmc[["ATAC"]] <- CreateChromatinAssay(counts = counts$Peaks,sep = c(":", "-"),
fragments = fragpath,annotation = annotation)
## Computing hash
pbmc
## An object of class Seurat
## 164481 features across 12290 samples within 2 assays
## Active assay: RNA (38544 features, 0 variable features)
## 2 layers present: counts, data
## 1 other assay present: ATAC
Compute per-cell quality metrics using DNA accessibility data.
DefaultAssay(pbmc) <- "ATAC"
pbmc <- NucleosomeSignal(pbmc)
pbmc <- TSSEnrichment(pbmc)
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
VlnPlot(
object = pbmc,
features = c("nCount_RNA", "nCount_ATAC", "TSS.enrichment", "nucleosome_signal"),
ncol = 4,
pt.size = 0
)
Remove cells that are outliers for these metrics, as well as cells with low or unusually high counts for either the RNA or ATAC assay.
pbmc <- subset(
x = pbmc,
subset = nCount_ATAC < 100000 &
nCount_RNA < 25000 &
nCount_ATAC > 1000 &
nCount_RNA > 1000 &
nucleosome_signal < 2 &
TSS.enrichment > 1
)
pbmc
## An object of class Seurat
## 164481 features across 10462 samples within 2 assays
## Active assay: ATAC (125937 features, 0 variable features)
## 2 layers present: counts, data
## 1 other assay present: RNA
Using MACS2. We will remove peaks on nonstandard chromosomes and in genomic blacklist regions. Then quantify counts in each peak and create a new assay using the MACS2 peak set and add it to the Seurat object
peaks <- CallPeaks(pbmc)
peaks <- keepStandardChromosomes(peaks, pruning.mode = "coarse")
peaks <- subsetByOverlaps(x = peaks, ranges = blacklist_hg38_unified, invert = TRUE)
macs2_counts <- FeatureMatrix(
fragments = Fragments(pbmc),
features = peaks,
cells = colnames(pbmc)
)
## Extracting reads overlapping genomic regions
pbmc[["peaks"]] <- CreateChromatinAssay(
counts = macs2_counts,
fragments = fragpath,
annotation = annotation
)
## Computing hash
We can normalize the gene expression data using SCTransform, and reduce the dimensionality using PCA.
DefaultAssay(pbmc) <- "RNA"
pbmc <- SCTransform(pbmc)
## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 24389 by 10462
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## Found 187 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 24389 genes
## Computing corrected count matrix for 24389 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 30.70272 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Set default assay to SCT
pbmc <- RunPCA(pbmc)
## PC_ 1
## Positive: VCAN, ZEB2, DPYD, PLXDC2, SLC8A1, NEAT1, LRMDA, CD36, PLCB1, AOAH
## PID1, LYST, DMXL2, FCN1, CSF3R, LRRK2, HDAC9, ARHGAP26, MCTP1, VMP1
## IRAK3, RBM47, TBXAS1, TMTC2, RTN1, WDFY3, PICALM, LYN, ZSWIM6, LYZ
## Negative: BACH2, SKAP1, LEF1, CAMK4, B2M, BCL11B, ANK3, INPP4B, BANK1, PRKCH
## BCL2, ARHGAP15, CD247, MAML2, NELL2, AFF3, THEMIS, TSHZ2, IL7R, TC2N
## RPS27, RORA, CD96, RPL13A, PCED1B, KLF12, PDE7A, ETS1, HLA-B, SYNE2
## PC_ 2
## Positive: BANK1, AFF3, RALGPS2, IGHM, IGKC, EBF1, CD74, MS4A1, FCRL1, OSBPL10
## COL19A1, PAX5, SOX5, HLA-DRA, CCSER1, BLK, BACH2, CDK14, LINC00926, COBLL1
## ARHGAP24, KHDRBS2, CD79A, ADAM28, AUTS2, AP002075.1, PLEKHG1, GPM6A, STEAP1B, IGLC2
## Negative: PRKCH, LEF1, CAMK4, BCL11B, CD247, INPP4B, THEMIS, SKAP1, IL7R, PDE3B
## NELL2, FYB1, TC2N, RORA, PRKCA, TNIK, MAML2, TSHZ2, NCALD, ITK
## TXK, PLCL1, AAK1, STAT4, SYNE2, PITPNC1, CD96, SERINC5, DPYD, LINC01934
## PC_ 3
## Positive: LEF1, MAML2, CAMK4, FOXP1, TSHZ2, BACH2, FHIT, VCAN, NELL2, INPP4B
## PRKCA, ANK3, PLCL1, DPYD, ARHGAP15, PDE3B, BCL11B, SERINC5, BCL2, AL589693.1
## PDE7A, CSGALNACT1, MLLT3, DOCK10, TCF7, AC139720.1, IL6ST, IL7R, THEMIS, IGF1R
## Negative: AC105402.3, GNLY, NKG7, CCL5, PRF1, TGFBR3, KLRD1, KLRF1, PDGFD, ACTB
## TMSB4X, B2M, BNC2, FCGR3A, HLA-C, GZMA, C1orf21, GZMH, FGFBP2, NCALD
## PPP2R2B, MYBL1, CTSW, GZMB, IL2RB, NCAM1, SPON2, TCF7L2, ADGRG1, DTHD1
## PC_ 4
## Positive: GNLY, NKG7, BNC2, TGFBR3, PDGFD, KLRD1, KLRF1, PRF1, NCALD, TOX
## PPP2R2B, MCTP2, C1orf21, NCAM1, SYNE1, SAMD3, PYHIN1, MYBL1, CCL5, IKZF2
## SPON2, CD247, IL2RB, LINGO2, VCAN, FGFBP2, LINC00299, PRKCH, GZMA, AL392086.3
## Negative: AC105402.3, RPS27, RPL13A, EEF1A1, RPS12, RPS3A, RPL32, RPL19, RPL13, RPL28
## RPLP2, RPL23A, RPS8, RPS18, RPS25, RPL41, TCF7L2, TPT1, RPL10, RPL31
## RPS15A, RPL21, RPS23, RPS11, RPS20, RPS14, RPL18A, RPS15, RPL30, RPL23
## PC_ 5
## Positive: TCF4, LINC01374, AC023590.1, LINC01478, FAM160A1, RHEX, EPHB1, CUX2, ZFAT, BX284613.2
## TCF7L2, UGCG, PDE4B, LINC00996, COL24A1, CCDC50, PTPRS, SCN9A, PLXNA4, RUNX2
## NRP1, NEGR1, DACH1, SCN1A-AS1, SLC41A2, FLT3, CLEC4C, RBMS3, SLC35F3, COL26A1
## Negative: VCAN, BANK1, PLCB1, IGHM, ARHGAP24, IGKC, DPYD, RALGPS2, CSF3R, CD36
## AC105402.3, DYSF, EBF1, VCAN-AS1, ACSL1, MEGF9, FCRL1, MS4A1, ARHGAP26, PDE4D
## COL19A1, RPS27, PAX5, NRG1, LRRK2, SLC2A3, BACH2, RPL13A, OSBPL10, CREB5
Here we process the DNA accessibility assay the same way we would process a scATAC-seq dataset, by performing latent semantic indexing (LSI).
DefaultAssay(pbmc) <- "peaks"
pbmc <- FindTopFeatures(pbmc, min.cutoff = 5)
pbmc <- RunTFIDF(pbmc)
## Performing TF-IDF normalization
## Warning in RunTFIDF.default(object = GetAssayData(object = object, slot =
## "counts"), : Some features contain 0 total counts
pbmc <- RunSVD(pbmc)
## Running SVD
## Scaling cell embeddings
To annotate cell types in the dataset we can transfer cell labels from an existing PBMC reference dataset using tools in the Seurat package. See the Seurat reference mapping vignette for more information.
We’ll use an annotated PBMC reference dataset from Hao et al. (2020), available for download.
Note that the SeuratDisk package is required to load the reference dataset. Installation instructions for SeuratDisk can be found here.
Load reference data.
options(timeout=10000)
if (! file.exists("pbmc_multimodal.h5seurat")) {
download.file("https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat",
destfile="pbmc_multimodal.h5seurat")
}
reference <- LoadH5Seurat("pbmc_multimodal.h5seurat", assays = list("SCT" = "counts"), reductions = 'spca')
## Validating h5Seurat file
## Initializing SCT with counts
## Adding variable feature information for SCT
## Adding miscellaneous information for SCT
## Adding reduction spca
## Adding cell embeddings for spca
## Adding feature loadings for spca
## Adding miscellaneous information for spca
## Adding graph wknn
## Adding graph wsnn
## Adding command information
## Adding cell-level metadata
reference <- UpdateSeuratObject(reference)
## Validating object structure
## Updating object slots
## Ensuring keys are in the proper structure
## Updating matrix keys for DimReduc 'spca'
## Ensuring keys are in the proper structure
## Ensuring feature names don't have underscores or pipes
## Updating slots in SCT
## Updating slots in wknn
## Cannot find wknn in the object, setting default assay of wknn to SCT
## Updating slots in wsnn
## Cannot find wsnn in the object, setting default assay of wsnn to SCT
## Updating slots in spca
## Validating object structure for SCTAssay 'SCT'
## Validating object structure for Graph 'wknn'
## Validating object structure for Graph 'wsnn'
## Validating object structure for DimReduc 'spca'
## Object representation is consistent with the most current Seurat version
DefaultAssay(pbmc) <- "SCT"
# transfer cell type labels from reference to query
transfer_anchors <- FindTransferAnchors(
reference = reference,
query = pbmc,
normalization.method = "SCT",
reference.reduction = "spca",
recompute.residuals = FALSE,
dims = 1:50
)
## Projecting cell embeddings
## Counts matrix provided is not sparse; vreating v5 assay in Seurat object
## Finding neighborhoods
## Finding anchors
## Found 9413 anchors
predictions <- TransferData(
anchorset = transfer_anchors,
refdata = reference$celltype.l2,
weight.reduction = pbmc[['pca']],
dims = 1:50
)
## Finding integration vectors
## Finding integration vector weights
## Predicting cell labels
pbmc <- AddMetaData(
object = pbmc,
metadata = predictions
)
# set the cell identities to the cell type predictions
Idents(pbmc) <- "predicted.id"
Using the weighted nearest neighbor methods in Seurat v4, we can compute a joint neighbor graph that represent both the gene expression and DNA accessibility measurements.
# build a joint neighbor graph using both assays
pbmc <- FindMultiModalNeighbors(
object = pbmc,
reduction.list = list("pca", "lsi"),
dims.list = list(1:50, 2:40),
modality.weight.name = "RNA.weight",
verbose = TRUE
)
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Warning in FindMultiModalNeighbors(object = pbmc, reduction.list = list("pca",
## : The number of provided modality.weight.name is not equal to the number of
## modalities. SCT.weight peaks.weight are used to store the modality weights
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
# build a joint UMAP visualization
pbmc <- RunUMAP(
object = pbmc,
nn.name = "weighted.nn",
assay = "RNA",
verbose = TRUE
)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 13:39:47 UMAP embedding parameters a = 0.9922 b = 1.112
## Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
## Also defined by 'spam'
## 13:39:49 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 13:39:51 Initializing from normalized Laplacian + noise (using RSpectra)
## 13:39:51 Commencing optimization for 200 epochs, with 326568 positive edges
## 13:39:55 Optimization finished
DimPlot(pbmc, label = TRUE, repel = TRUE, reduction = "umap") + NoLegend()
For each gene, we can find the set of peaks that may regulate the gene by by computing the correlation between gene expression and accessibility at nearby peaks, and correcting for bias due to GC content, overall accessibility, and peak size. See the Signac paper for a full description of the method we use to link peaks to genes.
Running this step on the whole genome can be time consuming, so here we demonstrate peak-gene links for a subset of genes as an example. The same function can be used to find links for all genes by omitting the genes.use parameter.
DefaultAssay(pbmc) <- "peaks"
# first compute the GC content for each peak
pbmc <- RegionStats(pbmc, genome = BSgenome.Hsapiens.UCSC.hg38)
# link peaks to genes
pbmc <- LinkPeaks(
object = pbmc,
peak.assay = "peaks",
expression.assay = "SCT"
)
## Testing 13132 genes and 120708 peaks
## Found gene coordinates for 11040 genes
## Warning in .merge_two_Seqinfo_objects(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': chrM
## - in 'y': chrY, chrMT
## Make sure to always combine/compare objects based on the same reference
## genome (use suppressWarnings() to suppress this warning).
## 'as(<dgCMatrix>, "dgTMatrix")' is deprecated.
## Use 'as(., "TsparseMatrix")' instead.
## See help("Deprecated") and help("Matrix-deprecated").
We can visualize these links using the CoveragePlot()
function, or alternatively we could use the CoverageBrowser()
function in an interactive analysis.
idents.plot <- c("B naive", "B intermediate", "B memory",
"CD14 Mono", "CD16 Mono", "CD8 TEM", "CD8 Naive")
p1 <- CoveragePlot(
object = pbmc,
region = "MS4A1",
features = "MS4A1",
expression.assay = "SCT",
idents = idents.plot,
extend.upstream = 500,
extend.downstream = 10000
)
p2 <- CoveragePlot(
object = pbmc,
region = "LYZ",
features = "LYZ",
expression.assay = "SCT",
idents = idents.plot,
extend.upstream = 8000,
extend.downstream = 5000
)
patchwork::wrap_plots(p1, p2, ncol = 1)
gene.activities <- GeneActivity(pbmc)
## Extracting gene coordinates
## Extracting reads overlapping genomic regions
# add the gene activity matrix to the Seurat object as a new assay and normalize it
pbmc[['RNA']] <- CreateAssayObject(counts = gene.activities)
## Warning: Different cells and/or features from existing assay RNA
pbmc <- NormalizeData(
object = pbmc,
assay = 'RNA',
normalization.method = 'LogNormalize',
scale.factor = median(pbmc$nCount_RNA)
)
Now we can visualize the activities of canonical marker genes.
DefaultAssay(pbmc) <- 'RNA'
FeaturePlot(
object = pbmc,
features = c('MS4A1', 'CD3D', 'LEF1', 'NKG7', 'TREM1', 'LYZ'),
pt.size = 0.1,
max.cutoff = 'q95',
ncol = 3
)
Now we can visualize the activities of pathogen genes.
CP014267.1 = Mycoplasma pneumoniae chromosome
NZ_LR214945.1 = Mycoplasmoides pneumoniae strain NCTC10119 plasmid 1
NZ_LR214946.1 = Mycoplasmoides pneumoniae strain NCTC10119 plasmid 2
NC_001416.1 = lambda phage
NC_006273.2 = Human herpesvirus 5
NC_007605.1 = Human gammaherpesvirus 4
FeaturePlot(
object = pbmc,
features = c('MS4A1', 'CD3D', 'LEF1', 'NKG7', 'TREM1', 'LYZ'),
pt.size = 0.1,
max.cutoff = 'q95',
ncol = 3
)
save.image("scmulti.Rdata")
Show versions of packages and key dependancies.
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] qlcMatrix_0.9.7 sparsesvd_0.2-2
## [3] slam_0.1-50 SeuratDisk_0.0.0.9021
## [5] glmGamPoi_1.14.0 BSgenome.Hsapiens.UCSC.hg38_1.4.5
## [7] BSgenome_1.70.1 rtracklayer_1.62.0
## [9] BiocIO_1.12.0 Biostrings_2.70.1
## [11] XVector_0.42.0 EnsDb.Hsapiens.v86_2.99.0
## [13] ensembldb_2.26.0 AnnotationFilter_1.26.0
## [15] GenomicFeatures_1.54.1 AnnotationDbi_1.64.1
## [17] Biobase_2.62.0 GenomicRanges_1.54.1
## [19] GenomeInfoDb_1.38.5 IRanges_2.36.0
## [21] S4Vectors_0.40.2 BiocGenerics_0.48.1
## [23] hdf5r_1.3.9 Seurat_5.0.1
## [25] SeuratObject_5.0.1 sp_2.1-3
## [27] Signac_1.12.0 irlba_2.3.5.1
## [29] Matrix_1.6-5
##
## loaded via a namespace (and not attached):
## [1] ProtGenerics_1.34.0 matrixStats_1.2.0
## [3] spatstat.sparse_3.0-3 bitops_1.0-7
## [5] httr_1.4.7 RColorBrewer_1.1-3
## [7] docopt_0.7.1 tools_4.3.2
## [9] sctransform_0.4.1 backports_1.4.1
## [11] utf8_1.2.4 R6_2.5.1
## [13] lazyeval_0.2.2 uwot_0.1.16
## [15] withr_3.0.0 prettyunits_1.2.0
## [17] gridExtra_2.3 progressr_0.14.0
## [19] cli_3.6.2 spatstat.explore_3.2-5
## [21] fastDummies_1.7.3 labeling_0.4.3
## [23] sass_0.4.8 spatstat.data_3.0-4
## [25] ggridges_0.5.6 pbapply_1.7-2
## [27] Rsamtools_2.18.0 foreign_0.8-86
## [29] dichromat_2.0-0.1 parallelly_1.36.0
## [31] rstudioapi_0.15.0 RSQLite_2.3.5
## [33] generics_0.1.3 ica_1.0-3
## [35] spatstat.random_3.2-2 dplyr_1.1.4
## [37] fansi_1.0.6 abind_1.4-5
## [39] lifecycle_1.0.4 yaml_2.3.8
## [41] SummarizedExperiment_1.32.0 SparseArray_1.2.3
## [43] BiocFileCache_2.10.1 Rtsne_0.17
## [45] grid_4.3.2 blob_1.2.4
## [47] promises_1.2.1 crayon_1.5.2
## [49] miniUI_0.1.1.1 lattice_0.22-5
## [51] cowplot_1.1.3 KEGGREST_1.42.0
## [53] pillar_1.9.0 knitr_1.45
## [55] rjson_0.2.21 future.apply_1.11.1
## [57] codetools_0.2-19 fastmatch_1.1-4
## [59] leiden_0.4.3.1 glue_1.7.0
## [61] data.table_1.15.0 vctrs_0.6.5
## [63] png_0.1-8 spam_2.10-0
## [65] gtable_0.3.4 cachem_1.0.8
## [67] xfun_0.41 S4Arrays_1.2.0
## [69] mime_0.12 survival_3.5-7
## [71] RcppRoll_0.3.0 ellipsis_0.3.2
## [73] fitdistrplus_1.1-11 ROCR_1.0-11
## [75] nlme_3.1-164 bit64_4.0.5
## [77] progress_1.2.3 filelock_1.0.3
## [79] RcppAnnoy_0.0.22 bslib_0.6.1
## [81] KernSmooth_2.23-22 rpart_4.1.23
## [83] colorspace_2.1-0 DBI_1.2.1
## [85] Hmisc_5.1-1 nnet_7.3-19
## [87] tidyselect_1.2.0 bit_4.0.5
## [89] compiler_4.3.2 curl_5.2.0
## [91] htmlTable_2.4.2 xml2_1.3.6
## [93] DelayedArray_0.28.0 plotly_4.10.4
## [95] checkmate_2.3.1 scales_1.3.0
## [97] lmtest_0.9-40 rappdirs_0.3.3
## [99] stringr_1.5.1 digest_0.6.34
## [101] goftest_1.2-3 spatstat.utils_3.0-4
## [103] rmarkdown_2.25 htmltools_0.5.7
## [105] pkgconfig_2.0.3 base64enc_0.1-3
## [107] sparseMatrixStats_1.14.0 MatrixGenerics_1.14.0
## [109] highr_0.10 dbplyr_2.4.0
## [111] fastmap_1.1.1 rlang_1.1.3
## [113] htmlwidgets_1.6.4 DelayedMatrixStats_1.24.0
## [115] shiny_1.8.0 farver_2.1.1
## [117] jquerylib_0.1.4 zoo_1.8-12
## [119] jsonlite_1.8.8 BiocParallel_1.36.0
## [121] VariantAnnotation_1.48.1 RCurl_1.98-1.14
## [123] magrittr_2.0.3 Formula_1.2-5
## [125] GenomeInfoDbData_1.2.11 dotCall64_1.1-1
## [127] patchwork_1.2.0 munsell_0.5.0
## [129] Rcpp_1.0.12 reticulate_1.34.0
## [131] stringi_1.8.3 zlibbioc_1.48.0
## [133] MASS_7.3-60.0.1 plyr_1.8.9
## [135] parallel_4.3.2 listenv_0.9.1
## [137] ggrepel_0.9.5 deldir_2.0-2
## [139] splines_4.3.2 tensor_1.5
## [141] hms_1.1.3 igraph_2.0.1.1
## [143] spatstat.geom_3.2-8 RcppHNSW_0.5.0
## [145] reshape2_1.4.4 biomaRt_2.58.0
## [147] XML_3.99-0.16.1 evaluate_0.23
## [149] biovizBase_1.50.0 tweenr_2.0.2
## [151] httpuv_1.6.14 RANN_2.6.1
## [153] tidyr_1.3.1 purrr_1.0.2
## [155] polyclip_1.10-6 future_1.33.1
## [157] scattermore_1.2 ggplot2_3.4.4
## [159] ggforce_0.4.1 xtable_1.8-4
## [161] restfulr_0.0.15 RSpectra_0.16-1
## [163] later_1.3.2 viridisLite_0.4.2
## [165] tibble_3.2.1 memoise_2.0.1.9000
## [167] GenomicAlignments_1.38.1 cluster_2.1.6
## [169] globals_0.16.2