Source codes: https://github.com/Anne-LiseGerard/dftd_rnaseq Data: https://doi.org/10.1007/s00018-019-03259-2
suppressPackageStartupMessages({
library("reshape2")
library("gplots")
library("DESeq2")
library("mitch")
library("limma")
library("kableExtra")
library("dplyr")
library("tidyr")
library("ComplexHeatmap")
library("RColorBrewer")
library("circlize")
library("pathview")
library("stringr")
library("grid")
})
Goal: re-analyse previously published DFTD tumour biopsy transcriptomes and compare to DFTD cell line transcriptomes, focusing on genes involved in metabolism. Below is a list of the samples for this project.
ss <- read.table("../ss_patchett.txt",sep="\t",fill=TRUE,header=TRUE)
ss$DFT <- as.factor(ss$DFT)
ss %>%
kbl(caption="Sample sheet for all samples") %>%
kable_paper("hover", full_width = F)
run_accession | sample_id | DFT | sample | sample_type |
---|---|---|---|---|
ERR2804403 | sha_DFT1_1-1 | DFT1 | DFT1_1 | biopsy |
ERR2804404 | sha_DFT1_1-2 | DFT1 | DFT1_1 | biopsy |
ERR2804405 | sha_DFT1_2-1 | DFT1 | DFT1_2 | biopsy |
ERR2804406 | sha_DFT1_2-2 | DFT1 | DFT1_2 | biopsy |
ERR2804407 | sha_DFT2_1-1 | DFT2 | DFT2_1 | biopsy |
ERR2804408 | sha_DFT2_1-2 | DFT2 | DFT2_1 | biopsy |
ERR2804409 | sha_DFT2_2-1 | DFT2 | DFT2_2 | biopsy |
ERR2804410 | sha_DFT2_2-2 | DFT2 | DFT2_2 | biopsy |
ERR2852729 | sha_DFT2_RV_2-2 | DFT2 | RV_2 | cell_line |
ERR2852728 | sha_DFT2_RV_2-1 | DFT2 | RV_2 | cell_line |
ERR2852727 | sha_DFT2_RV_1-2 | DFT2 | RV_1 | cell_line |
ERR2852726 | sha_DFT2_RV_1-1 | DFT2 | RV_1 | cell_line |
SRR6266557 | C5065_UT_01 | DFT1 | C5065 | cell_line |
SRR6266556 | C5065_UT_02 | DFT1 | C5065 | cell_line |
maplot <- function(de,contrast_name) {
sig <-subset(de, padj < 0.05 )
up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
GENESUP <- length(up)
GENESDN <- length(dn)
DET=nrow(de)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=1, col="dark gray",
main=contrast_name, cex.main=0.7)
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=1, col="red")
mtext(SUBHEADER,cex = 0.7)
grid()
}
make_volcano <- function(de,name) {
sig <- subset(de,padj<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,log2FoldChange>0))
N_DN=nrow(subset(sig,log2FoldChange<0))
DET=nrow(de)
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="darkgray",
main=name, xlab="log2 FC", ylab="-log10 pval")
mtext(HEADER)
grid()
points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="red")
}
Import data from the aligner.
tmp <- read.table("../fastq/3col_patchett.tsv.gz")
tmp$V1 <- gsub('.fastq-trimmed.fastq','',tmp$V1)
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3",fun.aggregate=sum))
tmp_old <- read.table("../fastq/3col_old.tsv.gz")
tmp_old$V1 <- gsub('.fastq-trimmed.fastq','',tmp_old$V1)
x_old <- as.data.frame(acast(tmp_old, V2~V1, value.var="V3",fun.aggregate=sum))
Load gene names.
gn <- read.table("../ref/Sarcophilus_harrisii.mSarHar1.11.cdna+ncrna.genenames.tsv",fill=TRUE)
gn <- gn[order(gn$V1),]
gn_old <- read.table("../ref/Sarcophilus_harrisii.DEVIL7.0.cdna+ncrna.genenames.tsv",fill=TRUE)
gn_old <- gn_old[order(gn_old$V1),]
hm <- read.table("../ref/mart_export_ensembl109_2023-07-14.txt",sep="\t",header=TRUE)
hm_old <- read.table("../ref/mart_export_ensembl101_2023-09-06.txt",sep="\t",header=TRUE)
Now need to collapse transcript data to genes.
x$gene <- paste(gn$V2,gn$V3)
y <- aggregate(. ~ gene,x,sum)
rownames(y) <- y$gene
y$gene = NULL
x_old$gene <- paste(gn_old$V2,gn_old$V3)
y_old <- aggregate(. ~ gene,x_old,sum)
rownames(y_old) <- y_old$gene
y_old$gene = NULL
Samples with <1M reads should be omitted. Will also round values to integers.
cs <- colSums(y)
cs <- cs[order(cs)]
par(mar=c(5,10,5,2))
barplot(cs,main="All samples",horiz=TRUE,las=1)
abline(v=1e7,col="red",lty=2)
y <- round(y)
cs_old <- colSums(y_old)
cs_old <- cs_old[order(cs_old)]
par(mar=c(5,10,5,2))
barplot(cs_old,main="All samples - old genome ",horiz=TRUE,las=1, xli=c(0,4e+07))
abline(v=1e7,col="red",lty=2)
y_old <- round(y_old)
This will help us to visualise the sources of variability in the overall dataset.
Plot MDS. Also fix the sample names.
Plot MDS by DFT.
par(mar = c(5.1, 4.1, 4.1, 2.1))
colnames(y) <- ss$sample_id
cs <- colSums(y)
cs <- cs[order(cs)]
par(mar=c(5,10,5,2))
barplot(cs,main="All samples",horiz=TRUE,las=1)
par(mar = c(5.1, 4.1, 4.1, 2.1))
colnames(y_old) <- ss$sample_id[1:8]
cs_old <- colSums(y_old)
cs_old <- cs_old[order(cs_old)]
par(mar=c(5,10,5,2))
barplot(cs_old,main="All samples - old genome",horiz=TRUE,las=1, xlim=c(0,4e+07))
par(mar = c(5.1, 4.1, 4.1, 2.1) )
cols <- ss$DFT
cols <- gsub("DFT1","lightblue",cols)
cols <- gsub("DFT2","pink",cols)
mymds <- plotMDS(y,plot=FALSE)
mymds_old <- plotMDS(y_old,plot=FALSE)
# fix the xlims
XMIN=min(mymds$x)
XMAX=max(mymds$x)
XMID=(XMAX+XMIN)/2
XMIN <- XMID + (XMIN-XMID)*1.1
XMAX <- XMID+(XMAX-XMID)*1.1
plotMDS(mymds,pch=19,cex=3,col=cols,main="MDS plot",xlim=c(XMIN,XMAX))
text(mymds,labels=colnames(y))
mtext("blue=DFT1, pink=DFT2")
plotMDS(mymds_old,pch=19,cex=3,col=cols,main="MDS plot - old genome",xlim=c(XMIN,XMAX))
text(mymds_old,labels=colnames(y_old))
mtext("blue=DFT1, pink=DFT2")
y_DFT1 <- y[,colnames(y) %in% c("sha_DFT1_1-1","sha_DFT1_1-2","sha_DFT1_2-1","sha_DFT1_2-2",
"C5065_UT_01", "C5065_UT_02")]
y_DFT2 <- y[,colnames(y) %in% c("sha_DFT2_1-1","sha_DFT2_1-2","sha_DFT2_2-1","sha_DFT2_2-2",
"sha_DFT2_RV_1-1","sha_DFT2_RV_1-2","sha_DFT2_RV_2-1","sha_DFT2_RV_2-2")]
y_biopsy <- y[,colnames(y) %in% c("sha_DFT1_1-1","sha_DFT1_1-2","sha_DFT1_2-1","sha_DFT1_2-2",
"sha_DFT2_1-1","sha_DFT2_1-2","sha_DFT2_2-1","sha_DFT2_2-2")]
y_cline <- y[,colnames(y) %in% c( "C5065_UT_01", "C5065_UT_02",
"sha_DFT2_RV_1-1","sha_DFT2_RV_1-2","sha_DFT2_RV_2-1","sha_DFT2_RV_2-2")]
mdsDFT1 <- plotMDS(y_DFT1,plot=FALSE)
plotMDS(mdsDFT1,pch=19,cex=3,col="pink",main="MDS plot",xlim=c(XMIN,XMAX))
text(mdsDFT1,labels=colnames(y_DFT1))
mtext("DFT1")
mdsDFT2 <- plotMDS(y_DFT2,plot=FALSE)
plotMDS(mdsDFT2,pch=19,cex=3,col="lightblue",main="MDS plot",xlim=c(XMIN,XMAX))
text(mdsDFT2, labels=colnames(y_DFT2))
mtext("DFT2")
mds_biopsy <- plotMDS(y_biopsy,plot=FALSE)
plotMDS(mds_biopsy,pch=19,cex=3,col=cols,main="MDS plot",xlim=c(XMIN,XMAX))
text(mds_biopsy,labels=colnames(y_biopsy))
mtext("Tumour biopsies: blue=DFT1, pink=DFT2")
mds_cline <- plotMDS(y_cline,plot=FALSE)
plotMDS(mds_cline,pch=19,cex=3,col=cols,main="MDS plot",xlim=c(XMIN,XMAX))
text(mds_cline,labels=colnames(y_cline))
mtext("Cell lines: blue=DFT1, pink=DFT2")
Sum replicates then run differential expression analysis. For now, ignore alignment on old genome and cell line data.
y_cell <- y[,9:14]
y <- y[,1:8]
# combine replicates for each sample
DFT1_1 <- rowSums(y[,ss$sample=="DFT1_1"])
DFT1_2 <- rowSums(y[,ss$sample=="DFT1_2"])
DFT2_1 <- rowSums(y[,ss$sample=="DFT2_1"])
DFT2_2 <- rowSums(y[,ss$sample=="DFT2_2"])
y2 <- data.frame(DFT1_1,DFT1_2,DFT2_1,DFT2_2)
ss2 <- as.data.frame(colnames(y2))
colnames(ss2) <- "sample"
ss2$DFT <- factor(c("DFT1","DFT1","DFT2","DFT2"))
y2 <- y2[which(rowMeans(y2)>10),] # remove genes with average count < 10
dim(y2)
## [1] 15931 4
dds <- DESeqDataSetFromMatrix(countData = y2 , colData = ss2, design = ~ DFT )
## converting counts to integer mode
dds2 <- dds
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge2 <- dge
sig <- subset(dge,padj<0.05)
sig2_up <- rownames(subset(sig,log2FoldChange>0))
sig2_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig2_up)
## [1] 2536
length(sig2_dn)
## [1] 2033
DE visualisation.
maplot(dge2,"DFT1 vs DFT2 biopsies")
make_volcano(dge2,"DFT1 vs DFT2 biopsies")
sig[1:50,1:6] %>%
kbl(caption="DFT1 vs DFT2 biopsies") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSSHAG00000031627.1 PRX | 19373.4283 | -8.968977 | 0.2894364 | -30.98773 | 0 | 0 |
ENSSHAG00000001230.2 NA | 10348.7183 | -6.799811 | 0.2540229 | -26.76849 | 0 | 0 |
ENSSHAG00000024692.1 NA | 4999.3676 | -6.180271 | 0.2714882 | -22.76442 | 0 | 0 |
ENSSHAG00000028855.1 NA | 7373.2103 | 8.218378 | 0.3645190 | 22.54582 | 0 | 0 |
ENSSHAG00000012247.2 COL9A1 | 4015.0597 | 8.997323 | 0.4019274 | 22.38544 | 0 | 0 |
ENSSHAG00000022172.1 HAPLN2 | 6695.1868 | -10.289516 | 0.4742283 | -21.69739 | 0 | 0 |
ENSSHAG00000008158.2 NA | 13177.6995 | 9.309236 | 0.4323846 | 21.52999 | 0 | 0 |
ENSSHAG00000021199.1 UGT8 | 12350.1972 | -10.371276 | 0.4882968 | -21.23970 | 0 | 0 |
ENSSHAG00000025070.1 NA | 1894.6094 | -7.072079 | 0.3410819 | -20.73425 | 0 | 0 |
ENSSHAG00000011362.2 NA | 2280.0991 | 7.982211 | 0.3989326 | 20.00892 | 0 | 0 |
ENSSHAG00000030052.1 CFTR | 3494.4469 | -9.939796 | 0.5041830 | -19.71466 | 0 | 0 |
ENSSHAG00000028043.1 NA | 9433.1352 | -7.017523 | 0.3623763 | -19.36530 | 0 | 0 |
ENSSHAG00000000720.2 ANO1 | 4017.4307 | -4.669392 | 0.2435018 | -19.17601 | 0 | 0 |
ENSSHAG00000010203.2 NA | 3321.1501 | -5.790986 | 0.3031099 | -19.10523 | 0 | 0 |
ENSSHAG00000013813.2 NA | 1817.3236 | 8.295411 | 0.4353132 | 19.05619 | 0 | 0 |
ENSSHAG00000028708.1 NA | 3398.8519 | 6.491294 | 0.3425299 | 18.95103 | 0 | 0 |
ENSSHAG00000024629.1 NA | 1343.7559 | 6.383444 | 0.3399438 | 18.77794 | 0 | 0 |
ENSSHAG00000016586.2 NA | 1835.1747 | -6.920324 | 0.3884226 | -17.81648 | 0 | 0 |
ENSSHAG00000006184.2 ATCAY | 1670.3603 | 9.361726 | 0.5279151 | 17.73339 | 0 | 0 |
ENSSHAG00000024135.1 STK32B | 2034.3346 | 9.519390 | 0.5470800 | 17.40036 | 0 | 0 |
ENSSHAG00000028637.1 NA | 4152.7889 | -5.630734 | 0.3245807 | -17.34772 | 0 | 0 |
ENSSHAG00000014038.2 NA | 12218.7615 | 5.503863 | 0.3213211 | 17.12886 | 0 | 0 |
ENSSHAG00000014560.2 PLLP | 2929.6765 | -7.645683 | 0.4475932 | -17.08177 | 0 | 0 |
ENSSHAG00000028146.1 NSG1 | 2831.2843 | -6.946736 | 0.4078914 | -17.03085 | 0 | 0 |
ENSSHAG00000021888.1 PRSS35 | 1185.1224 | 5.763210 | 0.3446391 | 16.72245 | 0 | 0 |
ENSSHAG00000004800.2 ALKAL2 | 1461.3473 | 9.459052 | 0.5799044 | 16.31140 | 0 | 0 |
ENSSHAG00000028187.1 NA | 1148.8575 | -5.776665 | 0.3563146 | -16.21226 | 0 | 0 |
ENSSHAG00000012895.2 NA | 901.0142 | -6.245494 | 0.3879777 | -16.09756 | 0 | 0 |
ENSSHAG00000006714.2 P3H2 | 2310.5733 | 6.110694 | 0.3858657 | 15.83632 | 0 | 0 |
ENSSHAG00000011465.2 NA | 2880.8002 | -6.766093 | 0.4335532 | -15.60614 | 0 | 0 |
ENSSHAG00000027357.1 DHH | 1255.9489 | -5.030988 | 0.3229230 | -15.57953 | 0 | 0 |
ENSSHAG00000013967.2 ALDOC | 2233.7205 | -5.021646 | 0.3241752 | -15.49053 | 0 | 0 |
ENSSHAG00000016224.2 NECTIN4 | 882.6687 | -4.862932 | 0.3156142 | -15.40783 | 0 | 0 |
ENSSHAG00000029081.1 FGF10 | 632.5331 | 7.164789 | 0.4652379 | 15.40027 | 0 | 0 |
ENSSHAG00000031559.1 FHDC1 | 1379.8599 | 5.027249 | 0.3269925 | 15.37420 | 0 | 0 |
ENSSHAG00000014878.2 EFS | 987.1933 | 4.920944 | 0.3247012 | 15.15530 | 0 | 0 |
ENSSHAG00000020854.1 FMN2 | 765.5943 | -7.665817 | 0.5113521 | -14.99127 | 0 | 0 |
ENSSHAG00000008456.2 NOTCH3 | 3770.3176 | 4.007920 | 0.2717511 | 14.74850 | 0 | 0 |
ENSSHAG00000012189.2 NA | 1655.7477 | 4.669329 | 0.3205060 | 14.56862 | 0 | 0 |
ENSSHAG00000023680.1 EGR3 | 5429.7319 | -4.718157 | 0.3271562 | -14.42173 | 0 | 0 |
ENSSHAG00000006005.2 NA | 940.6342 | 7.989749 | 0.5545270 | 14.40822 | 0 | 0 |
ENSSHAG00000017676.2 ARHGAP22 | 727.6463 | 5.957871 | 0.4175935 | 14.26715 | 0 | 0 |
ENSSHAG00000010663.2 MAP1B | 5140.0041 | 4.022572 | 0.2836538 | 14.18127 | 0 | 0 |
ENSSHAG00000007917.2 EDNRB | 13288.1593 | 4.328769 | 0.3052459 | 14.18125 | 0 | 0 |
ENSSHAG00000021589.1 EBF2 | 996.6855 | -6.138741 | 0.4339999 | -14.14457 | 0 | 0 |
ENSSHAG00000024394.1 NA | 1939.7779 | 4.717647 | 0.3350385 | 14.08091 | 0 | 0 |
ENSSHAG00000016955.2 CCDC68 | 3306.2354 | -7.239744 | 0.5152135 | -14.05193 | 0 | 0 |
ENSSHAG00000030008.1 NA | 533.2343 | 5.281241 | 0.3765384 | 14.02577 | 0 | 0 |
ENSSHAG00000010841.2 PCDH7 | 506.1298 | 5.613175 | 0.4019355 | 13.96536 | 0 | 0 |
ENSSHAG00000009869.2 PXDC1 | 1400.8423 | 3.930310 | 0.2815769 | 13.95821 | 0 | 0 |
write.table(dge2,file="dge2.tsv",sep="\t")
# heatmap
mx <- sig[,7:ncol(sig)]
mx <- head(mx,30)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(mx),trace="none",scale="row",
col=colfunc(25),ColSideColors=cols[3:6],mar=c(8,14),
cexRow=0.7,cexCol=1)
Need to get human homologs of these genes. These were obtained from Ensembl v109 biomart (website).
rownames(dge2) <- sapply(strsplit(rownames(dge2),"\\."),"[[",1)
hm2 <- hm[hm$Tasmanian.devil.gene.stable.ID != "",]
gt <- hm2[,2:3]
length(unique(gt$Tasmanian.devil.gene.stable.ID))
## [1] 16979
length(unique(gt$Gene.name))
## [1] 16646
for(i in 1:length(gt$Gene.name)){
if(gt$Gene.name[i]==""){gt$Gene.name[i] <- gt$Tasmanian.devil.gene.stable.ID[i]}
}
Now run mitch for DGE2. The pathways are sourced from Reactome 7th July 2023.
genesets <- gmt_import("../ref/ReactomePathways_2023-07-14.gmt")
m2 <- mitch_import(dge2, DEtype="deseq2", geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 15931
## Note: no. genes in output = 12859
## Note: estimated proportion of input genes in output = 0.807
head(m2)
## x
## A2M -0.3375456
## A4GALT 4.2591767
## AAAS 1.7285141
## AACS 0.7228111
## AADAC 1.5043061
## AADAT -0.3392250
res2 <- mitch_calc(m2, genesets, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(res2$enrichment_result,20) %>%
kbl(caption="metabolic pathways") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
674 | Metallothioneins bind metals | 10 | 3.00e-07 | 0.9370379 | 0.0000060 |
195 | Classical antibody-mediated complement activation | 35 | 0.00e+00 | 0.8419660 | 0.0000000 |
222 | Creation of C4 and C2 activators | 38 | 0.00e+00 | 0.8380987 | 0.0000000 |
121 | CD22 mediated BCR regulation | 25 | 0.00e+00 | 0.8306093 | 0.0000000 |
1052 | Response to metal ions | 13 | 2.00e-07 | 0.8292075 | 0.0000049 |
549 | Initial triggering of complement | 44 | 0.00e+00 | 0.8284078 | 0.0000000 |
1100 | Scavenging of heme from plasma | 35 | 0.00e+00 | 0.8250913 | 0.0000000 |
1335 | Translocation of ZAP-70 to Immunological synapse | 11 | 4.10e-06 | 0.8018156 | 0.0000711 |
842 | Phosphorylation of CD3 and TCR zeta chains | 14 | 3.00e-07 | 0.7923984 | 0.0000060 |
371 | FCGR activation | 41 | 0.00e+00 | 0.7907896 | 0.0000000 |
533 | Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell | 97 | 0.00e+00 | 0.7731094 | 0.0000000 |
206 | Complement cascade | 60 | 0.00e+00 | 0.7653827 | 0.0000000 |
802 | PD-1 signaling | 15 | 3.00e-07 | 0.7627063 | 0.0000064 |
988 | Regulation of Complement cascade | 56 | 0.00e+00 | 0.7608513 | 0.0000000 |
180 | Chemokine receptors bind chemokines | 37 | 0.00e+00 | 0.7327229 | 0.0000000 |
577 | Interleukin-2 signaling | 10 | 7.58e-05 | 0.7226555 | 0.0010743 |
110 | Binding and Uptake of Ligands by Scavenger Receptors | 63 | 0.00e+00 | 0.6951552 | 0.0000000 |
1099 | Scavenging by Class A Receptors | 17 | 2.80e-06 | 0.6566505 | 0.0000496 |
459 | Generation of second messenger molecules | 24 | 0.00e+00 | 0.6497955 | 0.0000008 |
368 | FCERI mediated Ca+2 mobilization | 57 | 0.00e+00 | 0.6414869 | 0.0000000 |
top <- subset(res2$enrichment_result,p.adjustANOVA<0.05)
if ( !file.exists("mitch2.html") & nrow(top)>1 ) {
mitch_report(res2, "mitch2.html")
}
topup <- head(subset(top,s.dist>0),20)
topup %>%
kbl(caption="Top 20 upregulated pathways") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
674 | Metallothioneins bind metals | 10 | 3.00e-07 | 0.9370379 | 0.0000060 |
195 | Classical antibody-mediated complement activation | 35 | 0.00e+00 | 0.8419660 | 0.0000000 |
222 | Creation of C4 and C2 activators | 38 | 0.00e+00 | 0.8380987 | 0.0000000 |
121 | CD22 mediated BCR regulation | 25 | 0.00e+00 | 0.8306093 | 0.0000000 |
1052 | Response to metal ions | 13 | 2.00e-07 | 0.8292075 | 0.0000049 |
549 | Initial triggering of complement | 44 | 0.00e+00 | 0.8284078 | 0.0000000 |
1100 | Scavenging of heme from plasma | 35 | 0.00e+00 | 0.8250913 | 0.0000000 |
1335 | Translocation of ZAP-70 to Immunological synapse | 11 | 4.10e-06 | 0.8018156 | 0.0000711 |
842 | Phosphorylation of CD3 and TCR zeta chains | 14 | 3.00e-07 | 0.7923984 | 0.0000060 |
371 | FCGR activation | 41 | 0.00e+00 | 0.7907896 | 0.0000000 |
533 | Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell | 97 | 0.00e+00 | 0.7731094 | 0.0000000 |
206 | Complement cascade | 60 | 0.00e+00 | 0.7653827 | 0.0000000 |
802 | PD-1 signaling | 15 | 3.00e-07 | 0.7627063 | 0.0000064 |
988 | Regulation of Complement cascade | 56 | 0.00e+00 | 0.7608513 | 0.0000000 |
180 | Chemokine receptors bind chemokines | 37 | 0.00e+00 | 0.7327229 | 0.0000000 |
577 | Interleukin-2 signaling | 10 | 7.58e-05 | 0.7226555 | 0.0010743 |
110 | Binding and Uptake of Ligands by Scavenger Receptors | 63 | 0.00e+00 | 0.6951552 | 0.0000000 |
1099 | Scavenging by Class A Receptors | 17 | 2.80e-06 | 0.6566505 | 0.0000496 |
459 | Generation of second messenger molecules | 24 | 0.00e+00 | 0.6497955 | 0.0000008 |
368 | FCERI mediated Ca+2 mobilization | 57 | 0.00e+00 | 0.6414869 | 0.0000000 |
topdn <- head(subset(top,s.dist<0),20)
topdn %>%
kbl(caption="Top 20 downregulated pathways") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
281 | Degradation of cysteine and homocysteine | 11 | 0.0056613 | -0.4818154 | 0.0456124 |
330 | EGR2 and SOX10-mediated initiation of Schwann cell myelination | 28 | 0.0001144 | -0.4213122 | 0.0015897 |
194 | Class I peroxisomal membrane protein import | 17 | 0.0037161 | -0.4065062 | 0.0323276 |
178 | Chaperone Mediated Autophagy | 18 | 0.0061237 | -0.3732575 | 0.0482409 |
555 | Insulin processing | 21 | 0.0033346 | -0.3700621 | 0.0295527 |
590 | Intraflagellar transport | 45 | 0.0001932 | -0.3213395 | 0.0025135 |
152 | Cargo trafficking to the periciliary membrane | 43 | 0.0027614 | -0.2639506 | 0.0252628 |
189 | Circadian Clock | 60 | 0.0017280 | -0.2340743 | 0.0170157 |
837 | Peroxisomal protein import | 53 | 0.0037920 | -0.2300455 | 0.0327866 |
1055 | Retrograde transport at the Trans-Golgi-Network | 48 | 0.0060099 | -0.2293537 | 0.0476094 |
188 | Cilium Assembly | 175 | 0.0000147 | -0.1903699 | 0.0002340 |
643 | Macroautophagy | 115 | 0.0040649 | -0.1553679 | 0.0349336 |
96 | Autophagy | 127 | 0.0025907 | -0.1550925 | 0.0240109 |
792 | Organelle biogenesis and maintenance | 259 | 0.0002103 | -0.1342986 | 0.0027114 |
top2 <- rbind(head(topup,10),head(topdn,10))
top2 <- top2[order(-top2$s.dist),]
par(mar=c(5,15,5,2))
if ( nrow(top2)>1 ) {
barplot(rev(abs(top2$s.dist)),horiz=TRUE,las=1,names.arg=rev(top2$set),
main="Top REACTOME Pathways",cex.names=0.6,xlab="Enrichment Score",
col=c(rep("blue",nrow(head(topdn,10))),rep("red",nrow(head(topup,10)))),
xlim=c(0,0.8))
}
Now to extract metabolism sets and analyse with another mitch run.
hierarchy <- read.table("../ref/gsea/ReactomePathwaysRelation.txt", sep ="\t", header = F, dec =".")
pathways <- read.delim("../ref/gsea/ReactomePathways.txt", header=FALSE)
hierarchy <- hierarchy[grep("HSA", hierarchy$V1), ] # keep only homo sapiens data
pathways <- pathways[grep("HSA", pathways$V1), 1:2]
metabo_pathways <- "R-HSA-1430728" # reactome code for metabolism
# this is very bad coding - but it works
# why 325? that is when while loop crashes because all branches of the hierarchy have been looped over
# will have to change this if Reactome data is updated
while(length(metabo_pathways) < 325){
for(i in 1:length(metabo_pathways)){
metabo_pathways <- unique(c(metabo_pathways,filter(hierarchy, V1 == metabo_pathways[i])$V2))
}
}
mpathways <- c()
for(i in 1:length(metabo_pathways)){mpathways <- c(mpathways,filter(pathways, V1 == metabo_pathways[i])$V2)}
library(magrittr)
genesets3 <- extract(genesets, mpathways)
# remove empty sets (ie hierarchy exists but no gene set is associated to it)
genesets3 <- genesets3[lapply(genesets3,length)>0]
Now run mitch on metabolic pathways only.
# TODO: for some reason this doesn't give any results - except when reducing number of sets
# I am confused by this, ask Mark
# also when no sets enriched, get error when produce mitch report
m3 <- mitch_import(dge2, DEtype="deseq2", geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 15931
## Note: no. genes in output = 12859
## Note: estimated proportion of input genes in output = 0.807
head(m3)
## x
## A2M -0.3375456
## A4GALT 4.2591767
## AAAS 1.7285141
## AACS 0.7228111
## AADAC 1.5043061
## AADAT -0.3392250
# maybe something off with 1st row of m3 - fixed it, doesn't change anything
res3 <- mitch_calc(m3, genesets3, priority="effect", cores=16)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(res3$enrichment_result,20) %>%
kbl(caption="metabolic pathways") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA | |
---|---|---|---|---|---|
39 | Biosynthesis of specialized proresolving mediators (SPMs) | 11 | 0.0030352 | 0.5161751 | 0.0827414 |
88 | Degradation of cysteine and homocysteine | 11 | 0.0056613 | -0.4818154 | 0.0842911 |
26 | Heme degradation | 12 | 0.0056385 | 0.4615410 | 0.0842911 |
107 | Acyl chain remodelling of PI | 11 | 0.0105634 | 0.4452621 | 0.1177028 |
120 | Synthesis of Prostaglandins (PG) and Thromboxanes (TX) | 13 | 0.0085667 | 0.4211547 | 0.1043579 |
119 | Synthesis of Leukotrienes (LT) and Eoxins (EX) | 12 | 0.0155762 | 0.4032978 | 0.1357629 |
47 | Glutamate and glutamine metabolism | 12 | 0.0230345 | -0.3789860 | 0.1543310 |
82 | alpha-linolenic (omega3) and linoleic (omega6) acid metabolism | 13 | 0.0225596 | -0.3654056 | 0.1543310 |
118 | alpha-linolenic acid (ALA) metabolism | 13 | 0.0225596 | -0.3654056 | 0.1543310 |
87 | Triglyceride catabolism | 16 | 0.0114189 | 0.3653352 | 0.1177028 |
129 | Dermatan sulfate biosynthesis | 10 | 0.0472731 | 0.3623473 | 0.2346148 |
113 | Synthesis of PIPs at the early endosome membrane | 15 | 0.0152200 | -0.3620160 | 0.1357629 |
20 | Nucleotide catabolism | 27 | 0.0012633 | 0.3585827 | 0.0616802 |
108 | Acyl chain remodelling of PG | 12 | 0.0330634 | 0.3553878 | 0.2043665 |
40 | Sulfur amino acid metabolism | 22 | 0.0073914 | -0.3299907 | 0.0990445 |
104 | Acyl chain remodelling of PC | 19 | 0.0162105 | 0.3186670 | 0.1357629 |
105 | Acyl chain remodelling of PS | 15 | 0.0335527 | 0.3170248 | 0.2043665 |
83 | Arachidonic acid metabolism | 35 | 0.0030874 | 0.2891320 | 0.0827414 |
81 | Carnitine metabolism | 14 | 0.0767701 | 0.2732358 | 0.3318450 |
102 | Glutathione synthesis and recycling | 10 | 0.1400903 | 0.2694996 | 0.3910854 |
top <- subset(res3$enrichment_result,p.adjustANOVA<0.05)
if ( !file.exists("mitch3_biopsy.html") & nrow(top)>1 ) {
mitch_report(res3, "mitch3_biopsy.html")
}
topup <- head(subset(top,s.dist>0),20)
topup %>%
kbl(caption="Top 20 upregulated metabolic pathways") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA |
---|---|---|---|---|
topdn <- head(subset(top,s.dist<0),20)
topdn %>%
kbl(caption="Top 20 downregulated metabolic pathways") %>%
kable_paper("hover", full_width = F)
set | setSize | pANOVA | s.dist | p.adjustANOVA |
---|---|---|---|---|
top2 <- rbind(head(topup,10),head(topdn,10))
top2 <- top2[order(-top2$s.dist),]
par(mar=c(5,15,5,2))
if ( nrow(top2)>1 ) {
barplot(rev(abs(top2$s.dist)),horiz=TRUE,las=1,names.arg=rev(top2$set),
main="Top REACTOME Metabolic Pathways",cex.names=0.6,xlab="Enrichment Score",
col=c(rep("blue",nrow(head(topdn,10))),rep("red",nrow(head(topup,10)))),
xlim=c(0,0.8))
}
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] pkgload_1.3.2.1 GGally_2.1.2
## [3] ggplot2_3.4.3 beeswarm_0.4.0
## [5] gtools_3.9.4 tibble_3.2.1
## [7] echarts4r_0.4.5 magrittr_2.0.3
## [9] stringr_1.5.0 pathview_1.40.0
## [11] circlize_0.4.15 RColorBrewer_1.1-3
## [13] ComplexHeatmap_2.16.0 tidyr_1.3.0
## [15] dplyr_1.1.3 kableExtra_1.3.4
## [17] limma_3.56.2 mitch_1.12.0
## [19] DESeq2_1.40.2 SummarizedExperiment_1.30.2
## [21] Biobase_2.60.0 MatrixGenerics_1.12.3
## [23] matrixStats_1.0.0 GenomicRanges_1.52.0
## [25] GenomeInfoDb_1.36.3 IRanges_2.34.1
## [27] S4Vectors_0.38.1 BiocGenerics_0.46.0
## [29] gplots_3.1.3 reshape2_1.4.4
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.15.0 jsonlite_1.8.7 shape_1.4.6
## [4] magick_2.7.5 rmarkdown_2.24 GlobalOptions_0.1.2
## [7] zlibbioc_1.46.0 vctrs_0.6.3 memoise_2.0.1
## [10] Cairo_1.6-1 RCurl_1.98-1.12 webshot_0.5.5
## [13] htmltools_0.5.6 S4Arrays_1.0.6 sass_0.4.7
## [16] KernSmooth_2.23-22 bslib_0.5.1 htmlwidgets_1.6.2
## [19] plyr_1.8.8 cachem_1.0.8 mime_0.12
## [22] lifecycle_1.0.3 iterators_1.0.14 pkgconfig_2.0.3
## [25] Matrix_1.6-1 R6_2.5.1 fastmap_1.1.1
## [28] GenomeInfoDbData_1.2.10 shiny_1.7.5 clue_0.3-64
## [31] digest_0.6.33 colorspace_2.1-0 reshape_0.8.9
## [34] AnnotationDbi_1.62.2 RSQLite_2.3.1 org.Hs.eg.db_3.17.0
## [37] fansi_1.0.4 httr_1.4.7 abind_1.4-5
## [40] compiler_4.3.1 bit64_4.0.5 withr_2.5.0
## [43] doParallel_1.0.17 BiocParallel_1.34.2 DBI_1.1.3
## [46] highr_0.10 MASS_7.3-60 DelayedArray_0.26.7
## [49] rjson_0.2.21 caTools_1.18.2 tools_4.3.1
## [52] httpuv_1.6.11 glue_1.6.2 promises_1.2.1
## [55] cluster_2.1.4 generics_0.1.3 gtable_0.3.4
## [58] xml2_1.3.5 utf8_1.2.3 XVector_0.40.0
## [61] foreach_1.5.2 pillar_1.9.0 later_1.3.1
## [64] lattice_0.21-8 bit_4.0.5 tidyselect_1.2.0
## [67] locfit_1.5-9.8 Biostrings_2.68.1 knitr_1.44
## [70] gridExtra_2.3 svglite_2.1.1 xfun_0.40
## [73] KEGGgraph_1.60.0 stringi_1.7.12 yaml_2.3.7
## [76] evaluate_0.21 codetools_0.2-19 Rgraphviz_2.44.0
## [79] graph_1.78.0 cli_3.6.1 xtable_1.8-4
## [82] systemfonts_1.0.4 munsell_0.5.0 jquerylib_0.1.4
## [85] Rcpp_1.0.11 png_0.1-8 XML_3.99-0.14
## [88] parallel_4.3.1 ellipsis_0.3.2 blob_1.2.4
## [91] bitops_1.0-7 viridisLite_0.4.2 scales_1.2.1
## [94] purrr_1.0.2 crayon_1.5.2 GetoptLong_1.0.5
## [97] rlang_1.1.1 KEGGREST_1.40.0 rvest_1.0.3