suppressPackageStartupMessages({
library("zoo")
library("tidyverse")
library("reshape2")
library("DESeq2")
library("gplots")
library("fgsea")
library("MASS")
library("mitch")
library("eulerr")
library("limma")
library("topconfects")
library("gplots")
})
tmp <- read.table("3col.tsv",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)
Here we’re manipulating the sample sheet. Making sure that the samplesheet matches the names of the data sets and vice versa. Ensuring that each patient has a baseline and postop sample.
samplesheet<-read.csv("samplesheet.tsv",sep="\t",stringsAsFactors=FALSE)
mysamples <- colnames(xx)
baseline <- mysamples[grep("T0",mysamples)]
postop <- mysamples[grep("POD",mysamples)]
baseline <- sapply(strsplit(baseline,"-"),"[[",1)
postop <- sapply(strsplit(postop,"-"),"[[",1)
mysamples <- intersect(baseline,postop)
samplesheet2 <- samplesheet
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
samplesheet3 <- samplesheet2
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
rownames(samplesheet3) <- paste(samplesheet2$RG_number,"POD",sep="-")
samplesheet2$timepoint <- 0
samplesheet3$timepoint <- 1
samplesheet <- rbind(samplesheet2,samplesheet3)
# filter the data matrix
xx <- xx[,which(colnames(xx) %in% rownames(samplesheet))]
samplesheet <- samplesheet[which(rownames(samplesheet) %in% colnames(xx)),]
samplesheet <- samplesheet[order(rownames(samplesheet)),]
Here the idea is to use the concept of Monaco et al, 2019 to deconvolute blood RNA-seq signature.
DOI:https://doi.org/10.1016/j.celrep.2019.01.041
xn <- xx
gt <- as.data.frame(sapply(strsplit(rownames(xn)," "),"[[",2) )
rownames(gt) <- rownames(xx)
colnames(gt) = "genesymbol"
gt$geneID <- rownames(xx)
blood <- read.table("https://raw.githubusercontent.com/giannimonaco/ABIS/master/data/sigmatrixRNAseq.txt")
blood2 <- merge(gt,blood,by.x="genesymbol",by.y=0)
blood2 <- blood2[which(!duplicated(blood2$genesymbol)),]
rownames(blood2) <- blood2$geneID
blood2 <- blood2[,c(3:ncol(blood2))]
genes <- intersect(rownames(xx), rownames(blood2))
dec <- apply(xx[genes, , drop=F], 2, function(x) coef(rlm( as.matrix(blood2[genes,]), x, maxit =100 ))) *100
## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps
## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps
dec <- t(dec/colSums(dec)*100)
dec <- signif(dec, 3)
cs <- colSums(dec)
order(-cs)
## [1] 1 4 8 13 2 6 3 14 10 12 5 16 17 9 15 11 7
cv <- apply(dec,2,function(x) {sd(x)/mean(x)} )
cv[order(-cv)]
## T.CD8.Naive T.CD4.Naive pDCs Monocytes.C B.Naive
## 12.309866 6.935020 6.768844 5.828486 5.773624
## MAIT Monocytes.NC.I NK Basophils.LD B.Memory
## 5.611708 5.598135 5.555667 5.480702 5.163804
## T.CD8.Memory Neutrophils.LD T.CD4.Memory T.gd.non.Vd2 mDCs
## 3.524342 2.972512 -5.555388 -6.493188 -7.442438
## T.gd.Vd2 Plasmablasts
## -7.869309 -18.095614
plot(cs,cv,main="cell abundance versus SD")
text(cs,cv+0.5,label=names(cs))
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( dec, col=colfunc(25),scale="row",
trace="none",margins = c(10,15), cexRow=.7, cexCol=.8, main="cell type abundances")
samplesheet <- cbind(samplesheet,dec)
dec_t0_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==0 )] ),])
dec_po_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==1 )] ),])
dec_t0_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==0 )] ),])
dec_po_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==1 )] ),])
boxplot(dec_t0_lo$Monocytes.C , dec_t0_hi$Monocytes.C, dec_po_lo$Monocytes.C, dec_po_hi$Monocytes.C ,
ylim=c(0,100),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.C")
t.test(dec_t0_hi$Monocytes.C , dec_po_hi$Monocytes.C)
##
## Welch Two Sample t-test
##
## data: dec_t0_hi$Monocytes.C and dec_po_hi$Monocytes.C
## t = -2.7175, df = 25.773, p-value = 0.0116
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -55.076844 -7.627109
## sample estimates:
## mean of x mean of y
## 44.13850 75.49048
boxplot(dec_t0_lo$NK , dec_t0_hi$NK, dec_po_lo$NK, dec_po_hi$NK ,
ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="NK")
t.test(dec_t0_lo$NK , dec_po_lo$NK)
##
## Welch Two Sample t-test
##
## data: dec_t0_lo$NK and dec_po_lo$NK
## t = 2.4688, df = 47.994, p-value = 0.01717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.4820947 4.7136093
## sample estimates:
## mean of x mean of y
## 6.707104 4.109252
boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory, dec_po_hi$T.CD8.Memory ,
ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")
boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory, dec_po_hi$T.CD8.Memory ,
ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")
boxplot(dec_t0_lo$T.CD4.Naive , dec_t0_hi$T.CD4.Naive, dec_po_lo$T.CD4.Naive, dec_po_hi$T.CD4.Naive ,
ylim=c(0,25),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Naive")
boxplot(dec_t0_lo$T.CD8.Naive , dec_t0_hi$T.CD8.Naive, dec_po_lo$T.CD8.Naive, dec_po_hi$T.CD8.Naive ,
ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Naive")
boxplot(dec_t0_lo$B.Naive , dec_t0_hi$B.Naive, dec_po_lo$B.Naive, dec_po_hi$B.Naive ,
ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Naive")
boxplot(dec_t0_lo$T.CD4.Memory , dec_t0_hi$T.CD4.Memory, dec_po_lo$T.CD4.Memory, dec_po_hi$T.CD4.Memory ,
ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Memory")
boxplot(dec_t0_lo$MAIT , dec_t0_hi$MAIT, dec_po_lo$MAIT, dec_po_hi$MAIT ,
ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="MAIT")
boxplot(dec_t0_lo$T.gd.Vd2 , dec_t0_hi$T.gd.Vd2 , dec_po_lo$T.gd.Vd2 , dec_po_hi$T.gd.Vd2 ,
ylim=c(0,5),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.Vd2")
boxplot(dec_t0_lo$Neutrophils.LD , dec_t0_hi$Neutrophils.LD , dec_po_lo$Neutrophils.LD , dec_po_hi$Neutrophils.LD ,
ylim=c(0,50),names=c("t0 lo","t0 hi","po lo","po hi"),main="Neutrophils.LD")
boxplot(dec_t0_lo$T.gd.non.Vd2 , dec_t0_hi$T.gd.non.Vd2 , dec_po_lo$T.gd.non.Vd2 , dec_po_hi$T.gd.non.Vd2 ,
ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.non.Vd2")
boxplot(dec_t0_lo$Basophils.LD , dec_t0_hi$Basophils.LD , dec_po_lo$Basophils.LD , dec_po_hi$Basophils.LD ,
ylim=c(0,4),names=c("t0 lo","t0 hi","po lo","po hi"),main="Basophils.LD")
boxplot(dec_t0_lo$Monocytes.NC.I , dec_t0_hi$Monocytes.NC.I , dec_po_lo$Monocytes.NC.I , dec_po_hi$Monocytes.NC.I ,
ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.NC.I")
boxplot(dec_t0_lo$B.Memory , dec_t0_hi$B.Memory , dec_po_lo$B.Memory , dec_po_hi$B.Memory ,
ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Memory")
boxplot(dec_t0_lo$mDCs , dec_t0_hi$mDCs , dec_po_lo$mDCs , dec_po_hi$mDCs ,
ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="mDCs")
boxplot(dec_t0_lo$pDCs , dec_t0_hi$pDCs , dec_po_lo$pDCs , dec_po_hi$pDCs ,
ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="pDCs")
boxplot(dec_t0_lo$Plasmablasts , dec_t0_hi$Plasmablasts , dec_po_lo$Plasmablasts , dec_po_hi$Plasmablasts ,
ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="Plasmablasts")
plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='l')
text(cmdscale(dist(t(xx))), labels=colnames(xx),cex=1.3)
heatmap.2(cor(xx),trace="none",margins = c(10,10), main="pearson correlation heatmap")
heatmap.2(cor(xx,method="s"),trace="none",margins = c(10,10), main="spearman correlation heatmap")
# filter genes with fewer than 10 reads per sample
NAME = "t0_v_pod"
# no correction for cell types
y <- xx[which(rowSums(xx)/ncol(xx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet,
design = ~ Sex + age + timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000087116.16 ADAMTS2 634.35845 4.5151731 0.35054106 12.88058
## ENSG00000108950.12 FAM20A 792.00129 2.5933863 0.21519289 12.05145
## ENSG00000170439.7 METTL7B 126.30501 3.7994825 0.32669823 11.62995
## ENSG00000132170.21 PPARG 70.86749 2.5437784 0.21970131 11.57835
## ENSG00000121316.11 PLBD1 10689.06659 1.4627439 0.12773015 11.45183
## ENSG00000112290.13 WASF1 113.84190 1.2704505 0.11958889 10.62348
## ENSG00000156414.19 TDRD9 595.16883 1.6598788 0.15732025 10.55095
## ENSG00000166033.13 HTRA1 131.84384 2.0268764 0.19324218 10.48879
## ENSG00000110013.13 SIAE 451.39075 0.8257222 0.07915764 10.43137
## ENSG00000163220.11 S100A9 114258.61851 1.5350647 0.14750800 10.40665
## pvalue padj
## ENSG00000087116.16 ADAMTS2 5.789681e-38 1.267245e-33
## ENSG00000108950.12 FAM20A 1.905681e-33 2.085577e-29
## ENSG00000170439.7 METTL7B 2.902842e-31 2.117913e-27
## ENSG00000132170.21 PPARG 5.305835e-31 2.903353e-27
## ENSG00000121316.11 PLBD1 2.302339e-30 1.007872e-26
## ENSG00000112290.13 WASF1 2.317491e-26 8.454208e-23
## ENSG00000156414.19 TDRD9 5.028360e-26 1.572296e-22
## ENSG00000166033.13 HTRA1 9.726683e-26 2.661221e-22
## ENSG00000110013.13 SIAE 1.783074e-25 4.336435e-22
## ENSG00000163220.11 S100A9 2.312053e-25 5.060621e-22
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3,
xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
t0_v_pod <- dge
write.table(dge,file="t0_v_pod_rna.tsv",sep="\t",quote=FALSE)
# filter genes with fewer than 10 reads per sample
NAME = "pod_crp"
ss <- subset(samplesheet,timepoint==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + CrpGroup )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000135424.17 ITGA7 387.1708 2.2131744 0.2570353 8.610391
## ENSG00000108950.12 FAM20A 1327.9835 2.0134443 0.2649032 7.600680
## ENSG00000143365.19 RORC 146.5522 -1.4132086 0.2085281 -6.777064
## ENSG00000110079.18 MS4A4A 673.1434 1.5505614 0.2295571 6.754577
## ENSG00000123643.13 SLC36A1 1610.6451 0.9818768 0.1518341 6.466773
## ENSG00000281852.1 LINC00891 116.2132 -0.9573707 0.1488960 -6.429796
## ENSG00000018280.17 SLC11A1 17883.6005 1.1018384 0.1722874 6.395353
## ENSG00000014257.16 ACPP 863.6512 0.7083577 0.1121526 6.316019
## ENSG00000178562.18 CD28 1015.3327 -1.0429829 0.1694328 -6.155733
## ENSG00000121316.11 PLBD1 15404.6149 1.0790204 0.1756857 6.141765
## pvalue padj
## ENSG00000135424.17 ITGA7 7.281202e-18 1.574560e-13
## ENSG00000108950.12 FAM20A 2.945794e-14 3.185140e-10
## ENSG00000143365.19 RORC 1.226425e-11 7.744568e-08
## ENSG00000110079.18 MS4A4A 1.432521e-11 7.744568e-08
## ENSG00000123643.13 SLC36A1 1.001180e-10 4.330105e-07
## ENSG00000281852.1 LINC00891 1.277753e-10 4.605234e-07
## ENSG00000018280.17 SLC11A1 1.601768e-10 4.948320e-07
## ENSG00000014257.16 ACPP 2.683869e-10 7.254833e-07
## ENSG00000178562.18 CD28 7.473115e-10 1.764803e-06
## ENSG00000121316.11 PLBD1 8.160938e-10 1.764803e-06
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
pod_crp <- dge
write.table(dge,file="pod_crp_rna.tsv",sep="\t",quote=FALSE)
Run a ROC based on PCA distance classification.
# library size normalised
yy <- log2( (y/colSums(y)*1000000) + 0.001 )
yy <- yy[,which(rowMeans(cor(yy))>0.75)]
# crp group
crpgrp <- ss$CrpGroup
names(crpgrp) <- rownames(ss)
crpgrp <- crpgrp[which(names(crpgrp) %in% colnames(yy))]
# confects
confects <- deseq2_confects(res)
# Limma p-values
ROCp <- function(ngenes){
pnames <- rownames(dge)[1:ngenes]
topgenes <- as.data.frame(yy[which(rownames(yy) %in% pnames),])
topgenes$ctrl <- rowMeans(topgenes[,which(crpgrp==0)])
topgenes$case <- rowMeans(topgenes[,which(crpgrp==1)])
mds <- plotMDS(topgenes,ndim=2)
mds <- mds$cmdscale.out
hypotenuse <- function(x) {
sqrt(sum(unlist(lapply(x, function(x) { x^2 })),na.rm = TRUE ))
}
ctrl_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
a2=(x[1] - mds[(nrow(mds)-1),1] )^2
b2=(x[1] - mds[(nrow(mds)-1),2] )^2
sqrt(a2+b2)
})
case_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
a2=(x[1] - mds[nrow(mds),1] )^2
b2=(x[1] - mds[nrow(mds),2] )^2
sqrt(a2+b2)
})
df <- data.frame(ctrl_dist,case_dist)
df$classification_case <- unname(unlist(apply(df,1,function(x) {
if (x[2]<x[1]) {TRUE} else {FALSE}
}))) *1
df$case_conf <- df$ctrl_dist / ( df$ctrl_dist + df$case_dist )
df$ctrl_conf <- df$case_dist / ( df$ctrl_dist + df$case_dist )
df$conf <- unname(apply(df,1,function(x) {
if (x[4]>x[5]) {x[4]} else {x[5]}
}))
df$crpgrp <- crpgrp
df <- df[order(-df$conf),]
df$correct <- unname(unlist(apply(df,1,function(x) { if (x[3] == x[7]) {1} else {0} } )))
tpr <- cumsum(df$correct) / nrow(df)
tpr <- c(tpr,1)
fpr <- cumsum((df$correct -1)*-1) / nrow(df)
fpr <- c(fpr,1)
# AUC
id <- order(fpr)
AUC=round(sum(diff(fpr[id])*rollmean(tpr[id],2)),3)
HEADER=paste("AUC =",AUC)
plot(fpr,tpr,type="l",xlim=c(0,1),ylim=c(0,1),xlab="FPR",ylab="TPR",main=paste(ngenes,"genes"))
mtext(HEADER)
AUC
}
# topconfect genes
ROCc <- function(ngenes){
pnames <- confects$table$name[1:ngenes]
#pnames <- rownames(dge)[1:ngenes]
topgenes <- as.data.frame(yy[which(rownames(yy) %in% pnames),])
topgenes$ctrl <- rowMeans(topgenes[,which(crpgrp==0)])
topgenes$case <- rowMeans(topgenes[,which(crpgrp==1)])
mds <- plotMDS(topgenes,ndim=2)
mds <- mds$cmdscale.out
hypotenuse <- function(x) {
sqrt(sum(unlist(lapply(x, function(x) { x^2 })),na.rm = TRUE ))
}
ctrl_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
a2=(x[1] - mds[(nrow(mds)-1),1] )^2
b2=(x[1] - mds[(nrow(mds)-1),2] )^2
sqrt(a2+b2)
})
case_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
a2=(x[1] - mds[nrow(mds),1] )^2
b2=(x[1] - mds[nrow(mds),2] )^2
sqrt(a2+b2)
})
df <- data.frame(ctrl_dist,case_dist)
df$classification_case <- unname(unlist(apply(df,1,function(x) {
if (x[2]<x[1]) {TRUE} else {FALSE}
}))) *1
df$case_conf <- df$ctrl_dist / ( df$ctrl_dist + df$case_dist )
df$ctrl_conf <- df$case_dist / ( df$ctrl_dist + df$case_dist )
df$conf <- unname(apply(df,1,function(x) {
if (x[4]>x[5]) {x[4]} else {x[5]}
}))
df$crpgrp <- crpgrp
df <- df[order(-df$conf),]
df$correct <- unname(unlist(apply(df,1,function(x) { if (x[3] == x[7]) {1} else {0} } )))
tpr <- cumsum(df$correct) / nrow(df)
tpr <- c(tpr,1)
fpr <- cumsum((df$correct -1)*-1) / nrow(df)
fpr <- c(fpr,1)
# AUC
id <- order(fpr)
AUC=round(sum(diff(fpr[id])*rollmean(tpr[id],2)),3)
HEADER=paste("AUC =",AUC)
plot(fpr,tpr,type="l",xlim=c(0,1),ylim=c(0,1),xlab="FPR",ylab="TPR",main=paste(ngenes,"genes"))
mtext(HEADER)
AUC
}
ng <- c(2,3,5,7,10,13,15,17,20,30,40,60,80,100,150,200,300,500)
p_roc <- lapply(ng,ROCp)
p_roc <- unlist(p_roc)
c_roc <- lapply(ng,ROCc)
c_roc <- unlist(c_roc)
Now need to summarise what is the optimal number of genes in the panel.
plot(ng,p_roc,type="b",ylim=c(0.7,1),col="blue",xlab="no. genes", ylab="AUC")
points(ng,c_roc,type="b",col="red")
mtext("blue: top p-value genes, red:top confect genes")
grid()
# zoom in
plot(ng,p_roc,type="b",xlim=c(0,100),col="blue",xlab="no. genes", ylab="AUC")
points(ng,c_roc,type="b",col="red")
mtext("blue: top p-value genes, red:top confect genes")
grid()
From this we can see that p-value genes show better discriminitory power compared to topconfect genes at least when the number of gene$ In the range of ~150 genes shows the best result for topconfect genes with AUC of ~0.95 I also ran the p_roc analysis with more dimensions but the result was the same:
2d:0.951 (15g)
3d:0.945 (15g)
4d:0.953 (15g)
5d:0.953 (15g)
Now let’s have a look at the heatmap of the top 15 genes with the best discriminitory power.
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:15,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(10,15), cexRow=1, cexCol=.7, main="Top 15 genes")
# filter genes with fewer than 10 reads per sample
NAME = "t0_crp"
ss <- subset(samplesheet,timepoint==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + CrpGroup )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000167680.16 SEMA6B 116.93734 -1.7545755 0.3476538 -5.046904
## ENSG00000165949.12 IFI27 67.17018 2.4903519 0.5349850 4.654994
## ENSG00000279358.1 AP002833.3 37.34539 0.6664108 0.1548064 4.304802
## ENSG00000198010.12 DLGAP2 15.39018 -2.9860007 0.7063975 -4.227083
## ENSG00000285755.2 AC132153.1 16.27930 0.7771681 0.1867644 4.161222
## ENSG00000112812.16 PRSS16 40.93576 0.7922314 0.1906324 4.155806
## ENSG00000169245.6 CXCL10 83.86172 1.4237493 0.3426822 4.154722
## ENSG00000211821.2 TRDV2 114.06718 2.8845574 0.7018804 4.109756
## ENSG00000174482.10 LINGO2 25.14387 -1.2019485 0.2936643 -4.092933
## ENSG00000225886.3 AL445490.1 40.31291 1.0396377 0.2557416 4.065188
## pvalue padj
## ENSG00000167680.16 SEMA6B 4.490270e-07 0.00973760
## ENSG00000165949.12 IFI27 3.239900e-06 0.03513024
## ENSG00000279358.1 AP002833.3 1.671350e-05 0.09840478
## ENSG00000198010.12 DLGAP2 2.367404e-05 0.09840478
## ENSG00000285755.2 AC132153.1 3.165494e-05 0.09840478
## ENSG00000112812.16 PRSS16 3.241423e-05 0.09840478
## ENSG00000169245.6 CXCL10 3.256838e-05 0.09840478
## ENSG00000211821.2 TRDV2 3.960772e-05 0.09840478
## ENSG00000174482.10 LINGO2 4.259502e-05 0.09840478
## ENSG00000225886.3 AL445490.1 4.799366e-05 0.09840478
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
t0_crp <- dge
write.table(dge,file="t0_crp_rna.tsv",sep="\t",quote=FALSE)
NAME = "t0_vs_pod_lowCRP"
ss <- subset(samplesheet,CrpGroup==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000137441.8 FGFBP2 2290.7736 -1.0441286 0.13932527 -7.494180
## ENSG00000180739.14 S1PR5 2194.3357 -0.8996463 0.12148727 -7.405273
## ENSG00000087116.16 ADAMTS2 152.2537 3.2666803 0.45463999 7.185202
## ENSG00000180644.8 PRF1 13508.0087 -0.8692873 0.12117649 -7.173729
## ENSG00000073861.3 TBX21 2627.3291 -0.7986587 0.11394462 -7.009184
## ENSG00000189430.13 NCR1 325.8155 -0.9846467 0.14472353 -6.803640
## ENSG00000197471.12 SPN 7497.6862 -0.4540182 0.06795644 -6.681018
## ENSG00000203747.11 FCGR3A 8210.7779 -0.7321138 0.11070981 -6.612909
## ENSG00000198821.10 CD247 5280.3102 -0.5512978 0.08580645 -6.424899
## ENSG00000105374.10 NKG7 6479.5414 -0.8431169 0.13188449 -6.392844
## pvalue padj
## ENSG00000137441.8 FGFBP2 6.671429e-14 1.258686e-09
## ENSG00000180739.14 S1PR5 1.308814e-13 1.258686e-09
## ENSG00000087116.16 ADAMTS2 6.710777e-13 3.509335e-09
## ENSG00000180644.8 PRF1 7.298192e-13 3.509335e-09
## ENSG00000073861.3 TBX21 2.397127e-12 9.221270e-09
## ENSG00000189430.13 NCR1 1.020086e-11 3.270055e-08
## ENSG00000197471.12 SPN 2.372881e-11 6.519999e-08
## ENSG00000203747.11 FCGR3A 3.768410e-11 9.060200e-08
## ENSG00000198821.10 CD247 1.319572e-10 2.820071e-07
## ENSG00000105374.10 NKG7 1.628288e-10 3.006036e-07
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(ss$timepoint)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
t0_vs_pod_lowCRP <- dge
write.table(dge,file="t0_vs_pod_lowCRP_rna.tsv",sep="\t",quote=FALSE)
t0_vs_pod_lowCRP_up <- rownames( subset(dge,padj<0.05 & log2FoldChange > 0 ) )
t0_vs_pod_lowCRP_dn <- rownames( subset(dge,padj<0.05 & log2FoldChange < 0 ) )
NAME = "t0_vs_pod_highCRP"
ss <- subset(samplesheet,CrpGroup==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 1489.80876 3.618686 0.1833149 19.74027
## ENSG00000170439.7 METTL7B 272.04841 5.136126 0.3178545 16.15873
## ENSG00000136160.17 EDNRB 63.34773 3.868843 0.2740075 14.11948
## ENSG00000121316.11 PLBD1 16960.02791 2.087370 0.1545156 13.50912
## ENSG00000132170.21 PPARG 131.38193 3.638899 0.2733249 13.31346
## ENSG00000135424.17 ITGA7 475.12859 2.765605 0.2191406 12.62023
## ENSG00000178342.4 KCNG2 75.80036 2.650524 0.2111098 12.55519
## ENSG00000156414.19 TDRD9 930.02624 2.436293 0.1965318 12.39643
## ENSG00000138061.12 CYP1B1 7208.87837 1.995175 0.1633779 12.21203
## ENSG00000112290.13 WASF1 153.31662 1.799185 0.1483442 12.12845
## pvalue padj
## ENSG00000108950.12 FAM20A 9.725497e-87 2.146806e-82
## ENSG00000170439.7 METTL7B 9.856739e-59 1.087888e-54
## ENSG00000136160.17 EDNRB 2.881075e-45 2.119895e-41
## ENSG00000121316.11 PLBD1 1.381559e-41 7.624134e-38
## ENSG00000132170.21 PPARG 1.933286e-40 8.535071e-37
## ENSG00000135424.17 ITGA7 1.633424e-36 6.009366e-33
## ENSG00000178342.4 KCNG2 3.722802e-36 1.173959e-32
## ENSG00000156414.19 TDRD9 2.732331e-35 7.539185e-32
## ENSG00000138061.12 CYP1B1 2.681226e-34 6.576154e-31
## ENSG00000112290.13 WASF1 7.464895e-34 1.647801e-30
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(ss$timepoint)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
t0_vs_pod_highCRP <- dge
write.table(dge,file="t0_vs_pod_highCRP_rna.tsv",sep="\t",quote=FALSE)
t0_vs_pod_highCRP_up <- rownames( subset(dge,padj<0.05 & log2FoldChange > 0 ) )
t0_vs_pod_highCRP_dn <- rownames( subset(dge,padj<0.05 & log2FoldChange < 0 ) )
DE4 and DE5 will be compared.
v1 <- list("low CRP up"=t0_vs_pod_lowCRP_up,
"low CRP dn"=t0_vs_pod_lowCRP_dn,
"high CRP up"=t0_vs_pod_highCRP_up,
"high CRP dn"=t0_vs_pod_highCRP_dn)
plot(euler(v1),quantities = TRUE, main="Gene overlap")
Set up mitch data.
#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")
# set up the gene table
gt <- as.data.frame(sapply(strsplit(rownames(xx)," "),"[[",2))
gt$id <- rownames(xx)
colnames(gt) <- c("gene name","id")
Now mitch analysis with DE4 and DE5
l <- list("low CRP"=t0_vs_pod_lowCRP, "high CRP"=t0_vs_pod_highCRP)
y <- mitch_import( l , DEtype="DESeq2", geneTable=gt)
## Note: Mean no. genes in input = 21933.5
## Note: no. genes in output = 21404
## Note: estimated proportion of input genes in output = 0.976
# prioritisation by statistical significance
res <- mitch_calc(y, genesets, priority="significance",resrows=50)
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(res$enrichment_result,20)
## set setSize pMANOVA
## 736 Neutrophil degranulation 456 1.263804e-111
## 536 Innate Immune System 963 6.130762e-91
## 519 Immune System 1885 2.765539e-52
## 631 Membrane Trafficking 555 5.938118e-44
## 1070 Signal Transduction 1869 1.004496e-35
## 1312 Vesicle-mediated transport 644 3.036553e-33
## 634 Metabolism 1758 1.903167e-30
## 649 Metabolism of proteins 1706 1.534282e-28
## 841 Post-translational protein modification 1183 2.919397e-28
## 496 Hemostasis 542 1.799656e-22
## 829 Platelet activation, signaling and aggregation 219 2.664013e-22
## 1132 Signaling by Receptor Tyrosine Kinases 408 7.112960e-22
## 1103 Signaling by Interleukins 385 1.982991e-21
## 643 Metabolism of lipids 619 2.020443e-20
## 283 Disease 1339 2.210546e-19
## 85 Asparagine N-linked glycosylation 269 4.801739e-18
## 1057 Scavenging of heme from plasma 70 2.156204e-17
## 1010 Response to elevated platelet cytosolic Ca2+ 108 8.500126e-17
## 1245 Toll-like Receptor Cascades 143 4.513541e-16
## 831 Platelet degranulation 104 4.561114e-16
## s.low.CRP s.high.CRP p.low.CRP p.high.CRP s.dist SD
## 736 0.49112778 0.5926683 9.831146e-73 2.069228e-105 0.7697156 0.07179997
## 536 0.31921271 0.3677640 1.696475e-63 7.543273e-84 0.4869775 0.03433092
## 519 0.15849504 0.2108636 4.255092e-30 4.620841e-52 0.2637881 0.03703018
## 631 0.19402464 0.3485897 5.330055e-15 5.804668e-45 0.3989490 0.10929397
## 1070 0.09033584 0.1757052 1.012719e-10 2.342824e-36 0.1975674 0.06036526
## 1312 0.20651231 0.2786978 3.639864e-19 1.279883e-33 0.3468715 0.05104287
## 634 0.10348522 0.1678910 5.845616e-13 1.286778e-31 0.1972222 0.04554178
## 649 0.09665358 0.1645692 3.215319e-11 1.162926e-29 0.1908532 0.04802360
## 841 0.09714527 0.1923267 1.835523e-08 7.003482e-29 0.2154687 0.06730346
## 496 0.21747030 0.2327536 4.532923e-18 1.781809e-20 0.3185398 0.01080696
## 829 0.25216106 0.3901526 1.251285e-10 2.279381e-23 0.4645474 0.09757478
## 1132 0.16146563 0.2840002 2.185823e-08 6.761141e-23 0.3266914 0.08664501
## 1103 0.21956189 0.2839877 1.374644e-13 1.029543e-21 0.3589658 0.04555593
## 643 0.11291144 0.2220172 1.620118e-06 3.826076e-21 0.2490796 0.07714945
## 283 0.09709103 0.1508045 2.520117e-09 1.989476e-20 0.1793563 0.03798117
## 85 0.18687003 0.3159482 1.316744e-07 4.393851e-19 0.3670745 0.09127204
## 1057 0.25037432 -0.2801283 2.915726e-04 5.049103e-05 0.3757115 0.37512200
## 1010 0.37973833 0.4634969 9.020070e-12 8.213220e-17 0.5991916 0.05922624
## 1245 0.28561077 0.4042238 3.681181e-09 6.785557e-17 0.4949449 0.08387210
## 831 0.38321867 0.4592344 1.421157e-11 5.601182e-16 0.5981244 0.05375122
## p.adjustMANOVA
## 736 1.722565e-108
## 536 4.178114e-88
## 519 1.256477e-49
## 631 2.023414e-41
## 1070 2.738255e-33
## 1312 6.898036e-31
## 634 3.705739e-28
## 649 2.614033e-26
## 841 4.421264e-26
## 496 2.452931e-20
## 829 3.300954e-20
## 1132 8.079138e-20
## 1103 2.079090e-19
## 643 1.967046e-18
## 283 2.008650e-17
## 85 4.090482e-16
## 1057 1.728768e-15
## 1010 6.436484e-15
## 1245 3.108399e-14
## 831 3.108399e-14
lo_up <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.low.CRP)<0.05 & res$enrichment_result$s.low.CRP>0),1]
lo_dn <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.low.CRP)<0.05 & res$enrichment_result$s.low.CRP<0),1]
hi_up <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.high.CRP)<0.05 & res$enrichment_result$s.high.CRP>0),1]
hi_dn <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.high.CRP)<0.05 & res$enrichment_result$s.high.CRP<0),1]
v1 <- list("low CRP up"=lo_up,
"low CRP dn"=lo_dn,
"high CRP up"=hi_up,
"high CRP dn"=hi_dn)
plot(euler(v1),quantities = TRUE, main="Gene set overlap")
unlink("t0_v_pod_lohi_mitch_sig.html")
capture.output(
mitch_report(res, "t0_v_pod_lohi_mitch_sig.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_sig.rds ".
##
##
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
##
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html
# prioritisation by statistical significance
res <- mitch_calc(y, genesets, priority="effect",resrows=50)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(res$enrichment_result,20)
## set setSize
## 513 IRAK4 deficiency (TLR2/4) 10
## 686 MyD88 deficiency (TLR2/4) 10
## 968 Regulation of TLR by endogenous ligand 11
## 1330 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism 12
## 1331 alpha-linolenic acid (ALA) metabolism 12
## 1278 Translocation of ZAP-70 to Immunological synapse 24
## 345 Erythrocytes take up carbon dioxide and release oxygen 11
## 765 O2/CO2 exchange in erythrocytes 11
## 505 Hyaluronan uptake and degradation 12
## 1306 Uptake and function of anthrax toxins 10
## 1342 p130Cas linkage to MAPK signaling for integrins 11
## 1323 WNT5A-dependent internalization of FZD2, FZD5 and ROR2 11
## 1304 Unwinding of DNA 12
## 434 GRB2:SOS provides linkage to MAPK signaling for Integrins 12
## 889 RHO GTPases Activate WASPs and WAVEs 35
## 902 RNA Polymerase I Promoter Opening 19
## 61 Advanced glycosylation endproduct receptor signaling 12
## 779 PD-1 signaling 28
## 925 ROS and RNS production in phagocytes 31
## 1105 Signaling by Leptin 10
## pMANOVA s.low.CRP s.high.CRP p.low.CRP p.high.CRP s.dist
## 513 8.775903e-07 0.8445452 0.8863981 3.735703e-06 1.204349e-06 1.2243194
## 686 8.775903e-07 0.8445452 0.8863981 3.735703e-06 1.204349e-06 1.2243194
## 968 1.120943e-05 0.7701542 0.7203843 9.697309e-06 3.506286e-05 1.0545573
## 1330 7.056715e-06 0.6555878 0.7808449 8.392555e-05 2.803937e-06 1.0195656
## 1331 7.056715e-06 0.6555878 0.7808449 8.392555e-05 2.803937e-06 1.0195656
## 1278 7.245946e-11 -0.6794473 -0.7589726 8.215895e-09 1.200844e-10 1.0186697
## 345 3.194632e-05 0.7410538 0.6765722 2.075703e-05 1.018797e-04 1.0034494
## 765 3.194632e-05 0.7410538 0.6765722 2.075703e-05 1.018797e-04 1.0034494
## 505 3.641898e-05 0.5909140 0.7322831 3.929058e-04 1.116940e-05 0.9409665
## 1306 1.565554e-04 0.5050388 0.7630364 5.680935e-03 2.928526e-05 0.9150348
## 1342 2.208711e-04 0.6487891 0.6348848 1.942043e-04 2.657830e-04 0.9077478
## 1323 1.020922e-04 0.3951887 0.7422776 2.323143e-02 2.011407e-05 0.8409222
## 1304 4.411617e-04 -0.5820634 -0.5953939 4.799668e-04 3.546911e-04 0.8326414
## 434 4.557212e-04 0.6265816 0.5329485 1.707055e-04 1.388701e-03 0.8225804
## 889 1.351546e-10 0.5016305 0.6445158 2.787695e-07 4.059385e-11 0.8167213
## 902 4.750649e-06 0.4994745 0.6425163 1.634559e-04 1.235941e-06 0.8138194
## 61 2.239946e-04 0.4324436 0.6832227 9.487021e-03 4.153842e-05 0.8085794
## 779 3.993373e-08 -0.5351162 -0.6018064 9.472041e-07 3.514198e-08 0.8053076
## 925 1.324318e-08 0.5765444 0.5445837 2.734382e-08 1.526342e-07 0.7930794
## 1105 2.940561e-03 0.5869870 0.5268860 1.306677e-03 3.910265e-03 0.7887729
## SD p.adjustMANOVA
## 513 0.029594438 8.306636e-06
## 686 0.029594438 8.306636e-06
## 968 0.035192627 7.275453e-05
## 1330 0.088570149 4.809151e-05
## 1331 0.088570149 4.809151e-05
## 1278 0.056232849 1.673936e-09
## 345 0.045595366 1.762868e-04
## 765 0.045595366 1.762868e-04
## 505 0.099963012 1.946630e-04
## 1306 0.182431831 7.160573e-04
## 1342 0.009831820 9.806101e-04
## 1323 0.245428911 4.956164e-04
## 1304 0.009426107 1.805716e-03
## 434 0.066208624 1.843169e-03
## 889 0.101035109 3.019930e-09
## 902 0.101145776 3.444221e-05
## 61 0.177327607 9.880410e-04
## 779 0.047157149 5.284434e-07
## 925 0.022599652 1.962005e-07
## 1105 0.042497798 9.088402e-03
unlink("t0_v_pod_lohi_mitch_eff.html")
capture.output(
mitch_report(res, "t0_v_pod_lohi_mitch_eff.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_eff.rds ".
##
##
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
##
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html
# prioritisation by discordance
res <- mitch_calc(y, genesets, priority="SD",resrows=50)
## Note: Prioritisation by SD after selecting sets with
## p.adjustMANOVA<=0.05.
head(res$enrichment_result,20)
## set setSize
## 122 CD22 mediated BCR regulation 58
## 1310 VLDLR internalisation and degradation 11
## 1057 Scavenging of heme from plasma 70
## 189 Classical antibody-mediated complement activation 68
## 217 Creation of C4 and C2 activators 70
## 847 Pre-NOTCH Processing in Golgi 18
## 1360 tRNA processing in the mitochondrion 32
## 1017 Role of LAT2/NTAL/LAB on calcium mobilization 73
## 755 Nucleobase biosynthesis 13
## 362 FCGR activation 75
## 533 Initial triggering of complement 77
## 871 Purine ribonucleoside monophosphate biosynthesis 10
## 691 N-Glycan antennae elongation 13
## 637 Metabolism of amine-derived hormones 10
## 108 Binding and Uptake of Ligands by Scavenger Receptors 91
## 806 Pausing and recovery of Tat-mediated HIV elongation 30
## 1218 Tat-mediated HIV elongation arrest and recovery 30
## 186 Class C/3 (Metabotropic glutamate/pheromone receptors) 11
## 971 Regulation of TP53 Activity through Acetylation 29
## 332 ERKs are inactivated 13
## pMANOVA s.low.CRP s.high.CRP p.low.CRP p.high.CRP s.dist
## 122 1.546499e-15 0.19671294 -0.34782419 9.559860e-03 4.588960e-06 0.3995969
## 1310 1.280994e-04 0.09540504 0.63485932 5.837679e-01 2.659345e-04 0.6419879
## 1057 2.156204e-17 0.25037432 -0.28012830 2.915726e-04 5.049103e-05 0.3757115
## 189 7.313925e-15 0.16106580 -0.32812259 2.162847e-02 2.868729e-06 0.3655224
## 217 4.098656e-15 0.17406688 -0.31430580 1.178960e-02 5.413165e-06 0.3592874
## 847 2.798792e-05 0.04791816 0.51934807 7.248705e-01 1.360539e-04 0.5215540
## 1360 1.762783e-07 -0.40487261 0.06653565 7.356381e-05 5.147939e-01 0.4103033
## 1017 4.307860e-14 0.22984813 -0.23564585 6.840450e-04 4.984687e-04 0.3291795
## 755 1.934385e-03 0.04726287 -0.41566367 7.679557e-01 9.456809e-03 0.4183420
## 362 3.316828e-14 0.14305593 -0.31083751 3.218818e-02 3.235201e-06 0.3421768
## 533 3.368771e-14 0.15802967 -0.29232441 1.650559e-02 9.177028e-06 0.3323055
## 871 1.670738e-02 0.11325605 -0.33231747 5.351586e-01 6.880217e-02 0.3510867
## 691 4.127884e-03 -0.37812811 0.06089908 1.824009e-02 7.038136e-01 0.3830007
## 637 1.802667e-02 0.04661120 -0.37761989 7.985488e-01 3.865802e-02 0.3804857
## 108 9.227454e-15 0.27979673 -0.14230854 3.949152e-06 1.895915e-02 0.3139075
## 806 2.004984e-05 -0.11793456 0.30277596 2.635703e-01 4.098493e-03 0.3249336
## 1218 2.004984e-05 -0.11793456 0.30277596 2.635703e-01 4.098493e-03 0.3249336
## 186 1.195640e-02 0.01671320 -0.39788291 9.235367e-01 2.230803e-02 0.3982338
## 971 8.634356e-06 -0.39999516 0.01328816 1.924198e-04 9.014320e-01 0.4002158
## 332 1.092471e-03 0.12135585 0.53101053 4.486918e-01 9.151715e-04 0.5447012
## SD p.adjustMANOVA
## 122 0.3850459 9.581264e-14
## 1310 0.3814518 5.999981e-04
## 1057 0.3751220 1.728768e-15
## 189 0.3459084 3.560314e-13
## 217 0.3453316 2.428899e-13
## 847 0.3333513 1.609601e-04
## 1360 0.3333360 2.053567e-06
## 1017 0.3291539 1.779277e-12
## 755 0.3273385 6.446375e-03
## 362 0.3209511 1.434886e-12
## 533 0.3184484 1.434886e-12
## 871 0.3150681 3.808053e-02
## 691 0.3104391 1.223110e-02
## 637 0.2999767 4.008214e-02
## 108 0.2984735 4.336903e-13
## 806 0.2974873 1.193359e-04
## 1218 0.2974873 1.193359e-04
## 186 0.2931637 2.936320e-02
## 971 0.2922354 5.768935e-05
## 332 0.2896696 3.939254e-03
unlink("t0_v_pod_lohi_mitch_sd.html")
capture.output(
mitch_report(res, "t0_v_pod_lohi_mitch_sd.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_sd.rds ".
##
##
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
##
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html
Not correcting for blood compostion changes.
# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
## set setSize pANOVA
## 737 Neutrophil degranulation 457 1.319240e-103
## 537 Innate Immune System 967 5.848762e-84
## 520 Immune System 1893 3.240735e-46
## 632 Membrane Trafficking 559 5.327421e-36
## 1314 Vesicle-mediated transport 650 1.642242e-31
## 1071 Signal Transduction 1894 7.041601e-28
## 497 Hemostasis 549 9.700746e-23
## 635 Metabolism 1772 1.645387e-22
## 830 Platelet activation, signaling and aggregation 221 1.353176e-20
## 1133 Signaling by Receptor Tyrosine Kinases 414 1.678123e-20
## 1104 Signaling by Interleukins 386 1.710343e-19
## 650 Metabolism of proteins 1718 2.214499e-19
## 842 Post-translational protein modification 1193 6.814624e-19
## 1011 Response to elevated platelet cytosolic Ca2+ 110 1.253521e-16
## 832 Platelet degranulation 106 5.814015e-16
## 1247 Toll-like Receptor Cascades 143 2.293691e-15
## 284 Disease 1355 2.716874e-15
## 86 Asparagine N-linked glycosylation 269 5.316672e-15
## 644 Metabolism of lipids 626 6.696163e-15
## 1290 Transport of small molecules 561 1.571805e-14
## s.dist p.adjustANOVA
## 737 0.5868303 1.802082e-100
## 537 0.3671208 3.994704e-81
## 520 0.1977009 1.475615e-43
## 632 0.3093383 1.819314e-33
## 1314 0.2680762 4.486606e-29
## 1071 0.1517126 1.603138e-25
## 497 0.2446828 1.893031e-20
## 635 0.1395073 2.809498e-20
## 830 0.3628343 2.053820e-18
## 1133 0.2656301 2.292316e-18
## 1104 0.2675063 2.123935e-17
## 650 0.1305055 2.520838e-17
## 842 0.1524833 7.160598e-17
## 1011 0.4565015 1.223079e-14
## 832 0.4546295 5.294630e-14
## 1247 0.3835923 1.958239e-13
## 284 0.1278957 2.183088e-13
## 86 0.2767766 4.034764e-13
## 644 0.1822696 4.814189e-13
## 1290 0.1895711 1.073543e-12
unlink("t0_v_pod_mitch.html")
capture.output(
mitch_report(res, "t0_v_pod_mitch.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
## set
## 735 Neutrophil degranulation
## 1355 rRNA processing in the nucleus and cytosol
## 1353 rRNA processing
## 625 Major pathway of rRNA processing in the nucleolus and cytosol
## 535 Innate Immune System
## 1268 Translation
## 635 Metabolism of RNA
## 392 Formation of a pool of free 40S subunits
## 350 Eukaryotic Translation Elongation
## 807 Peptide chain elongation
## 1060 Selenocysteine synthesis
## 588 L13a-mediated translational silencing of Ceruloplasmin expression
## 434 GTP hydrolysis and joining of the 60S ribosomal subunit
## 352 Eukaryotic Translation Termination
## 1315 Viral mRNA Translation
## 142 Cap-dependent Translation Initiation
## 351 Eukaryotic Translation Initiation
## 743 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1007 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 527 Influenza Viral RNA Transcription and Replication
## setSize pANOVA s.dist p.adjustANOVA
## 735 457 4.743920e-61 0.4485963 6.465964e-58
## 1355 190 7.192148e-52 -0.6358614 4.901449e-49
## 1353 217 2.080594e-51 -0.5926305 9.452831e-49
## 625 180 6.055674e-50 -0.6405213 2.063471e-47
## 535 966 3.101139e-44 0.2645613 8.453705e-42
## 1268 295 9.421413e-40 -0.4457170 2.140231e-37
## 635 685 1.652325e-39 -0.2942649 3.217313e-37
## 392 100 4.047027e-38 -0.7455901 6.895122e-36
## 350 93 1.440793e-37 -0.7671592 2.182002e-35
## 807 88 5.055188e-36 -0.7714444 6.890221e-34
## 1060 92 1.357431e-35 -0.7498431 1.681981e-33
## 588 110 7.550069e-35 -0.6784815 8.575620e-33
## 434 111 1.109822e-34 -0.6737332 1.163606e-32
## 352 92 3.299311e-34 -0.7344070 3.212114e-32
## 1315 88 6.764758e-34 -0.7472455 6.146911e-32
## 142 118 2.647147e-33 -0.6398039 2.122389e-31
## 351 118 2.647147e-33 -0.6398039 2.122389e-31
## 743 94 1.583940e-32 -0.7076135 1.199395e-30
## 1007 100 3.169485e-31 -0.6715684 2.273688e-29
## 527 135 5.440710e-31 -0.5761699 3.707844e-29
unlink("pod_crp_mitch.html")
capture.output(
mitch_report(res, "pod_crp_mitch.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
## set
## 353 Eukaryotic Translation Elongation
## 395 Formation of a pool of free 40S subunits
## 810 Peptide chain elongation
## 591 L13a-mediated translational silencing of Ceruloplasmin expression
## 1318 Viral mRNA Translation
## 355 Eukaryotic Translation Termination
## 636 Metabolism
## 1063 Selenocysteine synthesis
## 437 GTP hydrolysis and joining of the 60S ribosomal subunit
## 145 Cap-dependent Translation Initiation
## 354 Eukaryotic Translation Initiation
## 738 Neutrophil degranulation
## 746 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1010 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 1043 SRP-dependent cotranslational protein targeting to membrane
## 285 Disease
## 745 Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)
## 747 Nonsense-Mediated Decay (NMD)
## 983 Regulation of expression of SLITs and ROBOs
## 1062 Selenoamino acid metabolism
## setSize pANOVA s.dist p.adjustANOVA
## 353 93 7.235821e-42 -0.8116867 9.891367e-39
## 395 100 1.055679e-40 -0.7714938 7.215568e-38
## 810 88 6.985114e-40 -0.8135221 3.182884e-37
## 591 110 1.096103e-37 -0.7068229 3.635947e-35
## 1318 88 1.329900e-37 -0.7889383 3.635947e-35
## 355 92 5.983625e-37 -0.7646378 1.282281e-34
## 636 1771 6.566177e-37 -0.1812196 1.282281e-34
## 1063 92 2.245441e-36 -0.7584028 3.579908e-34
## 437 111 2.356925e-36 -0.6905391 3.579908e-34
## 145 118 9.577584e-36 -0.6639584 1.190233e-33
## 354 118 9.577584e-36 -0.6639584 1.190233e-33
## 738 458 2.062828e-34 -0.3329542 2.349905e-32
## 746 94 3.773216e-34 -0.7259279 3.967682e-32
## 1010 100 6.511606e-31 -0.6680040 6.358118e-29
## 1043 111 7.472133e-30 -0.6226349 6.809604e-28
## 285 1354 4.559478e-29 -0.1809621 3.895504e-27
## 745 114 1.578986e-28 -0.5998573 1.199152e-26
## 747 114 1.578986e-28 -0.5998573 1.199152e-26
## 983 162 5.020894e-28 -0.4990337 3.612401e-26
## 1062 114 6.775910e-27 -0.5814016 4.631335e-25
unlink("t0_crp_mitch.html")
capture.output(
mitch_report(res, "t0_crp_mitch.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## prioritise by effect size
# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
## set setSize
## 514 IRAK4 deficiency (TLR2/4) 10
## 687 MyD88 deficiency (TLR2/4) 10
## 1333 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism 12
## 1334 alpha-linolenic acid (ALA) metabolism 12
## 969 Regulation of TLR by endogenous ligand 11
## 1280 Translocation of ZAP-70 to Immunological synapse 24
## 506 Hyaluronan uptake and degradation 12
## 1308 Uptake and function of anthrax toxins 10
## 346 Erythrocytes take up carbon dioxide and release oxygen 11
## 766 O2/CO2 exchange in erythrocytes 11
## 1326 WNT5A-dependent internalization of FZD2, FZD5 and ROR2 11
## 1345 p130Cas linkage to MAPK signaling for integrins 11
## 61 Advanced glycosylation endproduct receptor signaling 12
## 1306 Unwinding of DNA 12
## 903 RNA Polymerase I Promoter Opening 19
## 890 RHO GTPases Activate WASPs and WAVEs 35
## 926 ROS and RNS production in phagocytes 31
## 737 Neutrophil degranulation 457
## 435 GRB2:SOS provides linkage to MAPK signaling for Integrins 12
## 780 PD-1 signaling 28
## pANOVA s.dist p.adjustANOVA
## 514 8.367911e-07 0.8994748 1.048676e-05
## 687 8.367911e-07 0.8994748 1.048676e-05
## 1333 2.461377e-06 0.7852782 2.626751e-05
## 1334 2.461377e-06 0.7852782 2.626751e-05
## 969 1.930406e-05 0.7438719 1.448865e-04
## 1280 2.809953e-10 -0.7436252 8.344338e-09
## 506 8.850181e-06 0.7406638 7.239130e-05
## 1308 1.141313e-04 0.7045257 6.523155e-04
## 346 9.462990e-05 0.6796841 5.533223e-04
## 766 9.462990e-05 0.6796841 5.533223e-04
## 1326 1.165844e-04 0.6708490 6.580757e-04
## 1345 1.324011e-04 0.6654100 7.219060e-04
## 61 9.478582e-05 0.6506965 5.533223e-04
## 1306 1.323109e-04 -0.6371242 7.219060e-04
## 903 3.287422e-06 0.6163218 3.277824e-05
## 890 4.567968e-10 0.6085563 1.327626e-08
## 926 8.233487e-09 0.5978890 1.757335e-07
## 737 1.319240e-103 0.5868303 1.802082e-100
## 435 4.971813e-04 0.5804902 2.286699e-03
## 780 1.109028e-07 -0.5793553 1.870286e-06
unlink("t0_v_pod_mitche.html")
capture.output(
mitch_report(res, "t0_v_pod_mitche.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
## set
## 512 IRAK4 deficiency (TLR2/4)
## 685 MyD88 deficiency (TLR2/4)
## 807 Peptide chain elongation
## 350 Eukaryotic Translation Elongation
## 1060 Selenocysteine synthesis
## 1315 Viral mRNA Translation
## 392 Formation of a pool of free 40S subunits
## 352 Eukaryotic Translation Termination
## 1330 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1331 alpha-linolenic acid (ALA) metabolism
## 743 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1278 Translocation of ZAP-70 to Immunological synapse
## 588 L13a-mediated translational silencing of Ceruloplasmin expression
## 434 GTP hydrolysis and joining of the 60S ribosomal subunit
## 1007 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 398 Formation of the ternary complex, and subsequently, the 43S complex
## 1323 WNT5A-dependent internalization of FZD2, FZD5 and ROR2
## 967 Regulation of TLR by endogenous ligand
## 625 Major pathway of rRNA processing in the nucleolus and cytosol
## 142 Cap-dependent Translation Initiation
## setSize pANOVA s.dist p.adjustANOVA
## 512 10 2.191838e-06 0.8644929 3.111953e-05
## 685 10 2.191838e-06 0.8644929 3.111953e-05
## 807 88 5.055188e-36 -0.7714444 6.890221e-34
## 350 93 1.440793e-37 -0.7671592 2.182002e-35
## 1060 92 1.357431e-35 -0.7498431 1.681981e-33
## 1315 88 6.764758e-34 -0.7472455 6.146911e-32
## 392 100 4.047027e-38 -0.7455901 6.895122e-36
## 352 92 3.299311e-34 -0.7344070 3.212114e-32
## 1330 12 1.516159e-05 0.7211404 1.796977e-04
## 1331 12 1.516159e-05 0.7211404 1.796977e-04
## 743 94 1.583940e-32 -0.7076135 1.199395e-30
## 1278 24 1.982172e-09 -0.7071589 5.097548e-08
## 588 110 7.550069e-35 -0.6784815 8.575620e-33
## 434 111 1.109822e-34 -0.6737332 1.163606e-32
## 1007 100 3.169485e-31 -0.6715684 2.273688e-29
## 398 51 1.916444e-16 -0.6654328 8.426172e-15
## 1323 11 1.548886e-04 0.6586494 1.336159e-03
## 967 11 2.337332e-04 0.6406105 1.873990e-03
## 625 180 6.055674e-50 -0.6405213 2.063471e-47
## 142 118 2.647147e-33 -0.6398039 2.122389e-31
unlink("pod_crp_mitche.html")
capture.output(
mitch_report(res, "pod_crp_mitche.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
## set
## 810 Peptide chain elongation
## 353 Eukaryotic Translation Elongation
## 1318 Viral mRNA Translation
## 395 Formation of a pool of free 40S subunits
## 355 Eukaryotic Translation Termination
## 1063 Selenocysteine synthesis
## 746 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 591 L13a-mediated translational silencing of Ceruloplasmin expression
## 437 GTP hydrolysis and joining of the 60S ribosomal subunit
## 1313 VLDLR internalisation and degradation
## 1010 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 145 Cap-dependent Translation Initiation
## 354 Eukaryotic Translation Initiation
## 401 Formation of the ternary complex, and subsequently, the 43S complex
## 585 Josephin domain DUBs
## 1043 SRP-dependent cotranslational protein targeting to membrane
## 387 Formation of ATP by chemiosmotic coupling
## 1328 WNT5A-dependent internalization of FZD4
## 745 Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)
## 747 Nonsense-Mediated Decay (NMD)
## setSize pANOVA s.dist p.adjustANOVA
## 810 88 6.985114e-40 -0.8135221 3.182884e-37
## 353 93 7.235821e-42 -0.8116867 9.891367e-39
## 1318 88 1.329900e-37 -0.7889383 3.635947e-35
## 395 100 1.055679e-40 -0.7714938 7.215568e-38
## 355 92 5.983625e-37 -0.7646378 1.282281e-34
## 1063 92 2.245441e-36 -0.7584028 3.579908e-34
## 746 94 3.773216e-34 -0.7259279 3.967682e-32
## 591 110 1.096103e-37 -0.7068229 3.635947e-35
## 437 111 2.356925e-36 -0.6905391 3.579908e-34
## 1313 11 8.083211e-05 -0.6862890 8.743742e-04
## 1010 100 6.511606e-31 -0.6680040 6.358118e-29
## 145 118 9.577584e-36 -0.6639584 1.190233e-33
## 354 118 9.577584e-36 -0.6639584 1.190233e-33
## 401 51 3.041275e-15 -0.6381277 1.014006e-13
## 585 10 5.527420e-04 -0.6306684 4.244934e-03
## 1043 111 7.472133e-30 -0.6226349 6.809604e-28
## 387 18 7.323691e-06 -0.6103471 1.220913e-04
## 1328 13 1.508432e-04 -0.6069482 1.472876e-03
## 745 114 1.578986e-28 -0.5998573 1.199152e-26
## 747 114 1.578986e-28 -0.5998573 1.199152e-26
unlink("t0_crp_mitche.html")
capture.output(
mitch_report(res, "t0_crp_mitche.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# 3D analysys significance
z <- list("t0_v_pod"=t0_v_pod, "pod_crp"=pod_crp, "t0_crp"=t0_crp)
zz <- mitch_import(z, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(zz, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
## set
## 735 Neutrophil degranulation
## 535 Innate Immune System
## 1267 Translation
## 392 Formation of a pool of free 40S subunits
## 588 L13a-mediated translational silencing of Ceruloplasmin expression
## 350 Eukaryotic Translation Elongation
## 434 GTP hydrolysis and joining of the 60S ribosomal subunit
## 142 Cap-dependent Translation Initiation
## 351 Eukaryotic Translation Initiation
## 807 Peptide chain elongation
## 1352 rRNA processing
## 1354 rRNA processing in the nucleus and cytosol
## 1314 Viral mRNA Translation
## 352 Eukaryotic Translation Termination
## 1060 Selenocysteine synthesis
## 625 Major pathway of rRNA processing in the nucleolus and cytosol
## 743 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1007 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 635 Metabolism of RNA
## 1040 SRP-dependent cotranslational protein targeting to membrane
## setSize pMANOVA s.t0_v_pod s.pod_crp s.t0_crp p.t0_v_pod
## 735 457 4.706799e-120 0.58657151 0.4537116 -0.3280584 1.730727e-103
## 535 965 1.590800e-93 0.36728929 0.2699262 -0.1920147 8.532771e-84
## 1267 295 5.553482e-85 -0.05584812 -0.4411146 -0.3437255 9.896795e-02
## 392 100 9.217889e-83 -0.20789625 -0.7431061 -0.7691941 3.271811e-04
## 588 110 6.806914e-80 -0.14314838 -0.6756506 -0.7044555 9.492889e-03
## 350 93 8.413439e-79 -0.27289442 -0.7649026 -0.8094742 5.386406e-06
## 434 111 4.937354e-78 -0.14719204 -0.6708793 -0.6881875 7.382262e-03
## 142 118 3.480058e-76 -0.13160443 -0.6367120 -0.6614609 1.353731e-02
## 351 118 3.480058e-76 -0.13160443 -0.6367120 -0.6614609 1.353731e-02
## 807 88 9.119092e-75 -0.28134726 -0.7692423 -0.8113112 5.048100e-06
## 1352 217 8.405394e-73 -0.28892642 -0.5892016 -0.3942535 2.159088e-13
## 1354 190 2.793885e-71 -0.30718205 -0.6328397 -0.3926592 2.765619e-13
## 1314 88 9.813154e-71 -0.26210705 -0.7449685 -0.7866499 2.131873e-05
## 352 92 4.201006e-70 -0.26476727 -0.7319569 -0.7623451 1.134012e-05
## 1060 92 9.105403e-70 -0.29852322 -0.7475191 -0.7560906 7.425644e-07
## 625 180 2.038796e-68 -0.30640536 -0.6374809 -0.3909947 1.303462e-12
## 743 94 3.193283e-66 -0.24433952 -0.7051462 -0.7235822 4.227299e-05
## 1007 100 2.150994e-62 -0.22168319 -0.6687223 -0.6656059 1.275668e-04
## 635 685 1.758396e-61 -0.08857003 -0.2889896 -0.1753195 7.791790e-05
## 1040 111 8.284156e-60 -0.13387379 -0.5828477 -0.6201011 1.482425e-02
## p.pod_crp p.t0_crp s.dist SD p.adjustMANOVA
## 735 2.089278e-62 2.293735e-33 0.8108900 0.4941936 6.410660e-117
## 535 6.441357e-46 5.142292e-24 0.4946021 0.2988003 1.083335e-90
## 1267 5.790719e-39 2.801611e-24 0.5620039 0.2003282 2.521281e-82
## 392 7.088903e-38 1.808414e-40 1.0895352 0.3168032 3.138691e-80
## 588 1.430899e-34 1.918844e-37 0.9865357 0.3160839 1.854203e-77
## 350 2.345639e-37 1.199836e-41 1.1466455 0.2977629 1.909851e-76
## 434 2.117018e-34 4.079941e-36 0.9722894 0.3074692 9.606681e-76
## 142 5.363232e-33 1.738052e-35 0.9274980 0.2990245 5.266487e-74
## 351 5.363232e-33 1.738052e-35 0.9274980 0.2990245 5.266487e-74
## 807 7.948652e-36 1.127988e-39 1.1528729 0.2945825 1.242020e-72
## 1352 7.881898e-51 1.254523e-23 0.7655539 0.1523503 1.040741e-70
## 1354 2.169626e-51 9.466998e-21 0.8056228 0.1688421 3.171060e-69
## 1314 1.064541e-33 2.152672e-37 1.1146731 0.2915584 1.028117e-68
## 352 5.446050e-34 9.761602e-37 1.0895103 0.2789185 4.086979e-68
## 1060 2.205517e-35 3.664096e-36 1.1043432 0.2617374 8.267706e-68
## 625 1.748041e-49 1.356738e-19 0.8081726 0.1720087 1.735525e-66
## 743 2.603588e-32 6.137300e-34 1.0394730 0.2715254 2.558383e-64
## 1007 5.652620e-31 1.060551e-30 0.9692081 0.2572033 1.627585e-60
## 635 3.785681e-38 5.101082e-15 0.3494232 0.1005107 1.260492e-59
## 1040 2.362030e-26 1.274256e-29 0.8614865 0.2706112 5.641510e-58
unlink("3d_mitch.html")
capture.output(
mitch_report(res, "3d_mitch.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# 3D analysys effect size
res <- mitch_calc(zz, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
## set
## 512 IRAK4 deficiency (TLR2/4)
## 685 MyD88 deficiency (TLR2/4)
## 807 Peptide chain elongation
## 350 Eukaryotic Translation Elongation
## 1314 Viral mRNA Translation
## 1060 Selenocysteine synthesis
## 392 Formation of a pool of free 40S subunits
## 352 Eukaryotic Translation Termination
## 1329 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1330 alpha-linolenic acid (ALA) metabolism
## 1322 WNT5A-dependent internalization of FZD2, FZD5 and ROR2
## 504 Hyaluronan uptake and degradation
## 743 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1277 Translocation of ZAP-70 to Immunological synapse
## 1305 Uptake and function of anthrax toxins
## 967 Regulation of TLR by endogenous ligand
## 1309 VLDLR internalisation and degradation
## 588 L13a-mediated translational silencing of Ceruloplasmin expression
## 434 GTP hydrolysis and joining of the 60S ribosomal subunit
## 1007 Response of EIF2AK4 (GCN2) to amino acid deficiency
## setSize pMANOVA s.t0_v_pod s.pod_crp s.t0_crp p.t0_v_pod
## 512 10 7.496826e-06 0.8994993 0.8681297 -0.05981002 8.362207e-07
## 685 10 7.496826e-06 0.8994993 0.8681297 -0.05981002 8.362207e-07
## 807 88 9.119092e-75 -0.2813473 -0.7692423 -0.81131123 5.048100e-06
## 350 93 8.413439e-79 -0.2728944 -0.7649026 -0.80947416 5.386406e-06
## 1314 88 9.813154e-71 -0.2621071 -0.7449685 -0.78664990 2.131873e-05
## 1060 92 9.105403e-70 -0.2985232 -0.7475191 -0.75609061 7.425644e-07
## 392 100 9.217889e-83 -0.2078962 -0.7431061 -0.76919412 3.271811e-04
## 352 92 4.201006e-70 -0.2647673 -0.7319569 -0.76234512 1.134012e-05
## 1329 12 3.031252e-05 0.7850157 0.7256774 -0.13326158 2.480595e-06
## 1330 12 3.031252e-05 0.7850157 0.7256774 -0.13326158 2.480595e-06
## 1322 11 2.038886e-05 0.6698260 0.6642957 -0.51599949 1.194226e-04
## 504 10 1.402368e-04 0.7290627 0.5743765 -0.47099340 6.529266e-05
## 743 94 3.193283e-66 -0.2443395 -0.7051462 -0.72358217 4.227299e-05
## 1277 24 1.274233e-09 -0.7433199 -0.7045621 -0.08165161 2.857833e-10
## 1305 10 3.139948e-04 0.7035422 0.6166487 -0.40281690 1.166786e-04
## 967 11 2.542348e-04 0.7434466 0.6445144 -0.20197388 1.951771e-05
## 1309 11 9.652355e-06 0.4681329 0.5392096 -0.68189901 7.175299e-03
## 588 110 6.806914e-80 -0.1431484 -0.6756506 -0.70445549 9.492889e-03
## 434 111 4.937354e-78 -0.1471920 -0.6708793 -0.68818748 7.382262e-03
## 1007 100 2.150994e-62 -0.2216832 -0.6687223 -0.66560594 1.275668e-04
## p.pod_crp p.t0_crp s.dist SD p.adjustMANOVA
## 512 1.986341e-06 7.432915e-01 1.2515292 0.5450276 4.326558e-05
## 685 1.986341e-06 7.432915e-01 1.2515292 0.5450276 4.326558e-05
## 807 7.948652e-36 1.127988e-39 1.1528729 0.2945825 1.242020e-72
## 350 2.345639e-37 1.199836e-41 1.1466455 0.2977629 1.909851e-76
## 1314 1.064541e-33 2.152672e-37 1.1146731 0.2915584 1.028117e-68
## 1060 2.205517e-35 3.664096e-36 1.1043432 0.2617374 8.267706e-68
## 392 7.088903e-38 1.808414e-40 1.0895352 0.3168032 3.138691e-80
## 352 5.446050e-34 9.761602e-37 1.0895103 0.2789185 4.086979e-68
## 1329 1.339490e-05 4.241140e-01 1.0773189 0.5138953 1.413892e-04
## 1330 1.339490e-05 4.241140e-01 1.0773189 0.5138953 1.413892e-04
## 1322 1.358892e-04 3.041029e-03 1.0752726 0.6830458 1.002514e-04
## 504 1.658407e-03 9.903414e-03 1.0408053 0.6527967 5.472852e-04
## 743 2.603588e-32 6.137300e-34 1.0394730 0.2715254 2.558383e-64
## 1277 2.270074e-09 4.886791e-01 1.0274236 0.3713320 1.606950e-08
## 1305 7.328448e-04 2.739804e-02 1.0185719 0.6152088 1.099385e-03
## 967 2.140099e-04 2.460881e-01 1.0044427 0.5196393 9.075512e-04
## 1309 1.955607e-03 8.977778e-05 0.9873610 0.6854113 5.387913e-05
## 588 1.430899e-34 1.918844e-37 0.9865357 0.3160839 1.854203e-77
## 434 2.117018e-34 4.079941e-36 0.9722894 0.3074692 9.606681e-76
## 1007 5.652620e-31 1.060551e-30 0.9692081 0.2572033 1.627585e-60
unlink("3d_mitche.html")
capture.output(
mitch_report(res, "3d_mitche.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] pkgload_1.2.0 GGally_2.1.1
## [3] beeswarm_0.3.1 gtools_3.8.2
## [5] echarts4r_0.4.0 topconfects_1.6.0
## [7] limma_3.46.0 eulerr_6.1.0
## [9] mitch_1.2.2 MASS_7.3-53.1
## [11] fgsea_1.16.0 gplots_3.1.1
## [13] DESeq2_1.30.0 SummarizedExperiment_1.20.0
## [15] Biobase_2.50.0 MatrixGenerics_1.2.0
## [17] matrixStats_0.58.0 GenomicRanges_1.42.0
## [19] GenomeInfoDb_1.26.0 IRanges_2.24.0
## [21] S4Vectors_0.28.0 BiocGenerics_0.36.0
## [23] reshape2_1.4.4 forcats_0.5.1
## [25] stringr_1.4.0 dplyr_1.0.5
## [27] purrr_0.3.4 readr_1.4.0
## [29] tidyr_1.1.3 tibble_3.1.0
## [31] ggplot2_3.3.3 tidyverse_1.3.0
## [33] zoo_1.8-9
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-0 ellipsis_0.3.1 rprojroot_2.0.2
## [4] XVector_0.30.0 fs_1.5.0 rstudioapi_0.13
## [7] farver_2.1.0 bit64_4.0.5 AnnotationDbi_1.52.0
## [10] fansi_0.4.2 lubridate_1.7.10 xml2_1.3.2
## [13] splines_4.0.3 cachem_1.0.4 geneplotter_1.68.0
## [16] knitr_1.31 polyclip_1.10-0 jsonlite_1.7.2
## [19] broom_0.7.5 annotate_1.68.0 dbplyr_2.1.0
## [22] shiny_1.6.0 compiler_4.0.3 httr_1.4.2
## [25] backports_1.2.1 assertthat_0.2.1 Matrix_1.3-2
## [28] fastmap_1.1.0 cli_2.3.1 later_1.1.0.1
## [31] htmltools_0.5.1.1 tools_4.0.3 gtable_0.3.0
## [34] glue_1.4.2 GenomeInfoDbData_1.2.4 fastmatch_1.1-0
## [37] Rcpp_1.0.6 cellranger_1.1.0 jquerylib_0.1.3
## [40] vctrs_0.3.6 polylabelr_0.2.0 xfun_0.22
## [43] ps_1.6.0 testthat_3.0.2 rvest_1.0.0
## [46] mime_0.10 lifecycle_1.0.0 XML_3.99-0.6
## [49] zlibbioc_1.36.0 scales_1.1.1 promises_1.2.0.1
## [52] hms_1.0.0 RColorBrewer_1.1-2 yaml_2.2.1
## [55] memoise_2.0.0 gridExtra_2.3 sass_0.3.1
## [58] reshape_0.8.8 stringi_1.5.3 RSQLite_2.2.4
## [61] highr_0.8 genefilter_1.72.0 desc_1.3.0
## [64] caTools_1.18.1 BiocParallel_1.24.1 rlang_0.4.10
## [67] pkgconfig_2.0.3 bitops_1.0-6 evaluate_0.14
## [70] lattice_0.20-41 labeling_0.4.2 htmlwidgets_1.5.3
## [73] bit_4.0.4 tidyselect_1.1.0 plyr_1.8.6
## [76] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
## [79] DelayedArray_0.16.0 DBI_1.1.1 pillar_1.5.1
## [82] haven_2.3.1 withr_2.4.1 survival_3.2-10
## [85] RCurl_1.98-1.3 modelr_0.1.8 crayon_1.4.1
## [88] KernSmooth_2.23-18 utf8_1.2.1 rmarkdown_2.7
## [91] locfit_1.5-9.4 grid_4.0.3 readxl_1.3.1
## [94] data.table_1.14.0 blob_1.2.1 reprex_1.0.0
## [97] digest_0.6.27 xtable_1.8-4 httpuv_1.5.5
## [100] munsell_0.5.0 bslib_0.2.4
save.image("rnaseq_analysis_full.Rdata")