suppressPackageStartupMessages({
library("zoo")
library("tidyverse")
library("reshape2")
library("DESeq2")
library("gplots")
library("fgsea")
library("MASS")
library("mitch")
library("eulerr")
library("limma")
library("topconfects")
library("gplots")
})
tmp <- read.table("3col.tsv",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)
Here we’re manipulating the sample sheet. Making sure that the samplesheet matches the names of the data sets and vice versa. Ensuring that each patient has a baseline and postop sample.
samplesheet<-read.csv("samplesheet.tsv",sep="\t",stringsAsFactors=FALSE)
mysamples <- colnames(xx)
baseline <- mysamples[grep("T0",mysamples)]
postop <- mysamples[grep("POD",mysamples)]
baseline <- sapply(strsplit(baseline,"-"),"[[",1)
postop <- sapply(strsplit(postop,"-"),"[[",1)
mysamples <- intersect(baseline,postop)
samplesheet2 <- samplesheet
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
samplesheet3 <- samplesheet2
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
rownames(samplesheet3) <- paste(samplesheet2$RG_number,"POD",sep="-")
samplesheet2$timepoint <- 0
samplesheet3$timepoint <- 1
samplesheet <- rbind(samplesheet2,samplesheet3)
# filter the data matrix
xx <- xx[,which(colnames(xx) %in% rownames(samplesheet))]
samplesheet <- samplesheet[which(rownames(samplesheet) %in% colnames(xx)),]
samplesheet <- samplesheet[order(rownames(samplesheet)),]
Here the idea is to use the concept of Monaco et al, 2019 to deconvolute blood RNA-seq signature.
DOI:https://doi.org/10.1016/j.celrep.2019.01.041
xn <- xx
gt <- as.data.frame(sapply(strsplit(rownames(xn)," "),"[[",2) )
rownames(gt) <- rownames(xx)
colnames(gt) = "genesymbol"
gt$geneID <- rownames(xx)
blood <- read.table("https://raw.githubusercontent.com/giannimonaco/ABIS/master/data/sigmatrixRNAseq.txt")
blood2 <- merge(gt,blood,by.x="genesymbol",by.y=0)
blood2 <- blood2[which(!duplicated(blood2$genesymbol)),]
rownames(blood2) <- blood2$geneID
blood2 <- blood2[,c(3:ncol(blood2))]
genes <- intersect(rownames(xx), rownames(blood2))
dec <- apply(xx[genes, , drop=F], 2, function(x) coef(rlm( as.matrix(blood2[genes,]), x, maxit =100 ))) *100
## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps
## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps
dec <- t(dec/colSums(dec)*100)
dec <- signif(dec, 3)
cs <- colSums(dec)
order(-cs)
## [1] 1 4 8 13 2 6 3 14 10 12 5 16 17 9 15 11 7
cv <- apply(dec,2,function(x) {sd(x)/mean(x)} )
cv[order(-cv)]
## T.CD8.Naive T.CD4.Naive pDCs Monocytes.C B.Naive
## 12.309866 6.935020 6.768844 5.828486 5.773624
## MAIT Monocytes.NC.I NK Basophils.LD B.Memory
## 5.611708 5.598135 5.555667 5.480702 5.163804
## T.CD8.Memory Neutrophils.LD T.CD4.Memory T.gd.non.Vd2 mDCs
## 3.524342 2.972512 -5.555388 -6.493188 -7.442438
## T.gd.Vd2 Plasmablasts
## -7.869309 -18.095614
plot(cs,cv,main="cell abundance versus SD")
text(cs,cv+0.5,label=names(cs))
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( dec, col=colfunc(25),scale="row",
trace="none",margins = c(10,15), cexRow=.7, cexCol=.8, main="cell type abundances")
samplesheet <- cbind(samplesheet,dec)
dec_t0_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==0 )] ),])
dec_po_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==1 )] ),])
dec_t0_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==0 )] ),])
dec_po_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==1 )] ),])
boxplot(dec_t0_lo$Monocytes.C , dec_t0_hi$Monocytes.C, dec_po_lo$Monocytes.C, dec_po_hi$Monocytes.C ,
ylim=c(0,100),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.C")
t.test(dec_t0_hi$Monocytes.C , dec_po_hi$Monocytes.C)
##
## Welch Two Sample t-test
##
## data: dec_t0_hi$Monocytes.C and dec_po_hi$Monocytes.C
## t = -2.7175, df = 25.773, p-value = 0.0116
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -55.076844 -7.627109
## sample estimates:
## mean of x mean of y
## 44.13850 75.49048
boxplot(dec_t0_lo$NK , dec_t0_hi$NK, dec_po_lo$NK, dec_po_hi$NK ,
ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="NK")
t.test(dec_t0_lo$NK , dec_po_lo$NK)
##
## Welch Two Sample t-test
##
## data: dec_t0_lo$NK and dec_po_lo$NK
## t = 2.4688, df = 47.994, p-value = 0.01717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.4820947 4.7136093
## sample estimates:
## mean of x mean of y
## 6.707104 4.109252
boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory, dec_po_hi$T.CD8.Memory ,
ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")
boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory, dec_po_hi$T.CD8.Memory ,
ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")
boxplot(dec_t0_lo$T.CD4.Naive , dec_t0_hi$T.CD4.Naive, dec_po_lo$T.CD4.Naive, dec_po_hi$T.CD4.Naive ,
ylim=c(0,25),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Naive")
boxplot(dec_t0_lo$T.CD8.Naive , dec_t0_hi$T.CD8.Naive, dec_po_lo$T.CD8.Naive, dec_po_hi$T.CD8.Naive ,
ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Naive")
boxplot(dec_t0_lo$B.Naive , dec_t0_hi$B.Naive, dec_po_lo$B.Naive, dec_po_hi$B.Naive ,
ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Naive")
boxplot(dec_t0_lo$T.CD4.Memory , dec_t0_hi$T.CD4.Memory, dec_po_lo$T.CD4.Memory, dec_po_hi$T.CD4.Memory ,
ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Memory")
boxplot(dec_t0_lo$MAIT , dec_t0_hi$MAIT, dec_po_lo$MAIT, dec_po_hi$MAIT ,
ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="MAIT")
boxplot(dec_t0_lo$T.gd.Vd2 , dec_t0_hi$T.gd.Vd2 , dec_po_lo$T.gd.Vd2 , dec_po_hi$T.gd.Vd2 ,
ylim=c(0,5),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.Vd2")
boxplot(dec_t0_lo$Neutrophils.LD , dec_t0_hi$Neutrophils.LD , dec_po_lo$Neutrophils.LD , dec_po_hi$Neutrophils.LD ,
ylim=c(0,50),names=c("t0 lo","t0 hi","po lo","po hi"),main="Neutrophils.LD")
boxplot(dec_t0_lo$T.gd.non.Vd2 , dec_t0_hi$T.gd.non.Vd2 , dec_po_lo$T.gd.non.Vd2 , dec_po_hi$T.gd.non.Vd2 ,
ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.non.Vd2")
boxplot(dec_t0_lo$Basophils.LD , dec_t0_hi$Basophils.LD , dec_po_lo$Basophils.LD , dec_po_hi$Basophils.LD ,
ylim=c(0,4),names=c("t0 lo","t0 hi","po lo","po hi"),main="Basophils.LD")
boxplot(dec_t0_lo$Monocytes.NC.I , dec_t0_hi$Monocytes.NC.I , dec_po_lo$Monocytes.NC.I , dec_po_hi$Monocytes.NC.I ,
ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.NC.I")
boxplot(dec_t0_lo$B.Memory , dec_t0_hi$B.Memory , dec_po_lo$B.Memory , dec_po_hi$B.Memory ,
ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Memory")
boxplot(dec_t0_lo$mDCs , dec_t0_hi$mDCs , dec_po_lo$mDCs , dec_po_hi$mDCs ,
ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="mDCs")
boxplot(dec_t0_lo$pDCs , dec_t0_hi$pDCs , dec_po_lo$pDCs , dec_po_hi$pDCs ,
ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="pDCs")
boxplot(dec_t0_lo$Plasmablasts , dec_t0_hi$Plasmablasts , dec_po_lo$Plasmablasts , dec_po_hi$Plasmablasts ,
ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="Plasmablasts")
plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='l')
text(cmdscale(dist(t(xx))), labels=colnames(xx),cex=1.3)
heatmap.2(cor(xx),trace="none",margins = c(10,10), main="pearson correlation heatmap")
heatmap.2(cor(xx,method="s"),trace="none",margins = c(10,10), main="spearman correlation heatmap")
# filter genes with fewer than 10 reads per sample
NAME = "t0_v_pod"
# correct for cell types
y <- xx[which(rowSums(xx)/ncol(xx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet,
design = ~ Sex + age +
Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000087116.16 ADAMTS2 634.35845 4.5019885 0.37141368 12.121224
## ENSG00000108950.12 FAM20A 792.00129 2.5116433 0.22711255 11.059025
## ENSG00000132170.21 PPARG 70.86749 2.4378861 0.23401914 10.417465
## ENSG00000121316.11 PLBD1 10689.06659 1.4121371 0.13716326 10.295302
## ENSG00000170439.7 METTL7B 126.30501 3.5819063 0.35557548 10.073547
## ENSG00000163221.9 S100A12 8507.68558 1.9626611 0.20144015 9.743148
## ENSG00000110013.13 SIAE 451.39075 0.8022923 0.08249229 9.725664
## ENSG00000156414.19 TDRD9 595.16883 1.4700444 0.15147776 9.704688
## ENSG00000064601.18 CTSA 7464.21198 0.7151836 0.07508842 9.524554
## ENSG00000163220.11 S100A9 114258.61851 1.3846404 0.14657753 9.446471
## pvalue padj
## ENSG00000087116.16 ADAMTS2 8.153188e-34 1.788809e-29
## ENSG00000108950.12 FAM20A 1.982416e-28 2.174710e-24
## ENSG00000132170.21 PPARG 2.063815e-25 1.509337e-21
## ENSG00000121316.11 PLBD1 7.398729e-25 4.058203e-21
## ENSG00000170439.7 METTL7B 7.232195e-24 3.173487e-20
## ENSG00000163221.9 S100A12 1.973452e-22 7.216257e-19
## ENSG00000110013.13 SIAE 2.343714e-22 7.345869e-19
## ENSG00000156414.19 TDRD9 2.879560e-22 7.897192e-19
## ENSG00000064601.18 CTSA 1.657529e-21 4.040686e-18
## ENSG00000163220.11 S100A9 3.504468e-21 7.688802e-18
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3,
xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(samplesheet$timepoint)
colCols <- gsub("0","gray",colCols)
colCols <- gsub("1","black",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
#t0_v_pod <- dge
# filter genes with fewer than 10 reads per sample
NAME = "pod_crp"
ss <- subset(samplesheet,timepoint==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age +
Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
CrpGroup )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000135424.17 ITGA7 387.17081 2.2645794 0.3013376 7.515092
## ENSG00000108950.12 FAM20A 1327.98353 2.0851707 0.2987978 6.978534
## ENSG00000143365.19 RORC 146.55223 -1.3879091 0.2165874 -6.408079
## ENSG00000251158.1 AC131392.2 154.40395 2.2065217 0.3607701 6.116144
## ENSG00000281852.1 LINC00891 116.21318 -1.0464139 0.1722441 -6.075179
## ENSG00000067836.12 ROGDI 2043.57403 0.8532526 0.1446784 5.897582
## ENSG00000131042.14 LILRB2 11720.09783 0.7387328 0.1252981 5.895802
## ENSG00000018280.17 SLC11A1 17883.60054 1.1110849 0.1964892 5.654687
## ENSG00000110079.18 MS4A4A 673.14339 1.5599668 0.2780122 5.611144
## ENSG00000156453.13 PCDH1 85.29966 -1.4272987 0.2648852 -5.388367
## pvalue padj
## ENSG00000135424.17 ITGA7 5.687096e-14 1.184281e-09
## ENSG00000108950.12 FAM20A 2.982763e-12 3.105652e-08
## ENSG00000143365.19 RORC 1.473646e-10 1.022907e-06
## ENSG00000251158.1 AC131392.2 9.586692e-10 4.990832e-06
## ENSG00000281852.1 LINC00891 1.238495e-09 5.158082e-06
## ENSG00000067836.12 ROGDI 3.688660e-09 1.109224e-05
## ENSG00000131042.14 LILRB2 3.728664e-09 1.109224e-05
## ENSG00000018280.17 SLC11A1 1.561302e-08 4.064068e-05
## ENSG00000110079.18 MS4A4A 2.009929e-08 4.650529e-05
## ENSG00000156453.13 PCDH1 7.110086e-08 1.357363e-04
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
pod_crp <- dge
write.table(dge,file="pod_crp_rna.tsv",sep="\t",quote=FALSE)
Run a ROC based on PCA distance classification.
# library size normalised
yy <- log2( (y/colSums(y)*1000000) + 0.001 )
yy <- yy[,which(rowMeans(cor(yy))>0.75)]
# crp group
crpgrp <- ss$CrpGroup
names(crpgrp) <- rownames(ss)
crpgrp <- crpgrp[which(names(crpgrp) %in% colnames(yy))]
# confects
confects <- deseq2_confects(res)
# Limma p-values
ROCp <- function(ngenes){
pnames <- rownames(dge)[1:ngenes]
topgenes <- as.data.frame(yy[which(rownames(yy) %in% pnames),])
topgenes$ctrl <- rowMeans(topgenes[,which(crpgrp==0)])
topgenes$case <- rowMeans(topgenes[,which(crpgrp==1)])
mds <- plotMDS(topgenes,ndim=2)
mds <- mds$cmdscale.out
hypotenuse <- function(x) {
sqrt(sum(unlist(lapply(x, function(x) { x^2 })),na.rm = TRUE ))
}
ctrl_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
a2=(x[1] - mds[(nrow(mds)-1),1] )^2
b2=(x[1] - mds[(nrow(mds)-1),2] )^2
sqrt(a2+b2)
})
case_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
a2=(x[1] - mds[nrow(mds),1] )^2
b2=(x[1] - mds[nrow(mds),2] )^2
sqrt(a2+b2)
})
df <- data.frame(ctrl_dist,case_dist)
df$classification_case <- unname(unlist(apply(df,1,function(x) {
if (x[2]<x[1]) {TRUE} else {FALSE}
}))) *1
df$case_conf <- df$ctrl_dist / ( df$ctrl_dist + df$case_dist )
df$ctrl_conf <- df$case_dist / ( df$ctrl_dist + df$case_dist )
df$conf <- unname(apply(df,1,function(x) {
if (x[4]>x[5]) {x[4]} else {x[5]}
}))
df$crpgrp <- crpgrp
df <- df[order(-df$conf),]
df$correct <- unname(unlist(apply(df,1,function(x) { if (x[3] == x[7]) {1} else {0} } )))
tpr <- cumsum(df$correct) / nrow(df)
tpr <- c(tpr,1)
fpr <- cumsum((df$correct -1)*-1) / nrow(df)
fpr <- c(fpr,1)
# AUC
id <- order(fpr)
AUC=round(sum(diff(fpr[id])*rollmean(tpr[id],2)),3)
HEADER=paste("AUC =",AUC)
plot(fpr,tpr,type="l",xlim=c(0,1),ylim=c(0,1),xlab="FPR",ylab="TPR",main=paste(ngenes,"genes"))
mtext(HEADER)
AUC
}
# topconfect genes
ROCc <- function(ngenes){
pnames <- confects$table$name[1:ngenes]
#pnames <- rownames(dge)[1:ngenes]
topgenes <- as.data.frame(yy[which(rownames(yy) %in% pnames),])
topgenes$ctrl <- rowMeans(topgenes[,which(crpgrp==0)])
topgenes$case <- rowMeans(topgenes[,which(crpgrp==1)])
mds <- plotMDS(topgenes,ndim=2)
mds <- mds$cmdscale.out
hypotenuse <- function(x) {
sqrt(sum(unlist(lapply(x, function(x) { x^2 })),na.rm = TRUE ))
}
ctrl_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
a2=(x[1] - mds[(nrow(mds)-1),1] )^2
b2=(x[1] - mds[(nrow(mds)-1),2] )^2
sqrt(a2+b2)
})
case_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
a2=(x[1] - mds[nrow(mds),1] )^2
b2=(x[1] - mds[nrow(mds),2] )^2
sqrt(a2+b2)
})
df <- data.frame(ctrl_dist,case_dist)
df$classification_case <- unname(unlist(apply(df,1,function(x) {
if (x[2]<x[1]) {TRUE} else {FALSE}
}))) *1
df$case_conf <- df$ctrl_dist / ( df$ctrl_dist + df$case_dist )
df$ctrl_conf <- df$case_dist / ( df$ctrl_dist + df$case_dist )
df$conf <- unname(apply(df,1,function(x) {
if (x[4]>x[5]) {x[4]} else {x[5]}
}))
df$crpgrp <- crpgrp
df <- df[order(-df$conf),]
df$correct <- unname(unlist(apply(df,1,function(x) { if (x[3] == x[7]) {1} else {0} } )))
tpr <- cumsum(df$correct) / nrow(df)
tpr <- c(tpr,1)
fpr <- cumsum((df$correct -1)*-1) / nrow(df)
fpr <- c(fpr,1)
# AUC
id <- order(fpr)
AUC=round(sum(diff(fpr[id])*rollmean(tpr[id],2)),3)
HEADER=paste("AUC =",AUC)
plot(fpr,tpr,type="l",xlim=c(0,1),ylim=c(0,1),xlab="FPR",ylab="TPR",main=paste(ngenes,"genes"))
mtext(HEADER)
AUC
}
ng <- c(2,3,5,7,10,13,15,17,20,30,40,60,80,100,150,200,300,500)
p_roc <- lapply(ng,ROCp)
p_roc <- unlist(p_roc)
c_roc <- lapply(ng,ROCc)
c_roc <- unlist(c_roc)
Now need to summarise what is the optimal number of genes in the panel.
plot(ng,p_roc,type="b",ylim=c(0.7,1),col="blue",xlab="no. genes", ylab="AUC")
points(ng,c_roc,type="b",col="red")
mtext("blue: top p-value genes, red:top confect genes")
grid()
# zoom in
plot(ng,p_roc,type="b",xlim=c(0,100),col="blue",xlab="no. genes", ylab="AUC")
points(ng,c_roc,type="b",col="red")
mtext("blue: top p-value genes, red:top confect genes")
grid()
From this we can see that p-value genes show better discriminitory power compared to topconfect genes at least when the number of gene$ In the range of ~150 genes shows the best result for topconfect genes with AUC of ~0.95 I also ran the p_roc analysis with more dimensions but the result was the same:
2d:0.951 (15g)
3d:0.945 (15g)
4d:0.953 (15g)
5d:0.953 (15g)
Now let’s have a look at the heatmap of the top 15 genes with the best discriminitory power.
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:15,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(10,15), cexRow=1, cexCol=.7, main="Top 15 genes")
# filter genes with fewer than 10 reads per sample
NAME = "t0_crp"
ss <- subset(samplesheet,timepoint==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age +
Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
CrpGroup )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000227827.3 AC138969.3 310.45967 -3.2636406 0.76288080 -4.278048
## ENSG00000156453.13 PCDH1 185.05744 -1.2022277 0.28166914 -4.268226
## ENSG00000157404.15 KIT 47.13602 -0.9528291 0.23238345 -4.100245
## ENSG00000169100.14 SLC25A6 7806.94126 -0.3764844 0.09323009 -4.038229
## ENSG00000169100.14_PAR_Y SLC25A6 7806.94126 -0.3764844 0.09323009 -4.038229
## ENSG00000231125.2 AF129075.2 19.98241 0.7527834 0.18663459 4.033462
## ENSG00000066382.16 MPPED2 20.74194 -1.2574132 0.31314927 -4.015380
## ENSG00000125740.14 FOSB 2088.89783 -2.1258699 0.53411514 -3.980172
## ENSG00000153234.14 NR4A2 247.42024 -1.0109749 0.25453655 -3.971826
## ENSG00000197467.15 COL13A1 55.61892 -1.5021959 0.38258304 -3.926457
## pvalue padj
## ENSG00000227827.3 AC138969.3 1.885395e-05 0.1755993
## ENSG00000156453.13 PCDH1 1.970332e-05 0.1755993
## ENSG00000157404.15 KIT 4.127124e-05 0.1755993
## ENSG00000169100.14 SLC25A6 5.385631e-05 0.1755993
## ENSG00000169100.14_PAR_Y SLC25A6 5.385631e-05 0.1755993
## ENSG00000231125.2 AF129075.2 5.496116e-05 0.1755993
## ENSG00000066382.16 MPPED2 5.935018e-05 0.1755993
## ENSG00000125740.14 FOSB 6.886554e-05 0.1755993
## ENSG00000153234.14 NR4A2 7.132383e-05 0.1755993
## ENSG00000197467.15 COL13A1 8.620630e-05 0.1774872
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
t0_crp <- dge
write.table(dge,file="t0_crp_rna.tsv",sep="\t",quote=FALSE)
NAME = "t0_vs_pod_lowCRP"
ss <- subset(samplesheet,CrpGroup==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age +
Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000100027.16 YPEL1 587.582974 -0.8771291 0.1220744 -7.185201
## ENSG00000163221.9 S100A12 3581.305710 1.4029544 0.2038427 6.882534
## ENSG00000265531.3 FCGR1CP 114.277543 1.5657930 0.2290683 6.835485
## ENSG00000180739.14 S1PR5 2194.335665 -0.9534996 0.1453946 -6.558013
## ENSG00000150337.13 FCGR1A 1030.416486 1.2367991 0.1936081 6.388158
## ENSG00000165490.13 DDIAS 157.706355 0.7442192 0.1201683 6.193141
## ENSG00000189430.13 NCR1 325.815524 -1.0295640 0.1673096 -6.153647
## ENSG00000203747.11 FCGR3A 8210.777875 -0.7230627 0.1177461 -6.140865
## ENSG00000158292.7 GPR153 160.263063 -0.9855601 0.1618960 -6.087612
## ENSG00000156564.9 LRFN2 9.801471 -1.8690801 0.3080525 -6.067407
## pvalue padj
## ENSG00000100027.16 YPEL1 6.710815e-13 1.150569e-08
## ENSG00000163221.9 S100A12 5.879716e-12 4.670768e-08
## ENSG00000265531.3 FCGR1CP 8.172823e-12 4.670768e-08
## ENSG00000180739.14 S1PR5 5.452950e-11 2.337271e-07
## ENSG00000150337.13 FCGR1A 1.678961e-10 5.757158e-07
## ENSG00000165490.13 DDIAS 5.897683e-10 1.685263e-06
## ENSG00000189430.13 NCR1 7.572112e-10 1.758929e-06
## ENSG00000203747.11 FCGR3A 8.207311e-10 1.758929e-06
## ENSG00000158292.7 GPR153 1.146069e-09 2.183262e-06
## ENSG00000156564.9 LRFN2 1.299919e-09 NA
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(ss$timepoint)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
t0_vs_pod_lowCRP <- dge
write.table(dge,file="t0_vs_pod_lowCRP_rna.tsv",sep="\t",quote=FALSE)
t0_vs_pod_lowCRP_up <- rownames( subset(dge,padj<0.05 & log2FoldChange > 0 ) )
t0_vs_pod_lowCRP_dn <- rownames( subset(dge,padj<0.05 & log2FoldChange < 0 ) )
NAME = "t0_vs_pod_highCRP"
ss <- subset(samplesheet,CrpGroup==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age +
Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## it is generally a good idea to center and scale numeric variables in the design
## to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 1489.80876 3.472027 0.2099241 16.539443
## ENSG00000136160.17 EDNRB 63.34773 3.893434 0.3436883 11.328387
## ENSG00000170439.7 METTL7B 272.04841 5.060484 0.4687387 10.795960
## ENSG00000137869.15 CYP19A1 58.71920 5.801243 0.5450131 10.644226
## ENSG00000156414.19 TDRD9 930.02624 2.151162 0.2116347 10.164502
## ENSG00000161944.16 ASGR2 2830.13257 1.946087 0.1941712 10.022533
## ENSG00000121316.11 PLBD1 16960.02791 2.004487 0.2029243 9.878005
## ENSG00000112290.13 WASF1 153.31662 1.731543 0.1763302 9.819892
## ENSG00000138061.12 CYP1B1 7208.87837 1.849424 0.1943215 9.517344
## ENSG00000135838.13 NPL 2502.04493 1.054939 0.1143205 9.227907
## pvalue padj
## ENSG00000108950.12 FAM20A 1.907787e-61 4.211248e-57
## ENSG00000136160.17 EDNRB 9.493731e-30 1.047823e-25
## ENSG00000170439.7 METTL7B 3.596832e-27 2.646549e-23
## ENSG00000137869.15 CYP19A1 1.855191e-26 1.023787e-22
## ENSG00000156414.19 TDRD9 2.855790e-24 1.260774e-20
## ENSG00000161944.16 ASGR2 1.213526e-23 4.464563e-20
## ENSG00000121316.11 PLBD1 5.185516e-23 1.635215e-19
## ENSG00000112290.13 WASF1 9.244293e-23 2.550732e-19
## ENSG00000138061.12 CYP1B1 1.776612e-21 4.357436e-18
## ENSG00000135838.13 NPL 2.759713e-20 6.091790e-17
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)
# top N gene heatmap
colCols <- as.character(ss$timepoint)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
ColSideColors=colCols ,
trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")
t0_vs_pod_highCRP <- dge
write.table(dge,file="t0_vs_pod_highCRP_rna.tsv",sep="\t",quote=FALSE)
t0_vs_pod_highCRP_up <- rownames( subset(dge,padj<0.05 & log2FoldChange > 0 ) )
t0_vs_pod_highCRP_dn <- rownames( subset(dge,padj<0.05 & log2FoldChange < 0 ) )
DE4 and DE5 will be compared.
v1 <- list("low CRP up"=t0_vs_pod_lowCRP_up,
"low CRP dn"=t0_vs_pod_lowCRP_dn,
"high CRP up"=t0_vs_pod_highCRP_up,
"high CRP dn"=t0_vs_pod_highCRP_dn)
plot(euler(v1),quantities = TRUE, main="Gene overlap")
Set up mitch data.
#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")
# set up the gene table
gt <- as.data.frame(sapply(strsplit(rownames(xx)," "),"[[",2))
gt$id <- rownames(xx)
colnames(gt) <- c("gene name","id")
Now mitch analysis with DE4 and DE5
l <- list("low CRP"=t0_vs_pod_lowCRP, "high CRP"=t0_vs_pod_highCRP)
y <- mitch_import( l , DEtype="DESeq2", geneTable=gt)
## Note: Mean no. genes in input = 21933.5
## Note: no. genes in output = 21404
## Note: estimated proportion of input genes in output = 0.976
# prioritisation by statistical significance
res <- mitch_calc(y, genesets, priority="significance",resrows=50)
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(res$enrichment_result,20)
## set setSize pMANOVA
## 736 Neutrophil degranulation 456 5.754140e-107
## 536 Innate Immune System 963 8.818497e-86
## 519 Immune System 1885 3.546889e-42
## 631 Membrane Trafficking 555 5.190870e-40
## 634 Metabolism 1758 1.930516e-34
## 649 Metabolism of proteins 1706 2.395980e-31
## 1057 Scavenging of heme from plasma 70 3.831245e-27
## 1312 Vesicle-mediated transport 644 5.082246e-27
## 189 Classical antibody-mediated complement activation 68 3.039237e-23
## 1070 Signal Transduction 1869 7.702513e-23
## 362 FCGR activation 75 8.303094e-23
## 217 Creation of C4 and C2 activators 70 8.437836e-23
## 533 Initial triggering of complement 77 1.261178e-22
## 841 Post-translational protein modification 1183 1.179254e-21
## 1103 Signaling by Interleukins 385 3.304270e-21
## 122 CD22 mediated BCR regulation 58 2.233107e-20
## 1268 Translation 294 4.716228e-20
## 950 Regulation of Complement cascade 93 1.300963e-19
## 1355 rRNA processing in the nucleus and cytosol 189 1.337292e-19
## 1017 Role of LAT2/NTAL/LAB on calcium mobilization 73 1.981698e-19
## s.low.CRP s.high.CRP p.low.CRP p.high.CRP s.dist SD
## 736 0.53553808 0.53896888 3.007777e-86 2.421775e-87 0.7597950 0.002425943
## 536 0.35705939 0.30554523 4.153588e-79 2.684591e-58 0.4699461 0.036426015
## 519 0.18203861 0.15768821 3.379750e-39 8.288395e-30 0.2408394 0.017218334
## 631 0.19703851 0.33329704 2.013692e-15 3.074616e-41 0.3871835 0.096349329
## 634 0.17031688 0.14537224 1.719468e-32 4.238467e-24 0.2239217 0.017638530
## 649 0.16437577 0.14086091 1.353674e-29 3.765622e-22 0.2164745 0.016627519
## 1057 0.19236765 -0.47558291 5.382575e-03 5.820328e-12 0.5130150 0.472312367
## 1312 0.19985654 0.24495282 4.779854e-18 2.486368e-26 0.3161400 0.031887885
## 189 0.09857683 -0.50743427 1.598219e-01 4.483618e-13 0.5169206 0.428514562
## 1070 0.06840218 0.13965007 9.865631e-07 1.507210e-23 0.1555024 0.050379873
## 362 0.07991061 -0.48771844 2.314986e-01 2.730166e-13 0.4942216 0.401374350
## 217 0.11159383 -0.48415675 1.064284e-01 2.413596e-12 0.4968510 0.421259274
## 533 0.10865015 -0.45794094 9.928653e-02 3.624877e-12 0.4706535 0.400640406
## 841 0.12609043 0.16601946 2.767794e-13 6.394407e-22 0.2084736 0.028234087
## 1103 0.27385711 0.23533958 2.664372e-20 2.170608e-15 0.3610851 0.027236008
## 122 0.06903821 -0.53446660 3.631403e-01 1.865165e-12 0.5389071 0.426742348
## 1268 0.24093201 -0.02428742 1.162237e-12 4.738116e-01 0.2421531 0.187538462
## 950 0.17364398 -0.32220576 3.800867e-03 7.800262e-08 0.3660175 0.350618712
## 1355 0.06610302 -0.27098551 1.171105e-01 1.303053e-10 0.2789315 0.238357591
## 1017 0.14956623 -0.39926649 2.713312e-02 3.622380e-09 0.4263611 0.388083336
## p.adjustMANOVA
## 736 7.842893e-104
## 536 6.009806e-83
## 519 1.611470e-39
## 631 1.768789e-37
## 634 5.262588e-32
## 649 5.442868e-29
## 1057 7.459980e-25
## 1312 8.658876e-25
## 189 4.602755e-21
## 1070 9.583976e-21
## 362 9.583976e-21
## 217 9.583976e-21
## 533 1.322297e-20
## 841 1.148088e-19
## 1103 3.002480e-19
## 122 1.902328e-18
## 1268 3.781305e-18
## 950 9.593314e-18
## 1355 9.593314e-18
## 1017 1.303160e-17
lo_up <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.low.CRP)<0.05 & res$enrichment_result$s.low.CRP>0),1]
lo_dn <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.low.CRP)<0.05 & res$enrichment_result$s.low.CRP<0),1]
hi_up <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.high.CRP)<0.05 & res$enrichment_result$s.high.CRP>0),1]
hi_dn <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.high.CRP)<0.05 & res$enrichment_result$s.high.CRP<0),1]
v1 <- list("low CRP up"=lo_up,
"low CRP dn"=lo_dn,
"high CRP up"=hi_up,
"high CRP dn"=hi_dn)
plot(euler(v1),quantities = TRUE, main="Gene set overlap")
unlink("t0_v_pod_lohi_mitch_sig.html")
capture.output(
mitch_report(res, "t0_v_pod_lohi_mitch_sig.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_sig.rds ".
##
##
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
##
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html
# prioritisation by statistical significance
res <- mitch_calc(y, genesets, priority="effect",resrows=50)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(res$enrichment_result,20)
## set setSize
## 513 IRAK4 deficiency (TLR2/4) 10
## 686 MyD88 deficiency (TLR2/4) 10
## 968 Regulation of TLR by endogenous ligand 11
## 1330 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism 12
## 1331 alpha-linolenic acid (ALA) metabolism 12
## 1278 Translocation of ZAP-70 to Immunological synapse 24
## 345 Erythrocytes take up carbon dioxide and release oxygen 11
## 765 O2/CO2 exchange in erythrocytes 11
## 505 Hyaluronan uptake and degradation 12
## 1306 Uptake and function of anthrax toxins 10
## 1323 WNT5A-dependent internalization of FZD2, FZD5 and ROR2 11
## 61 Advanced glycosylation endproduct receptor signaling 12
## 1342 p130Cas linkage to MAPK signaling for integrins 11
## 278 Detoxification of Reactive Oxygen Species 31
## 925 ROS and RNS production in phagocytes 31
## 1304 Unwinding of DNA 12
## 540 Insulin receptor recycling 21
## 779 PD-1 signaling 28
## 736 Neutrophil degranulation 456
## 821 Phosphorylation of CD3 and TCR zeta chains 27
## pMANOVA s.low.CRP s.high.CRP p.low.CRP p.high.CRP s.dist
## 513 1.447591e-06 0.8128447 0.8750210 8.512796e-06 1.646553e-06 1.1943108
## 686 1.447591e-06 0.8128447 0.8750210 8.512796e-06 1.646553e-06 1.1943108
## 968 4.259879e-06 0.7977673 0.7464506 4.594433e-06 1.806135e-05 1.0925297
## 1330 2.219901e-06 0.7127742 0.7979852 1.901873e-05 1.688120e-06 1.0699661
## 1331 2.219901e-06 0.7127742 0.7979852 1.901873e-05 1.688120e-06 1.0699661
## 1278 2.610194e-11 -0.5498792 -0.8193756 3.098568e-06 3.595030e-12 0.9867844
## 345 1.135891e-04 0.6699430 0.6562087 1.190951e-04 1.638579e-04 0.9377812
## 765 1.135891e-04 0.6699430 0.6562087 1.190951e-04 1.638579e-04 0.9377812
## 505 6.178042e-05 0.5365869 0.7221002 1.287191e-03 1.477053e-05 0.8996411
## 1306 5.787056e-04 0.5175657 0.6932037 4.593165e-03 1.468342e-04 0.8651044
## 1323 1.165879e-04 0.4032203 0.7393880 2.057255e-02 2.166367e-05 0.8421883
## 61 2.064121e-04 0.5009895 0.6759770 2.653752e-03 5.007707e-05 0.8413889
## 1342 8.769271e-04 0.5595246 0.6047475 1.310831e-03 5.139015e-04 0.8238855
## 278 4.787371e-09 0.6053900 0.5333757 5.349439e-09 2.729172e-07 0.8068374
## 925 4.569586e-09 0.6097911 0.5276585 4.142987e-09 3.654867e-07 0.8063924
## 1304 4.921115e-04 -0.4919830 -0.6353855 3.165437e-03 1.380320e-04 0.8035932
## 540 3.683764e-06 0.5678365 0.5589153 6.616819e-06 9.214259e-06 0.7967588
## 779 2.287924e-09 -0.3716752 -0.6867381 6.628712e-04 3.124280e-10 0.7808660
## 736 5.754140e-107 0.5355381 0.5389689 3.007777e-86 2.421775e-87 0.7597950
## 821 2.341986e-07 -0.4628460 -0.6003250 3.132882e-05 6.625526e-08 0.7580347
## SD p.adjustMANOVA
## 513 0.043965291 9.177053e-06
## 686 0.043965291 9.177053e-06
## 968 0.036286387 2.481665e-05
## 1330 0.060253263 1.371851e-05
## 1331 0.060253263 1.371851e-05
## 1278 0.190562742 4.867471e-10
## 345 0.009711627 4.964991e-04
## 765 0.009711627 4.964991e-04
## 505 0.131177695 2.864174e-04
## 1306 0.124194842 2.149253e-03
## 1323 0.237706485 5.012913e-04
## 61 0.123734872 8.373206e-04
## 1342 0.031977454 3.112635e-03
## 278 0.050921789 4.595202e-08
## 925 0.058076495 4.448819e-08
## 1304 0.101400893 1.852895e-03
## 540 0.006308237 2.164212e-05
## 779 0.222783152 2.380489e-08
## 736 0.002425943 7.842893e-104
## 821 0.097212343 1.725474e-06
unlink("t0_v_pod_lohi_mitch_eff.html")
capture.output(
mitch_report(res, "t0_v_pod_lohi_mitch_eff.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_eff.rds ".
##
##
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
##
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html
# prioritisation by discordance
res <- mitch_calc(y, genesets, priority="SD",resrows=50)
## Note: Prioritisation by SD after selecting sets with
## p.adjustMANOVA<=0.05.
head(res$enrichment_result,20)
## set setSize pMANOVA
## 1057 Scavenging of heme from plasma 70 3.831245e-27
## 189 Classical antibody-mediated complement activation 68 3.039237e-23
## 122 CD22 mediated BCR regulation 58 2.233107e-20
## 217 Creation of C4 and C2 activators 70 8.437836e-23
## 362 FCGR activation 75 8.303094e-23
## 533 Initial triggering of complement 77 1.261178e-22
## 1017 Role of LAT2/NTAL/LAB on calcium mobilization 73 1.981698e-19
## 108 Binding and Uptake of Ligands by Scavenger Receptors 91 2.007803e-19
## 871 Purine ribonucleoside monophosphate biosynthesis 10 9.570688e-03
## 950 Regulation of Complement cascade 93 1.300963e-19
## 476 HDMs demethylate histones 22 4.716467e-05
## 1248 Trafficking of GluR2-containing AMPA receptors 12 4.259959e-04
## 201 Complement cascade 97 2.430425e-19
## 755 Nucleobase biosynthesis 13 3.618006e-03
## 995 Removal of the Flap Intermediate 14 1.541114e-03
## 847 Pre-NOTCH Processing in Golgi 18 2.113565e-04
## 860 Processive synthesis on the lagging strand 15 8.157076e-04
## 1018 Role of phospholipids in phagocytosis 87 3.749672e-17
## 338 Elevation of cytosolic Ca2+ levels 12 1.463138e-02
## 360 FCERI mediated MAPK activation 89 2.278582e-14
## s.low.CRP s.high.CRP p.low.CRP p.high.CRP s.dist SD
## 1057 0.19236765 -0.4755829 0.0053825746 5.820328e-12 0.5130150 0.4723124
## 189 0.09857683 -0.5074343 0.1598218928 4.483618e-13 0.5169206 0.4285146
## 122 0.06903821 -0.5344666 0.3631403287 1.865165e-12 0.5389071 0.4267423
## 217 0.11159383 -0.4841567 0.1064284191 2.413596e-12 0.4968510 0.4212593
## 362 0.07991061 -0.4877184 0.2314986407 2.730166e-13 0.4942216 0.4013744
## 533 0.10865015 -0.4579409 0.0992865261 3.624877e-12 0.4706535 0.4006404
## 1017 0.14956623 -0.3992665 0.0271331171 3.622380e-09 0.4263611 0.3880833
## 108 0.21418749 -0.2887703 0.0004128422 1.915912e-06 0.3595338 0.3556448
## 871 0.27046836 -0.2285501 0.1386008325 2.107603e-01 0.3541020 0.3528593
## 950 0.17364398 -0.3222058 0.0038008666 7.800262e-08 0.3660175 0.3506187
## 476 -0.22790410 0.2647490 0.0642392383 3.157768e-02 0.3493313 0.3483584
## 1248 0.08172058 0.5712727 0.6240215562 6.104116e-04 0.5770882 0.3461657
## 201 0.16930741 -0.3128303 0.0039550164 1.004921e-07 0.3557074 0.3409228
## 755 0.17551235 -0.3020213 0.2732132689 5.935994e-02 0.3493157 0.3376673
## 995 0.06243238 -0.4051493 0.6858797354 8.668296e-03 0.4099314 0.3306302
## 847 -0.05057826 0.4168823 0.7102686008 2.196323e-03 0.4199393 0.3305445
## 860 0.02133495 -0.4377546 0.8862422078 3.328581e-03 0.4382742 0.3246253
## 1018 0.05436436 -0.3974331 0.3807838117 1.451699e-10 0.4011340 0.3194690
## 338 -0.24170251 0.1922448 0.1471256321 2.488711e-01 0.3088335 0.3068471
## 360 0.15245212 -0.2786438 0.0129210518 5.508367e-06 0.3176224 0.3048308
## p.adjustMANOVA
## 1057 7.459980e-25
## 189 4.602755e-21
## 122 1.902328e-18
## 217 9.583976e-21
## 362 9.583976e-21
## 533 1.322297e-20
## 1017 1.303160e-17
## 108 1.303160e-17
## 871 2.598575e-02
## 950 9.593314e-18
## 476 2.262932e-04
## 1248 1.621878e-03
## 201 1.505759e-17
## 755 1.098294e-02
## 995 5.142932e-03
## 847 8.523044e-04
## 860 2.918135e-03
## 1018 1.892890e-15
## 338 3.734565e-02
## 360 7.963352e-13
unlink("t0_v_pod_lohi_mitch_sd.html")
capture.output(
mitch_report(res, "t0_v_pod_lohi_mitch_sd.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_sd.rds ".
##
##
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
##
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html
Not correcting for blood compostion changes.
# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
## set setSize pANOVA
## 737 Neutrophil degranulation 457 1.319240e-103
## 537 Innate Immune System 967 5.848762e-84
## 520 Immune System 1893 3.240735e-46
## 632 Membrane Trafficking 559 5.327421e-36
## 1314 Vesicle-mediated transport 650 1.642242e-31
## 1071 Signal Transduction 1894 7.041601e-28
## 497 Hemostasis 549 9.700746e-23
## 635 Metabolism 1772 1.645387e-22
## 830 Platelet activation, signaling and aggregation 221 1.353176e-20
## 1133 Signaling by Receptor Tyrosine Kinases 414 1.678123e-20
## 1104 Signaling by Interleukins 386 1.710343e-19
## 650 Metabolism of proteins 1718 2.214499e-19
## 842 Post-translational protein modification 1193 6.814624e-19
## 1011 Response to elevated platelet cytosolic Ca2+ 110 1.253521e-16
## 832 Platelet degranulation 106 5.814015e-16
## 1247 Toll-like Receptor Cascades 143 2.293691e-15
## 284 Disease 1355 2.716874e-15
## 86 Asparagine N-linked glycosylation 269 5.316672e-15
## 644 Metabolism of lipids 626 6.696163e-15
## 1290 Transport of small molecules 561 1.571805e-14
## s.dist p.adjustANOVA
## 737 0.5868303 1.802082e-100
## 537 0.3671208 3.994704e-81
## 520 0.1977009 1.475615e-43
## 632 0.3093383 1.819314e-33
## 1314 0.2680762 4.486606e-29
## 1071 0.1517126 1.603138e-25
## 497 0.2446828 1.893031e-20
## 635 0.1395073 2.809498e-20
## 830 0.3628343 2.053820e-18
## 1133 0.2656301 2.292316e-18
## 1104 0.2675063 2.123935e-17
## 650 0.1305055 2.520838e-17
## 842 0.1524833 7.160598e-17
## 1011 0.4565015 1.223079e-14
## 832 0.4546295 5.294630e-14
## 1247 0.3835923 1.958239e-13
## 284 0.1278957 2.183088e-13
## 86 0.2767766 4.034764e-13
## 644 0.1822696 4.814189e-13
## 1290 0.1895711 1.073543e-12
unlink("t0_v_pod_mitch.html")
capture.output(
mitch_report(res, "t0_v_pod_mitch.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
## set
## 1355 rRNA processing in the nucleus and cytosol
## 1353 rRNA processing
## 735 Neutrophil degranulation
## 625 Major pathway of rRNA processing in the nucleolus and cytosol
## 635 Metabolism of RNA
## 1268 Translation
## 392 Formation of a pool of free 40S subunits
## 350 Eukaryotic Translation Elongation
## 807 Peptide chain elongation
## 434 GTP hydrolysis and joining of the 60S ribosomal subunit
## 1060 Selenocysteine synthesis
## 588 L13a-mediated translational silencing of Ceruloplasmin expression
## 352 Eukaryotic Translation Termination
## 1315 Viral mRNA Translation
## 743 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 527 Influenza Viral RNA Transcription and Replication
## 142 Cap-dependent Translation Initiation
## 351 Eukaryotic Translation Initiation
## 1007 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 443 Gene expression (Transcription)
## setSize pANOVA s.dist p.adjustANOVA
## 1355 190 6.057107e-45 -0.5904904 8.255837e-42
## 1353 217 4.380955e-44 -0.5473800 2.985621e-41
## 735 457 3.041773e-43 0.3755493 1.161753e-40
## 625 180 3.409400e-43 -0.5941666 1.161753e-40
## 635 685 1.565493e-38 -0.2904518 4.267534e-36
## 1268 295 4.363036e-32 -0.3984506 9.911364e-30
## 392 100 5.671059e-32 -0.6799824 1.104236e-29
## 350 93 7.889298e-32 -0.7033290 1.344139e-29
## 807 88 2.456173e-30 -0.7048767 3.719737e-28
## 434 111 8.894827e-30 -0.6218167 1.095512e-27
## 1060 92 9.578986e-30 -0.6823235 1.095512e-27
## 588 110 9.645004e-30 -0.6242326 1.095512e-27
## 352 92 2.593358e-29 -0.6770630 2.719036e-27
## 1315 88 1.665544e-28 -0.6820598 1.621526e-26
## 743 94 3.687034e-28 -0.6557809 3.350285e-26
## 527 135 4.188769e-28 -0.5471626 3.568307e-26
## 142 118 5.136865e-28 -0.5840418 3.889748e-26
## 351 118 5.136865e-28 -0.5840418 3.889748e-26
## 1007 100 9.213024e-28 -0.6311161 6.609133e-26
## 443 1323 4.436643e-27 -0.1763157 3.023572e-25
unlink("pod_crp_mitch.html")
capture.output(
mitch_report(res, "pod_crp_mitch.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
## set
## 353 Eukaryotic Translation Elongation
## 810 Peptide chain elongation
## 395 Formation of a pool of free 40S subunits
## 355 Eukaryotic Translation Termination
## 1063 Selenocysteine synthesis
## 1318 Viral mRNA Translation
## 746 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 591 L13a-mediated translational silencing of Ceruloplasmin expression
## 437 GTP hydrolysis and joining of the 60S ribosomal subunit
## 145 Cap-dependent Translation Initiation
## 354 Eukaryotic Translation Initiation
## 1010 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 745 Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)
## 747 Nonsense-Mediated Decay (NMD)
## 1062 Selenoamino acid metabolism
## 1357 rRNA processing
## 530 Influenza Viral RNA Transcription and Replication
## 1043 SRP-dependent cotranslational protein targeting to membrane
## 1359 rRNA processing in the nucleus and cytosol
## 983 Regulation of expression of SLITs and ROBOs
## setSize pANOVA s.dist p.adjustANOVA
## 353 93 5.984883e-26 -0.6312625 8.181335e-23
## 810 88 6.359892e-25 -0.6350662 4.346986e-22
## 395 100 1.321350e-23 -0.5788531 6.020950e-21
## 355 92 5.460741e-23 -0.5948916 1.866208e-20
## 1063 92 8.975646e-23 -0.5918881 2.373209e-20
## 1318 88 1.041643e-22 -0.6042127 2.373209e-20
## 746 94 1.406713e-20 -0.5544663 2.747110e-18
## 591 110 5.779516e-20 -0.5044096 9.875749e-18
## 437 111 6.917771e-19 -0.4872317 1.050733e-16
## 145 118 1.451766e-17 -0.4542798 1.804150e-15
## 354 118 1.451766e-17 -0.4542798 1.804150e-15
## 1010 100 1.775376e-17 -0.4919270 2.022449e-15
## 745 114 4.576291e-17 -0.4549009 4.468421e-15
## 747 114 4.576291e-17 -0.4549009 4.468421e-15
## 1062 114 6.820977e-15 -0.4219374 6.216184e-13
## 1357 219 8.592078e-15 -0.3040087 7.340857e-13
## 530 135 9.538981e-15 -0.3858047 7.670463e-13
## 1043 111 1.484250e-13 -0.4056829 1.127205e-11
## 1359 190 6.118865e-12 -0.2891464 4.402362e-10
## 983 162 7.623572e-12 -0.3115117 5.210711e-10
unlink("t0_crp_mitch.html")
capture.output(
mitch_report(res, "t0_crp_mitch.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## prioritise by effect size
# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
## set setSize
## 514 IRAK4 deficiency (TLR2/4) 10
## 687 MyD88 deficiency (TLR2/4) 10
## 1333 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism 12
## 1334 alpha-linolenic acid (ALA) metabolism 12
## 969 Regulation of TLR by endogenous ligand 11
## 1280 Translocation of ZAP-70 to Immunological synapse 24
## 506 Hyaluronan uptake and degradation 12
## 1308 Uptake and function of anthrax toxins 10
## 346 Erythrocytes take up carbon dioxide and release oxygen 11
## 766 O2/CO2 exchange in erythrocytes 11
## 1326 WNT5A-dependent internalization of FZD2, FZD5 and ROR2 11
## 1345 p130Cas linkage to MAPK signaling for integrins 11
## 61 Advanced glycosylation endproduct receptor signaling 12
## 1306 Unwinding of DNA 12
## 903 RNA Polymerase I Promoter Opening 19
## 890 RHO GTPases Activate WASPs and WAVEs 35
## 926 ROS and RNS production in phagocytes 31
## 737 Neutrophil degranulation 457
## 435 GRB2:SOS provides linkage to MAPK signaling for Integrins 12
## 780 PD-1 signaling 28
## pANOVA s.dist p.adjustANOVA
## 514 8.367911e-07 0.8994748 1.048676e-05
## 687 8.367911e-07 0.8994748 1.048676e-05
## 1333 2.461377e-06 0.7852782 2.626751e-05
## 1334 2.461377e-06 0.7852782 2.626751e-05
## 969 1.930406e-05 0.7438719 1.448865e-04
## 1280 2.809953e-10 -0.7436252 8.344338e-09
## 506 8.850181e-06 0.7406638 7.239130e-05
## 1308 1.141313e-04 0.7045257 6.523155e-04
## 346 9.462990e-05 0.6796841 5.533223e-04
## 766 9.462990e-05 0.6796841 5.533223e-04
## 1326 1.165844e-04 0.6708490 6.580757e-04
## 1345 1.324011e-04 0.6654100 7.219060e-04
## 61 9.478582e-05 0.6506965 5.533223e-04
## 1306 1.323109e-04 -0.6371242 7.219060e-04
## 903 3.287422e-06 0.6163218 3.277824e-05
## 890 4.567968e-10 0.6085563 1.327626e-08
## 926 8.233487e-09 0.5978890 1.757335e-07
## 737 1.319240e-103 0.5868303 1.802082e-100
## 435 4.971813e-04 0.5804902 2.286699e-03
## 780 1.109028e-07 -0.5793553 1.870286e-06
unlink("t0_v_pod_mitche.html")
capture.output(
mitch_report(res, "t0_v_pod_mitche.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
## set
## 512 IRAK4 deficiency (TLR2/4)
## 685 MyD88 deficiency (TLR2/4)
## 807 Peptide chain elongation
## 350 Eukaryotic Translation Elongation
## 1060 Selenocysteine synthesis
## 1315 Viral mRNA Translation
## 392 Formation of a pool of free 40S subunits
## 352 Eukaryotic Translation Termination
## 1278 Translocation of ZAP-70 to Immunological synapse
## 743 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1330 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1331 alpha-linolenic acid (ALA) metabolism
## 1007 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 588 L13a-mediated translational silencing of Ceruloplasmin expression
## 967 Regulation of TLR by endogenous ligand
## 434 GTP hydrolysis and joining of the 60S ribosomal subunit
## 1304 Unwinding of DNA
## 398 Formation of the ternary complex, and subsequently, the 43S complex
## 625 Major pathway of rRNA processing in the nucleolus and cytosol
## 1355 rRNA processing in the nucleus and cytosol
## setSize pANOVA s.dist p.adjustANOVA
## 512 10 6.624388e-06 0.8226169 1.142917e-04
## 685 10 6.624388e-06 0.8226169 1.142917e-04
## 807 88 2.456173e-30 -0.7048767 3.719737e-28
## 350 93 7.889298e-32 -0.7033290 1.344139e-29
## 1060 92 9.578986e-30 -0.6823235 1.095512e-27
## 1315 88 1.665544e-28 -0.6820598 1.621526e-26
## 392 100 5.671059e-32 -0.6799824 1.104236e-29
## 352 92 2.593358e-29 -0.6770630 2.719036e-27
## 1278 24 1.862173e-08 -0.6629989 5.902655e-07
## 743 94 3.687034e-28 -0.6557809 3.350285e-26
## 1330 12 1.314716e-04 0.6373869 1.518609e-03
## 1331 12 1.314716e-04 0.6373869 1.518609e-03
## 1007 100 9.213024e-28 -0.6311161 6.609133e-26
## 588 110 9.645004e-30 -0.6242326 1.095512e-27
## 967 11 3.448433e-04 0.6231351 3.133476e-03
## 434 111 8.894827e-30 -0.6218167 1.095512e-27
## 1304 12 2.114502e-04 -0.6175972 2.150795e-03
## 398 51 2.797660e-14 -0.6153683 1.314900e-12
## 625 180 3.409400e-43 -0.5941666 1.161753e-40
## 1355 190 6.057107e-45 -0.5904904 8.255837e-42
unlink("pod_crp_mitche.html")
capture.output(
mitch_report(res, "pod_crp_mitche.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
## set
## 810 Peptide chain elongation
## 353 Eukaryotic Translation Elongation
## 1318 Viral mRNA Translation
## 355 Eukaryotic Translation Termination
## 1063 Selenocysteine synthesis
## 395 Formation of a pool of free 40S subunits
## 746 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 525 Incretin synthesis, secretion, and inactivation
## 1185 Synthesis, secretion, and inactivation of Glucagon-like Peptide-1 (GLP-1)
## 700 NF-kB is activated and signals survival
## 1328 WNT5A-dependent internalization of FZD4
## 591 L13a-mediated translational silencing of Ceruloplasmin expression
## 247 DNA methylation
## 1010 Response of EIF2AK4 (GCN2) to amino acid deficiency
## 1313 VLDLR internalisation and degradation
## 1364 tRNA processing in the mitochondrion
## 437 GTP hydrolysis and joining of the 60S ribosomal subunit
## 9 AKT phosphorylates targets in the nucleus
## 127 CDC6 association with the ORC:origin complex
## 585 Josephin domain DUBs
## setSize pANOVA s.dist p.adjustANOVA
## 810 88 6.359892e-25 -0.6350662 4.346986e-22
## 353 93 5.984883e-26 -0.6312625 8.181335e-23
## 1318 88 1.041643e-22 -0.6042127 2.373209e-20
## 355 92 5.460741e-23 -0.5948916 1.866208e-20
## 1063 92 8.975646e-23 -0.5918881 2.373209e-20
## 395 100 1.321350e-23 -0.5788531 6.020950e-21
## 746 94 1.406713e-20 -0.5544663 2.747110e-18
## 525 10 3.599313e-03 0.5316270 4.057041e-02
## 1185 10 3.599313e-03 0.5316270 4.057041e-02
## 700 12 2.070944e-03 -0.5134241 2.808190e-02
## 1328 13 1.527289e-03 -0.5076380 2.372505e-02
## 591 110 5.779516e-20 -0.5044096 9.875749e-18
## 247 20 1.264460e-04 -0.4950439 2.880862e-03
## 1010 100 1.775376e-17 -0.4919270 2.022449e-15
## 1313 11 5.001247e-03 -0.4887335 5.102339e-02
## 1364 32 1.707306e-06 -0.4886526 5.692407e-05
## 437 111 6.917771e-19 -0.4872317 1.050733e-16
## 9 10 8.214748e-03 -0.4826626 6.805794e-02
## 127 11 5.967394e-03 0.4787435 5.622505e-02
## 585 10 8.887143e-03 -0.4777788 7.102600e-02
unlink("t0_crp_mitche.html")
capture.output(
mitch_report(res, "t0_crp_mitche.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# 3D analysys significance
z <- list("t0_v_pod"=t0_v_pod, "pod_crp"=pod_crp, "t0_crp"=t0_crp)
zz <- mitch_import(z, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(zz, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
## set
## 735 Neutrophil degranulation
## 535 Innate Immune System
## 648 Metabolism of proteins
## 392 Formation of a pool of free 40S subunits
## 350 Eukaryotic Translation Elongation
## 518 Immune System
## 1352 rRNA processing
## 588 L13a-mediated translational silencing of Ceruloplasmin expression
## 1354 rRNA processing in the nucleus and cytosol
## 434 GTP hydrolysis and joining of the 60S ribosomal subunit
## 807 Peptide chain elongation
## 1267 Translation
## 625 Major pathway of rRNA processing in the nucleolus and cytosol
## 352 Eukaryotic Translation Termination
## 142 Cap-dependent Translation Initiation
## 351 Eukaryotic Translation Initiation
## 1314 Viral mRNA Translation
## 1060 Selenocysteine synthesis
## 635 Metabolism of RNA
## 743 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## setSize pMANOVA s.t0_v_pod s.pod_crp s.t0_crp p.t0_v_pod
## 735 457 4.618407e-105 0.58657151 0.38000420 -0.09806816 1.730727e-103
## 535 965 1.019944e-85 0.36728929 0.20130112 -0.04234340 8.532771e-84
## 648 1709 9.511033e-54 0.13024363 -0.04556259 -0.03560888 3.452131e-19
## 392 100 3.115070e-52 -0.20789625 -0.67710164 -0.57696255 3.271811e-04
## 350 93 3.790254e-51 -0.27289442 -0.70065421 -0.62924440 5.386406e-06
## 518 1887 7.628286e-51 0.19712633 0.07975329 -0.02452675 9.887493e-46
## 1352 217 3.109490e-50 -0.28892642 -0.54393079 -0.29892470 2.159088e-13
## 588 110 7.423309e-50 -0.14314838 -0.62114616 -0.50269723 9.492889e-03
## 1354 190 1.282164e-49 -0.30718205 -0.58732978 -0.28780888 2.765619e-13
## 434 111 1.016227e-48 -0.14719204 -0.61871349 -0.48555467 7.382262e-03
## 807 88 1.950166e-48 -0.28134726 -0.70222181 -0.63296623 5.048100e-06
## 1267 295 6.440216e-48 -0.05584812 -0.39411771 -0.16014353 9.896795e-02
## 625 180 1.523489e-47 -0.30640536 -0.59097632 -0.28617308 1.303462e-12
## 352 92 5.578268e-46 -0.26476727 -0.67426227 -0.59291387 1.134012e-05
## 142 118 6.710127e-46 -0.13160443 -0.58074861 -0.45265469 1.353731e-02
## 351 118 6.710127e-46 -0.13160443 -0.58074861 -0.45265469 1.353731e-02
## 1314 88 4.236706e-45 -0.26210705 -0.67930669 -0.60218147 2.131873e-05
## 1060 92 4.468833e-45 -0.29852322 -0.67953138 -0.58980449 7.425644e-07
## 635 685 4.612815e-45 -0.08857003 -0.28572495 -0.05179886 7.791790e-05
## 743 94 2.694113e-43 -0.24433952 -0.65294271 -0.55251982 4.227299e-05
## p.pod_crp p.t0_crp s.dist SD p.adjustMANOVA
## 735 3.141156e-44 3.275366e-04 0.7057526 0.35117771 6.290271e-102
## 535 3.074075e-26 2.599497e-02 0.4209709 0.20603951 6.945816e-83
## 648 1.751055e-03 1.445666e-02 0.1425038 0.09875387 4.318009e-51
## 392 1.025970e-31 1.847446e-23 0.9135498 0.24711383 1.060681e-49
## 350 1.339923e-31 8.594033e-26 0.9804775 0.22915174 1.032465e-48
## 518 9.950923e-09 7.806409e-02 0.2140583 0.11089097 1.731621e-48
## 1352 1.508627e-43 3.129581e-14 0.6846131 0.14442713 6.050180e-48
## 588 1.831485e-29 7.741170e-20 0.8117996 0.24892683 1.263818e-47
## 1354 1.768750e-44 7.696288e-12 0.7226002 0.16761605 1.940342e-47
## 434 1.700567e-29 9.146209e-19 0.8001470 0.24308881 1.384101e-46
## 807 4.044170e-30 9.099313e-25 0.9863661 0.22567219 2.414661e-46
## 1267 2.010406e-31 2.222264e-06 0.4290615 0.17322806 7.309645e-46
## 625 9.560263e-43 3.471971e-11 0.7245911 0.17043815 1.596148e-45
## 352 4.398222e-29 7.603884e-23 0.9360973 0.21678865 5.426858e-44
## 142 1.020572e-27 1.899295e-17 0.7479873 0.23137697 5.711996e-44
## 351 1.020572e-27 1.899295e-17 0.7479873 0.23137697 5.711996e-44
## 1314 2.747234e-28 1.449075e-22 0.9448705 0.22198140 3.306660e-43
## 1060 1.628508e-29 1.269695e-22 0.9480234 0.19919124 3.306660e-43
## 635 2.509810e-37 2.087489e-02 0.3035894 0.12579321 3.306660e-43
## 743 6.262770e-28 1.919514e-20 0.8895584 0.21292269 1.834691e-41
unlink("3d_mitch.html")
capture.output(
mitch_report(res, "3d_mitch.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
# 3D analysys effect size
res <- mitch_calc(zz, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
## set
## 512 IRAK4 deficiency (TLR2/4)
## 685 MyD88 deficiency (TLR2/4)
## 1329 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1330 alpha-linolenic acid (ALA) metabolism
## 967 Regulation of TLR by endogenous ligand
## 1277 Translocation of ZAP-70 to Immunological synapse
## 807 Peptide chain elongation
## 350 Eukaryotic Translation Elongation
## 1322 WNT5A-dependent internalization of FZD2, FZD5 and ROR2
## 1060 Selenocysteine synthesis
## 1314 Viral mRNA Translation
## 352 Eukaryotic Translation Termination
## 504 Hyaluronan uptake and degradation
## 392 Formation of a pool of free 40S subunits
## 743 Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1303 Unwinding of DNA
## 1305 Uptake and function of anthrax toxins
## 619 MET activates RAP1 and RAC1
## 1309 VLDLR internalisation and degradation
## 1007 Response of EIF2AK4 (GCN2) to amino acid deficiency
## setSize pMANOVA s.t0_v_pod s.pod_crp s.t0_crp p.t0_v_pod
## 512 10 2.399517e-06 0.8994993 0.8257826 0.340349071 8.362207e-07
## 685 10 2.399517e-06 0.8994993 0.8257826 0.340349071 8.362207e-07
## 1329 12 2.897763e-05 0.7850157 0.6416616 0.198176486 2.480595e-06
## 1330 12 2.897763e-05 0.7850157 0.6416616 0.198176486 2.480595e-06
## 967 11 1.227313e-04 0.7434466 0.6262900 0.269451653 1.951771e-05
## 1277 24 2.630091e-09 -0.7433199 -0.6603807 -0.008084781 2.857833e-10
## 807 88 1.950166e-48 -0.2813473 -0.7022218 -0.632966233 5.048100e-06
## 350 93 3.790254e-51 -0.2728944 -0.7006542 -0.629244402 5.386406e-06
## 1322 11 6.637017e-05 0.6698260 0.5756923 -0.388684222 1.194226e-04
## 1060 92 4.468833e-45 -0.2985232 -0.6795314 -0.589804492 7.425644e-07
## 1314 88 4.236706e-45 -0.2621071 -0.6793067 -0.602181468 2.131873e-05
## 352 92 5.578268e-46 -0.2647673 -0.6742623 -0.592913875 1.134012e-05
## 504 10 4.894291e-04 0.7290627 0.5139582 -0.217453559 6.529266e-05
## 392 100 3.115070e-52 -0.2078962 -0.6771016 -0.576962549 3.271811e-04
## 743 94 2.694113e-43 -0.2443395 -0.6529427 -0.552519818 4.227299e-05
## 1303 12 8.238305e-04 -0.6370599 -0.6141841 -0.051172259 1.325263e-04
## 1305 10 9.385080e-04 0.7035422 0.4327921 -0.199635019 1.166786e-04
## 619 10 4.320319e-03 0.5299424 0.5145571 0.384586589 3.707275e-03
## 1309 11 3.497482e-04 0.4681329 0.4836687 -0.485846769 7.175299e-03
## 1007 100 5.739204e-41 -0.2216832 -0.6280664 -0.490073775 1.275668e-04
## p.pod_crp p.t0_crp s.dist SD p.adjustMANOVA
## 512 6.103929e-06 6.236393e-02 1.2676172 0.30378964 1.458992e-05
## 685 6.103929e-06 6.236393e-02 1.2676172 0.30378964 1.458992e-05
## 1329 1.184409e-04 2.345674e-01 1.0330794 0.30594381 1.252938e-04
## 1330 1.184409e-04 2.345674e-01 1.0330794 0.30594381 1.252938e-04
## 967 3.216973e-04 1.217580e-01 1.0087400 0.24689085 4.517839e-04
## 1277 2.117729e-08 9.453406e-01 0.9943302 0.40268671 2.985153e-08
## 807 4.044170e-30 9.099313e-25 0.9863661 0.22567219 2.414661e-46
## 350 1.339923e-31 8.594033e-26 0.9804775 0.22915174 1.032465e-48
## 1322 9.447632e-04 2.559871e-02 0.9649684 0.58585084 2.590148e-04
## 1060 1.628508e-29 1.269695e-22 0.9480234 0.19919124 3.306660e-43
## 1314 2.747234e-28 1.449075e-22 0.9448705 0.22198140 3.306660e-43
## 352 4.398222e-29 7.603884e-23 0.9360973 0.21678865 5.426858e-44
## 504 4.885278e-03 2.337660e-01 0.9181348 0.49617306 1.455079e-03
## 392 1.025970e-31 1.847446e-23 0.9135498 0.24711383 1.060681e-49
## 743 6.262770e-28 1.919514e-20 0.8895584 0.21292269 1.834691e-41
## 1303 2.292062e-04 7.588968e-01 0.8863893 0.33185589 2.332759e-03
## 1305 1.778988e-02 2.743316e-01 0.8497851 0.46350094 2.603356e-03
## 619 4.835647e-03 3.520775e-02 0.8327754 0.07985129 9.684082e-03
## 1309 5.472180e-03 5.265108e-03 0.8301391 0.55531955 1.118209e-03
## 1007 1.649839e-27 2.349345e-17 0.8269118 0.20664895 3.722284e-39
unlink("3d_mitche.html")
capture.output(
mitch_report(res, "3d_mitche.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] pkgload_1.2.0 GGally_2.1.1
## [3] beeswarm_0.3.1 gtools_3.8.2
## [5] echarts4r_0.4.0 topconfects_1.6.0
## [7] limma_3.46.0 eulerr_6.1.0
## [9] mitch_1.2.2 MASS_7.3-53.1
## [11] fgsea_1.16.0 gplots_3.1.1
## [13] DESeq2_1.30.0 SummarizedExperiment_1.20.0
## [15] Biobase_2.50.0 MatrixGenerics_1.2.0
## [17] matrixStats_0.58.0 GenomicRanges_1.42.0
## [19] GenomeInfoDb_1.26.0 IRanges_2.24.0
## [21] S4Vectors_0.28.0 BiocGenerics_0.36.0
## [23] reshape2_1.4.4 forcats_0.5.1
## [25] stringr_1.4.0 dplyr_1.0.5
## [27] purrr_0.3.4 readr_1.4.0
## [29] tidyr_1.1.3 tibble_3.1.0
## [31] ggplot2_3.3.3 tidyverse_1.3.0
## [33] zoo_1.8-9
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-0 ellipsis_0.3.1 rprojroot_2.0.2
## [4] XVector_0.30.0 fs_1.5.0 rstudioapi_0.13
## [7] farver_2.1.0 bit64_4.0.5 AnnotationDbi_1.52.0
## [10] fansi_0.4.2 lubridate_1.7.10 xml2_1.3.2
## [13] splines_4.0.3 cachem_1.0.4 geneplotter_1.68.0
## [16] knitr_1.31 polyclip_1.10-0 jsonlite_1.7.2
## [19] broom_0.7.5 annotate_1.68.0 dbplyr_2.1.0
## [22] shiny_1.6.0 compiler_4.0.3 httr_1.4.2
## [25] backports_1.2.1 assertthat_0.2.1 Matrix_1.3-2
## [28] fastmap_1.1.0 cli_2.3.1 later_1.1.0.1
## [31] htmltools_0.5.1.1 tools_4.0.3 gtable_0.3.0
## [34] glue_1.4.2 GenomeInfoDbData_1.2.4 fastmatch_1.1-0
## [37] Rcpp_1.0.6 cellranger_1.1.0 jquerylib_0.1.3
## [40] vctrs_0.3.6 polylabelr_0.2.0 xfun_0.22
## [43] ps_1.6.0 testthat_3.0.2 rvest_1.0.0
## [46] mime_0.10 lifecycle_1.0.0 XML_3.99-0.6
## [49] zlibbioc_1.36.0 scales_1.1.1 promises_1.2.0.1
## [52] hms_1.0.0 RColorBrewer_1.1-2 yaml_2.2.1
## [55] memoise_2.0.0 gridExtra_2.3 sass_0.3.1
## [58] reshape_0.8.8 stringi_1.5.3 RSQLite_2.2.4
## [61] highr_0.8 genefilter_1.72.0 desc_1.3.0
## [64] caTools_1.18.1 BiocParallel_1.24.1 rlang_0.4.10
## [67] pkgconfig_2.0.3 bitops_1.0-6 evaluate_0.14
## [70] lattice_0.20-41 labeling_0.4.2 htmlwidgets_1.5.3
## [73] bit_4.0.4 tidyselect_1.1.0 plyr_1.8.6
## [76] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
## [79] DelayedArray_0.16.0 DBI_1.1.1 pillar_1.5.1
## [82] haven_2.3.1 withr_2.4.1 survival_3.2-10
## [85] RCurl_1.98-1.3 modelr_0.1.8 crayon_1.4.1
## [88] KernSmooth_2.23-18 utf8_1.2.1 rmarkdown_2.7
## [91] locfit_1.5-9.4 grid_4.0.3 readxl_1.3.1
## [94] data.table_1.14.0 blob_1.2.1 reprex_1.0.0
## [97] digest_0.6.27 xtable_1.8-4 httpuv_1.5.5
## [100] munsell_0.5.0 bslib_0.2.4
save.image("rnaseq_analysis_full.Rdata")