Introduction

Packages

suppressPackageStartupMessages({
    library("zoo")
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("MASS")
    library("mitch")
    library("eulerr")
    library("limma")
    library("topconfects")
    library("gplots")
})

Import read counts

tmp <- read.table("3col.tsv",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)

xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)

Curate the samplesheet

Here we’re manipulating the sample sheet. Making sure that the samplesheet matches the names of the data sets and vice versa. Ensuring that each patient has a baseline and postop sample.

samplesheet<-read.csv("samplesheet.tsv",sep="\t",stringsAsFactors=FALSE)
mysamples <- colnames(xx)
baseline <- mysamples[grep("T0",mysamples)]
postop <- mysamples[grep("POD",mysamples)]
baseline <- sapply(strsplit(baseline,"-"),"[[",1)
postop <- sapply(strsplit(postop,"-"),"[[",1)
mysamples <- intersect(baseline,postop)
samplesheet2 <- samplesheet
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
samplesheet3 <- samplesheet2
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
rownames(samplesheet3) <- paste(samplesheet2$RG_number,"POD",sep="-")
samplesheet2$timepoint <- 0       
samplesheet3$timepoint <- 1
samplesheet <- rbind(samplesheet2,samplesheet3)

# filter the data matrix
xx <- xx[,which(colnames(xx) %in% rownames(samplesheet))]
samplesheet <- samplesheet[which(rownames(samplesheet) %in% colnames(xx)),]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

Blood cell composition correction

Here the idea is to use the concept of Monaco et al, 2019 to deconvolute blood RNA-seq signature.

DOI:https://doi.org/10.1016/j.celrep.2019.01.041

xn <- xx
gt <- as.data.frame(sapply(strsplit(rownames(xn)," "),"[[",2) )
rownames(gt) <- rownames(xx)
colnames(gt) = "genesymbol"
gt$geneID <- rownames(xx)
blood <- read.table("https://raw.githubusercontent.com/giannimonaco/ABIS/master/data/sigmatrixRNAseq.txt")
blood2 <- merge(gt,blood,by.x="genesymbol",by.y=0)
blood2 <- blood2[which(!duplicated(blood2$genesymbol)),]
rownames(blood2) <- blood2$geneID
blood2 <- blood2[,c(3:ncol(blood2))]
genes <- intersect(rownames(xx), rownames(blood2))
dec <- apply(xx[genes, , drop=F], 2, function(x) coef(rlm( as.matrix(blood2[genes,]), x, maxit =100 ))) *100
## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps

## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps
dec <- t(dec/colSums(dec)*100)

dec <- signif(dec, 3)

cs <- colSums(dec)
order(-cs)
##  [1]  1  4  8 13  2  6  3 14 10 12  5 16 17  9 15 11  7
cv <- apply(dec,2,function(x) {sd(x)/mean(x)} )
cv[order(-cv)]
##    T.CD8.Naive    T.CD4.Naive           pDCs    Monocytes.C        B.Naive 
##      12.309866       6.935020       6.768844       5.828486       5.773624 
##           MAIT Monocytes.NC.I             NK   Basophils.LD       B.Memory 
##       5.611708       5.598135       5.555667       5.480702       5.163804 
##   T.CD8.Memory Neutrophils.LD   T.CD4.Memory   T.gd.non.Vd2           mDCs 
##       3.524342       2.972512      -5.555388      -6.493188      -7.442438 
##       T.gd.Vd2   Plasmablasts 
##      -7.869309     -18.095614
plot(cs,cv,main="cell abundance versus SD")
text(cs,cv+0.5,label=names(cs))

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2( dec, col=colfunc(25),scale="row",
 trace="none",margins = c(10,15), cexRow=.7, cexCol=.8,  main="cell type abundances")

samplesheet <- cbind(samplesheet,dec)

dec_t0_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==0 )] ),])
dec_po_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==1 )] ),])
dec_t0_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==0 )] ),])
dec_po_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==1 )] ),])


boxplot(dec_t0_lo$Monocytes.C , dec_t0_hi$Monocytes.C, dec_po_lo$Monocytes.C,  dec_po_hi$Monocytes.C ,
  ylim=c(0,100),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.C")

t.test(dec_t0_hi$Monocytes.C , dec_po_hi$Monocytes.C)
## 
##  Welch Two Sample t-test
## 
## data:  dec_t0_hi$Monocytes.C and dec_po_hi$Monocytes.C
## t = -2.7175, df = 25.773, p-value = 0.0116
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -55.076844  -7.627109
## sample estimates:
## mean of x mean of y 
##  44.13850  75.49048
boxplot(dec_t0_lo$NK , dec_t0_hi$NK, dec_po_lo$NK,  dec_po_hi$NK ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="NK")

t.test(dec_t0_lo$NK , dec_po_lo$NK)
## 
##  Welch Two Sample t-test
## 
## data:  dec_t0_lo$NK and dec_po_lo$NK
## t = 2.4688, df = 47.994, p-value = 0.01717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.4820947 4.7136093
## sample estimates:
## mean of x mean of y 
##  6.707104  4.109252
boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory,  dec_po_hi$T.CD8.Memory ,
  ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")

boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory,  dec_po_hi$T.CD8.Memory ,
  ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")

boxplot(dec_t0_lo$T.CD4.Naive , dec_t0_hi$T.CD4.Naive, dec_po_lo$T.CD4.Naive,  dec_po_hi$T.CD4.Naive ,
  ylim=c(0,25),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Naive")

boxplot(dec_t0_lo$T.CD8.Naive , dec_t0_hi$T.CD8.Naive, dec_po_lo$T.CD8.Naive,  dec_po_hi$T.CD8.Naive ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Naive")

boxplot(dec_t0_lo$B.Naive , dec_t0_hi$B.Naive, dec_po_lo$B.Naive,  dec_po_hi$B.Naive ,
  ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Naive")

boxplot(dec_t0_lo$T.CD4.Memory , dec_t0_hi$T.CD4.Memory, dec_po_lo$T.CD4.Memory,  dec_po_hi$T.CD4.Memory ,
  ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Memory")

boxplot(dec_t0_lo$MAIT , dec_t0_hi$MAIT, dec_po_lo$MAIT,  dec_po_hi$MAIT ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="MAIT")

boxplot(dec_t0_lo$T.gd.Vd2 , dec_t0_hi$T.gd.Vd2 , dec_po_lo$T.gd.Vd2 ,  dec_po_hi$T.gd.Vd2 ,
  ylim=c(0,5),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.Vd2")

boxplot(dec_t0_lo$Neutrophils.LD , dec_t0_hi$Neutrophils.LD , dec_po_lo$Neutrophils.LD ,  dec_po_hi$Neutrophils.LD ,
  ylim=c(0,50),names=c("t0 lo","t0 hi","po lo","po hi"),main="Neutrophils.LD")

boxplot(dec_t0_lo$T.gd.non.Vd2 , dec_t0_hi$T.gd.non.Vd2 , dec_po_lo$T.gd.non.Vd2 ,  dec_po_hi$T.gd.non.Vd2 ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.non.Vd2")

boxplot(dec_t0_lo$Basophils.LD , dec_t0_hi$Basophils.LD , dec_po_lo$Basophils.LD ,  dec_po_hi$Basophils.LD ,
  ylim=c(0,4),names=c("t0 lo","t0 hi","po lo","po hi"),main="Basophils.LD")

boxplot(dec_t0_lo$Monocytes.NC.I , dec_t0_hi$Monocytes.NC.I , dec_po_lo$Monocytes.NC.I ,  dec_po_hi$Monocytes.NC.I ,
  ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.NC.I")

boxplot(dec_t0_lo$B.Memory , dec_t0_hi$B.Memory , dec_po_lo$B.Memory ,  dec_po_hi$B.Memory ,
  ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Memory")

boxplot(dec_t0_lo$mDCs , dec_t0_hi$mDCs , dec_po_lo$mDCs ,  dec_po_hi$mDCs ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="mDCs")

boxplot(dec_t0_lo$pDCs , dec_t0_hi$pDCs , dec_po_lo$pDCs ,  dec_po_hi$pDCs ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="pDCs")

boxplot(dec_t0_lo$Plasmablasts , dec_t0_hi$Plasmablasts , dec_po_lo$Plasmablasts ,  dec_po_hi$Plasmablasts ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="Plasmablasts")

MDS analysis

plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='l')
text(cmdscale(dist(t(xx))), labels=colnames(xx),cex=1.3)

heatmap.2(cor(xx),trace="none",margins = c(10,10), main="pearson correlation heatmap")

heatmap.2(cor(xx,method="s"),trace="none",margins = c(10,10), main="spearman correlation heatmap")

DE analysis baseline versus post-op

# filter genes with fewer than 10 reads per sample
NAME = "t0_v_pod"

# correct for cell types
y <- xx[which(rowSums(xx)/ncol(xx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, 
  design = ~ Sex + age + 
  Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
  Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
  timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                                baseMean log2FoldChange      lfcSE      stat
## ENSG00000087116.16 ADAMTS2    634.35845      4.5019885 0.37141368 12.121224
## ENSG00000108950.12 FAM20A     792.00129      2.5116433 0.22711255 11.059025
## ENSG00000132170.21 PPARG       70.86749      2.4378861 0.23401914 10.417465
## ENSG00000121316.11 PLBD1    10689.06659      1.4121371 0.13716326 10.295302
## ENSG00000170439.7 METTL7B     126.30501      3.5819063 0.35557548 10.073547
## ENSG00000163221.9 S100A12    8507.68558      1.9626611 0.20144015  9.743148
## ENSG00000110013.13 SIAE       451.39075      0.8022923 0.08249229  9.725664
## ENSG00000156414.19 TDRD9      595.16883      1.4700444 0.15147776  9.704688
## ENSG00000064601.18 CTSA      7464.21198      0.7151836 0.07508842  9.524554
## ENSG00000163220.11 S100A9  114258.61851      1.3846404 0.14657753  9.446471
##                                  pvalue         padj
## ENSG00000087116.16 ADAMTS2 8.153188e-34 1.788809e-29
## ENSG00000108950.12 FAM20A  1.982416e-28 2.174710e-24
## ENSG00000132170.21 PPARG   2.063815e-25 1.509337e-21
## ENSG00000121316.11 PLBD1   7.398729e-25 4.058203e-21
## ENSG00000170439.7 METTL7B  7.232195e-24 3.173487e-20
## ENSG00000163221.9 S100A12  1.973452e-22 7.216257e-19
## ENSG00000110013.13 SIAE    2.343714e-22 7.345869e-19
## ENSG00000156414.19 TDRD9   2.879560e-22 7.897192e-19
## ENSG00000064601.18 CTSA    1.657529e-21 4.040686e-18
## ENSG00000163220.11 S100A9  3.504468e-21 7.688802e-18
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, 
  xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")  
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(samplesheet$timepoint)
colCols <- gsub("0","gray",colCols)
colCols <- gsub("1","black",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

#t0_v_pod <- dge

DE analysis post-op low versus high CRP

# filter genes with fewer than 10 reads per sample
NAME = "pod_crp"
ss <- subset(samplesheet,timepoint==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + 
  Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
  Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
  CrpGroup )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                                 baseMean log2FoldChange     lfcSE      stat
## ENSG00000135424.17 ITGA7       387.17081      2.2645794 0.3013376  7.515092
## ENSG00000108950.12 FAM20A     1327.98353      2.0851707 0.2987978  6.978534
## ENSG00000143365.19 RORC        146.55223     -1.3879091 0.2165874 -6.408079
## ENSG00000251158.1 AC131392.2   154.40395      2.2065217 0.3607701  6.116144
## ENSG00000281852.1 LINC00891    116.21318     -1.0464139 0.1722441 -6.075179
## ENSG00000067836.12 ROGDI      2043.57403      0.8532526 0.1446784  5.897582
## ENSG00000131042.14 LILRB2    11720.09783      0.7387328 0.1252981  5.895802
## ENSG00000018280.17 SLC11A1   17883.60054      1.1110849 0.1964892  5.654687
## ENSG00000110079.18 MS4A4A      673.14339      1.5599668 0.2780122  5.611144
## ENSG00000156453.13 PCDH1        85.29966     -1.4272987 0.2648852 -5.388367
##                                    pvalue         padj
## ENSG00000135424.17 ITGA7     5.687096e-14 1.184281e-09
## ENSG00000108950.12 FAM20A    2.982763e-12 3.105652e-08
## ENSG00000143365.19 RORC      1.473646e-10 1.022907e-06
## ENSG00000251158.1 AC131392.2 9.586692e-10 4.990832e-06
## ENSG00000281852.1 LINC00891  1.238495e-09 5.158082e-06
## ENSG00000067836.12 ROGDI     3.688660e-09 1.109224e-05
## ENSG00000131042.14 LILRB2    3.728664e-09 1.109224e-05
## ENSG00000018280.17 SLC11A1   1.561302e-08 4.064068e-05
## ENSG00000110079.18 MS4A4A    2.009929e-08 4.650529e-05
## ENSG00000156453.13 PCDH1     7.110086e-08 1.357363e-04
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,  
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

pod_crp <- dge
write.table(dge,file="pod_crp_rna.tsv",sep="\t",quote=FALSE)

Run a ROC based on PCA distance classification.

# library size normalised
yy <- log2( (y/colSums(y)*1000000) + 0.001 )
yy <- yy[,which(rowMeans(cor(yy))>0.75)]

# crp group
crpgrp <- ss$CrpGroup
names(crpgrp) <- rownames(ss)
crpgrp <- crpgrp[which(names(crpgrp) %in% colnames(yy))]


# confects
confects <- deseq2_confects(res)

# Limma p-values
ROCp <- function(ngenes){
  pnames <- rownames(dge)[1:ngenes]
  topgenes <- as.data.frame(yy[which(rownames(yy) %in% pnames),])
  topgenes$ctrl <- rowMeans(topgenes[,which(crpgrp==0)])
  topgenes$case <- rowMeans(topgenes[,which(crpgrp==1)])
  mds <- plotMDS(topgenes,ndim=2)
  mds <- mds$cmdscale.out
  hypotenuse <- function(x) {
    sqrt(sum(unlist(lapply(x, function(x) { x^2 })),na.rm = TRUE ))
  }
  ctrl_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) { 
    a2=(x[1] - mds[(nrow(mds)-1),1] )^2
    b2=(x[1] - mds[(nrow(mds)-1),2] )^2
    sqrt(a2+b2)
  })
  case_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
    a2=(x[1] - mds[nrow(mds),1] )^2
    b2=(x[1] - mds[nrow(mds),2] )^2
    sqrt(a2+b2)
  })
  df <- data.frame(ctrl_dist,case_dist)
  df$classification_case <- unname(unlist(apply(df,1,function(x) {
    if (x[2]<x[1]) {TRUE} else {FALSE}
  }))) *1
  df$case_conf <- df$ctrl_dist / ( df$ctrl_dist + df$case_dist )
  df$ctrl_conf <- df$case_dist / ( df$ctrl_dist + df$case_dist )
  df$conf <- unname(apply(df,1,function(x) {
    if (x[4]>x[5]) {x[4]} else {x[5]}
  }))
  df$crpgrp <- crpgrp
  df <- df[order(-df$conf),]
  df$correct <- unname(unlist(apply(df,1,function(x) { if (x[3] == x[7]) {1} else {0} } )))
  tpr <- cumsum(df$correct) / nrow(df)
  tpr <- c(tpr,1)
  fpr <- cumsum((df$correct -1)*-1) / nrow(df)
  fpr <- c(fpr,1)
  # AUC 
  id <- order(fpr)
  AUC=round(sum(diff(fpr[id])*rollmean(tpr[id],2)),3)
  HEADER=paste("AUC =",AUC)
  plot(fpr,tpr,type="l",xlim=c(0,1),ylim=c(0,1),xlab="FPR",ylab="TPR",main=paste(ngenes,"genes"))
  mtext(HEADER)
  AUC
}

# topconfect genes
ROCc <- function(ngenes){
  pnames <- confects$table$name[1:ngenes]
  #pnames <- rownames(dge)[1:ngenes]
  topgenes <- as.data.frame(yy[which(rownames(yy) %in% pnames),])
  topgenes$ctrl <- rowMeans(topgenes[,which(crpgrp==0)])
  topgenes$case <- rowMeans(topgenes[,which(crpgrp==1)])
  mds <- plotMDS(topgenes,ndim=2)
  mds <- mds$cmdscale.out
  hypotenuse <- function(x) {
    sqrt(sum(unlist(lapply(x, function(x) { x^2 })),na.rm = TRUE ))
  }
  ctrl_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) { 
    a2=(x[1] - mds[(nrow(mds)-1),1] )^2
    b2=(x[1] - mds[(nrow(mds)-1),2] )^2
    sqrt(a2+b2)
  })
  case_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) { 
    a2=(x[1] - mds[nrow(mds),1] )^2
    b2=(x[1] - mds[nrow(mds),2] )^2
    sqrt(a2+b2)
  })
  df <- data.frame(ctrl_dist,case_dist)
  df$classification_case <- unname(unlist(apply(df,1,function(x) {
    if (x[2]<x[1]) {TRUE} else {FALSE}
  }))) *1
  df$case_conf <- df$ctrl_dist / ( df$ctrl_dist + df$case_dist )
  df$ctrl_conf <- df$case_dist / ( df$ctrl_dist + df$case_dist )
  df$conf <- unname(apply(df,1,function(x) {
    if (x[4]>x[5]) {x[4]} else {x[5]}
  }))
  df$crpgrp <- crpgrp
  df <- df[order(-df$conf),]
  df$correct <- unname(unlist(apply(df,1,function(x) { if (x[3] == x[7]) {1} else {0} } )))
  tpr <- cumsum(df$correct) / nrow(df)
  tpr <- c(tpr,1)
  fpr <- cumsum((df$correct -1)*-1) / nrow(df)
  fpr <- c(fpr,1)
  # AUC 
  id <- order(fpr)
  AUC=round(sum(diff(fpr[id])*rollmean(tpr[id],2)),3)
  HEADER=paste("AUC =",AUC)
  plot(fpr,tpr,type="l",xlim=c(0,1),ylim=c(0,1),xlab="FPR",ylab="TPR",main=paste(ngenes,"genes"))
  mtext(HEADER)
  AUC
}

ng <- c(2,3,5,7,10,13,15,17,20,30,40,60,80,100,150,200,300,500)

p_roc <- lapply(ng,ROCp)

p_roc <- unlist(p_roc)

c_roc <- lapply(ng,ROCc)

c_roc <- unlist(c_roc)

Now need to summarise what is the optimal number of genes in the panel.

plot(ng,p_roc,type="b",ylim=c(0.7,1),col="blue",xlab="no. genes", ylab="AUC")
points(ng,c_roc,type="b",col="red")
mtext("blue: top p-value genes, red:top confect genes")
grid()

# zoom in
plot(ng,p_roc,type="b",xlim=c(0,100),col="blue",xlab="no. genes", ylab="AUC")
points(ng,c_roc,type="b",col="red")
mtext("blue: top p-value genes, red:top confect genes")
grid()

From this we can see that p-value genes show better discriminitory power compared to topconfect genes at least when the number of gene$ In the range of ~150 genes shows the best result for topconfect genes with AUC of ~0.95 I also ran the p_roc analysis with more dimensions but the result was the same:

2d:0.951 (15g)

3d:0.945 (15g)

4d:0.953 (15g)

5d:0.953 (15g)

Now let’s have a look at the heatmap of the top 15 genes with the best discriminitory power.

colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:15,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
 trace="none",margins = c(10,15), cexRow=1, cexCol=.7, main="Top 15 genes")

DE analysis baseline low versus high CRP

# filter genes with fewer than 10 reads per sample
NAME = "t0_crp"
ss <- subset(samplesheet,timepoint==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age +
  Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
  Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
  CrpGroup )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                                    baseMean log2FoldChange      lfcSE      stat
## ENSG00000227827.3 AC138969.3      310.45967     -3.2636406 0.76288080 -4.278048
## ENSG00000156453.13 PCDH1          185.05744     -1.2022277 0.28166914 -4.268226
## ENSG00000157404.15 KIT             47.13602     -0.9528291 0.23238345 -4.100245
## ENSG00000169100.14 SLC25A6       7806.94126     -0.3764844 0.09323009 -4.038229
## ENSG00000169100.14_PAR_Y SLC25A6 7806.94126     -0.3764844 0.09323009 -4.038229
## ENSG00000231125.2 AF129075.2       19.98241      0.7527834 0.18663459  4.033462
## ENSG00000066382.16 MPPED2          20.74194     -1.2574132 0.31314927 -4.015380
## ENSG00000125740.14 FOSB          2088.89783     -2.1258699 0.53411514 -3.980172
## ENSG00000153234.14 NR4A2          247.42024     -1.0109749 0.25453655 -3.971826
## ENSG00000197467.15 COL13A1         55.61892     -1.5021959 0.38258304 -3.926457
##                                        pvalue      padj
## ENSG00000227827.3 AC138969.3     1.885395e-05 0.1755993
## ENSG00000156453.13 PCDH1         1.970332e-05 0.1755993
## ENSG00000157404.15 KIT           4.127124e-05 0.1755993
## ENSG00000169100.14 SLC25A6       5.385631e-05 0.1755993
## ENSG00000169100.14_PAR_Y SLC25A6 5.385631e-05 0.1755993
## ENSG00000231125.2 AF129075.2     5.496116e-05 0.1755993
## ENSG00000066382.16 MPPED2        5.935018e-05 0.1755993
## ENSG00000125740.14 FOSB          6.886554e-05 0.1755993
## ENSG00000153234.14 NR4A2         7.132383e-05 0.1755993
## ENSG00000197467.15 COL13A1       8.620630e-05 0.1774872
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,  
 trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

t0_crp <- dge
write.table(dge,file="t0_crp_rna.tsv",sep="\t",quote=FALSE)

DE analysis baseline versus post-op (low CRP)

NAME = "t0_vs_pod_lowCRP"
ss <- subset(samplesheet,CrpGroup==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age +
  Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
  Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
  timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                              baseMean log2FoldChange     lfcSE      stat
## ENSG00000100027.16 YPEL1   587.582974     -0.8771291 0.1220744 -7.185201
## ENSG00000163221.9 S100A12 3581.305710      1.4029544 0.2038427  6.882534
## ENSG00000265531.3 FCGR1CP  114.277543      1.5657930 0.2290683  6.835485
## ENSG00000180739.14 S1PR5  2194.335665     -0.9534996 0.1453946 -6.558013
## ENSG00000150337.13 FCGR1A 1030.416486      1.2367991 0.1936081  6.388158
## ENSG00000165490.13 DDIAS   157.706355      0.7442192 0.1201683  6.193141
## ENSG00000189430.13 NCR1    325.815524     -1.0295640 0.1673096 -6.153647
## ENSG00000203747.11 FCGR3A 8210.777875     -0.7230627 0.1177461 -6.140865
## ENSG00000158292.7 GPR153   160.263063     -0.9855601 0.1618960 -6.087612
## ENSG00000156564.9 LRFN2      9.801471     -1.8690801 0.3080525 -6.067407
##                                 pvalue         padj
## ENSG00000100027.16 YPEL1  6.710815e-13 1.150569e-08
## ENSG00000163221.9 S100A12 5.879716e-12 4.670768e-08
## ENSG00000265531.3 FCGR1CP 8.172823e-12 4.670768e-08
## ENSG00000180739.14 S1PR5  5.452950e-11 2.337271e-07
## ENSG00000150337.13 FCGR1A 1.678961e-10 5.757158e-07
## ENSG00000165490.13 DDIAS  5.897683e-10 1.685263e-06
## ENSG00000189430.13 NCR1   7.572112e-10 1.758929e-06
## ENSG00000203747.11 FCGR3A 8.207311e-10 1.758929e-06
## ENSG00000158292.7 GPR153  1.146069e-09 2.183262e-06
## ENSG00000156564.9 LRFN2   1.299919e-09           NA
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$timepoint)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
 trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

t0_vs_pod_lowCRP <- dge
write.table(dge,file="t0_vs_pod_lowCRP_rna.tsv",sep="\t",quote=FALSE)

t0_vs_pod_lowCRP_up <- rownames( subset(dge,padj<0.05 & log2FoldChange > 0 ) )
t0_vs_pod_lowCRP_dn <- rownames( subset(dge,padj<0.05 & log2FoldChange < 0 ) )

DE analysis baseline versus post-op (high CRP)

NAME = "t0_vs_pod_highCRP"
ss <- subset(samplesheet,CrpGroup==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age +
  Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive + B.Naive + MAIT +
  Neutrophils.LD + Basophils.LD + Monocytes.NC.I + B.Memory + pDCs +
  timepoint )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                               baseMean log2FoldChange     lfcSE      stat
## ENSG00000108950.12 FAM20A   1489.80876       3.472027 0.2099241 16.539443
## ENSG00000136160.17 EDNRB      63.34773       3.893434 0.3436883 11.328387
## ENSG00000170439.7 METTL7B    272.04841       5.060484 0.4687387 10.795960
## ENSG00000137869.15 CYP19A1    58.71920       5.801243 0.5450131 10.644226
## ENSG00000156414.19 TDRD9     930.02624       2.151162 0.2116347 10.164502
## ENSG00000161944.16 ASGR2    2830.13257       1.946087 0.1941712 10.022533
## ENSG00000121316.11 PLBD1   16960.02791       2.004487 0.2029243  9.878005
## ENSG00000112290.13 WASF1     153.31662       1.731543 0.1763302  9.819892
## ENSG00000138061.12 CYP1B1   7208.87837       1.849424 0.1943215  9.517344
## ENSG00000135838.13 NPL      2502.04493       1.054939 0.1143205  9.227907
##                                  pvalue         padj
## ENSG00000108950.12 FAM20A  1.907787e-61 4.211248e-57
## ENSG00000136160.17 EDNRB   9.493731e-30 1.047823e-25
## ENSG00000170439.7 METTL7B  3.596832e-27 2.646549e-23
## ENSG00000137869.15 CYP19A1 1.855191e-26 1.023787e-22
## ENSG00000156414.19 TDRD9   2.855790e-24 1.260774e-20
## ENSG00000161944.16 ASGR2   1.213526e-23 4.464563e-20
## ENSG00000121316.11 PLBD1   5.185516e-23 1.635215e-19
## ENSG00000112290.13 WASF1   9.244293e-23 2.550732e-19
## ENSG00000138061.12 CYP1B1  1.776612e-21 4.357436e-18
## ENSG00000135838.13 NPL     2.759713e-20 6.091790e-17
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$timepoint)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
 trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

t0_vs_pod_highCRP <- dge
write.table(dge,file="t0_vs_pod_highCRP_rna.tsv",sep="\t",quote=FALSE)

t0_vs_pod_highCRP_up <- rownames( subset(dge,padj<0.05 & log2FoldChange > 0 ) )
t0_vs_pod_highCRP_dn <- rownames( subset(dge,padj<0.05 & log2FoldChange < 0 ) )

Compare low and high patterns

DE4 and DE5 will be compared.

v1 <- list("low CRP up"=t0_vs_pod_lowCRP_up, 
    "low CRP dn"=t0_vs_pod_lowCRP_dn,
    "high CRP up"=t0_vs_pod_highCRP_up,
    "high CRP dn"=t0_vs_pod_highCRP_dn)

plot(euler(v1),quantities = TRUE, main="Gene overlap")

Set up mitch data.

#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")

# set up the gene table
gt <- as.data.frame(sapply(strsplit(rownames(xx)," "),"[[",2))
gt$id <- rownames(xx)
colnames(gt) <- c("gene name","id")

Now mitch analysis with DE4 and DE5

l <- list("low CRP"=t0_vs_pod_lowCRP, "high CRP"=t0_vs_pod_highCRP)
y <- mitch_import( l , DEtype="DESeq2", geneTable=gt)
## Note: Mean no. genes in input = 21933.5
## Note: no. genes in output = 21404
## Note: estimated proportion of input genes in output = 0.976
# prioritisation by statistical significance
res <- mitch_calc(y, genesets, priority="significance",resrows=50)
## Note: When prioritising by significance (ie: small 
##             p-values), large effect sizes might be missed.
head(res$enrichment_result,20)
##                                                    set setSize       pMANOVA
## 736                           Neutrophil degranulation     456 5.754140e-107
## 536                               Innate Immune System     963  8.818497e-86
## 519                                      Immune System    1885  3.546889e-42
## 631                               Membrane Trafficking     555  5.190870e-40
## 634                                         Metabolism    1758  1.930516e-34
## 649                             Metabolism of proteins    1706  2.395980e-31
## 1057                    Scavenging of heme from plasma      70  3.831245e-27
## 1312                        Vesicle-mediated transport     644  5.082246e-27
## 189  Classical antibody-mediated complement activation      68  3.039237e-23
## 1070                               Signal Transduction    1869  7.702513e-23
## 362                                    FCGR activation      75  8.303094e-23
## 217                   Creation of C4 and C2 activators      70  8.437836e-23
## 533                   Initial triggering of complement      77  1.261178e-22
## 841            Post-translational protein modification    1183  1.179254e-21
## 1103                         Signaling by Interleukins     385  3.304270e-21
## 122                       CD22 mediated BCR regulation      58  2.233107e-20
## 1268                                       Translation     294  4.716228e-20
## 950                   Regulation of Complement cascade      93  1.300963e-19
## 1355        rRNA processing in the nucleus and cytosol     189  1.337292e-19
## 1017     Role of LAT2/NTAL/LAB on calcium mobilization      73  1.981698e-19
##       s.low.CRP  s.high.CRP    p.low.CRP   p.high.CRP    s.dist          SD
## 736  0.53553808  0.53896888 3.007777e-86 2.421775e-87 0.7597950 0.002425943
## 536  0.35705939  0.30554523 4.153588e-79 2.684591e-58 0.4699461 0.036426015
## 519  0.18203861  0.15768821 3.379750e-39 8.288395e-30 0.2408394 0.017218334
## 631  0.19703851  0.33329704 2.013692e-15 3.074616e-41 0.3871835 0.096349329
## 634  0.17031688  0.14537224 1.719468e-32 4.238467e-24 0.2239217 0.017638530
## 649  0.16437577  0.14086091 1.353674e-29 3.765622e-22 0.2164745 0.016627519
## 1057 0.19236765 -0.47558291 5.382575e-03 5.820328e-12 0.5130150 0.472312367
## 1312 0.19985654  0.24495282 4.779854e-18 2.486368e-26 0.3161400 0.031887885
## 189  0.09857683 -0.50743427 1.598219e-01 4.483618e-13 0.5169206 0.428514562
## 1070 0.06840218  0.13965007 9.865631e-07 1.507210e-23 0.1555024 0.050379873
## 362  0.07991061 -0.48771844 2.314986e-01 2.730166e-13 0.4942216 0.401374350
## 217  0.11159383 -0.48415675 1.064284e-01 2.413596e-12 0.4968510 0.421259274
## 533  0.10865015 -0.45794094 9.928653e-02 3.624877e-12 0.4706535 0.400640406
## 841  0.12609043  0.16601946 2.767794e-13 6.394407e-22 0.2084736 0.028234087
## 1103 0.27385711  0.23533958 2.664372e-20 2.170608e-15 0.3610851 0.027236008
## 122  0.06903821 -0.53446660 3.631403e-01 1.865165e-12 0.5389071 0.426742348
## 1268 0.24093201 -0.02428742 1.162237e-12 4.738116e-01 0.2421531 0.187538462
## 950  0.17364398 -0.32220576 3.800867e-03 7.800262e-08 0.3660175 0.350618712
## 1355 0.06610302 -0.27098551 1.171105e-01 1.303053e-10 0.2789315 0.238357591
## 1017 0.14956623 -0.39926649 2.713312e-02 3.622380e-09 0.4263611 0.388083336
##      p.adjustMANOVA
## 736   7.842893e-104
## 536    6.009806e-83
## 519    1.611470e-39
## 631    1.768789e-37
## 634    5.262588e-32
## 649    5.442868e-29
## 1057   7.459980e-25
## 1312   8.658876e-25
## 189    4.602755e-21
## 1070   9.583976e-21
## 362    9.583976e-21
## 217    9.583976e-21
## 533    1.322297e-20
## 841    1.148088e-19
## 1103   3.002480e-19
## 122    1.902328e-18
## 1268   3.781305e-18
## 950    9.593314e-18
## 1355   9.593314e-18
## 1017   1.303160e-17
lo_up <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.low.CRP)<0.05 & res$enrichment_result$s.low.CRP>0),1]
lo_dn <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.low.CRP)<0.05 & res$enrichment_result$s.low.CRP<0),1]
hi_up <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.high.CRP)<0.05 & res$enrichment_result$s.high.CRP>0),1]
hi_dn <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.high.CRP)<0.05 & res$enrichment_result$s.high.CRP<0),1]

v1 <- list("low CRP up"=lo_up,
    "low CRP dn"=lo_dn,
    "high CRP up"=hi_up,
    "high CRP dn"=hi_dn)

plot(euler(v1),quantities = TRUE, main="Gene set overlap")

unlink("t0_v_pod_lohi_mitch_sig.html")
capture.output(
    mitch_report(res, "t0_v_pod_lohi_mitch_sig.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_sig.rds ".
## 
## 
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
## 
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html
# prioritisation by statistical significance
res <- mitch_calc(y, genesets, priority="effect",resrows=50)
## Note: Enrichments with large effect sizes may not be 
##             statistically significant.
head(res$enrichment_result,20)
##                                                                 set setSize
## 513                                       IRAK4 deficiency (TLR2/4)      10
## 686                                       MyD88 deficiency (TLR2/4)      10
## 968                          Regulation of TLR by endogenous ligand      11
## 1330 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism      12
## 1331                          alpha-linolenic acid (ALA) metabolism      12
## 1278               Translocation of ZAP-70 to Immunological synapse      24
## 345          Erythrocytes take up carbon dioxide and release oxygen      11
## 765                                 O2/CO2 exchange in erythrocytes      11
## 505                               Hyaluronan uptake and degradation      12
## 1306                          Uptake and function of anthrax toxins      10
## 1323         WNT5A-dependent internalization of FZD2, FZD5 and ROR2      11
## 61             Advanced glycosylation endproduct receptor signaling      12
## 1342                p130Cas linkage to MAPK signaling for integrins      11
## 278                       Detoxification of Reactive Oxygen Species      31
## 925                            ROS and RNS production in phagocytes      31
## 1304                                               Unwinding of DNA      12
## 540                                      Insulin receptor recycling      21
## 779                                                  PD-1 signaling      28
## 736                                        Neutrophil degranulation     456
## 821                      Phosphorylation of CD3 and TCR zeta chains      27
##            pMANOVA  s.low.CRP s.high.CRP    p.low.CRP   p.high.CRP    s.dist
## 513   1.447591e-06  0.8128447  0.8750210 8.512796e-06 1.646553e-06 1.1943108
## 686   1.447591e-06  0.8128447  0.8750210 8.512796e-06 1.646553e-06 1.1943108
## 968   4.259879e-06  0.7977673  0.7464506 4.594433e-06 1.806135e-05 1.0925297
## 1330  2.219901e-06  0.7127742  0.7979852 1.901873e-05 1.688120e-06 1.0699661
## 1331  2.219901e-06  0.7127742  0.7979852 1.901873e-05 1.688120e-06 1.0699661
## 1278  2.610194e-11 -0.5498792 -0.8193756 3.098568e-06 3.595030e-12 0.9867844
## 345   1.135891e-04  0.6699430  0.6562087 1.190951e-04 1.638579e-04 0.9377812
## 765   1.135891e-04  0.6699430  0.6562087 1.190951e-04 1.638579e-04 0.9377812
## 505   6.178042e-05  0.5365869  0.7221002 1.287191e-03 1.477053e-05 0.8996411
## 1306  5.787056e-04  0.5175657  0.6932037 4.593165e-03 1.468342e-04 0.8651044
## 1323  1.165879e-04  0.4032203  0.7393880 2.057255e-02 2.166367e-05 0.8421883
## 61    2.064121e-04  0.5009895  0.6759770 2.653752e-03 5.007707e-05 0.8413889
## 1342  8.769271e-04  0.5595246  0.6047475 1.310831e-03 5.139015e-04 0.8238855
## 278   4.787371e-09  0.6053900  0.5333757 5.349439e-09 2.729172e-07 0.8068374
## 925   4.569586e-09  0.6097911  0.5276585 4.142987e-09 3.654867e-07 0.8063924
## 1304  4.921115e-04 -0.4919830 -0.6353855 3.165437e-03 1.380320e-04 0.8035932
## 540   3.683764e-06  0.5678365  0.5589153 6.616819e-06 9.214259e-06 0.7967588
## 779   2.287924e-09 -0.3716752 -0.6867381 6.628712e-04 3.124280e-10 0.7808660
## 736  5.754140e-107  0.5355381  0.5389689 3.007777e-86 2.421775e-87 0.7597950
## 821   2.341986e-07 -0.4628460 -0.6003250 3.132882e-05 6.625526e-08 0.7580347
##               SD p.adjustMANOVA
## 513  0.043965291   9.177053e-06
## 686  0.043965291   9.177053e-06
## 968  0.036286387   2.481665e-05
## 1330 0.060253263   1.371851e-05
## 1331 0.060253263   1.371851e-05
## 1278 0.190562742   4.867471e-10
## 345  0.009711627   4.964991e-04
## 765  0.009711627   4.964991e-04
## 505  0.131177695   2.864174e-04
## 1306 0.124194842   2.149253e-03
## 1323 0.237706485   5.012913e-04
## 61   0.123734872   8.373206e-04
## 1342 0.031977454   3.112635e-03
## 278  0.050921789   4.595202e-08
## 925  0.058076495   4.448819e-08
## 1304 0.101400893   1.852895e-03
## 540  0.006308237   2.164212e-05
## 779  0.222783152   2.380489e-08
## 736  0.002425943  7.842893e-104
## 821  0.097212343   1.725474e-06
unlink("t0_v_pod_lohi_mitch_eff.html")
capture.output(
    mitch_report(res, "t0_v_pod_lohi_mitch_eff.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_eff.rds ".
## 
## 
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
## 
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html
# prioritisation by discordance
res <- mitch_calc(y, genesets, priority="SD",resrows=50)
## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.
head(res$enrichment_result,20)
##                                                       set setSize      pMANOVA
## 1057                       Scavenging of heme from plasma      70 3.831245e-27
## 189     Classical antibody-mediated complement activation      68 3.039237e-23
## 122                          CD22 mediated BCR regulation      58 2.233107e-20
## 217                      Creation of C4 and C2 activators      70 8.437836e-23
## 362                                       FCGR activation      75 8.303094e-23
## 533                      Initial triggering of complement      77 1.261178e-22
## 1017        Role of LAT2/NTAL/LAB on calcium mobilization      73 1.981698e-19
## 108  Binding and Uptake of Ligands by Scavenger Receptors      91 2.007803e-19
## 871      Purine ribonucleoside monophosphate biosynthesis      10 9.570688e-03
## 950                      Regulation of Complement cascade      93 1.300963e-19
## 476                             HDMs demethylate histones      22 4.716467e-05
## 1248       Trafficking of GluR2-containing AMPA receptors      12 4.259959e-04
## 201                                    Complement cascade      97 2.430425e-19
## 755                               Nucleobase biosynthesis      13 3.618006e-03
## 995                      Removal of the Flap Intermediate      14 1.541114e-03
## 847                         Pre-NOTCH Processing in Golgi      18 2.113565e-04
## 860            Processive synthesis on the lagging strand      15 8.157076e-04
## 1018                Role of phospholipids in phagocytosis      87 3.749672e-17
## 338                    Elevation of cytosolic Ca2+ levels      12 1.463138e-02
## 360                        FCERI mediated MAPK activation      89 2.278582e-14
##        s.low.CRP s.high.CRP    p.low.CRP   p.high.CRP    s.dist        SD
## 1057  0.19236765 -0.4755829 0.0053825746 5.820328e-12 0.5130150 0.4723124
## 189   0.09857683 -0.5074343 0.1598218928 4.483618e-13 0.5169206 0.4285146
## 122   0.06903821 -0.5344666 0.3631403287 1.865165e-12 0.5389071 0.4267423
## 217   0.11159383 -0.4841567 0.1064284191 2.413596e-12 0.4968510 0.4212593
## 362   0.07991061 -0.4877184 0.2314986407 2.730166e-13 0.4942216 0.4013744
## 533   0.10865015 -0.4579409 0.0992865261 3.624877e-12 0.4706535 0.4006404
## 1017  0.14956623 -0.3992665 0.0271331171 3.622380e-09 0.4263611 0.3880833
## 108   0.21418749 -0.2887703 0.0004128422 1.915912e-06 0.3595338 0.3556448
## 871   0.27046836 -0.2285501 0.1386008325 2.107603e-01 0.3541020 0.3528593
## 950   0.17364398 -0.3222058 0.0038008666 7.800262e-08 0.3660175 0.3506187
## 476  -0.22790410  0.2647490 0.0642392383 3.157768e-02 0.3493313 0.3483584
## 1248  0.08172058  0.5712727 0.6240215562 6.104116e-04 0.5770882 0.3461657
## 201   0.16930741 -0.3128303 0.0039550164 1.004921e-07 0.3557074 0.3409228
## 755   0.17551235 -0.3020213 0.2732132689 5.935994e-02 0.3493157 0.3376673
## 995   0.06243238 -0.4051493 0.6858797354 8.668296e-03 0.4099314 0.3306302
## 847  -0.05057826  0.4168823 0.7102686008 2.196323e-03 0.4199393 0.3305445
## 860   0.02133495 -0.4377546 0.8862422078 3.328581e-03 0.4382742 0.3246253
## 1018  0.05436436 -0.3974331 0.3807838117 1.451699e-10 0.4011340 0.3194690
## 338  -0.24170251  0.1922448 0.1471256321 2.488711e-01 0.3088335 0.3068471
## 360   0.15245212 -0.2786438 0.0129210518 5.508367e-06 0.3176224 0.3048308
##      p.adjustMANOVA
## 1057   7.459980e-25
## 189    4.602755e-21
## 122    1.902328e-18
## 217    9.583976e-21
## 362    9.583976e-21
## 533    1.322297e-20
## 1017   1.303160e-17
## 108    1.303160e-17
## 871    2.598575e-02
## 950    9.593314e-18
## 476    2.262932e-04
## 1248   1.621878e-03
## 201    1.505759e-17
## 755    1.098294e-02
## 995    5.142932e-03
## 847    8.523044e-04
## 860    2.918135e-03
## 1018   1.892890e-15
## 338    3.734565e-02
## 360    7.963352e-13
unlink("t0_v_pod_lohi_mitch_sd.html")
capture.output(
    mitch_report(res, "t0_v_pod_lohi_mitch_sd.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/Rtmp4JD0SS/t0_v_pod_lohi_mitch_sd.rds ".
## 
## 
## processing file: mitch.Rmd
## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md
## 
## Output created: /tmp/Rtmp4JD0SS/mitch_report.html

One dimensional enrichment analysis

Not correcting for blood compostion changes.

# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100) 
head(res$enrichment_result,20)
##                                                 set setSize        pANOVA
## 737                        Neutrophil degranulation     457 1.319240e-103
## 537                            Innate Immune System     967  5.848762e-84
## 520                                   Immune System    1893  3.240735e-46
## 632                            Membrane Trafficking     559  5.327421e-36
## 1314                     Vesicle-mediated transport     650  1.642242e-31
## 1071                            Signal Transduction    1894  7.041601e-28
## 497                                      Hemostasis     549  9.700746e-23
## 635                                      Metabolism    1772  1.645387e-22
## 830  Platelet activation, signaling and aggregation     221  1.353176e-20
## 1133         Signaling by Receptor Tyrosine Kinases     414  1.678123e-20
## 1104                      Signaling by Interleukins     386  1.710343e-19
## 650                          Metabolism of proteins    1718  2.214499e-19
## 842         Post-translational protein modification    1193  6.814624e-19
## 1011   Response to elevated platelet cytosolic Ca2+     110  1.253521e-16
## 832                          Platelet degranulation     106  5.814015e-16
## 1247                    Toll-like Receptor Cascades     143  2.293691e-15
## 284                                         Disease    1355  2.716874e-15
## 86                Asparagine N-linked glycosylation     269  5.316672e-15
## 644                            Metabolism of lipids     626  6.696163e-15
## 1290                   Transport of small molecules     561  1.571805e-14
##         s.dist p.adjustANOVA
## 737  0.5868303 1.802082e-100
## 537  0.3671208  3.994704e-81
## 520  0.1977009  1.475615e-43
## 632  0.3093383  1.819314e-33
## 1314 0.2680762  4.486606e-29
## 1071 0.1517126  1.603138e-25
## 497  0.2446828  1.893031e-20
## 635  0.1395073  2.809498e-20
## 830  0.3628343  2.053820e-18
## 1133 0.2656301  2.292316e-18
## 1104 0.2675063  2.123935e-17
## 650  0.1305055  2.520838e-17
## 842  0.1524833  7.160598e-17
## 1011 0.4565015  1.223079e-14
## 832  0.4546295  5.294630e-14
## 1247 0.3835923  1.958239e-13
## 284  0.1278957  2.183088e-13
## 86   0.2767766  4.034764e-13
## 644  0.1822696  4.814189e-13
## 1290 0.1895711  1.073543e-12
unlink("t0_v_pod_mitch.html")
capture.output(
    mitch_report(res, "t0_v_pod_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
##                                                                               set
## 1355                                   rRNA processing in the nucleus and cytosol
## 1353                                                              rRNA processing
## 735                                                      Neutrophil degranulation
## 625                 Major pathway of rRNA processing in the nucleolus and cytosol
## 635                                                             Metabolism of RNA
## 1268                                                                  Translation
## 392                                      Formation of a pool of free 40S subunits
## 350                                             Eukaryotic Translation Elongation
## 807                                                      Peptide chain elongation
## 434                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 1060                                                     Selenocysteine synthesis
## 588             L13a-mediated translational silencing of Ceruloplasmin expression
## 352                                            Eukaryotic Translation Termination
## 1315                                                       Viral mRNA Translation
## 743  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 527                             Influenza Viral RNA Transcription and Replication
## 142                                          Cap-dependent Translation Initiation
## 351                                             Eukaryotic Translation Initiation
## 1007                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 443                                               Gene expression (Transcription)
##      setSize       pANOVA     s.dist p.adjustANOVA
## 1355     190 6.057107e-45 -0.5904904  8.255837e-42
## 1353     217 4.380955e-44 -0.5473800  2.985621e-41
## 735      457 3.041773e-43  0.3755493  1.161753e-40
## 625      180 3.409400e-43 -0.5941666  1.161753e-40
## 635      685 1.565493e-38 -0.2904518  4.267534e-36
## 1268     295 4.363036e-32 -0.3984506  9.911364e-30
## 392      100 5.671059e-32 -0.6799824  1.104236e-29
## 350       93 7.889298e-32 -0.7033290  1.344139e-29
## 807       88 2.456173e-30 -0.7048767  3.719737e-28
## 434      111 8.894827e-30 -0.6218167  1.095512e-27
## 1060      92 9.578986e-30 -0.6823235  1.095512e-27
## 588      110 9.645004e-30 -0.6242326  1.095512e-27
## 352       92 2.593358e-29 -0.6770630  2.719036e-27
## 1315      88 1.665544e-28 -0.6820598  1.621526e-26
## 743       94 3.687034e-28 -0.6557809  3.350285e-26
## 527      135 4.188769e-28 -0.5471626  3.568307e-26
## 142      118 5.136865e-28 -0.5840418  3.889748e-26
## 351      118 5.136865e-28 -0.5840418  3.889748e-26
## 1007     100 9.213024e-28 -0.6311161  6.609133e-26
## 443     1323 4.436643e-27 -0.1763157  3.023572e-25
unlink("pod_crp_mitch.html")
capture.output(
    mitch_report(res, "pod_crp_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
##                                                                               set
## 353                                             Eukaryotic Translation Elongation
## 810                                                      Peptide chain elongation
## 395                                      Formation of a pool of free 40S subunits
## 355                                            Eukaryotic Translation Termination
## 1063                                                     Selenocysteine synthesis
## 1318                                                       Viral mRNA Translation
## 746  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 591             L13a-mediated translational silencing of Ceruloplasmin expression
## 437                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 145                                          Cap-dependent Translation Initiation
## 354                                             Eukaryotic Translation Initiation
## 1010                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 745     Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)
## 747                                                 Nonsense-Mediated Decay (NMD)
## 1062                                                  Selenoamino acid metabolism
## 1357                                                              rRNA processing
## 530                             Influenza Viral RNA Transcription and Replication
## 1043                  SRP-dependent cotranslational protein targeting to membrane
## 1359                                   rRNA processing in the nucleus and cytosol
## 983                                   Regulation of expression of SLITs and ROBOs
##      setSize       pANOVA     s.dist p.adjustANOVA
## 353       93 5.984883e-26 -0.6312625  8.181335e-23
## 810       88 6.359892e-25 -0.6350662  4.346986e-22
## 395      100 1.321350e-23 -0.5788531  6.020950e-21
## 355       92 5.460741e-23 -0.5948916  1.866208e-20
## 1063      92 8.975646e-23 -0.5918881  2.373209e-20
## 1318      88 1.041643e-22 -0.6042127  2.373209e-20
## 746       94 1.406713e-20 -0.5544663  2.747110e-18
## 591      110 5.779516e-20 -0.5044096  9.875749e-18
## 437      111 6.917771e-19 -0.4872317  1.050733e-16
## 145      118 1.451766e-17 -0.4542798  1.804150e-15
## 354      118 1.451766e-17 -0.4542798  1.804150e-15
## 1010     100 1.775376e-17 -0.4919270  2.022449e-15
## 745      114 4.576291e-17 -0.4549009  4.468421e-15
## 747      114 4.576291e-17 -0.4549009  4.468421e-15
## 1062     114 6.820977e-15 -0.4219374  6.216184e-13
## 1357     219 8.592078e-15 -0.3040087  7.340857e-13
## 530      135 9.538981e-15 -0.3858047  7.670463e-13
## 1043     111 1.484250e-13 -0.4056829  1.127205e-11
## 1359     190 6.118865e-12 -0.2891464  4.402362e-10
## 983      162 7.623572e-12 -0.3115117  5.210711e-10
unlink("t0_crp_mitch.html")
capture.output(
    mitch_report(res, "t0_crp_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


## prioritise by effect size

# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
##                                                                 set setSize
## 514                                       IRAK4 deficiency (TLR2/4)      10
## 687                                       MyD88 deficiency (TLR2/4)      10
## 1333 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism      12
## 1334                          alpha-linolenic acid (ALA) metabolism      12
## 969                          Regulation of TLR by endogenous ligand      11
## 1280               Translocation of ZAP-70 to Immunological synapse      24
## 506                               Hyaluronan uptake and degradation      12
## 1308                          Uptake and function of anthrax toxins      10
## 346          Erythrocytes take up carbon dioxide and release oxygen      11
## 766                                 O2/CO2 exchange in erythrocytes      11
## 1326         WNT5A-dependent internalization of FZD2, FZD5 and ROR2      11
## 1345                p130Cas linkage to MAPK signaling for integrins      11
## 61             Advanced glycosylation endproduct receptor signaling      12
## 1306                                               Unwinding of DNA      12
## 903                               RNA Polymerase I Promoter Opening      19
## 890                            RHO GTPases Activate WASPs and WAVEs      35
## 926                            ROS and RNS production in phagocytes      31
## 737                                        Neutrophil degranulation     457
## 435       GRB2:SOS provides linkage to MAPK signaling for Integrins      12
## 780                                                  PD-1 signaling      28
##             pANOVA     s.dist p.adjustANOVA
## 514   8.367911e-07  0.8994748  1.048676e-05
## 687   8.367911e-07  0.8994748  1.048676e-05
## 1333  2.461377e-06  0.7852782  2.626751e-05
## 1334  2.461377e-06  0.7852782  2.626751e-05
## 969   1.930406e-05  0.7438719  1.448865e-04
## 1280  2.809953e-10 -0.7436252  8.344338e-09
## 506   8.850181e-06  0.7406638  7.239130e-05
## 1308  1.141313e-04  0.7045257  6.523155e-04
## 346   9.462990e-05  0.6796841  5.533223e-04
## 766   9.462990e-05  0.6796841  5.533223e-04
## 1326  1.165844e-04  0.6708490  6.580757e-04
## 1345  1.324011e-04  0.6654100  7.219060e-04
## 61    9.478582e-05  0.6506965  5.533223e-04
## 1306  1.323109e-04 -0.6371242  7.219060e-04
## 903   3.287422e-06  0.6163218  3.277824e-05
## 890   4.567968e-10  0.6085563  1.327626e-08
## 926   8.233487e-09  0.5978890  1.757335e-07
## 737  1.319240e-103  0.5868303 1.802082e-100
## 435   4.971813e-04  0.5804902  2.286699e-03
## 780   1.109028e-07 -0.5793553  1.870286e-06
unlink("t0_v_pod_mitche.html")
capture.output(
    mitch_report(res, "t0_v_pod_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
##                                                                               set
## 512                                                     IRAK4 deficiency (TLR2/4)
## 685                                                     MyD88 deficiency (TLR2/4)
## 807                                                      Peptide chain elongation
## 350                                             Eukaryotic Translation Elongation
## 1060                                                     Selenocysteine synthesis
## 1315                                                       Viral mRNA Translation
## 392                                      Formation of a pool of free 40S subunits
## 352                                            Eukaryotic Translation Termination
## 1278                             Translocation of ZAP-70 to Immunological synapse
## 743  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1330               alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1331                                        alpha-linolenic acid (ALA) metabolism
## 1007                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 588             L13a-mediated translational silencing of Ceruloplasmin expression
## 967                                        Regulation of TLR by endogenous ligand
## 434                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 1304                                                             Unwinding of DNA
## 398           Formation of the ternary complex, and subsequently, the 43S complex
## 625                 Major pathway of rRNA processing in the nucleolus and cytosol
## 1355                                   rRNA processing in the nucleus and cytosol
##      setSize       pANOVA     s.dist p.adjustANOVA
## 512       10 6.624388e-06  0.8226169  1.142917e-04
## 685       10 6.624388e-06  0.8226169  1.142917e-04
## 807       88 2.456173e-30 -0.7048767  3.719737e-28
## 350       93 7.889298e-32 -0.7033290  1.344139e-29
## 1060      92 9.578986e-30 -0.6823235  1.095512e-27
## 1315      88 1.665544e-28 -0.6820598  1.621526e-26
## 392      100 5.671059e-32 -0.6799824  1.104236e-29
## 352       92 2.593358e-29 -0.6770630  2.719036e-27
## 1278      24 1.862173e-08 -0.6629989  5.902655e-07
## 743       94 3.687034e-28 -0.6557809  3.350285e-26
## 1330      12 1.314716e-04  0.6373869  1.518609e-03
## 1331      12 1.314716e-04  0.6373869  1.518609e-03
## 1007     100 9.213024e-28 -0.6311161  6.609133e-26
## 588      110 9.645004e-30 -0.6242326  1.095512e-27
## 967       11 3.448433e-04  0.6231351  3.133476e-03
## 434      111 8.894827e-30 -0.6218167  1.095512e-27
## 1304      12 2.114502e-04 -0.6175972  2.150795e-03
## 398       51 2.797660e-14 -0.6153683  1.314900e-12
## 625      180 3.409400e-43 -0.5941666  1.161753e-40
## 1355     190 6.057107e-45 -0.5904904  8.255837e-42
unlink("pod_crp_mitche.html")
capture.output(
    mitch_report(res, "pod_crp_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
##                                                                               set
## 810                                                      Peptide chain elongation
## 353                                             Eukaryotic Translation Elongation
## 1318                                                       Viral mRNA Translation
## 355                                            Eukaryotic Translation Termination
## 1063                                                     Selenocysteine synthesis
## 395                                      Formation of a pool of free 40S subunits
## 746  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 525                               Incretin synthesis, secretion, and inactivation
## 1185    Synthesis, secretion, and inactivation of Glucagon-like Peptide-1 (GLP-1)
## 700                                       NF-kB is activated and signals survival
## 1328                                      WNT5A-dependent internalization of FZD4
## 591             L13a-mediated translational silencing of Ceruloplasmin expression
## 247                                                               DNA methylation
## 1010                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 1313                                        VLDLR internalisation and degradation
## 1364                                         tRNA processing in the mitochondrion
## 437                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 9                                       AKT phosphorylates targets in the nucleus
## 127                                  CDC6 association with the ORC:origin complex
## 585                                                          Josephin domain DUBs
##      setSize       pANOVA     s.dist p.adjustANOVA
## 810       88 6.359892e-25 -0.6350662  4.346986e-22
## 353       93 5.984883e-26 -0.6312625  8.181335e-23
## 1318      88 1.041643e-22 -0.6042127  2.373209e-20
## 355       92 5.460741e-23 -0.5948916  1.866208e-20
## 1063      92 8.975646e-23 -0.5918881  2.373209e-20
## 395      100 1.321350e-23 -0.5788531  6.020950e-21
## 746       94 1.406713e-20 -0.5544663  2.747110e-18
## 525       10 3.599313e-03  0.5316270  4.057041e-02
## 1185      10 3.599313e-03  0.5316270  4.057041e-02
## 700       12 2.070944e-03 -0.5134241  2.808190e-02
## 1328      13 1.527289e-03 -0.5076380  2.372505e-02
## 591      110 5.779516e-20 -0.5044096  9.875749e-18
## 247       20 1.264460e-04 -0.4950439  2.880862e-03
## 1010     100 1.775376e-17 -0.4919270  2.022449e-15
## 1313      11 5.001247e-03 -0.4887335  5.102339e-02
## 1364      32 1.707306e-06 -0.4886526  5.692407e-05
## 437      111 6.917771e-19 -0.4872317  1.050733e-16
## 9         10 8.214748e-03 -0.4826626  6.805794e-02
## 127       11 5.967394e-03  0.4787435  5.622505e-02
## 585       10 8.887143e-03 -0.4777788  7.102600e-02
unlink("t0_crp_mitche.html")
capture.output(
    mitch_report(res, "t0_crp_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# 3D analysys significance
z <- list("t0_v_pod"=t0_v_pod, "pod_crp"=pod_crp, "t0_crp"=t0_crp)
zz <- mitch_import(z, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(zz, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)
##                                                                               set
## 735                                                      Neutrophil degranulation
## 535                                                          Innate Immune System
## 648                                                        Metabolism of proteins
## 392                                      Formation of a pool of free 40S subunits
## 350                                             Eukaryotic Translation Elongation
## 518                                                                 Immune System
## 1352                                                              rRNA processing
## 588             L13a-mediated translational silencing of Ceruloplasmin expression
## 1354                                   rRNA processing in the nucleus and cytosol
## 434                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 807                                                      Peptide chain elongation
## 1267                                                                  Translation
## 625                 Major pathway of rRNA processing in the nucleolus and cytosol
## 352                                            Eukaryotic Translation Termination
## 142                                          Cap-dependent Translation Initiation
## 351                                             Eukaryotic Translation Initiation
## 1314                                                       Viral mRNA Translation
## 1060                                                     Selenocysteine synthesis
## 635                                                             Metabolism of RNA
## 743  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
##      setSize       pMANOVA  s.t0_v_pod   s.pod_crp    s.t0_crp    p.t0_v_pod
## 735      457 4.618407e-105  0.58657151  0.38000420 -0.09806816 1.730727e-103
## 535      965  1.019944e-85  0.36728929  0.20130112 -0.04234340  8.532771e-84
## 648     1709  9.511033e-54  0.13024363 -0.04556259 -0.03560888  3.452131e-19
## 392      100  3.115070e-52 -0.20789625 -0.67710164 -0.57696255  3.271811e-04
## 350       93  3.790254e-51 -0.27289442 -0.70065421 -0.62924440  5.386406e-06
## 518     1887  7.628286e-51  0.19712633  0.07975329 -0.02452675  9.887493e-46
## 1352     217  3.109490e-50 -0.28892642 -0.54393079 -0.29892470  2.159088e-13
## 588      110  7.423309e-50 -0.14314838 -0.62114616 -0.50269723  9.492889e-03
## 1354     190  1.282164e-49 -0.30718205 -0.58732978 -0.28780888  2.765619e-13
## 434      111  1.016227e-48 -0.14719204 -0.61871349 -0.48555467  7.382262e-03
## 807       88  1.950166e-48 -0.28134726 -0.70222181 -0.63296623  5.048100e-06
## 1267     295  6.440216e-48 -0.05584812 -0.39411771 -0.16014353  9.896795e-02
## 625      180  1.523489e-47 -0.30640536 -0.59097632 -0.28617308  1.303462e-12
## 352       92  5.578268e-46 -0.26476727 -0.67426227 -0.59291387  1.134012e-05
## 142      118  6.710127e-46 -0.13160443 -0.58074861 -0.45265469  1.353731e-02
## 351      118  6.710127e-46 -0.13160443 -0.58074861 -0.45265469  1.353731e-02
## 1314      88  4.236706e-45 -0.26210705 -0.67930669 -0.60218147  2.131873e-05
## 1060      92  4.468833e-45 -0.29852322 -0.67953138 -0.58980449  7.425644e-07
## 635      685  4.612815e-45 -0.08857003 -0.28572495 -0.05179886  7.791790e-05
## 743       94  2.694113e-43 -0.24433952 -0.65294271 -0.55251982  4.227299e-05
##         p.pod_crp     p.t0_crp    s.dist         SD p.adjustMANOVA
## 735  3.141156e-44 3.275366e-04 0.7057526 0.35117771  6.290271e-102
## 535  3.074075e-26 2.599497e-02 0.4209709 0.20603951   6.945816e-83
## 648  1.751055e-03 1.445666e-02 0.1425038 0.09875387   4.318009e-51
## 392  1.025970e-31 1.847446e-23 0.9135498 0.24711383   1.060681e-49
## 350  1.339923e-31 8.594033e-26 0.9804775 0.22915174   1.032465e-48
## 518  9.950923e-09 7.806409e-02 0.2140583 0.11089097   1.731621e-48
## 1352 1.508627e-43 3.129581e-14 0.6846131 0.14442713   6.050180e-48
## 588  1.831485e-29 7.741170e-20 0.8117996 0.24892683   1.263818e-47
## 1354 1.768750e-44 7.696288e-12 0.7226002 0.16761605   1.940342e-47
## 434  1.700567e-29 9.146209e-19 0.8001470 0.24308881   1.384101e-46
## 807  4.044170e-30 9.099313e-25 0.9863661 0.22567219   2.414661e-46
## 1267 2.010406e-31 2.222264e-06 0.4290615 0.17322806   7.309645e-46
## 625  9.560263e-43 3.471971e-11 0.7245911 0.17043815   1.596148e-45
## 352  4.398222e-29 7.603884e-23 0.9360973 0.21678865   5.426858e-44
## 142  1.020572e-27 1.899295e-17 0.7479873 0.23137697   5.711996e-44
## 351  1.020572e-27 1.899295e-17 0.7479873 0.23137697   5.711996e-44
## 1314 2.747234e-28 1.449075e-22 0.9448705 0.22198140   3.306660e-43
## 1060 1.628508e-29 1.269695e-22 0.9480234 0.19919124   3.306660e-43
## 635  2.509810e-37 2.087489e-02 0.3035894 0.12579321   3.306660e-43
## 743  6.262770e-28 1.919514e-20 0.8895584 0.21292269   1.834691e-41
unlink("3d_mitch.html")
capture.output(
    mitch_report(res, "3d_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# 3D analysys effect size
res <- mitch_calc(zz, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)
##                                                                               set
## 512                                                     IRAK4 deficiency (TLR2/4)
## 685                                                     MyD88 deficiency (TLR2/4)
## 1329               alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1330                                        alpha-linolenic acid (ALA) metabolism
## 967                                        Regulation of TLR by endogenous ligand
## 1277                             Translocation of ZAP-70 to Immunological synapse
## 807                                                      Peptide chain elongation
## 350                                             Eukaryotic Translation Elongation
## 1322                       WNT5A-dependent internalization of FZD2, FZD5 and ROR2
## 1060                                                     Selenocysteine synthesis
## 1314                                                       Viral mRNA Translation
## 352                                            Eukaryotic Translation Termination
## 504                                             Hyaluronan uptake and degradation
## 392                                      Formation of a pool of free 40S subunits
## 743  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1303                                                             Unwinding of DNA
## 1305                                        Uptake and function of anthrax toxins
## 619                                                   MET activates RAP1 and RAC1
## 1309                                        VLDLR internalisation and degradation
## 1007                          Response of EIF2AK4 (GCN2) to amino acid deficiency
##      setSize      pMANOVA s.t0_v_pod  s.pod_crp     s.t0_crp   p.t0_v_pod
## 512       10 2.399517e-06  0.8994993  0.8257826  0.340349071 8.362207e-07
## 685       10 2.399517e-06  0.8994993  0.8257826  0.340349071 8.362207e-07
## 1329      12 2.897763e-05  0.7850157  0.6416616  0.198176486 2.480595e-06
## 1330      12 2.897763e-05  0.7850157  0.6416616  0.198176486 2.480595e-06
## 967       11 1.227313e-04  0.7434466  0.6262900  0.269451653 1.951771e-05
## 1277      24 2.630091e-09 -0.7433199 -0.6603807 -0.008084781 2.857833e-10
## 807       88 1.950166e-48 -0.2813473 -0.7022218 -0.632966233 5.048100e-06
## 350       93 3.790254e-51 -0.2728944 -0.7006542 -0.629244402 5.386406e-06
## 1322      11 6.637017e-05  0.6698260  0.5756923 -0.388684222 1.194226e-04
## 1060      92 4.468833e-45 -0.2985232 -0.6795314 -0.589804492 7.425644e-07
## 1314      88 4.236706e-45 -0.2621071 -0.6793067 -0.602181468 2.131873e-05
## 352       92 5.578268e-46 -0.2647673 -0.6742623 -0.592913875 1.134012e-05
## 504       10 4.894291e-04  0.7290627  0.5139582 -0.217453559 6.529266e-05
## 392      100 3.115070e-52 -0.2078962 -0.6771016 -0.576962549 3.271811e-04
## 743       94 2.694113e-43 -0.2443395 -0.6529427 -0.552519818 4.227299e-05
## 1303      12 8.238305e-04 -0.6370599 -0.6141841 -0.051172259 1.325263e-04
## 1305      10 9.385080e-04  0.7035422  0.4327921 -0.199635019 1.166786e-04
## 619       10 4.320319e-03  0.5299424  0.5145571  0.384586589 3.707275e-03
## 1309      11 3.497482e-04  0.4681329  0.4836687 -0.485846769 7.175299e-03
## 1007     100 5.739204e-41 -0.2216832 -0.6280664 -0.490073775 1.275668e-04
##         p.pod_crp     p.t0_crp    s.dist         SD p.adjustMANOVA
## 512  6.103929e-06 6.236393e-02 1.2676172 0.30378964   1.458992e-05
## 685  6.103929e-06 6.236393e-02 1.2676172 0.30378964   1.458992e-05
## 1329 1.184409e-04 2.345674e-01 1.0330794 0.30594381   1.252938e-04
## 1330 1.184409e-04 2.345674e-01 1.0330794 0.30594381   1.252938e-04
## 967  3.216973e-04 1.217580e-01 1.0087400 0.24689085   4.517839e-04
## 1277 2.117729e-08 9.453406e-01 0.9943302 0.40268671   2.985153e-08
## 807  4.044170e-30 9.099313e-25 0.9863661 0.22567219   2.414661e-46
## 350  1.339923e-31 8.594033e-26 0.9804775 0.22915174   1.032465e-48
## 1322 9.447632e-04 2.559871e-02 0.9649684 0.58585084   2.590148e-04
## 1060 1.628508e-29 1.269695e-22 0.9480234 0.19919124   3.306660e-43
## 1314 2.747234e-28 1.449075e-22 0.9448705 0.22198140   3.306660e-43
## 352  4.398222e-29 7.603884e-23 0.9360973 0.21678865   5.426858e-44
## 504  4.885278e-03 2.337660e-01 0.9181348 0.49617306   1.455079e-03
## 392  1.025970e-31 1.847446e-23 0.9135498 0.24711383   1.060681e-49
## 743  6.262770e-28 1.919514e-20 0.8895584 0.21292269   1.834691e-41
## 1303 2.292062e-04 7.588968e-01 0.8863893 0.33185589   2.332759e-03
## 1305 1.778988e-02 2.743316e-01 0.8497851 0.46350094   2.603356e-03
## 619  4.835647e-03 3.520775e-02 0.8327754 0.07985129   9.684082e-03
## 1309 5.472180e-03 5.265108e-03 0.8301391 0.55531955   1.118209e-03
## 1007 1.649839e-27 2.349345e-17 0.8269118 0.20664895   3.722284e-39
unlink("3d_mitche.html")
capture.output(
    mitch_report(res, "3d_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

Session information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pkgload_1.2.0               GGally_2.1.1               
##  [3] beeswarm_0.3.1              gtools_3.8.2               
##  [5] echarts4r_0.4.0             topconfects_1.6.0          
##  [7] limma_3.46.0                eulerr_6.1.0               
##  [9] mitch_1.2.2                 MASS_7.3-53.1              
## [11] fgsea_1.16.0                gplots_3.1.1               
## [13] DESeq2_1.30.0               SummarizedExperiment_1.20.0
## [15] Biobase_2.50.0              MatrixGenerics_1.2.0       
## [17] matrixStats_0.58.0          GenomicRanges_1.42.0       
## [19] GenomeInfoDb_1.26.0         IRanges_2.24.0             
## [21] S4Vectors_0.28.0            BiocGenerics_0.36.0        
## [23] reshape2_1.4.4              forcats_0.5.1              
## [25] stringr_1.4.0               dplyr_1.0.5                
## [27] purrr_0.3.4                 readr_1.4.0                
## [29] tidyr_1.1.3                 tibble_3.1.0               
## [31] ggplot2_3.3.3               tidyverse_1.3.0            
## [33] zoo_1.8-9                  
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-0       ellipsis_0.3.1         rprojroot_2.0.2       
##   [4] XVector_0.30.0         fs_1.5.0               rstudioapi_0.13       
##   [7] farver_2.1.0           bit64_4.0.5            AnnotationDbi_1.52.0  
##  [10] fansi_0.4.2            lubridate_1.7.10       xml2_1.3.2            
##  [13] splines_4.0.3          cachem_1.0.4           geneplotter_1.68.0    
##  [16] knitr_1.31             polyclip_1.10-0        jsonlite_1.7.2        
##  [19] broom_0.7.5            annotate_1.68.0        dbplyr_2.1.0          
##  [22] shiny_1.6.0            compiler_4.0.3         httr_1.4.2            
##  [25] backports_1.2.1        assertthat_0.2.1       Matrix_1.3-2          
##  [28] fastmap_1.1.0          cli_2.3.1              later_1.1.0.1         
##  [31] htmltools_0.5.1.1      tools_4.0.3            gtable_0.3.0          
##  [34] glue_1.4.2             GenomeInfoDbData_1.2.4 fastmatch_1.1-0       
##  [37] Rcpp_1.0.6             cellranger_1.1.0       jquerylib_0.1.3       
##  [40] vctrs_0.3.6            polylabelr_0.2.0       xfun_0.22             
##  [43] ps_1.6.0               testthat_3.0.2         rvest_1.0.0           
##  [46] mime_0.10              lifecycle_1.0.0        XML_3.99-0.6          
##  [49] zlibbioc_1.36.0        scales_1.1.1           promises_1.2.0.1      
##  [52] hms_1.0.0              RColorBrewer_1.1-2     yaml_2.2.1            
##  [55] memoise_2.0.0          gridExtra_2.3          sass_0.3.1            
##  [58] reshape_0.8.8          stringi_1.5.3          RSQLite_2.2.4         
##  [61] highr_0.8              genefilter_1.72.0      desc_1.3.0            
##  [64] caTools_1.18.1         BiocParallel_1.24.1    rlang_0.4.10          
##  [67] pkgconfig_2.0.3        bitops_1.0-6           evaluate_0.14         
##  [70] lattice_0.20-41        labeling_0.4.2         htmlwidgets_1.5.3     
##  [73] bit_4.0.4              tidyselect_1.1.0       plyr_1.8.6            
##  [76] magrittr_2.0.1         R6_2.5.0               generics_0.1.0        
##  [79] DelayedArray_0.16.0    DBI_1.1.1              pillar_1.5.1          
##  [82] haven_2.3.1            withr_2.4.1            survival_3.2-10       
##  [85] RCurl_1.98-1.3         modelr_0.1.8           crayon_1.4.1          
##  [88] KernSmooth_2.23-18     utf8_1.2.1             rmarkdown_2.7         
##  [91] locfit_1.5-9.4         grid_4.0.3             readxl_1.3.1          
##  [94] data.table_1.14.0      blob_1.2.1             reprex_1.0.0          
##  [97] digest_0.6.27          xtable_1.8-4           httpuv_1.5.5          
## [100] munsell_0.5.0          bslib_0.2.4
save.image("rnaseq_analysis_full.Rdata")