Introduction
Packages
Import read counts
Curate the samplesheet
Blood cell composition correction
MDS analysis
DE analysis baseline versus post-op
DE analysis post-op low versus high CRP
DE analysis baseline low versus high CRP
DE analysis baseline versus post-op (low CRP)
DE analysis baseline versus post-op (high CRP)
Compare low and high patterns
- One dimensional enrichment analysis

Introduction

Packages

suppressPackageStartupMessages({
    library("zoo")
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("MASS")
    library("mitch")
    library("eulerr")
    library("limma")
    library("topconfects")
    library("gplots")
})

Import read counts

tmp <- read.table("3col.tsv",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)

xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)

Curate the samplesheet

Here we're manipulating the sample sheet. Making sure that the samplesheet matches the names of the data sets and vice versa. Ensuring that each patient has a baseline and postop sample.

samplesheet<-read.csv("samplesheet.tsv",sep="\t",stringsAsFactors=FALSE)
mysamples <- colnames(xx)
baseline <- mysamples[grep("T0",mysamples)]
postop <- mysamples[grep("POD",mysamples)]
baseline <- sapply(strsplit(baseline,"-"),"[[",1)
postop <- sapply(strsplit(postop,"-"),"[[",1)
mysamples <- intersect(baseline,postop)
samplesheet2 <- samplesheet
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
samplesheet3 <- samplesheet2
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
rownames(samplesheet3) <- paste(samplesheet2$RG_number,"POD",sep="-")
samplesheet2$timepoint <- 0       
samplesheet3$timepoint <- 1
samplesheet <- rbind(samplesheet2,samplesheet3)

# filter the data matrix
xx <- xx[,which(colnames(xx) %in% rownames(samplesheet))]
samplesheet <- samplesheet[which(rownames(samplesheet) %in% colnames(xx)),]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

Blood cell composition correction

Here the idea is to use the concept of Monaco et al, 2019 to deconvolute blood RNA-seq signature.

DOI:https://doi.org/10.1016/j.celrep.2019.01.041

xn <- xx
gt <- as.data.frame(sapply(strsplit(rownames(xn)," "),"[[",2) )
rownames(gt) <- rownames(xx)
colnames(gt) = "genesymbol"
gt$geneID <- rownames(xx)
blood <- read.table("https://raw.githubusercontent.com/giannimonaco/ABIS/master/data/sigmatrixRNAseq.txt")
blood2 <- merge(gt,blood,by.x="genesymbol",by.y=0)
blood2 <- blood2[which(!duplicated(blood2$genesymbol)),]
rownames(blood2) <- blood2$geneID
blood2 <- blood2[,c(3:ncol(blood2))]
genes <- intersect(rownames(xx), rownames(blood2))
dec <- apply(xx[genes, , drop=F], 2, function(x) coef(rlm( as.matrix(blood2[genes,]), x, maxit =100 ))) *100

## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps

## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps

dec <- t(dec/colSums(dec)*100)

dec <- signif(dec, 3)

cs <- colSums(dec)
order(-cs)

##  [1]  1  4  8 13  2  6  3 14 10 12  5 16 17  9 15 11  7

cv <- apply(dec,2,function(x) {sd(x)/mean(x)} )
cv[order(-cv)]

##    T.CD8.Naive    T.CD4.Naive           pDCs    Monocytes.C        B.Naive 
##      12.309866       6.935020       6.768844       5.828486       5.773624 
##           MAIT Monocytes.NC.I             NK   Basophils.LD       B.Memory 
##       5.611708       5.598135       5.555667       5.480702       5.163804 
##   T.CD8.Memory Neutrophils.LD   T.CD4.Memory   T.gd.non.Vd2           mDCs 
##       3.524342       2.972512      -5.555388      -6.493188      -7.442438 
##       T.gd.Vd2   Plasmablasts 
##      -7.869309     -18.095614

plot(cs,cv,main="cell abundance versus SD")
text(cs,cv+0.5,label=names(cs))

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2( dec, col=colfunc(25),scale="row",
 trace="none",margins = c(10,15), cexRow=.7, cexCol=.8,  main="cell type abundances")

samplesheet <- cbind(samplesheet,dec)

dec_t0_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==0 )] ),])
dec_po_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==1 )] ),])
dec_t0_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==0 )] ),])
dec_po_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==1 )] ),])


boxplot(dec_t0_lo$Monocytes.C , dec_t0_hi$Monocytes.C, dec_po_lo$Monocytes.C,  dec_po_hi$Monocytes.C ,
  ylim=c(0,100),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.C")

t.test(dec_t0_hi$Monocytes.C , dec_po_hi$Monocytes.C)

## 
##  Welch Two Sample t-test
## 
## data:  dec_t0_hi$Monocytes.C and dec_po_hi$Monocytes.C
## t = -2.7175, df = 25.773, p-value = 0.0116
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -55.076844  -7.627109
## sample estimates:
## mean of x mean of y 
##  44.13850  75.49048

boxplot(dec_t0_lo$NK , dec_t0_hi$NK, dec_po_lo$NK,  dec_po_hi$NK ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="NK")

t.test(dec_t0_lo$NK , dec_po_lo$NK)

## 
##  Welch Two Sample t-test
## 
## data:  dec_t0_lo$NK and dec_po_lo$NK
## t = 2.4688, df = 47.994, p-value = 0.01717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.4820947 4.7136093
## sample estimates:
## mean of x mean of y 
##  6.707104  4.109252

boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory,  dec_po_hi$T.CD8.Memory ,
  ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")

boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory,  dec_po_hi$T.CD8.Memory ,
  ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")

boxplot(dec_t0_lo$T.CD4.Naive , dec_t0_hi$T.CD4.Naive, dec_po_lo$T.CD4.Naive,  dec_po_hi$T.CD4.Naive ,
  ylim=c(0,25),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Naive")

boxplot(dec_t0_lo$T.CD8.Naive , dec_t0_hi$T.CD8.Naive, dec_po_lo$T.CD8.Naive,  dec_po_hi$T.CD8.Naive ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Naive")

boxplot(dec_t0_lo$B.Naive , dec_t0_hi$B.Naive, dec_po_lo$B.Naive,  dec_po_hi$B.Naive ,
  ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Naive")

boxplot(dec_t0_lo$T.CD4.Memory , dec_t0_hi$T.CD4.Memory, dec_po_lo$T.CD4.Memory,  dec_po_hi$T.CD4.Memory ,
  ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Memory")

boxplot(dec_t0_lo$MAIT , dec_t0_hi$MAIT, dec_po_lo$MAIT,  dec_po_hi$MAIT ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="MAIT")

boxplot(dec_t0_lo$T.gd.Vd2 , dec_t0_hi$T.gd.Vd2 , dec_po_lo$T.gd.Vd2 ,  dec_po_hi$T.gd.Vd2 ,
  ylim=c(0,5),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.Vd2")

boxplot(dec_t0_lo$Neutrophils.LD , dec_t0_hi$Neutrophils.LD , dec_po_lo$Neutrophils.LD ,  dec_po_hi$Neutrophils.LD ,
  ylim=c(0,50),names=c("t0 lo","t0 hi","po lo","po hi"),main="Neutrophils.LD")

boxplot(dec_t0_lo$T.gd.non.Vd2 , dec_t0_hi$T.gd.non.Vd2 , dec_po_lo$T.gd.non.Vd2 ,  dec_po_hi$T.gd.non.Vd2 ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.non.Vd2")

boxplot(dec_t0_lo$Basophils.LD , dec_t0_hi$Basophils.LD , dec_po_lo$Basophils.LD ,  dec_po_hi$Basophils.LD ,
  ylim=c(0,4),names=c("t0 lo","t0 hi","po lo","po hi"),main="Basophils.LD")

boxplot(dec_t0_lo$Monocytes.NC.I , dec_t0_hi$Monocytes.NC.I , dec_po_lo$Monocytes.NC.I ,  dec_po_hi$Monocytes.NC.I ,
  ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.NC.I")

boxplot(dec_t0_lo$B.Memory , dec_t0_hi$B.Memory , dec_po_lo$B.Memory ,  dec_po_hi$B.Memory ,
  ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Memory")

boxplot(dec_t0_lo$mDCs , dec_t0_hi$mDCs , dec_po_lo$mDCs ,  dec_po_hi$mDCs ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="mDCs")

boxplot(dec_t0_lo$pDCs , dec_t0_hi$pDCs , dec_po_lo$pDCs ,  dec_po_hi$pDCs ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="pDCs")

boxplot(dec_t0_lo$Plasmablasts , dec_t0_hi$Plasmablasts , dec_po_lo$Plasmablasts ,  dec_po_hi$Plasmablasts ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="Plasmablasts")

MDS analysis

plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='l')
text(cmdscale(dist(t(xx))), labels=colnames(xx),cex=1.3)

heatmap.2(cor(xx),trace="none",margins = c(10,10), main="pearson correlation heatmap")

heatmap.2(cor(xx,method="s"),trace="none",margins = c(10,10), main="spearman correlation heatmap")

DE analysis baseline versus post-op

# filter genes with fewer than 10 reads per sample
NAME = "t0_v_pod"

# no correction for cell types
y <- xx[which(rowSums(xx)/ncol(xx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet,
  design = ~ Sex + age + timepoint )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                                baseMean log2FoldChange      lfcSE     stat
## ENSG00000087116.16 ADAMTS2    634.35845      4.5151731 0.35054106 12.88058
## ENSG00000108950.12 FAM20A     792.00129      2.5933863 0.21519289 12.05145
## ENSG00000170439.7 METTL7B     126.30501      3.7994825 0.32669823 11.62995
## ENSG00000132170.21 PPARG       70.86749      2.5437784 0.21970131 11.57835
## ENSG00000121316.11 PLBD1    10689.06659      1.4627439 0.12773015 11.45183
## ENSG00000112290.13 WASF1      113.84190      1.2704505 0.11958889 10.62348
## ENSG00000156414.19 TDRD9      595.16883      1.6598788 0.15732025 10.55095
## ENSG00000166033.13 HTRA1      131.84384      2.0268764 0.19324218 10.48879
## ENSG00000110013.13 SIAE       451.39075      0.8257222 0.07915764 10.43137
## ENSG00000163220.11 S100A9  114258.61851      1.5350647 0.14750800 10.40665
##                                  pvalue         padj
## ENSG00000087116.16 ADAMTS2 5.789681e-38 1.267245e-33
## ENSG00000108950.12 FAM20A  1.905681e-33 2.085577e-29
## ENSG00000170439.7 METTL7B  2.902842e-31 2.117913e-27
## ENSG00000132170.21 PPARG   5.305835e-31 2.903353e-27
## ENSG00000121316.11 PLBD1   2.302339e-30 1.007872e-26
## ENSG00000112290.13 WASF1   2.317491e-26 8.454208e-23
## ENSG00000156414.19 TDRD9   5.028360e-26 1.572296e-22
## ENSG00000166033.13 HTRA1   9.726683e-26 2.661221e-22
## ENSG00000110013.13 SIAE    1.783074e-25 4.336435e-22
## ENSG00000163220.11 S100A9  2.312053e-25 5.060621e-22

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, 
  xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

t0_v_pod <- dge
write.table(dge,file="t0_v_pod_rna.tsv",sep="\t",quote=FALSE)

# correct for cell types
y <- xx[which(rowSums(xx)/ncol(xx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, 
  design = ~ Sex + age + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive +
  B.Naive + T.CD4.Memory + MAIT + T.gd.Vd2 + Neutrophils.LD + T.gd.non.Vd2 +
  Basophils.LD + Monocytes.NC.I + B.Memory + mDCs + pDCs + Plasmablasts + timepoint )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                               baseMean log2FoldChange      lfcSE      stat
## ENSG00000087116.16 ADAMTS2   634.35845      4.4583888 0.38198506 11.671631
## ENSG00000108950.12 FAM20A    792.00129      2.4139216 0.23631617 10.214797
## ENSG00000121316.11 PLBD1   10689.06659      1.3815172 0.14305201  9.657447
## ENSG00000132170.21 PPARG      70.86749      2.2946466 0.24099431  9.521580
## ENSG00000156414.19 TDRD9     595.16883      1.4892175 0.16054323  9.276115
## ENSG00000170439.7 METTL7B    126.30501      3.3287242 0.36128049  9.213684
## ENSG00000164125.15 GASK1B   3752.55250      0.9813449 0.10907350  8.997097
## ENSG00000110013.13 SIAE      451.39075      0.7805594 0.08685202  8.987234
## ENSG00000198053.11 SIRPA    7106.66137      0.7691877 0.08625109  8.918005
## ENSG00000163221.9 S100A12   8507.68558      1.8593362 0.20909153  8.892451
##                                  pvalue         padj
## ENSG00000087116.16 ADAMTS2 1.779774e-31 3.904824e-27
## ENSG00000108950.12 FAM20A  1.702354e-24 1.867483e-20
## ENSG00000121316.11 PLBD1   4.571125e-22 3.343016e-18
## ENSG00000132170.21 PPARG   1.705655e-21 9.355517e-18
## ENSG00000156414.19 TDRD9   1.757702e-20 7.712798e-17
## ENSG00000170439.7 METTL7B  3.151212e-20 1.152293e-16
## ENSG00000164125.15 GASK1B  2.317645e-19 6.953065e-16
## ENSG00000110013.13 SIAE    2.535302e-19 6.953065e-16
## ENSG00000198053.11 SIRPA   4.747613e-19 1.157363e-15
## ENSG00000163221.9 S100A12  5.977554e-19 1.311475e-15

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, 
  xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")  
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(samplesheet$timepoint)
colCols <- gsub("0","gray",colCols)
colCols <- gsub("1","black",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

#t0_v_pod <- dge

DE analysis post-op low versus high CRP

# filter genes with fewer than 10 reads per sample
NAME = "pod_crp"
ss <- subset(samplesheet,timepoint==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + CrpGroup )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                               baseMean log2FoldChange     lfcSE      stat
## ENSG00000135424.17 ITGA7      387.1708      2.2131744 0.2570353  8.610391
## ENSG00000108950.12 FAM20A    1327.9835      2.0134443 0.2649032  7.600680
## ENSG00000143365.19 RORC       146.5522     -1.4132086 0.2085281 -6.777064
## ENSG00000110079.18 MS4A4A     673.1434      1.5505614 0.2295571  6.754577
## ENSG00000123643.13 SLC36A1   1610.6451      0.9818768 0.1518341  6.466773
## ENSG00000281852.1 LINC00891   116.2132     -0.9573707 0.1488960 -6.429796
## ENSG00000018280.17 SLC11A1  17883.6005      1.1018384 0.1722874  6.395353
## ENSG00000014257.16 ACPP       863.6512      0.7083577 0.1121526  6.316019
## ENSG00000178562.18 CD28      1015.3327     -1.0429829 0.1694328 -6.155733
## ENSG00000121316.11 PLBD1    15404.6149      1.0790204 0.1756857  6.141765
##                                   pvalue         padj
## ENSG00000135424.17 ITGA7    7.281202e-18 1.574560e-13
## ENSG00000108950.12 FAM20A   2.945794e-14 3.185140e-10
## ENSG00000143365.19 RORC     1.226425e-11 7.744568e-08
## ENSG00000110079.18 MS4A4A   1.432521e-11 7.744568e-08
## ENSG00000123643.13 SLC36A1  1.001180e-10 4.330105e-07
## ENSG00000281852.1 LINC00891 1.277753e-10 4.605234e-07
## ENSG00000018280.17 SLC11A1  1.601768e-10 4.948320e-07
## ENSG00000014257.16 ACPP     2.683869e-10 7.254833e-07
## ENSG00000178562.18 CD28     7.473115e-10 1.764803e-06
## ENSG00000121316.11 PLBD1    8.160938e-10 1.764803e-06

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,  
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

pod_crp <- dge
write.table(dge,file="pod_crp_rna.tsv",sep="\t",quote=FALSE)

Run a ROC based on PCA distance classification.

# library size normalised
yy <- log2( (y/colSums(y)*1000000) + 0.001 )
yy <- yy[,which(rowMeans(cor(yy))>0.75)]

# crp group
crpgrp <- ss$CrpGroup
names(crpgrp) <- rownames(ss)
crpgrp <- crpgrp[which(names(crpgrp) %in% colnames(yy))]


# confects
confects <- deseq2_confects(res)

# Limma p-values
ROCp <- function(ngenes){
  pnames <- rownames(dge)[1:ngenes]
  topgenes <- as.data.frame(yy[which(rownames(yy) %in% pnames),])
  topgenes$ctrl <- rowMeans(topgenes[,which(crpgrp==0)])
  topgenes$case <- rowMeans(topgenes[,which(crpgrp==1)])
  mds <- plotMDS(topgenes,ndim=2)
  mds <- mds$cmdscale.out
  hypotenuse <- function(x) {
    sqrt(sum(unlist(lapply(x, function(x) { x^2 })),na.rm = TRUE ))
  }
  ctrl_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) { 
    a2=(x[1] - mds[(nrow(mds)-1),1] )^2
    b2=(x[1] - mds[(nrow(mds)-1),2] )^2
    sqrt(a2+b2)
  })
  case_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) {
    a2=(x[1] - mds[nrow(mds),1] )^2
    b2=(x[1] - mds[nrow(mds),2] )^2
    sqrt(a2+b2)
  })
  df <- data.frame(ctrl_dist,case_dist)
  df$classification_case <- unname(unlist(apply(df,1,function(x) {
    if (x[2]<x[1]) {TRUE} else {FALSE}
  }))) *1
  df$case_conf <- df$ctrl_dist / ( df$ctrl_dist + df$case_dist )
  df$ctrl_conf <- df$case_dist / ( df$ctrl_dist + df$case_dist )
  df$conf <- unname(apply(df,1,function(x) {
    if (x[4]>x[5]) {x[4]} else {x[5]}
  }))
  df$crpgrp <- crpgrp
  df <- df[order(-df$conf),]
  df$correct <- unname(unlist(apply(df,1,function(x) { if (x[3] == x[7]) {1} else {0} } )))
  tpr <- cumsum(df$correct) / nrow(df)
  tpr <- c(tpr,1)
  fpr <- cumsum((df$correct -1)*-1) / nrow(df)
  fpr <- c(fpr,1)
  # AUC 
  id <- order(fpr)
  AUC=round(sum(diff(fpr[id])*rollmean(tpr[id],2)),3)
  HEADER=paste("AUC =",AUC)
  plot(fpr,tpr,type="l",xlim=c(0,1),ylim=c(0,1),xlab="FPR",ylab="TPR",main=paste(ngenes,"genes"))
  mtext(HEADER)
  AUC
}

# topconfect genes
ROCc <- function(ngenes){
  pnames <- confects$table$name[1:ngenes]
  #pnames <- rownames(dge)[1:ngenes]
  topgenes <- as.data.frame(yy[which(rownames(yy) %in% pnames),])
  topgenes$ctrl <- rowMeans(topgenes[,which(crpgrp==0)])
  topgenes$case <- rowMeans(topgenes[,which(crpgrp==1)])
  mds <- plotMDS(topgenes,ndim=2)
  mds <- mds$cmdscale.out
  hypotenuse <- function(x) {
    sqrt(sum(unlist(lapply(x, function(x) { x^2 })),na.rm = TRUE ))
  }
  ctrl_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) { 
    a2=(x[1] - mds[(nrow(mds)-1),1] )^2
    b2=(x[1] - mds[(nrow(mds)-1),2] )^2
    sqrt(a2+b2)
  })
  case_dist <- apply(mds[1:(nrow(mds)-2),],1,function(x) { 
    a2=(x[1] - mds[nrow(mds),1] )^2
    b2=(x[1] - mds[nrow(mds),2] )^2
    sqrt(a2+b2)
  })
  df <- data.frame(ctrl_dist,case_dist)
  df$classification_case <- unname(unlist(apply(df,1,function(x) {
    if (x[2]<x[1]) {TRUE} else {FALSE}
  }))) *1
  df$case_conf <- df$ctrl_dist / ( df$ctrl_dist + df$case_dist )
  df$ctrl_conf <- df$case_dist / ( df$ctrl_dist + df$case_dist )
  df$conf <- unname(apply(df,1,function(x) {
    if (x[4]>x[5]) {x[4]} else {x[5]}
  }))
  df$crpgrp <- crpgrp
  df <- df[order(-df$conf),]
  df$correct <- unname(unlist(apply(df,1,function(x) { if (x[3] == x[7]) {1} else {0} } )))
  tpr <- cumsum(df$correct) / nrow(df)
  tpr <- c(tpr,1)
  fpr <- cumsum((df$correct -1)*-1) / nrow(df)
  fpr <- c(fpr,1)
  # AUC 
  id <- order(fpr)
  AUC=round(sum(diff(fpr[id])*rollmean(tpr[id],2)),3)
  HEADER=paste("AUC =",AUC)
  plot(fpr,tpr,type="l",xlim=c(0,1),ylim=c(0,1),xlab="FPR",ylab="TPR",main=paste(ngenes,"genes"))
  mtext(HEADER)
  AUC
}

ng <- c(2,3,5,7,10,13,15,17,20,30,40,60,80,100,150,200,300,500)

p_roc <- lapply(ng,ROCp)

p_roc <- unlist(p_roc)

c_roc <- lapply(ng,ROCc)

c_roc <- unlist(c_roc)

Now need to summarise what is the optimal number of genes in the panel.

plot(ng,p_roc,type="b",ylim=c(0.7,1),col="blue",xlab="no. genes", ylab="AUC")
points(ng,c_roc,type="b",col="red")
mtext("blue: top p-value genes, red:top confect genes")
grid()

# zoom in
plot(ng,p_roc,type="b",xlim=c(0,100),col="blue",xlab="no. genes", ylab="AUC")
points(ng,c_roc,type="b",col="red")
mtext("blue: top p-value genes, red:top confect genes")
grid()

From this we can see that p-value genes show better discriminitory power compared to topconfect genes at least when the number of gene$ In the range of ~150 genes shows the best result for topconfect genes with AUC of ~0.95 I also ran the p_roc analysis with more dimensions but the result was the same:

2d:0.951 (15g)

3d:0.945 (15g)

4d:0.953 (15g)

5d:0.953 (15g)

Now let's have a look at the heatmap of the top 15 genes with the best discriminitory power.

colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:15,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
 trace="none",margins = c(10,15), cexRow=1, cexCol=.7, main="Top 15 genes")

DE analysis baseline low versus high CRP

# filter genes with fewer than 10 reads per sample
NAME = "t0_crp"
ss <- subset(samplesheet,timepoint==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + CrpGroup )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                               baseMean log2FoldChange     lfcSE      stat
## ENSG00000167680.16 SEMA6B    116.93734     -1.7545755 0.3476538 -5.046904
## ENSG00000165949.12 IFI27      67.17018      2.4903519 0.5349850  4.654994
## ENSG00000279358.1 AP002833.3  37.34539      0.6664108 0.1548064  4.304802
## ENSG00000198010.12 DLGAP2     15.39018     -2.9860007 0.7063975 -4.227083
## ENSG00000285755.2 AC132153.1  16.27930      0.7771681 0.1867644  4.161222
## ENSG00000112812.16 PRSS16     40.93576      0.7922314 0.1906324  4.155806
## ENSG00000169245.6 CXCL10      83.86172      1.4237493 0.3426822  4.154722
## ENSG00000211821.2 TRDV2      114.06718      2.8845574 0.7018804  4.109756
## ENSG00000174482.10 LINGO2     25.14387     -1.2019485 0.2936643 -4.092933
## ENSG00000225886.3 AL445490.1  40.31291      1.0396377 0.2557416  4.065188
##                                    pvalue       padj
## ENSG00000167680.16 SEMA6B    4.490270e-07 0.00973760
## ENSG00000165949.12 IFI27     3.239900e-06 0.03513024
## ENSG00000279358.1 AP002833.3 1.671350e-05 0.09840478
## ENSG00000198010.12 DLGAP2    2.367404e-05 0.09840478
## ENSG00000285755.2 AC132153.1 3.165494e-05 0.09840478
## ENSG00000112812.16 PRSS16    3.241423e-05 0.09840478
## ENSG00000169245.6 CXCL10     3.256838e-05 0.09840478
## ENSG00000211821.2 TRDV2      3.960772e-05 0.09840478
## ENSG00000174482.10 LINGO2    4.259502e-05 0.09840478
## ENSG00000225886.3 AL445490.1 4.799366e-05 0.09840478

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,  
 trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

t0_crp <- dge
write.table(dge,file="t0_crp_rna.tsv",sep="\t",quote=FALSE)

DE analysis baseline versus post-op (low CRP)

NAME = "t0_vs_pod_lowCRP"
ss <- subset(samplesheet,CrpGroup==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + timepoint )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                              baseMean log2FoldChange      lfcSE      stat
## ENSG00000137441.8 FGFBP2    2290.7736     -1.0441286 0.13932527 -7.494180
## ENSG00000180739.14 S1PR5    2194.3357     -0.8996463 0.12148727 -7.405273
## ENSG00000087116.16 ADAMTS2   152.2537      3.2666803 0.45463999  7.185202
## ENSG00000180644.8 PRF1     13508.0087     -0.8692873 0.12117649 -7.173729
## ENSG00000073861.3 TBX21     2627.3291     -0.7986587 0.11394462 -7.009184
## ENSG00000189430.13 NCR1      325.8155     -0.9846467 0.14472353 -6.803640
## ENSG00000197471.12 SPN      7497.6862     -0.4540182 0.06795644 -6.681018
## ENSG00000203747.11 FCGR3A   8210.7779     -0.7321138 0.11070981 -6.612909
## ENSG00000198821.10 CD247    5280.3102     -0.5512978 0.08580645 -6.424899
## ENSG00000105374.10 NKG7     6479.5414     -0.8431169 0.13188449 -6.392844
##                                  pvalue         padj
## ENSG00000137441.8 FGFBP2   6.671429e-14 1.258686e-09
## ENSG00000180739.14 S1PR5   1.308814e-13 1.258686e-09
## ENSG00000087116.16 ADAMTS2 6.710777e-13 3.509335e-09
## ENSG00000180644.8 PRF1     7.298192e-13 3.509335e-09
## ENSG00000073861.3 TBX21    2.397127e-12 9.221270e-09
## ENSG00000189430.13 NCR1    1.020086e-11 3.270055e-08
## ENSG00000197471.12 SPN     2.372881e-11 6.519999e-08
## ENSG00000203747.11 FCGR3A  3.768410e-11 9.060200e-08
## ENSG00000198821.10 CD247   1.319572e-10 2.820071e-07
## ENSG00000105374.10 NKG7    1.628288e-10 3.006036e-07

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$timepoint)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
 trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

t0_vs_pod_lowCRP <- dge
write.table(dge,file="t0_vs_pod_lowCRP_rna.tsv",sep="\t",quote=FALSE)

t0_vs_pod_lowCRP_up <- rownames( subset(dge,padj<0.05 & log2FoldChange > 0 ) )
t0_vs_pod_lowCRP_dn <- rownames( subset(dge,padj<0.05 & log2FoldChange < 0 ) )

DE analysis baseline versus post-op (high CRP)

NAME = "t0_vs_pod_highCRP"
ss <- subset(samplesheet,CrpGroup==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + timepoint )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                              baseMean log2FoldChange     lfcSE     stat
## ENSG00000108950.12 FAM20A  1489.80876       3.618686 0.1833149 19.74027
## ENSG00000170439.7 METTL7B   272.04841       5.136126 0.3178545 16.15873
## ENSG00000136160.17 EDNRB     63.34773       3.868843 0.2740075 14.11948
## ENSG00000121316.11 PLBD1  16960.02791       2.087370 0.1545156 13.50912
## ENSG00000132170.21 PPARG    131.38193       3.638899 0.2733249 13.31346
## ENSG00000135424.17 ITGA7    475.12859       2.765605 0.2191406 12.62023
## ENSG00000178342.4 KCNG2      75.80036       2.650524 0.2111098 12.55519
## ENSG00000156414.19 TDRD9    930.02624       2.436293 0.1965318 12.39643
## ENSG00000138061.12 CYP1B1  7208.87837       1.995175 0.1633779 12.21203
## ENSG00000112290.13 WASF1    153.31662       1.799185 0.1483442 12.12845
##                                 pvalue         padj
## ENSG00000108950.12 FAM20A 9.725497e-87 2.146806e-82
## ENSG00000170439.7 METTL7B 9.856739e-59 1.087888e-54
## ENSG00000136160.17 EDNRB  2.881075e-45 2.119895e-41
## ENSG00000121316.11 PLBD1  1.381559e-41 7.624134e-38
## ENSG00000132170.21 PPARG  1.933286e-40 8.535071e-37
## ENSG00000135424.17 ITGA7  1.633424e-36 6.009366e-33
## ENSG00000178342.4 KCNG2   3.722802e-36 1.173959e-32
## ENSG00000156414.19 TDRD9  2.732331e-35 7.539185e-32
## ENSG00000138061.12 CYP1B1 2.681226e-34 6.576154e-31
## ENSG00000112290.13 WASF1  7.464895e-34 1.647801e-30

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$timepoint)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
 trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

t0_vs_pod_highCRP <- dge
write.table(dge,file="t0_vs_pod_highCRP_rna.tsv",sep="\t",quote=FALSE)

t0_vs_pod_highCRP_up <- rownames( subset(dge,padj<0.05 & log2FoldChange > 0 ) )
t0_vs_pod_highCRP_dn <- rownames( subset(dge,padj<0.05 & log2FoldChange < 0 ) )

Compare low and high patterns

DE4 and DE5 will be compared.

v1 <- list("low CRP up"=t0_vs_pod_lowCRP_up, 
    "low CRP dn"=t0_vs_pod_lowCRP_dn,
    "high CRP up"=t0_vs_pod_highCRP_up,
    "high CRP dn"=t0_vs_pod_highCRP_dn)

plot(euler(v1),quantities = TRUE, main="Gene overlap")

Set up mitch data.

#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")

# set up the gene table
gt <- as.data.frame(sapply(strsplit(rownames(xx)," "),"[[",2))
gt$id <- rownames(xx)
colnames(gt) <- c("gene name","id")

Now mitch analysis with DE4 and DE5

l <- list("low CRP"=t0_vs_pod_lowCRP, "high CRP"=t0_vs_pod_highCRP)
y <- mitch_import( l , DEtype="DESeq2", geneTable=gt)

## Note: Mean no. genes in input = 21933.5

## Note: no. genes in output = 21404

## Note: estimated proportion of input genes in output = 0.976

# prioritisation by statistical significance
res <- mitch_calc(y, genesets, priority="significance",resrows=50)

## Note: When prioritising by significance (ie: small 
##             p-values), large effect sizes might be missed.

head(res$enrichment_result,20)

##                                                 set setSize       pMANOVA
## 736                        Neutrophil degranulation     456 1.263803e-111
## 536                            Innate Immune System     963  6.130758e-91
## 519                                   Immune System    1885  2.765539e-52
## 631                            Membrane Trafficking     555  5.938119e-44
## 1070                            Signal Transduction    1869  1.004496e-35
## 1312                     Vesicle-mediated transport     644  3.036553e-33
## 634                                      Metabolism    1758  1.903167e-30
## 649                          Metabolism of proteins    1706  1.534282e-28
## 841         Post-translational protein modification    1183  2.919397e-28
## 496                                      Hemostasis     542  1.799655e-22
## 829  Platelet activation, signaling and aggregation     219  2.664012e-22
## 1132         Signaling by Receptor Tyrosine Kinases     408  7.112961e-22
## 1103                      Signaling by Interleukins     385  1.982991e-21
## 643                            Metabolism of lipids     619  2.020443e-20
## 283                                         Disease    1339  2.210546e-19
## 85                Asparagine N-linked glycosylation     269  4.801739e-18
## 1057                 Scavenging of heme from plasma      70  2.156205e-17
## 1010   Response to elevated platelet cytosolic Ca2+     108  8.500125e-17
## 1245                    Toll-like Receptor Cascades     143  4.513541e-16
## 831                          Platelet degranulation     104  4.561114e-16
##       s.low.CRP s.high.CRP    p.low.CRP    p.high.CRP    s.dist         SD
## 736  0.49112778  0.5926683 9.831146e-73 2.069228e-105 0.7697156 0.07179997
## 536  0.31921271  0.3677640 1.696475e-63  7.543273e-84 0.4869775 0.03433092
## 519  0.15849504  0.2108636 4.255092e-30  4.620841e-52 0.2637881 0.03703018
## 631  0.19402464  0.3485897 5.330055e-15  5.804668e-45 0.3989490 0.10929397
## 1070 0.09033584  0.1757052 1.012719e-10  2.342824e-36 0.1975674 0.06036526
## 1312 0.20651231  0.2786978 3.639864e-19  1.279883e-33 0.3468715 0.05104287
## 634  0.10348522  0.1678910 5.845616e-13  1.286778e-31 0.1972222 0.04554178
## 649  0.09665358  0.1645692 3.215319e-11  1.162926e-29 0.1908532 0.04802360
## 841  0.09714527  0.1923267 1.835523e-08  7.003482e-29 0.2154687 0.06730346
## 496  0.21747030  0.2327536 4.532923e-18  1.781809e-20 0.3185398 0.01080696
## 829  0.25216106  0.3901526 1.251285e-10  2.279381e-23 0.4645474 0.09757478
## 1132 0.16146563  0.2840002 2.185823e-08  6.761141e-23 0.3266914 0.08664501
## 1103 0.21956189  0.2839877 1.374644e-13  1.029543e-21 0.3589658 0.04555593
## 643  0.11291144  0.2220172 1.620118e-06  3.826076e-21 0.2490796 0.07714945
## 283  0.09709103  0.1508045 2.520117e-09  1.989476e-20 0.1793563 0.03798117
## 85   0.18687003  0.3159482 1.316744e-07  4.393851e-19 0.3670745 0.09127204
## 1057 0.25037432 -0.2801283 2.915726e-04  5.049103e-05 0.3757115 0.37512200
## 1010 0.37973833  0.4634969 9.020070e-12  8.213220e-17 0.5991916 0.05922624
## 1245 0.28561077  0.4042238 3.681181e-09  6.785557e-17 0.4949449 0.08387210
## 831  0.38321867  0.4592344 1.421157e-11  5.601182e-16 0.5981244 0.05375122
##      p.adjustMANOVA
## 736   1.722564e-108
## 536    4.178111e-88
## 519    1.256476e-49
## 631    2.023414e-41
## 1070   2.738255e-33
## 1312   6.898035e-31
## 634    3.705739e-28
## 649    2.614033e-26
## 841    4.421265e-26
## 496    2.452930e-20
## 829    3.300954e-20
## 1132   8.079138e-20
## 1103   2.079090e-19
## 643    1.967046e-18
## 283    2.008650e-17
## 85     4.090482e-16
## 1057   1.728769e-15
## 1010   6.436484e-15
## 1245   3.108399e-14
## 831    3.108399e-14

lo_up <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.low.CRP)<0.05 & res$enrichment_result$s.low.CRP>0),1]
lo_dn <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.low.CRP)<0.05 & res$enrichment_result$s.low.CRP<0),1]
hi_up <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.high.CRP)<0.05 & res$enrichment_result$s.high.CRP>0),1]
hi_dn <- res$enrichment_result[which(p.adjust(res$enrichment_result$p.high.CRP)<0.05 & res$enrichment_result$s.high.CRP<0),1]

v1 <- list("low CRP up"=lo_up,
    "low CRP dn"=lo_dn,
    "high CRP up"=hi_up,
    "high CRP dn"=hi_dn)

plot(euler(v1),quantities = TRUE, main="Gene set overlap")

unlink("t0_v_pod_lohi_mitch_sig.html")
capture.output(
    mitch_report(res, "t0_v_pod_lohi_mitch_sig.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

## Dataset saved as " /tmp/RtmpmQpMeM/t0_v_pod_lohi_mitch_sig.rds ".

## 
## 
## processing file: mitch.Rmd

## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md

## 
## Output created: /tmp/RtmpmQpMeM/mitch_report.html

# prioritisation by statistical significance
res <- mitch_calc(y, genesets, priority="effect",resrows=50)

## Note: Enrichments with large effect sizes may not be 
##             statistically significant.

head(res$enrichment_result,20)

##                                                                 set setSize
## 513                                       IRAK4 deficiency (TLR2/4)      10
## 686                                       MyD88 deficiency (TLR2/4)      10
## 968                          Regulation of TLR by endogenous ligand      11
## 1330 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism      12
## 1331                          alpha-linolenic acid (ALA) metabolism      12
## 1278               Translocation of ZAP-70 to Immunological synapse      24
## 345          Erythrocytes take up carbon dioxide and release oxygen      11
## 765                                 O2/CO2 exchange in erythrocytes      11
## 505                               Hyaluronan uptake and degradation      12
## 1306                          Uptake and function of anthrax toxins      10
## 1342                p130Cas linkage to MAPK signaling for integrins      11
## 1323         WNT5A-dependent internalization of FZD2, FZD5 and ROR2      11
## 1304                                               Unwinding of DNA      12
## 434       GRB2:SOS provides linkage to MAPK signaling for Integrins      12
## 889                            RHO GTPases Activate WASPs and WAVEs      35
## 902                               RNA Polymerase I Promoter Opening      19
## 61             Advanced glycosylation endproduct receptor signaling      12
## 779                                                  PD-1 signaling      28
## 925                            ROS and RNS production in phagocytes      31
## 1105                                            Signaling by Leptin      10
##           pMANOVA  s.low.CRP s.high.CRP    p.low.CRP   p.high.CRP    s.dist
## 513  8.775903e-07  0.8445452  0.8863981 3.735703e-06 1.204349e-06 1.2243194
## 686  8.775903e-07  0.8445452  0.8863981 3.735703e-06 1.204349e-06 1.2243194
## 968  1.120943e-05  0.7701542  0.7203843 9.697309e-06 3.506286e-05 1.0545573
## 1330 7.056714e-06  0.6555878  0.7808449 8.392555e-05 2.803937e-06 1.0195656
## 1331 7.056714e-06  0.6555878  0.7808449 8.392555e-05 2.803937e-06 1.0195656
## 1278 7.245946e-11 -0.6794473 -0.7589726 8.215895e-09 1.200844e-10 1.0186697
## 345  3.194632e-05  0.7410538  0.6765722 2.075703e-05 1.018797e-04 1.0034494
## 765  3.194632e-05  0.7410538  0.6765722 2.075703e-05 1.018797e-04 1.0034494
## 505  3.641898e-05  0.5909140  0.7322831 3.929058e-04 1.116940e-05 0.9409665
## 1306 1.565554e-04  0.5050388  0.7630364 5.680935e-03 2.928526e-05 0.9150348
## 1342 2.208711e-04  0.6487891  0.6348848 1.942043e-04 2.657830e-04 0.9077478
## 1323 1.020922e-04  0.3951887  0.7422776 2.323143e-02 2.011407e-05 0.8409222
## 1304 4.411617e-04 -0.5820634 -0.5953939 4.799668e-04 3.546911e-04 0.8326414
## 434  4.557211e-04  0.6265816  0.5329485 1.707055e-04 1.388701e-03 0.8225804
## 889  1.351546e-10  0.5016305  0.6445158 2.787695e-07 4.059385e-11 0.8167213
## 902  4.750649e-06  0.4994745  0.6425163 1.634559e-04 1.235941e-06 0.8138194
## 61   2.239946e-04  0.4324436  0.6832227 9.487021e-03 4.153842e-05 0.8085794
## 779  3.993373e-08 -0.5351162 -0.6018064 9.472041e-07 3.514198e-08 0.8053076
## 925  1.324318e-08  0.5765444  0.5445837 2.734382e-08 1.526342e-07 0.7930794
## 1105 2.940561e-03  0.5869870  0.5268860 1.306677e-03 3.910265e-03 0.7887729
##               SD p.adjustMANOVA
## 513  0.029594438   8.306636e-06
## 686  0.029594438   8.306636e-06
## 968  0.035192627   7.275452e-05
## 1330 0.088570149   4.809151e-05
## 1331 0.088570149   4.809151e-05
## 1278 0.056232849   1.673936e-09
## 345  0.045595366   1.762868e-04
## 765  0.045595366   1.762868e-04
## 505  0.099963012   1.946630e-04
## 1306 0.182431831   7.160573e-04
## 1342 0.009831820   9.806101e-04
## 1323 0.245428911   4.956164e-04
## 1304 0.009426107   1.805716e-03
## 434  0.066208624   1.843169e-03
## 889  0.101035109   3.019930e-09
## 902  0.101145776   3.444221e-05
## 61   0.177327607   9.880410e-04
## 779  0.047157149   5.284434e-07
## 925  0.022599652   1.962005e-07
## 1105 0.042497798   9.088401e-03

unlink("t0_v_pod_lohi_mitch_eff.html")
capture.output(
    mitch_report(res, "t0_v_pod_lohi_mitch_eff.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

## Dataset saved as " /tmp/RtmpmQpMeM/t0_v_pod_lohi_mitch_eff.rds ".

## 
## 
## processing file: mitch.Rmd

## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md

## 
## Output created: /tmp/RtmpmQpMeM/mitch_report.html

# prioritisation by discordance
res <- mitch_calc(y, genesets, priority="SD",resrows=50)

## Note: Prioritisation by SD after selecting sets with 
##             p.adjustMANOVA<=0.05.

head(res$enrichment_result,20)

##                                                         set setSize
## 122                            CD22 mediated BCR regulation      58
## 1310                  VLDLR internalisation and degradation      11
## 1057                         Scavenging of heme from plasma      70
## 189       Classical antibody-mediated complement activation      68
## 217                        Creation of C4 and C2 activators      70
## 847                           Pre-NOTCH Processing in Golgi      18
## 1360                   tRNA processing in the mitochondrion      32
## 1017          Role of LAT2/NTAL/LAB on calcium mobilization      73
## 755                                 Nucleobase biosynthesis      13
## 362                                         FCGR activation      75
## 533                        Initial triggering of complement      77
## 871        Purine ribonucleoside monophosphate biosynthesis      10
## 691                            N-Glycan antennae elongation      13
## 637                    Metabolism of amine-derived hormones      10
## 108    Binding and Uptake of Ligands by Scavenger Receptors      91
## 806     Pausing and recovery of Tat-mediated HIV elongation      30
## 1218        Tat-mediated HIV elongation arrest and recovery      30
## 186  Class C/3 (Metabotropic glutamate/pheromone receptors)      11
## 971         Regulation of TP53 Activity through Acetylation      29
## 332                                    ERKs are inactivated      13
##           pMANOVA   s.low.CRP  s.high.CRP    p.low.CRP   p.high.CRP    s.dist
## 122  1.546500e-15  0.19671294 -0.34782419 9.559860e-03 4.588960e-06 0.3995969
## 1310 1.280994e-04  0.09540504  0.63485932 5.837679e-01 2.659345e-04 0.6419879
## 1057 2.156205e-17  0.25037432 -0.28012830 2.915726e-04 5.049103e-05 0.3757115
## 189  7.313929e-15  0.16106580 -0.32812259 2.162847e-02 2.868729e-06 0.3655224
## 217  4.098658e-15  0.17406688 -0.31430580 1.178960e-02 5.413165e-06 0.3592874
## 847  2.798792e-05  0.04791816  0.51934807 7.248705e-01 1.360539e-04 0.5215540
## 1360 1.762783e-07 -0.40487261  0.06653565 7.356381e-05 5.147939e-01 0.4103033
## 1017 4.307862e-14  0.22984813 -0.23564585 6.840450e-04 4.984687e-04 0.3291795
## 755  1.934385e-03  0.04726287 -0.41566367 7.679557e-01 9.456809e-03 0.4183420
## 362  3.316829e-14  0.14305593 -0.31083751 3.218818e-02 3.235201e-06 0.3421768
## 533  3.368773e-14  0.15802967 -0.29232441 1.650559e-02 9.177028e-06 0.3323055
## 871  1.670738e-02  0.11325605 -0.33231747 5.351586e-01 6.880217e-02 0.3510867
## 691  4.127884e-03 -0.37812811  0.06089908 1.824009e-02 7.038136e-01 0.3830007
## 637  1.802667e-02  0.04661120 -0.37761989 7.985488e-01 3.865802e-02 0.3804857
## 108  9.227458e-15  0.27979673 -0.14230854 3.949152e-06 1.895915e-02 0.3139075
## 806  2.004984e-05 -0.11793456  0.30277596 2.635703e-01 4.098493e-03 0.3249336
## 1218 2.004984e-05 -0.11793456  0.30277596 2.635703e-01 4.098493e-03 0.3249336
## 186  1.195640e-02  0.01671320 -0.39788291 9.235367e-01 2.230803e-02 0.3982338
## 971  8.634357e-06 -0.39999516  0.01328816 1.924198e-04 9.014320e-01 0.4002158
## 332  1.092471e-03  0.12135585  0.53101053 4.486918e-01 9.151715e-04 0.5447012
##             SD p.adjustMANOVA
## 122  0.3850459   9.581269e-14
## 1310 0.3814518   5.999981e-04
## 1057 0.3751220   1.728769e-15
## 189  0.3459084   3.560316e-13
## 217  0.3453316   2.428901e-13
## 847  0.3333513   1.609601e-04
## 1360 0.3333360   2.053567e-06
## 1017 0.3291539   1.779278e-12
## 755  0.3273385   6.446375e-03
## 362  0.3209511   1.434887e-12
## 533  0.3184484   1.434887e-12
## 871  0.3150681   3.808054e-02
## 691  0.3104391   1.223110e-02
## 637  0.2999767   4.008214e-02
## 108  0.2984735   4.336905e-13
## 806  0.2974873   1.193360e-04
## 1218 0.2974873   1.193360e-04
## 186  0.2931637   2.936320e-02
## 971  0.2922354   5.768936e-05
## 332  0.2896696   3.939254e-03

unlink("t0_v_pod_lohi_mitch_sd.html")
capture.output(
    mitch_report(res, "t0_v_pod_lohi_mitch_sd.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

## Dataset saved as " /tmp/RtmpmQpMeM/t0_v_pod_lohi_mitch_sd.rds ".

## 
## 
## processing file: mitch.Rmd

## output file: /mnt/bfx6/bfx/bain/inflam/rna/dge/mitch.knit.md

## 
## Output created: /tmp/RtmpmQpMeM/mitch_report.html

One dimensional enrichment analysis

Not correcting for blood compostion changes.

# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100) 
head(res$enrichment_result,20)

##                                                 set setSize        pANOVA
## 737                        Neutrophil degranulation     457 1.319240e-103
## 537                            Innate Immune System     967  5.848762e-84
## 520                                   Immune System    1893  3.240735e-46
## 632                            Membrane Trafficking     559  5.327421e-36
## 1314                     Vesicle-mediated transport     650  1.642242e-31
## 1071                            Signal Transduction    1894  7.041601e-28
## 497                                      Hemostasis     549  9.700746e-23
## 635                                      Metabolism    1772  1.645387e-22
## 830  Platelet activation, signaling and aggregation     221  1.353176e-20
## 1133         Signaling by Receptor Tyrosine Kinases     414  1.678123e-20
## 1104                      Signaling by Interleukins     386  1.710343e-19
## 650                          Metabolism of proteins    1718  2.214499e-19
## 842         Post-translational protein modification    1193  6.814624e-19
## 1011   Response to elevated platelet cytosolic Ca2+     110  1.253521e-16
## 832                          Platelet degranulation     106  5.814015e-16
## 1247                    Toll-like Receptor Cascades     143  2.293691e-15
## 284                                         Disease    1355  2.716874e-15
## 86                Asparagine N-linked glycosylation     269  5.316672e-15
## 644                            Metabolism of lipids     626  6.696163e-15
## 1290                   Transport of small molecules     561  1.571805e-14
##         s.dist p.adjustANOVA
## 737  0.5868303 1.802082e-100
## 537  0.3671208  3.994704e-81
## 520  0.1977009  1.475615e-43
## 632  0.3093383  1.819314e-33
## 1314 0.2680762  4.486606e-29
## 1071 0.1517126  1.603138e-25
## 497  0.2446828  1.893031e-20
## 635  0.1395073  2.809498e-20
## 830  0.3628343  2.053820e-18
## 1133 0.2656301  2.292316e-18
## 1104 0.2675063  2.123935e-17
## 650  0.1305055  2.520838e-17
## 842  0.1524833  7.160598e-17
## 1011 0.4565015  1.223079e-14
## 832  0.4546295  5.294630e-14
## 1247 0.3835923  1.958239e-13
## 284  0.1278957  2.183088e-13
## 86   0.2767766  4.034764e-13
## 644  0.1822696  4.814189e-13
## 1290 0.1895711  1.073543e-12

unlink("t0_v_pod_mitch.html")
capture.output(
    mitch_report(res, "t0_v_pod_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)

##                                                                               set
## 735                                                      Neutrophil degranulation
## 1355                                   rRNA processing in the nucleus and cytosol
## 1353                                                              rRNA processing
## 625                 Major pathway of rRNA processing in the nucleolus and cytosol
## 535                                                          Innate Immune System
## 1268                                                                  Translation
## 635                                                             Metabolism of RNA
## 392                                      Formation of a pool of free 40S subunits
## 350                                             Eukaryotic Translation Elongation
## 807                                                      Peptide chain elongation
## 1060                                                     Selenocysteine synthesis
## 588             L13a-mediated translational silencing of Ceruloplasmin expression
## 434                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 352                                            Eukaryotic Translation Termination
## 1315                                                       Viral mRNA Translation
## 142                                          Cap-dependent Translation Initiation
## 351                                             Eukaryotic Translation Initiation
## 743  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1007                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 527                             Influenza Viral RNA Transcription and Replication
##      setSize       pANOVA     s.dist p.adjustANOVA
## 735      457 4.745120e-61  0.4485959  6.467599e-58
## 1355     190 7.192148e-52 -0.6358614  4.901449e-49
## 1353     217 2.080594e-51 -0.5926305  9.452831e-49
## 625      180 6.055674e-50 -0.6405213  2.063471e-47
## 535      966 3.101601e-44  0.2645611  8.454963e-42
## 1268     295 9.419052e-40 -0.4457176  2.139695e-37
## 635      685 1.652188e-39 -0.2942651  3.217046e-37
## 392      100 4.046179e-38 -0.7455910  6.893677e-36
## 350       93 1.440793e-37 -0.7671592  2.182002e-35
## 807       88 5.055188e-36 -0.7714444  6.890221e-34
## 1060      92 1.357145e-35 -0.7498441  1.681626e-33
## 588      110 7.548629e-35 -0.6784823  8.573985e-33
## 434      111 1.109612e-34 -0.6737341  1.163385e-32
## 352       92 3.299311e-34 -0.7344070  3.212114e-32
## 1315      88 6.764758e-34 -0.7472455  6.146911e-32
## 142      118 2.646671e-33 -0.6398047  2.122008e-31
## 351      118 2.646671e-33 -0.6398047  2.122008e-31
## 743       94 1.583940e-32 -0.7076135  1.199395e-30
## 1007     100 3.169485e-31 -0.6715684  2.273688e-29
## 527      135 5.439828e-31 -0.5761706  3.707243e-29

unlink("pod_crp_mitch.html")
capture.output(
    mitch_report(res, "pod_crp_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)

##                                                                               set
## 353                                             Eukaryotic Translation Elongation
## 395                                      Formation of a pool of free 40S subunits
## 810                                                      Peptide chain elongation
## 591             L13a-mediated translational silencing of Ceruloplasmin expression
## 1318                                                       Viral mRNA Translation
## 355                                            Eukaryotic Translation Termination
## 636                                                                    Metabolism
## 1063                                                     Selenocysteine synthesis
## 437                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 145                                          Cap-dependent Translation Initiation
## 354                                             Eukaryotic Translation Initiation
## 738                                                      Neutrophil degranulation
## 746  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1010                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 1043                  SRP-dependent cotranslational protein targeting to membrane
## 285                                                                       Disease
## 745     Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)
## 747                                                 Nonsense-Mediated Decay (NMD)
## 983                                   Regulation of expression of SLITs and ROBOs
## 1062                                                  Selenoamino acid metabolism
##      setSize       pANOVA     s.dist p.adjustANOVA
## 353       93 7.235821e-42 -0.8116867  9.891367e-39
## 395      100 1.055679e-40 -0.7714938  7.215568e-38
## 810       88 6.985114e-40 -0.8135221  3.182884e-37
## 591      110 1.096103e-37 -0.7068229  3.635947e-35
## 1318      88 1.329900e-37 -0.7889383  3.635947e-35
## 355       92 5.983625e-37 -0.7646378  1.282281e-34
## 636     1771 6.566177e-37 -0.1812196  1.282281e-34
## 1063      92 2.245441e-36 -0.7584028  3.579908e-34
## 437      111 2.356925e-36 -0.6905391  3.579908e-34
## 145      118 9.577584e-36 -0.6639584  1.190233e-33
## 354      118 9.577584e-36 -0.6639584  1.190233e-33
## 738      458 2.062828e-34 -0.3329542  2.349905e-32
## 746       94 3.773216e-34 -0.7259279  3.967682e-32
## 1010     100 6.511606e-31 -0.6680040  6.358118e-29
## 1043     111 7.472133e-30 -0.6226349  6.809604e-28
## 285     1354 4.559478e-29 -0.1809621  3.895504e-27
## 745      114 1.578986e-28 -0.5998573  1.199152e-26
## 747      114 1.578986e-28 -0.5998573  1.199152e-26
## 983      162 5.020894e-28 -0.4990337  3.612401e-26
## 1062     114 6.775910e-27 -0.5814016  4.631335e-25

unlink("t0_crp_mitch.html")
capture.output(
    mitch_report(res, "t0_crp_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


## prioritise by effect size

# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)

##                                                                 set setSize
## 514                                       IRAK4 deficiency (TLR2/4)      10
## 687                                       MyD88 deficiency (TLR2/4)      10
## 1333 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism      12
## 1334                          alpha-linolenic acid (ALA) metabolism      12
## 969                          Regulation of TLR by endogenous ligand      11
## 1280               Translocation of ZAP-70 to Immunological synapse      24
## 506                               Hyaluronan uptake and degradation      12
## 1308                          Uptake and function of anthrax toxins      10
## 346          Erythrocytes take up carbon dioxide and release oxygen      11
## 766                                 O2/CO2 exchange in erythrocytes      11
## 1326         WNT5A-dependent internalization of FZD2, FZD5 and ROR2      11
## 1345                p130Cas linkage to MAPK signaling for integrins      11
## 61             Advanced glycosylation endproduct receptor signaling      12
## 1306                                               Unwinding of DNA      12
## 903                               RNA Polymerase I Promoter Opening      19
## 890                            RHO GTPases Activate WASPs and WAVEs      35
## 926                            ROS and RNS production in phagocytes      31
## 737                                        Neutrophil degranulation     457
## 435       GRB2:SOS provides linkage to MAPK signaling for Integrins      12
## 780                                                  PD-1 signaling      28
##             pANOVA     s.dist p.adjustANOVA
## 514   8.367911e-07  0.8994748  1.048676e-05
## 687   8.367911e-07  0.8994748  1.048676e-05
## 1333  2.461377e-06  0.7852782  2.626751e-05
## 1334  2.461377e-06  0.7852782  2.626751e-05
## 969   1.930406e-05  0.7438719  1.448865e-04
## 1280  2.809953e-10 -0.7436252  8.344338e-09
## 506   8.850181e-06  0.7406638  7.239130e-05
## 1308  1.141313e-04  0.7045257  6.523155e-04
## 346   9.462990e-05  0.6796841  5.533223e-04
## 766   9.462990e-05  0.6796841  5.533223e-04
## 1326  1.165844e-04  0.6708490  6.580757e-04
## 1345  1.324011e-04  0.6654100  7.219060e-04
## 61    9.478582e-05  0.6506965  5.533223e-04
## 1306  1.323109e-04 -0.6371242  7.219060e-04
## 903   3.287422e-06  0.6163218  3.277824e-05
## 890   4.567968e-10  0.6085563  1.327626e-08
## 926   8.233487e-09  0.5978890  1.757335e-07
## 737  1.319240e-103  0.5868303 1.802082e-100
## 435   4.971813e-04  0.5804902  2.286699e-03
## 780   1.109028e-07 -0.5793553  1.870286e-06

unlink("t0_v_pod_mitche.html")
capture.output(
    mitch_report(res, "t0_v_pod_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)

##                                                                               set
## 512                                                     IRAK4 deficiency (TLR2/4)
## 685                                                     MyD88 deficiency (TLR2/4)
## 807                                                      Peptide chain elongation
## 350                                             Eukaryotic Translation Elongation
## 1060                                                     Selenocysteine synthesis
## 1315                                                       Viral mRNA Translation
## 392                                      Formation of a pool of free 40S subunits
## 352                                            Eukaryotic Translation Termination
## 1330               alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1331                                        alpha-linolenic acid (ALA) metabolism
## 743  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1278                             Translocation of ZAP-70 to Immunological synapse
## 588             L13a-mediated translational silencing of Ceruloplasmin expression
## 434                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 1007                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 398           Formation of the ternary complex, and subsequently, the 43S complex
## 1323                       WNT5A-dependent internalization of FZD2, FZD5 and ROR2
## 967                                        Regulation of TLR by endogenous ligand
## 625                 Major pathway of rRNA processing in the nucleolus and cytosol
## 142                                          Cap-dependent Translation Initiation
##      setSize       pANOVA     s.dist p.adjustANOVA
## 512       10 2.191838e-06  0.8644929  3.111953e-05
## 685       10 2.191838e-06  0.8644929  3.111953e-05
## 807       88 5.055188e-36 -0.7714444  6.890221e-34
## 350       93 1.440793e-37 -0.7671592  2.182002e-35
## 1060      92 1.357145e-35 -0.7498441  1.681626e-33
## 1315      88 6.764758e-34 -0.7472455  6.146911e-32
## 392      100 4.046179e-38 -0.7455910  6.893677e-36
## 352       92 3.299311e-34 -0.7344070  3.212114e-32
## 1330      12 1.516159e-05  0.7211404  1.796977e-04
## 1331      12 1.516159e-05  0.7211404  1.796977e-04
## 743       94 1.583940e-32 -0.7076135  1.199395e-30
## 1278      24 1.982172e-09 -0.7071589  5.097548e-08
## 588      110 7.548629e-35 -0.6784823  8.573985e-33
## 434      111 1.109612e-34 -0.6737341  1.163385e-32
## 1007     100 3.169485e-31 -0.6715684  2.273688e-29
## 398       51 1.916084e-16 -0.6654346  8.424589e-15
## 1323      11 1.548886e-04  0.6586494  1.336159e-03
## 967       11 2.337332e-04  0.6406105  1.873990e-03
## 625      180 6.055674e-50 -0.6405213  2.063471e-47
## 142      118 2.646671e-33 -0.6398047  2.122008e-31

unlink("pod_crp_mitche.html")
capture.output(
    mitch_report(res, "pod_crp_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(y, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)

##                                                                               set
## 810                                                      Peptide chain elongation
## 353                                             Eukaryotic Translation Elongation
## 1318                                                       Viral mRNA Translation
## 395                                      Formation of a pool of free 40S subunits
## 355                                            Eukaryotic Translation Termination
## 1063                                                     Selenocysteine synthesis
## 746  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 591             L13a-mediated translational silencing of Ceruloplasmin expression
## 437                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 1313                                        VLDLR internalisation and degradation
## 1010                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 145                                          Cap-dependent Translation Initiation
## 354                                             Eukaryotic Translation Initiation
## 401           Formation of the ternary complex, and subsequently, the 43S complex
## 585                                                          Josephin domain DUBs
## 1043                  SRP-dependent cotranslational protein targeting to membrane
## 387                                     Formation of ATP by chemiosmotic coupling
## 1328                                      WNT5A-dependent internalization of FZD4
## 745     Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)
## 747                                                 Nonsense-Mediated Decay (NMD)
##      setSize       pANOVA     s.dist p.adjustANOVA
## 810       88 6.985114e-40 -0.8135221  3.182884e-37
## 353       93 7.235821e-42 -0.8116867  9.891367e-39
## 1318      88 1.329900e-37 -0.7889383  3.635947e-35
## 395      100 1.055679e-40 -0.7714938  7.215568e-38
## 355       92 5.983625e-37 -0.7646378  1.282281e-34
## 1063      92 2.245441e-36 -0.7584028  3.579908e-34
## 746       94 3.773216e-34 -0.7259279  3.967682e-32
## 591      110 1.096103e-37 -0.7068229  3.635947e-35
## 437      111 2.356925e-36 -0.6905391  3.579908e-34
## 1313      11 8.083211e-05 -0.6862890  8.743742e-04
## 1010     100 6.511606e-31 -0.6680040  6.358118e-29
## 145      118 9.577584e-36 -0.6639584  1.190233e-33
## 354      118 9.577584e-36 -0.6639584  1.190233e-33
## 401       51 3.041275e-15 -0.6381277  1.014006e-13
## 585       10 5.527420e-04 -0.6306684  4.244934e-03
## 1043     111 7.472133e-30 -0.6226349  6.809604e-28
## 387       18 7.323691e-06 -0.6103471  1.220913e-04
## 1328      13 1.508432e-04 -0.6069482  1.472876e-03
## 745      114 1.578986e-28 -0.5998573  1.199152e-26
## 747      114 1.578986e-28 -0.5998573  1.199152e-26

unlink("t0_crp_mitche.html")
capture.output(
    mitch_report(res, "t0_crp_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# 3D analysys significance
z <- list("t0_v_pod"=t0_v_pod, "pod_crp"=pod_crp, "t0_crp"=t0_crp)
zz <- mitch_import(z, DEtype="DESeq2", geneTable=gt)
res <- mitch_calc(zz, genesets, priority="significance",resrows=100)
head(res$enrichment_result,20)

##                                                                               set
## 735                                                      Neutrophil degranulation
## 535                                                          Innate Immune System
## 1267                                                                  Translation
## 392                                      Formation of a pool of free 40S subunits
## 588             L13a-mediated translational silencing of Ceruloplasmin expression
## 350                                             Eukaryotic Translation Elongation
## 434                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 142                                          Cap-dependent Translation Initiation
## 351                                             Eukaryotic Translation Initiation
## 807                                                      Peptide chain elongation
## 1352                                                              rRNA processing
## 1354                                   rRNA processing in the nucleus and cytosol
## 1314                                                       Viral mRNA Translation
## 352                                            Eukaryotic Translation Termination
## 1060                                                     Selenocysteine synthesis
## 625                 Major pathway of rRNA processing in the nucleolus and cytosol
## 743  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1007                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 635                                                             Metabolism of RNA
## 1040                  SRP-dependent cotranslational protein targeting to membrane
##      setSize       pMANOVA  s.t0_v_pod  s.pod_crp   s.t0_crp    p.t0_v_pod
## 735      457 4.707175e-120  0.58657151  0.4537112 -0.3280584 1.730727e-103
## 535      965  1.590823e-93  0.36728929  0.2699260 -0.1920147  8.532771e-84
## 1267     295  5.549362e-85 -0.05584812 -0.4411152 -0.3437255  9.896795e-02
## 392      100  9.213449e-83 -0.20789625 -0.7431070 -0.7691941  3.271811e-04
## 588      110  6.803565e-80 -0.14314838 -0.6756514 -0.7044555  9.492889e-03
## 350       93  8.412614e-79 -0.27289442 -0.7649026 -0.8094742  5.386406e-06
## 434      111  4.934964e-78 -0.14719204 -0.6708802 -0.6881875  7.382262e-03
## 142      118  3.478425e-76 -0.13160443 -0.6367128 -0.6614609  1.353731e-02
## 351      118  3.478425e-76 -0.13160443 -0.6367128 -0.6614609  1.353731e-02
## 807       88  9.118266e-75 -0.28134726 -0.7692423 -0.8113112  5.048100e-06
## 1352     217  8.404694e-73 -0.28892642 -0.5892016 -0.3942535  2.159088e-13
## 1354     190  2.793641e-71 -0.30718205 -0.6328397 -0.3926592  2.765619e-13
## 1314      88  9.812276e-71 -0.26210705 -0.7449685 -0.7866499  2.131873e-05
## 352       92  4.200639e-70 -0.26476727 -0.7319569 -0.7623451  1.134012e-05
## 1060      92  9.101950e-70 -0.29852322 -0.7475202 -0.7560906  7.425644e-07
## 625      180  2.038620e-68 -0.30640536 -0.6374809 -0.3909947  1.303462e-12
## 743       94  3.193002e-66 -0.24433952 -0.7051462 -0.7235822  4.227299e-05
## 1007     100  2.150801e-62 -0.22168319 -0.6687223 -0.6656059  1.275668e-04
## 635      685  1.757909e-61 -0.08857003 -0.2889897 -0.1753195  7.791790e-05
## 1040     111  8.280906e-60 -0.13387379 -0.5828486 -0.6201011  1.482425e-02
##         p.pod_crp     p.t0_crp    s.dist        SD p.adjustMANOVA
## 735  2.089819e-62 2.293735e-33 0.8108898 0.4941935  6.411172e-117
## 535  6.442346e-46 5.142292e-24 0.4946020 0.2988003   1.083351e-90
## 1267 5.789266e-39 2.801611e-24 0.5620044 0.2003284   2.519411e-82
## 392  7.087404e-38 1.808414e-40 1.0895359 0.3168034   3.137179e-80
## 588  1.430624e-34 1.918844e-37 0.9865363 0.3160841   1.853291e-77
## 350  2.345639e-37 1.199836e-41 1.1466455 0.2977629   1.909663e-76
## 434  2.116614e-34 4.079941e-36 0.9722899 0.3074695   9.602030e-76
## 142  5.362261e-33 1.738052e-35 0.9274985 0.2990248   5.264016e-74
## 351  5.362261e-33 1.738052e-35 0.9274985 0.2990248   5.264016e-74
## 807  7.948652e-36 1.127988e-39 1.1528729 0.2945825   1.241908e-72
## 1352 7.881898e-51 1.254523e-23 0.7655539 0.1523503   1.040654e-70
## 1354 2.169626e-51 9.466998e-21 0.8056228 0.1688421   3.170782e-69
## 1314 1.064541e-33 2.152672e-37 1.1146731 0.2915584   1.028025e-68
## 352  5.446050e-34 9.761602e-37 1.0895103 0.2789185   4.086622e-68
## 1060 2.205048e-35 3.664096e-36 1.1043439 0.2617377   8.264571e-68
## 625  1.748041e-49 1.356738e-19 0.8081726 0.1720087   1.735375e-66
## 743  2.603588e-32 6.137300e-34 1.0394730 0.2715254   2.558158e-64
## 1007 5.652620e-31 1.060551e-30 0.9692081 0.2572033   1.627439e-60
## 635  3.785370e-38 5.101082e-15 0.3494233 0.1005107   1.260143e-59
## 1040 2.361639e-26 1.274256e-29 0.8614871 0.2706114   5.639297e-58

unlink("3d_mitch.html")
capture.output(
    mitch_report(res, "3d_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# 3D analysys effect size
res <- mitch_calc(zz, genesets, priority="effect",resrows=100)
head(res$enrichment_result,20)

##                                                                               set
## 512                                                     IRAK4 deficiency (TLR2/4)
## 685                                                     MyD88 deficiency (TLR2/4)
## 807                                                      Peptide chain elongation
## 350                                             Eukaryotic Translation Elongation
## 1314                                                       Viral mRNA Translation
## 1060                                                     Selenocysteine synthesis
## 392                                      Formation of a pool of free 40S subunits
## 352                                            Eukaryotic Translation Termination
## 1329               alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1330                                        alpha-linolenic acid (ALA) metabolism
## 1322                       WNT5A-dependent internalization of FZD2, FZD5 and ROR2
## 504                                             Hyaluronan uptake and degradation
## 743  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1277                             Translocation of ZAP-70 to Immunological synapse
## 1305                                        Uptake and function of anthrax toxins
## 967                                        Regulation of TLR by endogenous ligand
## 1309                                        VLDLR internalisation and degradation
## 588             L13a-mediated translational silencing of Ceruloplasmin expression
## 434                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 1007                          Response of EIF2AK4 (GCN2) to amino acid deficiency
##      setSize      pMANOVA s.t0_v_pod  s.pod_crp    s.t0_crp   p.t0_v_pod
## 512       10 7.496846e-06  0.8994993  0.8681297 -0.05981002 8.362207e-07
## 685       10 7.496846e-06  0.8994993  0.8681297 -0.05981002 8.362207e-07
## 807       88 9.118266e-75 -0.2813473 -0.7692423 -0.81131123 5.048100e-06
## 350       93 8.412614e-79 -0.2728944 -0.7649026 -0.80947416 5.386406e-06
## 1314      88 9.812276e-71 -0.2621071 -0.7449685 -0.78664990 2.131873e-05
## 1060      92 9.101950e-70 -0.2985232 -0.7475202 -0.75609061 7.425644e-07
## 392      100 9.213449e-83 -0.2078962 -0.7431070 -0.76919412 3.271811e-04
## 352       92 4.200639e-70 -0.2647673 -0.7319569 -0.76234512 1.134012e-05
## 1329      12 3.031259e-05  0.7850157  0.7256774 -0.13326158 2.480595e-06
## 1330      12 3.031259e-05  0.7850157  0.7256774 -0.13326158 2.480595e-06
## 1322      11 2.038891e-05  0.6698260  0.6642957 -0.51599949 1.194226e-04
## 504       10 1.402370e-04  0.7290627  0.5743765 -0.47099340 6.529266e-05
## 743       94 3.193002e-66 -0.2443395 -0.7051462 -0.72358217 4.227299e-05
## 1277      24 1.274238e-09 -0.7433199 -0.7045621 -0.08165161 2.857833e-10
## 1305      10 3.139954e-04  0.7035422  0.6166487 -0.40281690 1.166786e-04
## 967       11 2.542352e-04  0.7434466  0.6445144 -0.20197388 1.951771e-05
## 1309      11 9.652365e-06  0.4681329  0.5392096 -0.68189901 7.175299e-03
## 588      110 6.803565e-80 -0.1431484 -0.6756514 -0.70445549 9.492889e-03
## 434      111 4.934964e-78 -0.1471920 -0.6708802 -0.68818748 7.382262e-03
## 1007     100 2.150801e-62 -0.2216832 -0.6687223 -0.66560594 1.275668e-04
##         p.pod_crp     p.t0_crp    s.dist        SD p.adjustMANOVA
## 512  1.986341e-06 7.432915e-01 1.2515292 0.5450276   4.326570e-05
## 685  1.986341e-06 7.432915e-01 1.2515292 0.5450276   4.326570e-05
## 807  7.948652e-36 1.127988e-39 1.1528729 0.2945825   1.241908e-72
## 350  2.345639e-37 1.199836e-41 1.1466455 0.2977629   1.909663e-76
## 1314 1.064541e-33 2.152672e-37 1.1146731 0.2915584   1.028025e-68
## 1060 2.205048e-35 3.664096e-36 1.1043439 0.2617377   8.264571e-68
## 392  7.087404e-38 1.808414e-40 1.0895359 0.3168034   3.137179e-80
## 352  5.446050e-34 9.761602e-37 1.0895103 0.2789185   4.086622e-68
## 1329 1.339490e-05 4.241140e-01 1.0773189 0.5138953   1.413895e-04
## 1330 1.339490e-05 4.241140e-01 1.0773189 0.5138953   1.413895e-04
## 1322 1.358892e-04 3.041029e-03 1.0752726 0.6830458   1.002516e-04
## 504  1.658407e-03 9.903414e-03 1.0408053 0.6527967   5.472859e-04
## 743  2.603588e-32 6.137300e-34 1.0394730 0.2715254   2.558158e-64
## 1277 2.270074e-09 4.886791e-01 1.0274236 0.3713320   1.606956e-08
## 1305 7.328448e-04 2.739804e-02 1.0185719 0.6152088   1.099387e-03
## 967  2.140099e-04 2.460881e-01 1.0044427 0.5196393   9.075529e-04
## 1309 1.955607e-03 8.977778e-05 0.9873610 0.6854113   5.387918e-05
## 588  1.430624e-34 1.918844e-37 0.9865363 0.3160841   1.853291e-77
## 434  2.116614e-34 4.079941e-36 0.9722899 0.3074695   9.602030e-76
## 1007 5.652620e-31 1.060551e-30 0.9692081 0.2572033   1.627439e-60

unlink("3d_mitche.html")
capture.output(
    mitch_report(res, "3d_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

RELIEF: RNA expression analysis

Mark Ziemann

2020-11-13