Introduction
Packages
Import read counts
curate the samplesheet
Blood cell composition correction
MDS analysis
DE analysis baseline versus post-op
DE analysis post-op low versus high CRP
DE analysis baseline low versus high CRP
- One dimensional enrichment analysis

Introduction

Packages

suppressPackageStartupMessages({
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("MASS")
    library("mitch")
})

Import read counts

tmp <- read.table("3col.tsv",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)

xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)

curate the samplesheet

Here we're manipulating the sample sheet. Making sure that the samplesheet matches the names of the data sets and vice versa. Ensuring that each patient has a baseline and postop sample.

samplesheet<-read.csv("samplesheet.tsv",sep="\t",stringsAsFactors=FALSE)
mysamples <- colnames(xx)
baseline <- mysamples[grep("T0",mysamples)]
postop <- mysamples[grep("POD",mysamples)]
baseline <- sapply(strsplit(baseline,"-"),"[[",1)
postop <- sapply(strsplit(postop,"-"),"[[",1)
mysamples <- intersect(baseline,postop)
samplesheet2 <- samplesheet
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
samplesheet3 <- samplesheet2
rownames(samplesheet2) <- paste(samplesheet2$RG_number,"T0",sep="-")
rownames(samplesheet3) <- paste(samplesheet2$RG_number,"POD",sep="-")
samplesheet2$timepoint <- 0       
samplesheet3$timepoint <- 1
samplesheet <- rbind(samplesheet2,samplesheet3)

# filter the data matrix
xx <- xx[,which(colnames(xx) %in% rownames(samplesheet))]
samplesheet <- samplesheet[which(rownames(samplesheet) %in% colnames(xx)),]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

Blood cell composition correction

Here the idea is to use the concept of Monaco et al, 2019 to deconvolute blood RNA-seq signature.

DOI:https://doi.org/10.1016/j.celrep.2019.01.041

xn <- xx
gt <- as.data.frame(sapply(strsplit(rownames(xn)," "),"[[",2) )
rownames(gt) <- rownames(xx)
colnames(gt) = "genesymbol"
gt$geneID <- rownames(xx)
blood <- read.table("https://raw.githubusercontent.com/giannimonaco/ABIS/master/data/sigmatrixRNAseq.txt")
blood2 <- merge(gt,blood,by.x="genesymbol",by.y=0)
blood2 <- blood2[which(!duplicated(blood2$genesymbol)),]
rownames(blood2) <- blood2$geneID
blood2 <- blood2[,c(3:ncol(blood2))]
genes <- intersect(rownames(xx), rownames(blood2))
dec <- apply(xx[genes, , drop=F], 2, function(x) coef(rlm( as.matrix(blood2[genes,]), x, maxit =100 ))) *100

## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps

## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm' failed
## to converge in 100 steps

dec <- t(dec/colSums(dec)*100)

dec <- signif(dec, 3)

cs <- colSums(dec)
order(-cs)

##  [1]  1  4  8 13  2  6  3 14 10 12  5 16 17  9 15 11  7

cv <- apply(dec,2,function(x) {sd(x)/mean(x)} )
cv[order(-cv)]

##    T.CD8.Naive    T.CD4.Naive           pDCs    Monocytes.C        B.Naive 
##      12.309866       6.935020       6.768844       5.828486       5.773624 
##           MAIT Monocytes.NC.I             NK   Basophils.LD       B.Memory 
##       5.611708       5.598135       5.555667       5.480702       5.163804 
##   T.CD8.Memory Neutrophils.LD   T.CD4.Memory   T.gd.non.Vd2           mDCs 
##       3.524342       2.972512      -5.555388      -6.493188      -7.442438 
##       T.gd.Vd2   Plasmablasts 
##      -7.869309     -18.095614

plot(cs,cv,main="cell abundance versus SD")
text(cs,cv+0.5,label=names(cs))

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2( dec, col=colfunc(25),scale="row",
 trace="none",margins = c(10,15), cexRow=.7, cexCol=.8,  main="cell type abundances")

samplesheet <- cbind(samplesheet,dec)

dec_t0_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==0 )] ),])
dec_po_lo <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==0 & samplesheet$timepoint==1 )] ),])
dec_t0_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==0 )] ),])
dec_po_hi <- as.data.frame(dec[which(rownames(dec) %in% rownames(samplesheet)[which(samplesheet$CrpGroup==1 & samplesheet$timepoint==1 )] ),])


boxplot(dec_t0_lo$Monocytes.C , dec_t0_hi$Monocytes.C, dec_po_lo$Monocytes.C,  dec_po_hi$Monocytes.C ,
  ylim=c(0,100),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.C")

t.test(dec_t0_hi$Monocytes.C , dec_po_hi$Monocytes.C)

## 
##  Welch Two Sample t-test
## 
## data:  dec_t0_hi$Monocytes.C and dec_po_hi$Monocytes.C
## t = -2.7175, df = 25.773, p-value = 0.0116
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -55.076844  -7.627109
## sample estimates:
## mean of x mean of y 
##  44.13850  75.49048

boxplot(dec_t0_lo$NK , dec_t0_hi$NK, dec_po_lo$NK,  dec_po_hi$NK ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="NK")

t.test(dec_t0_lo$NK , dec_po_lo$NK)

## 
##  Welch Two Sample t-test
## 
## data:  dec_t0_lo$NK and dec_po_lo$NK
## t = 2.4688, df = 47.994, p-value = 0.01717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.4820947 4.7136093
## sample estimates:
## mean of x mean of y 
##  6.707104  4.109252

boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory,  dec_po_hi$T.CD8.Memory ,
  ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")

boxplot(dec_t0_lo$T.CD8.Memory , dec_t0_hi$T.CD8.Memory, dec_po_lo$T.CD8.Memory,  dec_po_hi$T.CD8.Memory ,
  ylim=c(0,40),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Memory")

boxplot(dec_t0_lo$T.CD4.Naive , dec_t0_hi$T.CD4.Naive, dec_po_lo$T.CD4.Naive,  dec_po_hi$T.CD4.Naive ,
  ylim=c(0,25),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Naive")

boxplot(dec_t0_lo$T.CD8.Naive , dec_t0_hi$T.CD8.Naive, dec_po_lo$T.CD8.Naive,  dec_po_hi$T.CD8.Naive ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD8.Naive")

boxplot(dec_t0_lo$B.Naive , dec_t0_hi$B.Naive, dec_po_lo$B.Naive,  dec_po_hi$B.Naive ,
  ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Naive")

boxplot(dec_t0_lo$T.CD4.Memory , dec_t0_hi$T.CD4.Memory, dec_po_lo$T.CD4.Memory,  dec_po_hi$T.CD4.Memory ,
  ylim=c(0,10),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.CD4.Memory")

boxplot(dec_t0_lo$MAIT , dec_t0_hi$MAIT, dec_po_lo$MAIT,  dec_po_hi$MAIT ,
  ylim=c(0,15),names=c("t0 lo","t0 hi","po lo","po hi"),main="MAIT")

boxplot(dec_t0_lo$T.gd.Vd2 , dec_t0_hi$T.gd.Vd2 , dec_po_lo$T.gd.Vd2 ,  dec_po_hi$T.gd.Vd2 ,
  ylim=c(0,5),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.Vd2")

boxplot(dec_t0_lo$Neutrophils.LD , dec_t0_hi$Neutrophils.LD , dec_po_lo$Neutrophils.LD ,  dec_po_hi$Neutrophils.LD ,
  ylim=c(0,50),names=c("t0 lo","t0 hi","po lo","po hi"),main="Neutrophils.LD")

boxplot(dec_t0_lo$T.gd.non.Vd2 , dec_t0_hi$T.gd.non.Vd2 , dec_po_lo$T.gd.non.Vd2 ,  dec_po_hi$T.gd.non.Vd2 ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="T.gd.non.Vd2")

boxplot(dec_t0_lo$Basophils.LD , dec_t0_hi$Basophils.LD , dec_po_lo$Basophils.LD ,  dec_po_hi$Basophils.LD ,
  ylim=c(0,4),names=c("t0 lo","t0 hi","po lo","po hi"),main="Basophils.LD")

boxplot(dec_t0_lo$Monocytes.NC.I , dec_t0_hi$Monocytes.NC.I , dec_po_lo$Monocytes.NC.I ,  dec_po_hi$Monocytes.NC.I ,
  ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="Monocytes.NC.I")

boxplot(dec_t0_lo$B.Memory , dec_t0_hi$B.Memory , dec_po_lo$B.Memory ,  dec_po_hi$B.Memory ,
  ylim=c(0,20),names=c("t0 lo","t0 hi","po lo","po hi"),main="B.Memory")

boxplot(dec_t0_lo$mDCs , dec_t0_hi$mDCs , dec_po_lo$mDCs ,  dec_po_hi$mDCs ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="mDCs")

boxplot(dec_t0_lo$pDCs , dec_t0_hi$pDCs , dec_po_lo$pDCs ,  dec_po_hi$pDCs ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="pDCs")

boxplot(dec_t0_lo$Plasmablasts , dec_t0_hi$Plasmablasts , dec_po_lo$Plasmablasts ,  dec_po_hi$Plasmablasts ,
  ylim=c(0,1),names=c("t0 lo","t0 hi","po lo","po hi"),main="Plasmablasts")

MDS analysis

plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='l')
text(cmdscale(dist(t(xx))), labels=colnames(xx),cex=1.3)

heatmap.2(cor(xx),trace="none",margins = c(10,10), main="pearson correlation heatmap")

heatmap.2(cor(xx,method="s"),trace="none",margins = c(10,10), main="spearman correlation heatmap")

DE analysis baseline versus post-op

# filter genes with fewer than 10 reads per sample
NAME = "t0_v_pod"

# no correction for cell types
y <- xx[which(rowSums(xx)/ncol(xx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet,
  design = ~ Sex + age + timepoint )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                                baseMean log2FoldChange      lfcSE     stat
## ENSG00000087116.16 ADAMTS2    634.35845      4.5151731 0.35054106 12.88058
## ENSG00000108950.12 FAM20A     792.00129      2.5933863 0.21519289 12.05145
## ENSG00000170439.7 METTL7B     126.30501      3.7994825 0.32669823 11.62995
## ENSG00000132170.21 PPARG       70.86749      2.5437784 0.21970131 11.57835
## ENSG00000121316.11 PLBD1    10689.06659      1.4627439 0.12773015 11.45183
## ENSG00000112290.13 WASF1      113.84190      1.2704505 0.11958889 10.62348
## ENSG00000156414.19 TDRD9      595.16883      1.6598788 0.15732025 10.55095
## ENSG00000166033.13 HTRA1      131.84384      2.0268764 0.19324218 10.48879
## ENSG00000110013.13 SIAE       451.39075      0.8257222 0.07915764 10.43137
## ENSG00000163220.11 S100A9  114258.61851      1.5350647 0.14750800 10.40665
##                                  pvalue         padj
## ENSG00000087116.16 ADAMTS2 5.789681e-38 1.267245e-33
## ENSG00000108950.12 FAM20A  1.905681e-33 2.085577e-29
## ENSG00000170439.7 METTL7B  2.902842e-31 2.117913e-27
## ENSG00000132170.21 PPARG   5.305835e-31 2.903353e-27
## ENSG00000121316.11 PLBD1   2.302339e-30 1.007872e-26
## ENSG00000112290.13 WASF1   2.317491e-26 8.454208e-23
## ENSG00000156414.19 TDRD9   5.028360e-26 1.572296e-22
## ENSG00000166033.13 HTRA1   9.726683e-26 2.661221e-22
## ENSG00000110013.13 SIAE    1.783074e-25 4.336435e-22
## ENSG00000163220.11 S100A9  2.312053e-25 5.060621e-22

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, 
  xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

t0_v_pod <- dge
write.table(dge,file="t0_v_pod_rna.tsv",sep="\t",quote=FALSE)

# correct for cell types
y <- xx[which(rowSums(xx)/ncol(xx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, 
  design = ~ Sex + age + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + T.CD8.Naive +
  B.Naive + T.CD4.Memory + MAIT + T.gd.Vd2 + Neutrophils.LD + T.gd.non.Vd2 +
  Basophils.LD + Monocytes.NC.I + B.Memory + mDCs + pDCs + Plasmablasts + timepoint )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                               baseMean log2FoldChange      lfcSE      stat
## ENSG00000087116.16 ADAMTS2   634.35845      4.4583888 0.38198506 11.671631
## ENSG00000108950.12 FAM20A    792.00129      2.4139216 0.23631617 10.214797
## ENSG00000121316.11 PLBD1   10689.06659      1.3815172 0.14305201  9.657447
## ENSG00000132170.21 PPARG      70.86749      2.2946466 0.24099431  9.521580
## ENSG00000156414.19 TDRD9     595.16883      1.4892175 0.16054323  9.276115
## ENSG00000170439.7 METTL7B    126.30501      3.3287242 0.36128049  9.213684
## ENSG00000164125.15 GASK1B   3752.55250      0.9813449 0.10907350  8.997097
## ENSG00000110013.13 SIAE      451.39075      0.7805594 0.08685202  8.987234
## ENSG00000198053.11 SIRPA    7106.66137      0.7691877 0.08625109  8.918005
## ENSG00000163221.9 S100A12   8507.68558      1.8593362 0.20909153  8.892451
##                                  pvalue         padj
## ENSG00000087116.16 ADAMTS2 1.779774e-31 3.904824e-27
## ENSG00000108950.12 FAM20A  1.702354e-24 1.867483e-20
## ENSG00000121316.11 PLBD1   4.571125e-22 3.343016e-18
## ENSG00000132170.21 PPARG   1.705655e-21 9.355517e-18
## ENSG00000156414.19 TDRD9   1.757702e-20 7.712798e-17
## ENSG00000170439.7 METTL7B  3.151212e-20 1.152293e-16
## ENSG00000164125.15 GASK1B  2.317645e-19 6.953065e-16
## ENSG00000110013.13 SIAE    2.535302e-19 6.953065e-16
## ENSG00000198053.11 SIRPA   4.747613e-19 1.157363e-15
## ENSG00000163221.9 S100A12  5.977554e-19 1.311475e-15

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, 
  xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")  
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(samplesheet$timepoint)
colCols <- gsub("0","gray",colCols)
colCols <- gsub("1","black",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

#t0_v_pod <- dge

DE analysis post-op low versus high CRP

# filter genes with fewer than 10 reads per sample
NAME = "pod_crp"
ss <- subset(samplesheet,timepoint==1)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + CrpGroup )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                               baseMean log2FoldChange     lfcSE      stat
## ENSG00000135424.17 ITGA7      387.1708      2.2131744 0.2570353  8.610391
## ENSG00000108950.12 FAM20A    1327.9835      2.0134443 0.2649032  7.600680
## ENSG00000143365.19 RORC       146.5522     -1.4132086 0.2085281 -6.777064
## ENSG00000110079.18 MS4A4A     673.1434      1.5505614 0.2295571  6.754577
## ENSG00000123643.13 SLC36A1   1610.6451      0.9818768 0.1518341  6.466773
## ENSG00000281852.1 LINC00891   116.2132     -0.9573707 0.1488960 -6.429796
## ENSG00000018280.17 SLC11A1  17883.6005      1.1018384 0.1722874  6.395353
## ENSG00000014257.16 ACPP       863.6512      0.7083577 0.1121526  6.316019
## ENSG00000178562.18 CD28      1015.3327     -1.0429829 0.1694328 -6.155733
## ENSG00000121316.11 PLBD1    15404.6149      1.0790204 0.1756857  6.141765
##                                   pvalue         padj
## ENSG00000135424.17 ITGA7    7.281202e-18 1.574560e-13
## ENSG00000108950.12 FAM20A   2.945794e-14 3.185140e-10
## ENSG00000143365.19 RORC     1.226425e-11 7.744568e-08
## ENSG00000110079.18 MS4A4A   1.432521e-11 7.744568e-08
## ENSG00000123643.13 SLC36A1  1.001180e-10 4.330105e-07
## ENSG00000281852.1 LINC00891 1.277753e-10 4.605234e-07
## ENSG00000018280.17 SLC11A1  1.601768e-10 4.948320e-07
## ENSG00000014257.16 ACPP     2.683869e-10 7.254833e-07
## ENSG00000178562.18 CD28     7.473115e-10 1.764803e-06
## ENSG00000121316.11 PLBD1    8.160938e-10 1.764803e-06

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,  
  trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

pod_crp <- dge
write.table(dge,file="pod_crp_rna.tsv",sep="\t",quote=FALSE)

DE analysis baseline low versus high CRP

# filter genes with fewer than 10 reads per sample
NAME = "t0_crp"
ss <- subset(samplesheet,timepoint==0)
xxx <- xx[,colnames(xx) %in% rownames(ss)]
y <- xxx[which(rowSums(xxx)/ncol(xxx)>=(10)),]
dds <- DESeqDataSetFromMatrix(countData = y , colData = ss, design = ~ Sex + age + CrpGroup )

## converting counts to integer mode

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   it is generally a good idea to center and scale numeric variables in the design
##   to improve GLM convergence.

res <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]

##                               baseMean log2FoldChange     lfcSE      stat
## ENSG00000167680.16 SEMA6B    116.93734     -1.7545755 0.3476538 -5.046904
## ENSG00000165949.12 IFI27      67.17018      2.4903519 0.5349850  4.654994
## ENSG00000279358.1 AP002833.3  37.34539      0.6664108 0.1548064  4.304802
## ENSG00000198010.12 DLGAP2     15.39018     -2.9860007 0.7063975 -4.227083
## ENSG00000285755.2 AC132153.1  16.27930      0.7771681 0.1867644  4.161222
## ENSG00000112812.16 PRSS16     40.93576      0.7922314 0.1906324  4.155806
## ENSG00000169245.6 CXCL10      83.86172      1.4237493 0.3426822  4.154722
## ENSG00000211821.2 TRDV2      114.06718      2.8845574 0.7018804  4.109756
## ENSG00000174482.10 LINGO2     25.14387     -1.2019485 0.2936643 -4.092933
## ENSG00000225886.3 AL445490.1  40.31291      1.0396377 0.2557416  4.065188
##                                    pvalue       padj
## ENSG00000167680.16 SEMA6B    4.490270e-07 0.00973760
## ENSG00000165949.12 IFI27     3.239900e-06 0.03513024
## ENSG00000279358.1 AP002833.3 1.671350e-05 0.09840478
## ENSG00000198010.12 DLGAP2    2.367404e-05 0.09840478
## ENSG00000285755.2 AC132153.1 3.165494e-05 0.09840478
## ENSG00000112812.16 PRSS16    3.241423e-05 0.09840478
## ENSG00000169245.6 CXCL10     3.256838e-05 0.09840478
## ENSG00000211821.2 TRDV2      3.960772e-05 0.09840478
## ENSG00000174482.10 LINGO2    4.259502e-05 0.09840478
## ENSG00000225886.3 AL445490.1 4.799366e-05 0.09840478

sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DGEs,", UP ,"upregulated,", DN, "downregulated")
# smear
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, xlab="log2 base mean",
  ylim=c(-3,3),ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top <- head(sig,20)
# volcano
plot(dge$log2FoldChange, -log2(dge$pvalue) ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

# top N gene heatmap
colCols <- as.character(ss$CrpGroup)
colCols <- gsub("1","orange",colCols)
colCols <- gsub("0","yellow",colCols)

colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(dge[1:50,c(7:ncol(dge))]), col=colfunc(25),scale="row",
  ColSideColors=colCols ,  
 trace="none",margins = c(6,10), cexRow=.6, cexCol=.5, main="Top 50 genes")

t0_crp <- dge
write.table(dge,file="t0_crp_rna.tsv",sep="\t",quote=FALSE)

One dimensional enrichment analysis

Not correcting for blood compostion changes.

# reactome
download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
unzip("ReactomePathways.gmt.zip")
genesets <- gmt_import("ReactomePathways.gmt")

# set up the gene table
gt <- as.data.frame(sapply(strsplit(rownames(xx)," "),"[[",2))
gt$id <- rownames(xx)
colnames(gt) <- c("gene name","id")


# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
capture.output( 
    res <- mitch_calc(y, genesets, priority="significance",resrows=100) 
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
head(res$enrichment_result,20)

##                                                 set setSize        pANOVA
## 736                        Neutrophil degranulation     458 1.609422e-103
## 536                            Innate Immune System     968  5.827567e-84
## 519                                   Immune System    1894  3.072226e-46
## 631                            Membrane Trafficking     559  5.327421e-36
## 1312                     Vesicle-mediated transport     650  1.642242e-31
## 1069                            Signal Transduction    1894  7.041601e-28
## 496                                      Hemostasis     550  5.250724e-23
## 634                                      Metabolism    1772  1.783150e-22
## 829  Platelet activation, signaling and aggregation     221  1.353176e-20
## 1131         Signaling by Receptor Tyrosine Kinases     414  1.678123e-20
## 1102                      Signaling by Interleukins     386  1.710343e-19
## 649                          Metabolism of proteins    1719  1.861219e-19
## 841         Post-translational protein modification    1194  5.595484e-19
## 1010   Response to elevated platelet cytosolic Ca2+     110  1.253521e-16
## 831                          Platelet degranulation     106  5.814015e-16
## 1245                    Toll-like Receptor Cascades     143  2.293691e-15
## 86                Asparagine N-linked glycosylation     269  5.316672e-15
## 643                            Metabolism of lipids     625  9.171040e-15
## 284                                         Disease    1303  1.323050e-14
## 1288                   Transport of small molecules     561  1.571805e-14
##         s.dist p.adjustANOVA
## 736  0.5859566 2.195252e-100
## 536  0.3669434  3.974401e-81
## 519  0.1977050  1.396839e-43
## 631  0.3093383  1.816650e-33
## 1312 0.2680762  4.480037e-29
## 1069 0.1517126  1.600791e-25
## 496  0.2459999  1.023141e-20
## 634  0.1393909  3.040271e-20
## 829  0.3628343  2.050813e-18
## 1131 0.2656301  2.288960e-18
## 1102 0.2675063  2.115585e-17
## 649  0.1307464  2.115585e-17
## 841  0.1527985  5.870954e-17
## 1010 0.4565015  1.221288e-14
## 831  0.4546295  5.286878e-14
## 1245 0.3835923  1.955372e-13
## 86   0.2767766  4.265848e-13
## 643  0.1814795  6.949610e-13
## 284  0.1269715  9.498108e-13
## 1288 0.1895711  1.071971e-12

unlink("t0_v_pod_mitch.html")
capture.output(
    mitch_report(res, "t0_v_pod_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
capture.output( 
    res <- mitch_calc(y, genesets, priority="significance",resrows=100)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
head(res$enrichment_result,20)

##                                                                               set
## 734                                                      Neutrophil degranulation
## 1353                                   rRNA processing in the nucleus and cytosol
## 1351                                                              rRNA processing
## 624                 Major pathway of rRNA processing in the nucleolus and cytosol
## 534                                                          Innate Immune System
## 1266                                                                  Translation
## 634                                                             Metabolism of RNA
## 391                                      Formation of a pool of free 40S subunits
## 349                                             Eukaryotic Translation Elongation
## 806                                                      Peptide chain elongation
## 1058                                                     Selenocysteine synthesis
## 587             L13a-mediated translational silencing of Ceruloplasmin expression
## 433                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 351                                            Eukaryotic Translation Termination
## 1313                                                       Viral mRNA Translation
## 142                                          Cap-dependent Translation Initiation
## 350                                             Eukaryotic Translation Initiation
## 742  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1006                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 526                             Influenza Viral RNA Transcription and Replication
##      setSize       pANOVA     s.dist p.adjustANOVA
## 734      458 3.162843e-61  0.4487783  4.304629e-58
## 1353     190 7.192148e-52 -0.6358614  4.894256e-49
## 1351     217 2.080594e-51 -0.5926305  9.438961e-49
## 624      180 6.055674e-50 -0.6405213  2.060443e-47
## 534      967 2.264051e-44  0.2648539  6.162746e-42
## 1266     295 9.419052e-40 -0.4457176  2.136555e-37
## 634      685 1.652188e-39 -0.2942651  3.212326e-37
## 391      100 4.046179e-38 -0.7455910  6.883562e-36
## 349       93 1.440793e-37 -0.7671592  2.178800e-35
## 806       88 5.055188e-36 -0.7714444  6.880111e-34
## 1058      92 1.357145e-35 -0.7498441  1.679158e-33
## 587      110 7.548629e-35 -0.6784823  8.561404e-33
## 433      111 1.109612e-34 -0.6737341  1.161678e-32
## 351       92 3.299311e-34 -0.7344070  3.207401e-32
## 1313      88 6.764758e-34 -0.7472455  6.137891e-32
## 142      118 2.646671e-33 -0.6398047  2.118894e-31
## 350      118 2.646671e-33 -0.6398047  2.118894e-31
## 742       94 1.583940e-32 -0.7076135  1.197635e-30
## 1006     100 3.169485e-31 -0.6715684  2.270352e-29
## 526      135 5.439828e-31 -0.5761706  3.701803e-29

unlink("pod_crp_mitch.html")
capture.output(
    mitch_report(res, "pod_crp_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
capture.output(
    res <- mitch_calc(y, genesets, priority="significance",resrows=100)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
head(res$enrichment_result,20)

##                                                                               set
## 352                                             Eukaryotic Translation Elongation
## 394                                      Formation of a pool of free 40S subunits
## 809                                                      Peptide chain elongation
## 590             L13a-mediated translational silencing of Ceruloplasmin expression
## 1316                                                       Viral mRNA Translation
## 354                                            Eukaryotic Translation Termination
## 635                                                                    Metabolism
## 1061                                                     Selenocysteine synthesis
## 436                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 145                                          Cap-dependent Translation Initiation
## 353                                             Eukaryotic Translation Initiation
## 745  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 737                                                      Neutrophil degranulation
## 1009                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 1041                  SRP-dependent cotranslational protein targeting to membrane
## 744     Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)
## 746                                                 Nonsense-Mediated Decay (NMD)
## 982                                   Regulation of expression of SLITs and ROBOs
## 285                                                                       Disease
## 1060                                                  Selenoamino acid metabolism
##      setSize       pANOVA     s.dist p.adjustANOVA
## 352       93 7.235821e-42 -0.8116867  9.876896e-39
## 394      100 1.055679e-40 -0.7714938  7.205011e-38
## 809       88 6.985114e-40 -0.8135221  3.178227e-37
## 590      110 1.096103e-37 -0.7068229  3.630628e-35
## 1316      88 1.329900e-37 -0.7889383  3.630628e-35
## 354       92 5.983625e-37 -0.7646378  1.361275e-34
## 635     1771 8.439284e-37 -0.1809398  1.645660e-34
## 1061      92 2.245441e-36 -0.7584028  3.574670e-34
## 436      111 2.356925e-36 -0.6905391  3.574670e-34
## 145      118 9.577584e-36 -0.6639584  1.188491e-33
## 353      118 9.577584e-36 -0.6639584  1.188491e-33
## 745       94 3.773216e-34 -0.7259279  4.292034e-32
## 737      459 5.381654e-34 -0.3304822  5.650737e-32
## 1009     100 6.511606e-31 -0.6680040  6.348815e-29
## 1041     111 7.472133e-30 -0.6226349  6.799641e-28
## 744      114 1.578986e-28 -0.5998573  1.267833e-26
## 746      114 1.578986e-28 -0.5998573  1.267833e-26
## 982      162 5.020894e-28 -0.4990337  3.807512e-26
## 285     1302 4.543638e-27 -0.1774825  3.264245e-25
## 1060     114 6.775910e-27 -0.5814016  4.624559e-25

unlink("t0_crp_mitch.html")
capture.output(
    mitch_report(res, "t0_crp_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


## prioritise by effect size

# t0_v_pod
# baseline versus post-op
y <- mitch_import(t0_v_pod, DEtype="DESeq2", geneTable=gt)
capture.output(
    res <- mitch_calc(y, genesets, priority="effect",resrows=100)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
head(res$enrichment_result,20)

##                                                                 set setSize
## 513                                       IRAK4 deficiency (TLR2/4)      10
## 686                                       MyD88 deficiency (TLR2/4)      10
## 1331 alpha-linolenic (omega3) and linoleic (omega6) acid metabolism      12
## 1332                          alpha-linolenic acid (ALA) metabolism      12
## 968                          Regulation of TLR by endogenous ligand      11
## 1278               Translocation of ZAP-70 to Immunological synapse      24
## 505                               Hyaluronan uptake and degradation      12
## 1306                          Uptake and function of anthrax toxins      10
## 345          Erythrocytes take up carbon dioxide and release oxygen      11
## 765                                 O2/CO2 exchange in erythrocytes      11
## 1324         WNT5A-dependent internalization of FZD2, FZD5 and ROR2      11
## 1343                p130Cas linkage to MAPK signaling for integrins      11
## 61             Advanced glycosylation endproduct receptor signaling      12
## 1304                                               Unwinding of DNA      12
## 902                               RNA Polymerase I Promoter Opening      19
## 889                            RHO GTPases Activate WASPs and WAVEs      35
## 925                            ROS and RNS production in phagocytes      31
## 736                                        Neutrophil degranulation     458
## 434       GRB2:SOS provides linkage to MAPK signaling for Integrins      12
## 779                                                  PD-1 signaling      28
##             pANOVA     s.dist p.adjustANOVA
## 513   8.367911e-07  0.8994748  1.047140e-05
## 686   8.367911e-07  0.8994748  1.047140e-05
## 1331  2.461377e-06  0.7852782  2.622905e-05
## 1332  2.461377e-06  0.7852782  2.622905e-05
## 968   1.930406e-05  0.7438719  1.446744e-04
## 1278  2.809953e-10 -0.7436252  8.517279e-09
## 505   8.850181e-06  0.7406638  7.228531e-05
## 1306  1.141313e-04  0.7045257  6.513604e-04
## 345   9.462990e-05  0.6796841  5.525122e-04
## 765   9.462990e-05  0.6796841  5.525122e-04
## 1324  1.165844e-04  0.6708490  6.571121e-04
## 1343  1.324011e-04  0.6654100  7.208490e-04
## 61    9.478582e-05  0.6506965  5.525122e-04
## 1304  1.323109e-04 -0.6371242  7.208490e-04
## 902   3.287422e-06  0.6163218  3.273024e-05
## 889   4.567968e-10  0.6085563  1.325682e-08
## 925   8.233487e-09  0.5978890  1.754762e-07
## 736  1.609422e-103  0.5859566 2.195252e-100
## 434   4.971813e-04  0.5804902  2.283351e-03
## 779   1.109028e-07 -0.5793553  1.867548e-06

unlink("t0_v_pod_mitche.html")
capture.output(
    mitch_report(res, "t0_v_pod_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# pod_crp
# postop low versus high CRP
y <- mitch_import(pod_crp, DEtype="DESeq2", geneTable=gt)
capture.output(
    res <- mitch_calc(y, genesets, priority="effect",resrows=100)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
head(res$enrichment_result,20)

##                                                                               set
## 511                                                     IRAK4 deficiency (TLR2/4)
## 684                                                     MyD88 deficiency (TLR2/4)
## 806                                                      Peptide chain elongation
## 349                                             Eukaryotic Translation Elongation
## 1058                                                     Selenocysteine synthesis
## 1313                                                       Viral mRNA Translation
## 391                                      Formation of a pool of free 40S subunits
## 351                                            Eukaryotic Translation Termination
## 1328               alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1329                                        alpha-linolenic acid (ALA) metabolism
## 742  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1276                             Translocation of ZAP-70 to Immunological synapse
## 587             L13a-mediated translational silencing of Ceruloplasmin expression
## 433                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 1006                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 397           Formation of the ternary complex, and subsequently, the 43S complex
## 1321                       WNT5A-dependent internalization of FZD2, FZD5 and ROR2
## 966                                        Regulation of TLR by endogenous ligand
## 624                 Major pathway of rRNA processing in the nucleolus and cytosol
## 142                                          Cap-dependent Translation Initiation
##      setSize       pANOVA     s.dist p.adjustANOVA
## 511       10 2.191838e-06  0.8644929  3.107386e-05
## 684       10 2.191838e-06  0.8644929  3.107386e-05
## 806       88 5.055188e-36 -0.7714444  6.880111e-34
## 349       93 1.440793e-37 -0.7671592  2.178800e-35
## 1058      92 1.357145e-35 -0.7498441  1.679158e-33
## 1313      88 6.764758e-34 -0.7472455  6.137891e-32
## 391      100 4.046179e-38 -0.7455910  6.883562e-36
## 351       92 3.299311e-34 -0.7344070  3.207401e-32
## 1328      12 1.516159e-05  0.7211404  1.794341e-04
## 1329      12 1.516159e-05  0.7211404  1.794341e-04
## 742       94 1.583940e-32 -0.7076135  1.197635e-30
## 1276      24 1.982172e-09 -0.7071589  5.090068e-08
## 587      110 7.548629e-35 -0.6784823  8.561404e-33
## 433      111 1.109612e-34 -0.6737341  1.161678e-32
## 1006     100 3.169485e-31 -0.6715684  2.270352e-29
## 397       51 1.916084e-16 -0.6654346  8.412228e-15
## 1321      11 1.548886e-04  0.6586494  1.334198e-03
## 966       11 2.337332e-04  0.6406105  1.882313e-03
## 624      180 6.055674e-50 -0.6405213  2.060443e-47
## 142      118 2.646671e-33 -0.6398047  2.118894e-31

unlink("pod_crp_mitche.html")
capture.output(
    mitch_report(res, "pod_crp_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# t0_crp
# baseline low versus high CRP
y <- mitch_import(t0_crp, DEtype="DESeq2", geneTable=gt)
capture.output(
    res <- mitch_calc(y, genesets, priority="effect",resrows=100)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
head(res$enrichment_result,20)

##                                                                               set
## 809                                                      Peptide chain elongation
## 352                                             Eukaryotic Translation Elongation
## 1316                                                       Viral mRNA Translation
## 394                                      Formation of a pool of free 40S subunits
## 354                                            Eukaryotic Translation Termination
## 1061                                                     Selenocysteine synthesis
## 745  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 590             L13a-mediated translational silencing of Ceruloplasmin expression
## 436                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 1311                                        VLDLR internalisation and degradation
## 1009                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 145                                          Cap-dependent Translation Initiation
## 353                                             Eukaryotic Translation Initiation
## 400           Formation of the ternary complex, and subsequently, the 43S complex
## 584                                                          Josephin domain DUBs
## 1041                  SRP-dependent cotranslational protein targeting to membrane
## 386                                     Formation of ATP by chemiosmotic coupling
## 1326                                      WNT5A-dependent internalization of FZD4
## 744     Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)
## 746                                                 Nonsense-Mediated Decay (NMD)
##      setSize       pANOVA     s.dist p.adjustANOVA
## 809       88 6.985114e-40 -0.8135221  3.178227e-37
## 352       93 7.235821e-42 -0.8116867  9.876896e-39
## 1316      88 1.329900e-37 -0.7889383  3.630628e-35
## 394      100 1.055679e-40 -0.7714938  7.205011e-38
## 354       92 5.983625e-37 -0.7646378  1.361275e-34
## 1061      92 2.245441e-36 -0.7584028  3.574670e-34
## 745       94 3.773216e-34 -0.7259279  4.292034e-32
## 590      110 1.096103e-37 -0.7068229  3.630628e-35
## 436      111 2.356925e-36 -0.6905391  3.574670e-34
## 1311      11 8.083211e-05 -0.6862890  8.800242e-04
## 1009     100 6.511606e-31 -0.6680040  6.348815e-29
## 145      118 9.577584e-36 -0.6639584  1.188491e-33
## 353      118 9.577584e-36 -0.6639584  1.188491e-33
## 400       51 3.041275e-15 -0.6381277  1.012522e-13
## 584       10 5.527420e-04 -0.6306684  4.286891e-03
## 1041     111 7.472133e-30 -0.6226349  6.799641e-28
## 386       18 7.323691e-06 -0.6103471  1.219127e-04
## 1326      13 1.508432e-04 -0.6069482  1.481302e-03
## 744      114 1.578986e-28 -0.5998573  1.267833e-26
## 746      114 1.578986e-28 -0.5998573  1.267833e-26

unlink("t0_crp_mitche.html")
capture.output(
    mitch_report(res, "t0_crp_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)


# 3D analysys significance
z <- list("t0_v_pod"=t0_v_pod, "pod_crp"=pod_crp, "t0_crp"=t0_crp)
zz <- mitch_import(z, DEtype="DESeq2", geneTable=gt)
capture.output(
    res <- mitch_calc(zz, genesets, priority="significance",resrows=100)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
head(res$enrichment_result,20)

##                                                                               set
## 734                                                      Neutrophil degranulation
## 534                                                          Innate Immune System
## 1265                                                                  Translation
## 391                                      Formation of a pool of free 40S subunits
## 587             L13a-mediated translational silencing of Ceruloplasmin expression
## 349                                             Eukaryotic Translation Elongation
## 433                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 142                                          Cap-dependent Translation Initiation
## 350                                             Eukaryotic Translation Initiation
## 806                                                      Peptide chain elongation
## 1350                                                              rRNA processing
## 1352                                   rRNA processing in the nucleus and cytosol
## 1312                                                       Viral mRNA Translation
## 351                                            Eukaryotic Translation Termination
## 1058                                                     Selenocysteine synthesis
## 624                 Major pathway of rRNA processing in the nucleolus and cytosol
## 742  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1006                          Response of EIF2AK4 (GCN2) to amino acid deficiency
## 634                                                             Metabolism of RNA
## 1038                  SRP-dependent cotranslational protein targeting to membrane
##      setSize       pMANOVA  s.t0_v_pod  s.pod_crp   s.t0_crp    p.t0_v_pod
## 734      458 1.100900e-119  0.58568651  0.4539032 -0.3255898 2.131658e-103
## 534      966  2.309842e-93  0.36710574  0.2702239 -0.1909662  8.553878e-84
## 1265     295  5.549362e-85 -0.05584812 -0.4411152 -0.3437255  9.896795e-02
## 391      100  9.213449e-83 -0.20789625 -0.7431070 -0.7691941  3.271811e-04
## 587      110  6.803565e-80 -0.14314838 -0.6756514 -0.7044555  9.492889e-03
## 349       93  8.412614e-79 -0.27289442 -0.7649026 -0.8094742  5.386406e-06
## 433      111  4.934964e-78 -0.14719204 -0.6708802 -0.6881875  7.382262e-03
## 142      118  3.478425e-76 -0.13160443 -0.6367128 -0.6614609  1.353731e-02
## 350      118  3.478425e-76 -0.13160443 -0.6367128 -0.6614609  1.353731e-02
## 806       88  9.118266e-75 -0.28134726 -0.7692423 -0.8113112  5.048100e-06
## 1350     217  8.404694e-73 -0.28892642 -0.5892016 -0.3942535  2.159088e-13
## 1352     190  2.793641e-71 -0.30718205 -0.6328397 -0.3926592  2.765619e-13
## 1312      88  9.812276e-71 -0.26210705 -0.7449685 -0.7866499  2.131873e-05
## 351       92  4.200639e-70 -0.26476727 -0.7319569 -0.7623451  1.134012e-05
## 1058      92  9.101950e-70 -0.29852322 -0.7475202 -0.7560906  7.425644e-07
## 624      180  2.038620e-68 -0.30640536 -0.6374809 -0.3909947  1.303462e-12
## 742       94  3.193002e-66 -0.24433952 -0.7051462 -0.7235822  4.227299e-05
## 1006     100  2.150801e-62 -0.22168319 -0.6687223 -0.6656059  1.275668e-04
## 634      685  1.757909e-61 -0.08857003 -0.2889897 -0.1753195  7.791790e-05
## 1038     111  8.280906e-60 -0.13387379 -0.5828486 -0.6201011  1.482425e-02
##         p.pod_crp     p.t0_crp    s.dist        SD p.adjustMANOVA
## 734  1.373656e-62 5.888784e-33 0.8093612 0.4925107  1.497224e-116
## 534  4.646843e-46 8.584734e-24 0.4942223 0.2981965   1.570693e-90
## 1265 5.789266e-39 2.801611e-24 0.5620044 0.2003284   2.515711e-82
## 391  7.087404e-38 1.808414e-40 1.0895359 0.3168034   3.132573e-80
## 587  1.430624e-34 1.918844e-37 0.9865363 0.3160841   1.850570e-77
## 349  2.345639e-37 1.199836e-41 1.1466455 0.2977629   1.906859e-76
## 433  2.116614e-34 4.079941e-36 0.9722899 0.3074695   9.587930e-76
## 142  5.362261e-33 1.738052e-35 0.9274985 0.2990248   5.256287e-74
## 350  5.362261e-33 1.738052e-35 0.9274985 0.2990248   5.256287e-74
## 806  7.948652e-36 1.127988e-39 1.1528729 0.2945825   1.240084e-72
## 1350 7.881898e-51 1.254523e-23 0.7655539 0.1523503   1.039126e-70
## 1352 2.169626e-51 9.466998e-21 0.8056228 0.1688421   3.166126e-69
## 1312 1.064541e-33 2.152672e-37 1.1146731 0.2915584   1.026515e-68
## 351  5.446050e-34 9.761602e-37 1.0895103 0.2789185   4.080621e-68
## 1058 2.205048e-35 3.664096e-36 1.1043439 0.2617377   8.252435e-68
## 624  1.748041e-49 1.356738e-19 0.8081726 0.1720087   1.732827e-66
## 742  2.603588e-32 6.137300e-34 1.0394730 0.2715254   2.554402e-64
## 1006 5.652620e-31 1.060551e-30 0.9692081 0.2572033   1.625050e-60
## 634  3.785370e-38 5.101082e-15 0.3494233 0.1005107   1.258293e-59
## 1038 2.361639e-26 1.274256e-29 0.8614871 0.2706114   5.631016e-58

unlink("3d_mitch.html")
capture.output(
    mitch_report(res, "3d_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# 3D analysys effect size
capture.output(
    res <- mitch_calc(zz, genesets, priority="effect",resrows=100)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
head(res$enrichment_result,20)

##                                                                               set
## 511                                                     IRAK4 deficiency (TLR2/4)
## 684                                                     MyD88 deficiency (TLR2/4)
## 806                                                      Peptide chain elongation
## 349                                             Eukaryotic Translation Elongation
## 1312                                                       Viral mRNA Translation
## 1058                                                     Selenocysteine synthesis
## 391                                      Formation of a pool of free 40S subunits
## 351                                            Eukaryotic Translation Termination
## 1327               alpha-linolenic (omega3) and linoleic (omega6) acid metabolism
## 1328                                        alpha-linolenic acid (ALA) metabolism
## 1320                       WNT5A-dependent internalization of FZD2, FZD5 and ROR2
## 503                                             Hyaluronan uptake and degradation
## 742  Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)
## 1275                             Translocation of ZAP-70 to Immunological synapse
## 1303                                        Uptake and function of anthrax toxins
## 966                                        Regulation of TLR by endogenous ligand
## 1307                                        VLDLR internalisation and degradation
## 587             L13a-mediated translational silencing of Ceruloplasmin expression
## 433                       GTP hydrolysis and joining of the 60S ribosomal subunit
## 1006                          Response of EIF2AK4 (GCN2) to amino acid deficiency
##      setSize      pMANOVA s.t0_v_pod  s.pod_crp    s.t0_crp   p.t0_v_pod
## 511       10 7.496846e-06  0.8994993  0.8681297 -0.05981002 8.362207e-07
## 684       10 7.496846e-06  0.8994993  0.8681297 -0.05981002 8.362207e-07
## 806       88 9.118266e-75 -0.2813473 -0.7692423 -0.81131123 5.048100e-06
## 349       93 8.412614e-79 -0.2728944 -0.7649026 -0.80947416 5.386406e-06
## 1312      88 9.812276e-71 -0.2621071 -0.7449685 -0.78664990 2.131873e-05
## 1058      92 9.101950e-70 -0.2985232 -0.7475202 -0.75609061 7.425644e-07
## 391      100 9.213449e-83 -0.2078962 -0.7431070 -0.76919412 3.271811e-04
## 351       92 4.200639e-70 -0.2647673 -0.7319569 -0.76234512 1.134012e-05
## 1327      12 3.031259e-05  0.7850157  0.7256774 -0.13326158 2.480595e-06
## 1328      12 3.031259e-05  0.7850157  0.7256774 -0.13326158 2.480595e-06
## 1320      11 2.038891e-05  0.6698260  0.6642957 -0.51599949 1.194226e-04
## 503       10 1.402370e-04  0.7290627  0.5743765 -0.47099340 6.529266e-05
## 742       94 3.193002e-66 -0.2443395 -0.7051462 -0.72358217 4.227299e-05
## 1275      24 1.274238e-09 -0.7433199 -0.7045621 -0.08165161 2.857833e-10
## 1303      10 3.139954e-04  0.7035422  0.6166487 -0.40281690 1.166786e-04
## 966       11 2.542352e-04  0.7434466  0.6445144 -0.20197388 1.951771e-05
## 1307      11 9.652365e-06  0.4681329  0.5392096 -0.68189901 7.175299e-03
## 587      110 6.803565e-80 -0.1431484 -0.6756514 -0.70445549 9.492889e-03
## 433      111 4.934964e-78 -0.1471920 -0.6708802 -0.68818748 7.382262e-03
## 1006     100 2.150801e-62 -0.2216832 -0.6687223 -0.66560594 1.275668e-04
##         p.pod_crp     p.t0_crp    s.dist        SD p.adjustMANOVA
## 511  1.986341e-06 7.432915e-01 1.2515292 0.5450276   4.338600e-05
## 684  1.986341e-06 7.432915e-01 1.2515292 0.5450276   4.338600e-05
## 806  7.948652e-36 1.127988e-39 1.1528729 0.2945825   1.240084e-72
## 349  2.345639e-37 1.199836e-41 1.1466455 0.2977629   1.906859e-76
## 1312 1.064541e-33 2.152672e-37 1.1146731 0.2915584   1.026515e-68
## 1058 2.205048e-35 3.664096e-36 1.1043439 0.2617377   8.252435e-68
## 391  7.087404e-38 1.808414e-40 1.0895359 0.3168034   3.132573e-80
## 351  5.446050e-34 9.761602e-37 1.0895103 0.2789185   4.080621e-68
## 1327 1.339490e-05 4.241140e-01 1.0773189 0.5138953   1.416671e-04
## 1328 1.339490e-05 4.241140e-01 1.0773189 0.5138953   1.416671e-04
## 1320 1.358892e-04 3.041029e-03 1.0752726 0.6830458   1.004671e-04
## 503  1.658407e-03 9.903414e-03 1.0408053 0.6527967   5.480526e-04
## 742  2.603588e-32 6.137300e-34 1.0394730 0.2715254   2.554402e-64
## 1275 2.270074e-09 4.886791e-01 1.0274236 0.3713320   1.619592e-08
## 1303 7.328448e-04 2.739804e-02 1.0185719 0.6152088   1.097773e-03
## 966  2.140099e-04 2.460881e-01 1.0044427 0.5196393   9.109898e-04
## 1307 1.955607e-03 8.977778e-05 0.9873610 0.6854113   5.402147e-05
## 587  1.430624e-34 1.918844e-37 0.9865363 0.3160841   1.850570e-77
## 433  2.116614e-34 4.079941e-36 0.9722899 0.3074695   9.587930e-76
## 1006 5.652620e-31 1.060551e-30 0.9692081 0.2572033   1.625050e-60

unlink("3d_mitche.html")
capture.output(
    mitch_report(res, "3d_mitche.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

RELIEF: RNA expression analysis

Mark Ziemann

2020-09-10