PADDI RNA expression analysis - Timecourse Analysis

Introduction

Time course analysis of PADDI genomics data.

suppressPackageStartupMessages({
  library("gplots")
  library("reshape2")
  library("WGCNA")
  library("dplyr")
  library("DESeq2")
  library("mitch")
  library("MASS")
  library("eulerr")
})

Load the data

tmp <- read.table("3col.tsv.gz",header=FALSE)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)
xx[1:10,1:6]

##                             3166-POD1 3166-T0 3167-POD1 3167-T0 3171-POD1
## ENSG00000000003.15 TSPAN6           3       1         5       5        23
## ENSG00000000005.6 TNMD              0       0         0       0         0
## ENSG00000000419.14 DPM1           685     577       521     735       811
## ENSG00000000457.14 SCYL3          622     611       550     777       789
## ENSG00000000460.17 C1orf112       181     171       232     263       215
## ENSG00000000938.13 FGR          33797   44344     31524   38959     26402
## ENSG00000000971.16 CFH            106      40        98     183       195
## ENSG00000001036.14 FUCA2         1229     769      1150     868       978
## ENSG00000001084.13 GCLC           944    1085       577     961       908
## ENSG00000001167.15 NFYA          1243    1277      1295    1605      1166
##                             3171-T0
## ENSG00000000003.15 TSPAN6         4
## ENSG00000000005.6 TNMD            1
## ENSG00000000419.14 DPM1         494
## ENSG00000000457.14 SCYL3        575
## ENSG00000000460.17 C1orf112     196
## ENSG00000000938.13 FGR        33751
## ENSG00000000971.16 CFH          130
## ENSG00000001036.14 FUCA2        805
## ENSG00000001084.13 GCLC         798
## ENSG00000001167.15 NFYA        1251

Number of reads per sample

Let’s look at the number of reads per sample

Most samples were in the range of 25-30 million assigned reads. Just 2 samples had less than 20 million reads: PG1452-EOS and PG1423-EOS. The maximum read count was about 40 million for PG7072-EOS.

xxcs <- colSums(xx)
par(mar=c(5,8,3,1))
barplot(xxcs,horiz=TRUE,las=1,main="no. reads per sample")

barplot(head(xxcs[order(xxcs)],20),horiz=TRUE,las=1,main="lowest no. reads per sample")

barplot(head(xxcs[order(-xxcs)],20),horiz=TRUE,las=1,main="highest no. reads per sample")

MDS

Some outliers are apparent.

PG2090-EOS to the left of the chart - this is clearly the effect of rRNA carryover. Other samples over to the left of the chart include PG815-EOS, PG145-EOS and PG702-POD1 which all have elevated rRNA.

heatmap.2( cor(xx),trace="none",scale="none")

mds <- cmdscale(dist(t(xx)))

par(mar=c(5,5,3,1))
minx <- min(mds[,1])
maxx <- max(mds[,1])
miny <- min(mds[,2])
maxy <- max(mds[,2])

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) ,
  type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='n')
text(mds, labels=rownames(mds), cex=0.8)

col <- rownames(mds)
col <- sapply(strsplit(col,"-"),"[[",2)
col <- gsub("T0","lightblue",col)
col <- gsub("POD1","orange",col)
col <- gsub("EOS","pink",col)

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) , cex=1.5 ,
  type = "p", col=col, pch=19, cex.axis=1.3,cex.lab=1.3, bty='n')
#text(mds, labels=rownames(mds), cex=0.8) 
mtext("blue=T0, orange=POD1, pink=EOS")

Exclude PG2090-EOS and repeat the analysis.

xx <- xx[,grep("PG2090-EOS",colnames(xx),invert=TRUE)]

mds <- cmdscale(dist(t(xx)))

par(mar=c(5,5,3,1))
minx <- min(mds[,1])
maxx <- max(mds[,1])
miny <- min(mds[,2])
maxy <- max(mds[,2])

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) ,
  type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='n')
text(mds, labels=rownames(mds), cex=0.8)

col <- rownames(mds)
col <- sapply(strsplit(col,"-"),"[[",2)
col <- gsub("T0","lightblue",col)
col <- gsub("POD1","orange",col)
col <- gsub("EOS","pink",col)

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) , cex=1.5 ,
  type = "p", col=col, pch=19, cex.axis=1.3,cex.lab=1.3, bty='n')
#text(mds, labels=rownames(mds), cex=0.8) 
mtext("blue=T0, orange=POD1, pink=EOS")

In the MDS plot with PG2090-EOS removed, there appears to be some separation of T0, POD1 and EOS samples. POD1 (orange) are more towards the upper side of the chart and T0 (blue) are toward the bottom right. EOS (pink) are quite spread out.

Load patient info

xx <- xx[,order(colnames(xx))]

ss <- read.csv("PADDIgenomicsData.csv")
ss <- ss[order(ss$PG_number),]
colnames(ss)

##  [1] "PG_number"                        "sexD"                            
##  [3] "ageD"                             "weightD"                         
##  [5] "heightD"                          "asaD"                            
##  [7] "ethnicityD"                       "ethnicity_otherD"                
##  [9] "current_smokerD"                  "diabetes_typeD"                  
## [11] "daily_insulinD"                   "oral_hypoglycemicsD"             
## [13] "non_insulin_injectablesD"         "diabetes_yrs_since_diagnosisD"   
## [15] "DM_years"                         "creatinine_preopD"               
## [17] "crp_preopD"                       "crp_preop_typeD"                 
## [19] "crp_preop_naD"                    "hba1c_doneD"                     
## [21] "surgery_typeD"                    "surgery_procedureD"              
## [23] "surgery_dominantD"                "wound_typeOP"                    
## [25] "non_study_dexameth_steriodPOSTOP" "nonstudy_dexameth_steriodD3"     
## [27] "HbA1c"                            "bmi"                             
## [29] "whodas_total_preop"               "revised_whodas_preop"            
## [31] "neut_lymph_ratio_d0"              "neut_lymph_ratio_d1"             
## [33] "neut_lymph_ratio_change_d1"       "neut_lymph_ratio_d2"             
## [35] "neut_lymph_ratio_change_d2"       "neut_lymph_ratio_d1_2"           
## [37] "neut_lymph_ratio_d2_2"            "ab_noninfection"                 
## [39] "risk"                             "risk_cat"                        
## [41] "bmi_cat"                          "asa_cat"                         
## [43] "wound_type_cat"                   "oxygen_quin"                     
## [45] "duration_sx"                      "duration_sx_quin"                
## [47] "anyDex"                           "anyDex_count"                    
## [49] "anyDexMiss"                       "anyDex2"                         
## [51] "treatment_group"                  "deltacrp"                        
## [53] "crp_group"

str(ss)

## 'data.frame':    117 obs. of  53 variables:
##  $ PG_number                       : chr  "3166" "3167" "3171" "3172" ...
##  $ sexD                            : chr  "Male" "Male" "Male" "Male" ...
##  $ ageD                            : int  62 67 61 78 73 77 84 54 70 62 ...
##  $ weightD                         : num  64.5 78.8 71.1 43 83.6 ...
##  $ heightD                         : num  163 169 165 156 171 167 133 155 170 175 ...
##  $ asaD                            : int  2 2 2 2 2 3 3 2 2 2 ...
##  $ ethnicityD                      : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicity_otherD                : chr  "" "" "" "" ...
##  $ current_smokerD                 : chr  "No" "No" "No" "No" ...
##  $ diabetes_typeD                  : chr  "" "" "" "" ...
##  $ daily_insulinD                  : chr  "" "" "" "" ...
##  $ oral_hypoglycemicsD             : chr  "" "" "" "" ...
##  $ non_insulin_injectablesD        : chr  "" "" "" "" ...
##  $ diabetes_yrs_since_diagnosisD   : int  NA NA NA NA NA 1 NA NA NA NA ...
##  $ DM_years                        : int  NA NA NA NA NA 1 NA NA NA NA ...
##  $ creatinine_preopD               : int  68 82 82 96 105 90 54 47 109 98 ...
##  $ crp_preopD                      : chr  "2.1" "0.6" "2.7" "1.2" ...
##  $ crp_preop_typeD                 : chr  "CRP" "CRP" "CRP" "CRP" ...
##  $ crp_preop_naD                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ hba1c_doneD                     : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ surgery_typeD                   : chr  "Laparoscopic assisted low anterior resection of rectum" "Laparoscopic sigmoidectomy" "Laparoscopic assisted anterior resection of rectum" "Robotic assisted laparoscopic radical prostatectomy, pelvic lymph node dissection" ...
##  $ surgery_procedureD              : chr  "None of the above" "None of the above" "None of the above" "None of the above" ...
##  $ surgery_dominantD               : chr  "Gastrointestinal" "Gastrointestinal" "Gastrointestinal" "Urology-renal" ...
##  $ wound_typeOP                    : chr  "Clean / contaminated" "Clean / contaminated" "Clean / contaminated" "Clean / contaminated" ...
##  $ non_study_dexameth_steriodPOSTOP: chr  "No" "No" "No" "No" ...
##  $ nonstudy_dexameth_steriodD3     : chr  "No" "No" "No" "No" ...
##  $ HbA1c                           : num  5.7 6.2 6.2 6.3 6.3 ...
##  $ bmi                             : num  24.3 27.6 26.1 17.7 28.6 ...
##  $ whodas_total_preop              : int  16 12 12 12 12 12 24 14 12 12 ...
##  $ revised_whodas_preop            : int  16 12 12 12 12 12 24 14 12 12 ...
##  $ neut_lymph_ratio_d0             : num  4.3 2.94 2.29 2.93 2.62 ...
##  $ neut_lymph_ratio_d1             : num  13 6.5 7.22 23.2 8.57 ...
##  $ neut_lymph_ratio_change_d1      : num  8.7 3.56 4.93 20.27 5.95 ...
##  $ neut_lymph_ratio_d2             : num  5.92 3.68 3.77 22 NA ...
##  $ neut_lymph_ratio_change_d2      : num  1.623 0.741 1.475 19.071 NA ...
##  $ neut_lymph_ratio_d1_2           : num  13 6.5 7.22 23.2 8.57 ...
##  $ neut_lymph_ratio_d2_2           : num  5.92 3.68 3.77 22 NA ...
##  $ ab_noninfection                 : int  1 1 0 1 1 1 1 1 1 1 ...
##  $ risk                            : int  2 2 2 2 2 5 4 1 2 1 ...
##  $ risk_cat                        : chr  "Moderate" "Moderate" "Moderate" "Moderate" ...
##  $ bmi_cat                         : chr  "Normal [18.5 to <25]" "Overweight [25 to <30]" "Overweight [25 to <30]" "Underweight [BMI<18.5]" ...
##  $ asa_cat                         : chr  "1-2" "1-2" "1-2" "1-2" ...
##  $ wound_type_cat                  : chr  "Contaminated" "Contaminated" "Contaminated" "Contaminated" ...
##  $ oxygen_quin                     : chr  "0.21-0.4" "0.21-0.4" "0.21-0.4" "0.21-0.4" ...
##  $ duration_sx                     : num  2.5 2.67 2.42 3.17 2.5 ...
##  $ duration_sx_quin                : chr  "2.18-2.82" "2.18-2.82" "2.18-2.82" "2.83-3.75" ...
##  $ anyDex                          : chr  "No" "No" "No" "No" ...
##  $ anyDex_count                    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyDexMiss                      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyDex2                         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ treatment_group                 : int  1 1 2 2 1 1 2 1 2 1 ...
##  $ deltacrp                        : num  39.3 38.3 49 189.9 7.3 ...
##  $ crp_group                       : int  1 1 1 4 1 1 4 1 4 1 ...

summary(ss)

##   PG_number             sexD                ageD          weightD      
##  Length:117         Length:117         Min.   :25.00   Min.   : 41.00  
##  Class :character   Class :character   1st Qu.:54.00   1st Qu.: 68.50  
##  Mode  :character   Mode  :character   Median :62.00   Median : 82.00  
##                                        Mean   :61.03   Mean   : 84.55  
##                                        3rd Qu.:69.00   3rd Qu.: 95.40  
##                                        Max.   :86.00   Max.   :185.00  
##                                                                        
##     heightD           asaD        ethnicityD        ethnicity_otherD  
##  Min.   :133.0   Min.   :1.000   Length:117         Length:117        
##  1st Qu.:163.0   1st Qu.:2.000   Class :character   Class :character  
##  Median :171.0   Median :2.000   Mode  :character   Mode  :character  
##  Mean   :170.2   Mean   :2.308                                        
##  3rd Qu.:178.0   3rd Qu.:3.000                                        
##  Max.   :193.0   Max.   :4.000                                        
##                                                                       
##  current_smokerD    diabetes_typeD     daily_insulinD     oral_hypoglycemicsD
##  Length:117         Length:117         Length:117         Length:117         
##  Class :character   Class :character   Class :character   Class :character   
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character   
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  non_insulin_injectablesD diabetes_yrs_since_diagnosisD    DM_years     
##  Length:117               Min.   : 1.000                Min.   : 1.000  
##  Class :character         1st Qu.: 1.500                1st Qu.: 1.500  
##  Mode  :character         Median : 7.000                Median : 7.000  
##                           Mean   : 7.467                Mean   : 7.467  
##                           3rd Qu.:11.000                3rd Qu.:11.000  
##                           Max.   :18.000                Max.   :18.000  
##                           NA's   :102                   NA's   :102     
##  creatinine_preopD  crp_preopD        crp_preop_typeD    crp_preop_naD
##  Min.   : 19.0     Length:117         Length:117         Min.   :0    
##  1st Qu.: 66.0     Class :character   Class :character   1st Qu.:0    
##  Median : 76.0     Mode  :character   Mode  :character   Median :0    
##  Mean   : 80.3                                           Mean   :0    
##  3rd Qu.: 91.0                                           3rd Qu.:0    
##  Max.   :177.0                                           Max.   :0    
##  NA's   :10                                                           
##  hba1c_doneD        surgery_typeD      surgery_procedureD surgery_dominantD 
##  Length:117         Length:117         Length:117         Length:117        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  wound_typeOP       non_study_dexameth_steriodPOSTOP
##  Length:117         Length:117                      
##  Class :character   Class :character                
##  Mode  :character   Mode  :character                
##                                                     
##                                                     
##                                                     
##                                                     
##  nonstudy_dexameth_steriodD3     HbA1c             bmi       
##  Length:117                  Min.   : 4.500   Min.   :16.59  
##  Class :character            1st Qu.: 5.200   1st Qu.:24.93  
##  Mode  :character            Median : 5.600   Median :28.07  
##                              Mean   : 5.714   Mean   :29.00  
##                              3rd Qu.: 5.900   3rd Qu.:31.73  
##                              Max.   :10.000   Max.   :72.27  
##                                                              
##  whodas_total_preop revised_whodas_preop neut_lymph_ratio_d0
##  Min.   :12.00      Min.   :12.00        Min.   : 0.5312    
##  1st Qu.:12.00      1st Qu.:12.00        1st Qu.: 1.8254    
##  Median :14.00      Median :14.00        Median : 2.5737    
##  Mean   :16.74      Mean   :16.74        Mean   : 2.8745    
##  3rd Qu.:17.00      3rd Qu.:17.00        3rd Qu.: 3.3338    
##  Max.   :50.00      Max.   :50.00        Max.   :11.0000    
##                                          NA's   :9          
##  neut_lymph_ratio_d1 neut_lymph_ratio_change_d1 neut_lymph_ratio_d2
##  Min.   : 1.375      Min.   :-1.255             Min.   : 0.1235    
##  1st Qu.: 5.132      1st Qu.: 2.610             1st Qu.: 3.7692    
##  Median : 7.353      Median : 4.450             Median : 6.7273    
##  Mean   : 8.882      Mean   : 6.088             Mean   : 8.1589    
##  3rd Qu.:11.627      3rd Qu.: 8.730             3rd Qu.:10.8889    
##  Max.   :44.000      Max.   :39.299             Max.   :25.6042    
##  NA's   :13          NA's   :21                 NA's   :28         
##  neut_lymph_ratio_change_d2 neut_lymph_ratio_d1_2 neut_lymph_ratio_d2_2
##  Min.   :-6.182             Min.   : 1.375        Min.   : 0.1235      
##  1st Qu.: 1.591             1st Qu.: 5.132        1st Qu.: 3.7692      
##  Median : 4.356             Median : 7.353        Median : 6.7273      
##  Mean   : 5.356             Mean   : 8.882        Mean   : 8.1589      
##  3rd Qu.: 7.403             3rd Qu.:11.627        3rd Qu.:10.8889      
##  Max.   :22.776             Max.   :44.000        Max.   :25.6042      
##  NA's   :35                 NA's   :13            NA's   :28           
##  ab_noninfection       risk         risk_cat           bmi_cat         
##  Min.   :0.0000   Min.   :0.000   Length:117         Length:117        
##  1st Qu.:0.0000   1st Qu.:1.000   Class :character   Class :character  
##  Median :0.0000   Median :1.000   Mode  :character   Mode  :character  
##  Mean   :0.4495   Mean   :1.598                                        
##  3rd Qu.:1.0000   3rd Qu.:2.000                                        
##  Max.   :1.0000   Max.   :6.000                                        
##  NA's   :8                                                             
##    asa_cat          wound_type_cat     oxygen_quin         duration_sx     
##  Length:117         Length:117         Length:117         Min.   : 0.6833  
##  Class :character   Class :character   Class :character   1st Qu.: 2.5000  
##  Mode  :character   Mode  :character   Mode  :character   Median : 3.3333  
##                                                           Mean   : 3.9007  
##                                                           3rd Qu.: 4.7667  
##                                                           Max.   :10.6667  
##                                                                            
##  duration_sx_quin      anyDex           anyDex_count      anyDexMiss      
##  Length:117         Length:117         Min.   :0.0000   Min.   :0.000000  
##  Class :character   Class :character   1st Qu.:0.0000   1st Qu.:0.000000  
##  Mode  :character   Mode  :character   Median :0.0000   Median :0.000000  
##                                        Mean   :0.1282   Mean   :0.008547  
##                                        3rd Qu.:0.0000   3rd Qu.:0.000000  
##                                        Max.   :2.0000   Max.   :1.000000  
##                                                                           
##     anyDex2       treatment_group    deltacrp       crp_group    
##  Min.   :0.0000   Min.   :1.000   Min.   :-16.7   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:1.000   1st Qu.: 32.9   1st Qu.:1.000  
##  Median :0.0000   Median :2.000   Median : 49.5   Median :1.000  
##  Mean   :0.1111   Mean   :1.556   Mean   :130.9   Mean   :2.487  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:221.1   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :2.000   Max.   :359.0   Max.   :4.000  
##

ss1 <- ss

rownames(ss) <- paste(ss$PG_number,ss$timepoint,sep="-")

dim(ss)

## [1] 117  53

ss$ageCS <- scale(ss$ageD)
ss$sexD <- as.numeric(factor(ss$sexD))
ss$ethnicityCAT <- ss$ethnicityD
ss$ethnicityD <- as.numeric(factor(ss$ethnicityD))
ss$current_smokerD <- as.numeric(factor(ss$current_smokerD))
ss$diabetes_typeD <- as.numeric(factor(ss$diabetes_typeD))
ss$daily_insulinD <- as.numeric(factor(ss$daily_insulinD))
ss$oral_hypoglycemicsD <- as.numeric(factor(ss$oral_hypoglycemicsD))
ss$crp_preopD <- as.numeric(gsub("<5","2.5",gsub("<1","0.5",gsub("<1.0","0.5",ss$crp_preopD))))
ss$surgery_dominantD <- as.numeric(factor(ss$surgery_dominantD))
ss$wound_typeOP <- as.numeric(factor(ss$wound_typeOP))
ss$risk_cat <- as.numeric(factor(ss$risk_cat,levels=c("Low","Moderate","High")))
ss$wound_type_cat <- as.numeric(factor(ss$wound_type_cat))
ss$anyDex <- as.numeric(factor(ss$anyDex))

ss$bmi_cat <- as.numeric(factor(ss$bmi_cat,
  levels=c("Underweight [BMI<18.5]","Normal [18.5 to <25]",
  "Overweight [25 to <30]","Obese [30 to <40]","Super obese [40+]")))

ss <- ss[,c("PG_number","sexD","ageD","ageCS","weightD","asaD","heightD","ethnicityCAT","ethnicityD",
  "current_smokerD","diabetes_typeD","daily_insulinD","creatinine_preopD",
  "surgery_dominantD","wound_typeOP","HbA1c","bmi","revised_whodas_preop",
  "neut_lymph_ratio_d0","neut_lymph_ratio_d1","neut_lymph_ratio_d2","ab_noninfection",
  "risk","risk_cat","bmi_cat","wound_type_cat","duration_sx","anyDex","treatment_group",
  "deltacrp","crp_group")]

ss <- ss[order(rownames(ss)),]

ss_t0 <- ss
ss_eos <- ss
ss_pod1 <- ss

ss_t0$timepoint <- "T0"
ss_eos$timepoint <- "EOS"
ss_pod1$timepoint <- "POD1"

rownames(ss_t0) <- paste(ss_t0$PG_number,"T0",sep="-")
rownames(ss_eos) <- paste(ss_t0$PG_number,"EOS",sep="-")
rownames(ss_pod1) <- paste(ss_t0$PG_number,"POD1",sep="-")

ss <- rbind(ss_t0, ss_eos, ss_pod1)

rownames(ss) <- paste(ss$PG_number,ss$timepoint,sep="-")

xt0 <- xx[,grep("T0",colnames(xx))]
xpod1 <- xx[,grep("POD1",colnames(xx))]
xeos <- xx[,grep("EOS",colnames(xx))]

xt0f <- xt0[rowMeans(xt0)>=10,]
xpod1f <- xpod1[rowMeans(xpod1)>=10,]
xeosf <- xeos[rowMeans(xeos)>=10,]

dim(xt0f)

## [1] 21935   111

dim(xpod1f)

## [1] 21313   109

dim(xeosf)

## [1] 22067    98

ss_t0 <- ss_t0[which(rownames(ss_t0) %in% colnames(xt0)),]
ss_pod1 <- ss_pod1[which(rownames(ss_pod1) %in% colnames(xpod1)),]
ss_eos <- ss_eos[which(rownames(ss_eos) %in% colnames(xeos)),]

colnames(xt0) %in% rownames(ss_t0)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE

colnames(xpod1) %in% rownames(ss_pod1)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE

colnames(xeos) %in% rownames(ss_eos)

##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

rownames(ss_t0) %in% colnames(xt0)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE

rownames(ss_pod1) %in% colnames(xpod1)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE

rownames(ss_eos) %in% colnames(xeos)

##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

xxf <- xx[rowMeans(xx)>=10,]
xxf <- xxf[,order(colnames(xxf))]

ss1 <- ss
ss1 <- ss1[which(rownames(ss1) %in% colnames(xx)),]

MDS3

mds <- cmdscale(dist(t(xx)))

par(mar=c(5,5,3,1))
minx <- min(mds[,1])
maxx <- max(mds[,1])
miny <- min(mds[,2])
maxy <- max(mds[,2])

col <- rownames(mds)
col <- sapply(strsplit(col,"-"),"[[",2)
col <- gsub("T0","lightblue",col)
col <- gsub("POD1","orange",col)
col <- gsub("EOS","pink",col)

shp <- ss1$crp_group + 14

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) , cex=1.5 ,
  type = "p", col=col, pch=shp, cex.axis=1.3,cex.lab=1.3, bty='n')
#text(mds, labels=rownames(mds), cex=0.8) 
mtext("blue=T0,pink=EOS,orange=POD1,sq=lowCRP,di=highCRP")

Blood composition

xn <- xx
gt <- as.data.frame(sapply(strsplit(rownames(xn)," "),"[[",2) )
rownames(gt) <- rownames(xx)
colnames(gt) = "genesymbol"
gt$geneID <- rownames(xx)
blood <- read.table("https://raw.githubusercontent.com/giannimonaco/ABIS/master/data/sigmatrixRNAseq.txt")
blood2 <- merge(gt,blood,by.x="genesymbol",by.y=0)
blood2 <- blood2[which(!duplicated(blood2$genesymbol)),]
rownames(blood2) <- blood2$geneID
blood2 <- blood2[,c(3:ncol(blood2))]
genes <- intersect(rownames(xx), rownames(blood2))
dec <- apply(xx[genes, , drop=F], 2, function(x) coef(rlm( as.matrix(blood2[genes,]), x, maxit =100 ))) *100

## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm'
## failed to converge in 100 steps
## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm'
## failed to converge in 100 steps

dec <- t(dec/colSums(dec)*100)
dec <- signif(dec, 3)
# remove negative values
dec2 <- t(apply(dec,2,function(x) { mymin=min(x) ; if (mymin<0) { x + (mymin * -1) } else { x } } ))
dec2 <- apply(dec2,2,function(x) {x / sum(x) *100} )
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( dec2, col=colfunc(25),scale="row",
 trace="none",margins = c(5,5), cexRow=.7, cexCol=.8,  main="cell type abundances")

heatmap.2( dec2, col=colfunc(25),scale="none",
 trace="none",margins = c(5,5), cexRow=.7, cexCol=.8,  main="cell type abundances")

par(mar=c(5,10,3,1))
boxplot(t(dec2[order(rowMeans(dec2)),]),horizontal=TRUE,las=1, xlab="estimated cell proportion (%)")

par(mar = c(5.1, 4.1, 4.1, 2.1))
heatmap.2( cor(dec2),trace="none",scale="none")

heatmap.2( cor(t(dec2)),trace="none",scale="none", margins = c(8,8))

par(mar=c(5,10,3,1))
barplot(apply(dec2,1,sd),horiz=TRUE,las=1,xlab="SD of cell proportions (%)")

which(apply(dec2,1,sd)>4)

##    Monocytes.C             NK   T.CD8.Memory    T.CD4.Naive Neutrophils.LD 
##              1              2              3              4             10

Based on this analysis we can begin with correction of:

Monocytes.C
NK
T.CD8.Memory
T.CD4.Naive
Neutrophils.LD

According to the correlation heatmap, these are not strongly correlated.

Now look at how the cell proportions change over time.

ct0 <- dec2[,grep("-T0",colnames(dec2))]
ceos <- dec2[,grep("-EOS",colnames(dec2))]
cpod1 <- dec2[,grep("-POD1",colnames(dec2))]
par(mar=c(5,10,3,1))
boxplot(t(ct0),horizontal=TRUE,las=1, xlab="estimated cell proportion (%)",main="T0")

boxplot(t(ceos),horizontal=TRUE,las=1, xlab="estimated cell proportion (%)",main="EOS")

boxplot(t(cpod1),horizontal=TRUE,las=1, xlab="estimated cell proportion (%)",main="POD1")

sscell <- as.data.frame(t(dec2))
sscell_t0 <- sscell[grep("-T0",rownames(sscell)),]
sscell_eos <- sscell[grep("-EOS",rownames(sscell)),]
sscell_pod1 <- sscell[grep("POD1",rownames(sscell)),]

Now look at how cell types associate with the PCAs.

#xt0f xeosf xpod1f
#sscell_t0 sscell_eos sscell_pod1

## T0
mx <- xt0f

ss2 <- sscell_t0

pca <- prcomp(t(mx),center = TRUE, scale = TRUE,retx=TRUE)

loadings = pca$x
par(mar = c(5.1, 4.1, 4.1, 2.1))
plot(pca,type="lines",col="blue")

nGenes <- nrow(mx)
nSamples <- ncol(mx)
datTraits <- ss2
moduleTraitCor <- cor(loadings[,1:8], datTraits, use = "p")
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
  signif(moduleTraitPvalue, 1), ")", sep = "")

dim(textMatrix) = dim(moduleTraitCor)

labeledHeatmap(Matrix = t(moduleTraitCor),
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-0.45,0.45),
  main = paste("PCA-cell relationships @T0: Top principal components"))

## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are below given minimum and will be truncated to
## the minimum.

## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are above given maximum and will be truncated to
## the maximum.

## EOS
mx <- xeosf

ss2 <- sscell_eos

pca <- prcomp(t(mx),center = TRUE, scale = TRUE,retx=TRUE)

loadings = pca$x

plot(pca,type="lines",col="blue")

nGenes <- nrow(mx)
nSamples <- ncol(mx)
datTraits <- ss2
moduleTraitCor <- cor(loadings[,1:8], datTraits, use = "p")
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
  signif(moduleTraitPvalue, 1), ")", sep = "")

dim(textMatrix) = dim(moduleTraitCor)

labeledHeatmap(Matrix = t(moduleTraitCor),
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-0.45,0.45),
  main = paste("PCA-cell relationships @EOS: Top principal components"))

## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are below given minimum and will be truncated to
## the minimum.
## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are above given maximum and will be truncated to
## the maximum.

## POD1
mx <- xpod1f

ss2 <- sscell_pod1

pca <- prcomp(t(mx),center = TRUE, scale = TRUE,retx=TRUE)

loadings = pca$x

plot(pca,type="lines",col="blue")

nGenes <- nrow(mx)
nSamples <- ncol(mx)
datTraits <- ss2
moduleTraitCor <- cor(loadings[,1:8], datTraits, use = "p")
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
  signif(moduleTraitPvalue, 1), ")", sep = "")

dim(textMatrix) = dim(moduleTraitCor)

labeledHeatmap(Matrix = t(moduleTraitCor),
  xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
  yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
  textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
  cex.lab.y = 0.6, zlim = c(-0.45,0.45),
  main = paste("PCA-cell relationships @POD1: Top principal components"))

## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are below given minimum and will be truncated to
## the minimum.
## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are above given maximum and will be truncated to
## the maximum.

The conclusion here is that the cell types correlate strongly with the principal components. The good news is that we have selected the cell types that associate the strongest, so we can correct for their contribution.

Centre and scale bloodd composition

Received this warning from DESeq2:

  the design formula contains one or more numeric variables that have mean or
  standard deviation larger than 5 (an arbitrary threshold to trigger this message).
  Including numeric variables with large mean can induce collinearity with the intercept.
  Users should center and scale numeric variables in the design to improve GLM convergence.

Indeed, the distributions are severely skewed. To remedy this, I will centre and scale the data.

par(mfrow=c(5,2))

hist(ss2$Monocytes.C)
hist(scale(ss2$Monocytes.C,center=TRUE))

hist(ss2$NK)
hist(scale(ss2$NK,center=TRUE))

hist(ss2$`T.CD8.Memory`)
hist(scale(ss2$`T.CD8.Memory`,center=TRUE))

hist(ss2$`T.CD4.Naive`)
hist(scale(ss2$`T.CD4.Naive`,center=TRUE))

hist(ss2$`Neutrophils.LD`)
hist(scale(ss2$`Neutrophils.LD`,center=TRUE))

par(mfrow=c(1,1))


ss2$Monocytes.C <- scale(ss2$Monocytes.C,center=TRUE)

ss2$NK <- scale(ss2$NK,center=TRUE)

ss2$`T.CD8.Memory` <- scale(ss2$`T.CD8.Memory`,center=TRUE)

ss2$`T.CD4.Naive` <- scale(ss2$`T.CD4.Naive`,center=TRUE)

ss2$`Neutrophils.LD` <- scale(ss2$`Neutrophils.LD`,center=TRUE)

Differential expression across time

Specific PCAs for key clinical parameters:

wound type
surg duration
ethnicity
age
sex

And blood composition:

Monocytes.C
NK
T.CD8.Memory
T.CD4.Naive
Neutrophils.LD

And ones we didn’t include:

bmi
asaD
smoker
diabetes_typeD

TODO:

age data centred and scaled
ethnicity categories unordered

Overview

We will run timecourse analysis with a simple pairwise approach. For each of the groups below, this will involve three comparisons:

T0 vs EOS
EOS vs POD1
T0 vs POD1

The groups/subgroups we will look at are:

Timecourse in low CRP group
Timecourse in high CRP group
Timecourse in low CRP group and treatment group A
Timecourse in low CRP group and treatment group B
Timecourse in high CRP group and treatment group A
Timecourse in high CRP group and treatment group B

Timecourse in low CRP group

T0 vs EOS

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    79

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 22144    79

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 234 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange     lfcSE     stat
## ENSG00000109906.15 ZBTB16  4434.62143      2.2683562 0.2561987 70.22346
## ENSG00000096060.15 FKBP5  12790.86023      1.9176227 0.2320495 62.82915
## ENSG00000155893.13 PXYLP1   815.34259      0.9238062 0.1161330 61.78145
## ENSG00000177575.13 CD163  14191.10338      1.8315389 0.2319288 57.72874
## ENSG00000196935.9 SRGAP1    245.07442      1.4964335 0.1924557 57.35463
## ENSG00000123358.20 NR4A1   1152.18623     -1.9382868 0.2534991 55.40814
## ENSG00000134780.10 DAGLA    254.95032     -1.0925964 0.1478656 53.90101
## ENSG00000183779.7 ZNF703    706.63137     -1.4253954 0.1934903 52.84257
## ENSG00000171136.7 RLN3       24.60743      1.7599987 0.2386469 51.61627
## ENSG00000185338.7 SOCS1     666.02397      1.8343657 0.2458094 51.57006
##                                 pvalue         padj
## ENSG00000109906.15 ZBTB16 5.295289e-17 1.172589e-12
## ENSG00000096060.15 FKBP5  2.254370e-15 2.496038e-11
## ENSG00000155893.13 PXYLP1 3.837732e-15 2.832758e-11
## ENSG00000177575.13 CD163  3.008748e-14 1.611666e-10
## ENSG00000196935.9 SRGAP1  3.639058e-14 1.611666e-10
## ENSG00000123358.20 NR4A1  9.792998e-14 3.614269e-10
## ENSG00000134780.10 DAGLA  2.108497e-13 6.670078e-10
## ENSG00000183779.7 ZNF703  3.613827e-13 1.000307e-09
## ENSG00000171136.7 RLN3    6.747992e-13 1.529865e-09
## ENSG00000185338.7 SOCS1   6.908711e-13 1.529865e-09

mean(abs(dge$stat))

## [1] 2.961886

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                 baseMean log2FoldChange      lfcSE     stat
## ENSG00000235706.8 DICER1-AS1   293.06741      0.4440925 0.04967513 79.42134
## ENSG00000117228.11 GBP1       2168.31028     -0.6540265 0.07409947 75.85897
## ENSG00000152207.8 CYSLTR2      348.85253     -0.8830580 0.10313140 69.78812
## ENSG00000173083.16 HPSE       1117.77303     -0.7461422 0.09117295 64.40904
## ENSG00000111252.11 SH2B3      4768.12668     -0.6567997 0.08159330 62.73289
## ENSG00000109971.14 HSPA8     14953.38389     -0.3823666 0.04815554 62.40540
## ENSG00000079215.15 SLC1A3      705.60255      3.4504354 0.33980370 61.29834
## ENSG00000187474.5 FPR3          75.27347     -1.1796702 0.14640352 60.23829
## ENSG00000134531.10 EMP1         76.63453     -1.2043247 0.14999284 59.08054
## ENSG00000183337.18 BCOR       1343.06026     -0.6580184 0.08501873 57.89173
##                                    pvalue         padj
## ENSG00000235706.8 DICER1-AS1 5.018122e-19 1.111213e-14
## ENSG00000117228.11 GBP1      3.046657e-18 3.373259e-14
## ENSG00000152207.8 CYSLTR2    6.602940e-17 4.873850e-13
## ENSG00000173083.16 HPSE      1.010931e-15 5.596515e-12
## ENSG00000111252.11 SH2B3     2.367290e-15 1.031747e-11
## ENSG00000109971.14 HSPA8     2.795558e-15 1.031747e-11
## ENSG00000079215.15 SLC1A3    4.904948e-15 1.551645e-11
## ENSG00000187474.5 FPR3       8.404153e-15 2.326270e-11
## ENSG00000134531.10 EMP1      1.513486e-14 3.723848e-11
## ENSG00000183337.18 BCOR      2.769496e-14 6.132772e-11

mean(abs(dge$stat))

## [1] 3.82831

tc_lo_t0veos <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                    baseMean log2FoldChange      lfcSE     stat
## ENSG00000259976.3 RP11-553L6.5    661.62056    -0.08956655 1.43937022 74.56869
## ENSG00000235706.8 DICER1-AS1      293.06741     0.42425149 0.05269524 64.57058
## ENSG00000267436.1 AC005786.7       22.26384     2.54184253 1.26137541 59.40823
## ENSG00000109971.14 HSPA8        14953.38389    -0.37312891 0.04866671 58.28592
## ENSG00000261093.1 CTD-3126B10.1    43.00486    -1.82030629 1.43778056 52.74200
## ENSG00000117228.11 GBP1          2168.31028    -0.62306238 0.08467305 52.34726
## ENSG00000152207.8 CYSLTR2         348.85253    -0.74645485 0.10330648 49.77869
## ENSG00000111252.11 SH2B3         4768.12668    -0.56986188 0.08264104 46.35758
## ENSG00000183337.18 BCOR          1343.06026    -0.61010894 0.09047868 44.34594
## ENSG00000242732.4 RTL5            189.97352    -0.77093222 0.11393097 43.97201
##                                       pvalue         padj
## ENSG00000259976.3 RP11-553L6.5  5.856474e-18 1.296858e-13
## ENSG00000235706.8 DICER1-AS1    9.313564e-16 1.031198e-11
## ENSG00000267436.1 AC005786.7    1.281318e-14 9.457834e-11
## ENSG00000109971.14 HSPA8        2.266610e-14 1.254795e-10
## ENSG00000261093.1 CTD-3126B10.1 3.803703e-13 1.684584e-09
## ENSG00000117228.11 GBP1         4.650468e-13 1.716333e-09
## ENSG00000152207.8 CYSLTR2       1.721032e-12 5.444362e-09
## ENSG00000111252.11 SH2B3        9.852550e-12 2.727186e-08
## ENSG00000183337.18 BCOR         2.751818e-11 6.770695e-08
## ENSG00000242732.4 RTL5          3.331056e-11 7.376291e-08

mean(abs(dge$stat))

## [1] 2.802647

tc_lo_t0veos_adj <- dge

EOS vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    80

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21793    80

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 98 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange      lfcSE      stat
## ENSG00000184988.8 TMEM106A   711.88397      0.9496308 0.07478479 159.42903
## ENSG00000109906.15 ZBTB16   3929.37961     -2.8555261 0.23286151 129.08414
## ENSG00000133816.18 MICAL2   2848.09796      0.9794016 0.08734690 124.08978
## ENSG00000266405.3 CBX3P2      84.50267     -0.8902742 0.08621985 105.72753
## ENSG00000010704.19 HFE       344.48974      1.1500596 0.11354191 100.77433
## ENSG00000156804.7 FBXO32     710.09299     -1.2566781 0.12379154  99.39384
## ENSG00000039523.20 RIPOR1   3289.23804      0.8759300 0.08824745  97.54057
## ENSG00000080986.13 NDC80     106.93744     -1.1650911 0.11771502  95.57876
## ENSG00000164674.17 SYTL3    3103.24422     -1.8743808 0.18469700  95.55281
## ENSG00000010327.10 STAB1   22427.91687      1.7880700 0.17865996  95.25108
##                                  pvalue         padj
## ENSG00000184988.8 TMEM106A 1.507997e-36 3.286378e-32
## ENSG00000109906.15 ZBTB16  6.500360e-30 7.083117e-26
## ENSG00000133816.18 MICAL2  8.051681e-29 5.849009e-25
## ENSG00000266405.3 CBX3P2   8.460396e-25 4.609435e-21
## ENSG00000010704.19 HFE     1.030838e-23 4.493012e-20
## ENSG00000156804.7 FBXO32   2.069651e-23 7.517316e-20
## ENSG00000039523.20 RIPOR1  5.276416e-23 1.642699e-19
## ENSG00000080986.13 NDC80   1.421232e-22 3.486847e-19
## ENSG00000164674.17 SYTL3   1.439986e-22 3.486847e-19
## ENSG00000010327.10 STAB1   1.677066e-22 3.654831e-19

mean(abs(dge$stat))

## [1] 7.104229

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange      lfcSE     stat
## ENSG00000133816.18 MICAL2   2848.09796      0.9342429 0.07523959 148.6185
## ENSG00000182580.3 EPHB3       84.24902      2.3470632 0.18546232 136.2735
## ENSG00000107798.18 LIPA     2784.14312      1.0846430 0.09271535 129.6517
## ENSG00000184988.8 TMEM106A   711.88397      0.8854124 0.07905767 121.0531
## ENSG00000101347.11 SAMHD1  14367.49292      0.5043510 0.04737029 111.9630
## ENSG00000173083.16 HPSE     1225.94971      1.0911422 0.10073358 110.6136
## ENSG00000184992.13 BRI3BP    615.29434      0.5765545 0.05490327 108.7805
## ENSG00000134780.10 DAGLA     271.34390      1.3996228 0.12848922 108.3603
## ENSG00000149639.15 SOGA1     918.53585      0.7639237 0.07307951 106.1779
## ENSG00000072310.18 SREBF1   2231.75608      0.9352731 0.08897432 105.5724
##                                  pvalue         padj
## ENSG00000133816.18 MICAL2  3.474735e-34 7.572491e-30
## ENSG00000182580.3 EPHB3    1.738570e-31 1.894433e-27
## ENSG00000107798.18 LIPA    4.883754e-30 3.547722e-26
## ENSG00000184988.8 TMEM106A 3.720308e-28 2.026917e-24
## ENSG00000101347.11 SAMHD1  3.640350e-26 1.586683e-22
## ENSG00000173083.16 HPSE    7.190368e-26 2.611662e-22
## ENSG00000184992.13 BRI3BP  1.812875e-25 5.643997e-22
## ENSG00000134780.10 DAGLA   2.240963e-25 6.104664e-22
## ENSG00000149639.15 SOGA1   6.740442e-25 1.632161e-21
## ENSG00000072310.18 SREBF1  9.149255e-25 1.993897e-21

mean(abs(dge$stat))

## [1] 8.360329

tc_lo_eosvpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                    baseMean log2FoldChange      lfcSE      stat
## ENSG00000240247.8 DEFA1B         1722.06173     -4.0980962 0.47378091 1779.0131
## ENSG00000281887.3 GIMAP1-GIMAP5   576.99100     -1.2141825 0.54067063  729.8270
## ENSG00000235655.3 H3P6            574.93467      0.9736707 0.83430259  389.3044
## ENSG00000279117.1 CTD-2562J17.6    61.45752     -1.9028310 0.85421563  257.8098
## ENSG00000133816.18 MICAL2        2848.09796      0.9870993 0.07503490  167.8258
## ENSG00000182580.3 EPHB3            84.24902      2.4206827 0.19949094  127.5448
## ENSG00000101347.11 SAMHD1       14367.49292      0.5382133 0.04764215  126.1909
## ENSG00000120029.13 ARMH3         1080.38040      0.3395919 0.03063002  122.5982
## ENSG00000107798.18 LIPA          2784.14312      1.1293832 0.10253820  115.5094
## ENSG00000010327.10 STAB1        22427.91687      1.7854273 0.16088322  109.7020
##                                        pvalue          padj
## ENSG00000240247.8 DEFA1B         0.000000e+00  0.000000e+00
## ENSG00000281887.3 GIMAP1-GIMAP5 9.768085e-161 1.064379e-156
## ENSG00000235655.3 H3P6           1.173018e-86  8.521195e-83
## ENSG00000279117.1 CTD-2562J17.6  5.151441e-58  2.806634e-54
## ENSG00000133816.18 MICAL2        2.208261e-38  9.624924e-35
## ENSG00000182580.3 EPHB3          1.411808e-29  5.127921e-26
## ENSG00000101347.11 SAMHD1        2.792772e-29  8.694698e-26
## ENSG00000120029.13 ARMH3         1.707479e-28  4.651386e-25
## ENSG00000107798.18 LIPA          6.087045e-27  1.473944e-23
## ENSG00000010327.10 STAB1         1.138864e-25  2.481926e-22

mean(abs(dge$stat))

## [1] 8.135987

tc_lo_eosvpod1_adj <- dge

T0 vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    77

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21672    77

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 237 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                             baseMean log2FoldChange      lfcSE     stat
## ENSG00000155659.15 VSIG4   945.42691      3.1528166 0.29004232 99.55518
## ENSG00000132170.24 PPARG    55.22429      1.9914785 0.19995750 93.10869
## ENSG00000183019.7 MCEMP1  2055.80403      1.9117621 0.19299523 91.24427
## ENSG00000108861.9 DUSP3   2085.59921      0.8511817 0.08967106 88.63389
## ENSG00000137474.22 MYO7A   638.70733      1.7214810 0.17876657 87.37989
## ENSG00000169385.3 RNASE2   838.07543      1.5563159 0.16590553 83.73570
## ENSG00000168615.13 ADAM9  1239.05833      0.9558012 0.10739737 77.61693
## ENSG00000149534.9 MS4A2    193.69066     -2.0452500 0.22614090 76.45519
## ENSG00000129538.14 RNASE1   58.70760      2.2959336 0.25361130 75.00801
## ENSG00000166033.13 HTRA1   141.57985      2.2858282 0.25198875 74.81289
##                                 pvalue         padj
## ENSG00000155659.15 VSIG4  1.907733e-23 4.134438e-19
## ENSG00000132170.24 PPARG  4.950001e-22 5.363821e-18
## ENSG00000183019.7 MCEMP1  1.269875e-21 9.173579e-18
## ENSG00000108861.9 DUSP3   4.750792e-21 2.573979e-17
## ENSG00000137474.22 MYO7A  8.955667e-21 3.881744e-17
## ENSG00000169385.3 RNASE2  5.655464e-20 2.042753e-16
## ENSG00000168615.13 ADAM9  1.250933e-18 3.872889e-15
## ENSG00000149534.9 MS4A2   2.252665e-18 6.102468e-15
## ENSG00000129538.14 RNASE1 4.688078e-18 1.121540e-14
## ENSG00000166033.13 HTRA1  5.175066e-18 1.121540e-14

mean(abs(dge$stat))

## [1] 4.544953

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 2 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                            baseMean log2FoldChange      lfcSE     stat
## ENSG00000162654.9 GBP4    1788.1711     -0.9983599 0.07479456 171.7394
## ENSG00000198848.13 CES1   1867.1327      1.6797082 0.12769128 155.5637
## ENSG00000137474.22 MYO7A   638.7073      1.7598997 0.13756378 145.1405
## ENSG00000133106.15 EPSTI1 1071.3264     -0.8493338 0.06998927 143.1952
## ENSG00000166033.13 HTRA1   141.5798      2.2471987 0.17511114 142.6717
## ENSG00000168615.13 ADAM9  1239.0583      0.9878232 0.08148845 141.1859
## ENSG00000115415.20 STAT1  6084.1208     -0.8785012 0.07431848 135.1084
## ENSG00000079215.15 SLC1A3  162.7552      1.5105682 0.12643650 132.5324
## ENSG00000169385.3 RNASE2   838.0754      1.6198242 0.13342959 132.0131
## ENSG00000108861.9 DUSP3   2085.5992      0.8500539 0.07298078 131.5089
##                                 pvalue         padj
## ENSG00000162654.9 GBP4    3.085094e-39 6.686015e-35
## ENSG00000198848.13 CES1   1.054386e-35 1.142532e-31
## ENSG00000137474.22 MYO7A  2.001034e-33 1.445547e-29
## ENSG00000133106.15 EPSTI1 5.327845e-33 2.886626e-29
## ENSG00000166033.13 HTRA1  6.934615e-33 3.005740e-29
## ENSG00000168615.13 ADAM9  1.465162e-32 5.292166e-29
## ENSG00000115415.20 STAT1  3.126319e-31 9.679083e-28
## ENSG00000079215.15 SLC1A3 1.144259e-30 3.099798e-27
## ENSG00000169385.3 RNASE2  1.486338e-30 3.579101e-27
## ENSG00000108861.9 DUSP3   1.916165e-30 4.152713e-27

mean(abs(dge$stat))

## [1] 7.43484

tc_lo_t0vpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                   baseMean log2FoldChange      lfcSE      stat
## ENSG00000240247.8 DEFA1B        1388.99148     -3.6332048 0.56059171 1145.4977
## ENSG00000285444.1 RP11-147H23.3   26.87468     -1.0535626 0.50518807  165.4591
## ENSG00000279117.1 CTD-2562J17.6   49.66612     -0.3138792 0.89085687  153.8945
## ENSG00000079215.15 SLC1A3        162.75517      1.5375608 0.12290200  148.1137
## ENSG00000198848.13 CES1         1867.13269      1.6267616 0.12949149  142.0641
## ENSG00000168615.13 ADAM9        1239.05833      0.9859404 0.08193039  139.6682
## ENSG00000162654.9 GBP4          1788.17112     -0.9827881 0.08333005  133.9204
## ENSG00000163221.9 S100A12       5108.69615      1.7331541 0.14226936  131.6449
## ENSG00000134243.12 SORT1        2296.81018      0.7564959 0.06589281  128.5670
## ENSG00000108861.9 DUSP3         2085.59921      0.8245210 0.07335456  122.5478
##                                        pvalue          padj
## ENSG00000240247.8 DEFA1B        4.269625e-251 9.253132e-247
## ENSG00000285444.1 RP11-147H23.3  7.261242e-38  7.868282e-34
## ENSG00000279117.1 CTD-2562J17.6  2.442163e-35  1.764218e-31
## ENSG00000079215.15 SLC1A3        4.479890e-34  2.427205e-30
## ENSG00000198848.13 CES1          9.416130e-33  4.081327e-29
## ENSG00000168615.13 ADAM9         3.146149e-32  1.136389e-28
## ENSG00000162654.9 GBP4           5.687095e-31  1.760725e-27
## ENSG00000163221.9 S100A12        1.789216e-30  4.846986e-27
## ENSG00000134243.12 SORT1         8.435189e-30  2.031193e-26
## ENSG00000108861.9 DUSP3          1.751485e-28  3.795819e-25

mean(abs(dge$stat))

## [1] 6.276693

tc_lo_t0vpod1_adj <- dge

Timecourse in high CRP group

T0 vs EOS

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
dim(mx)

## [1] 60649   246

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21843   246

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 21843    87

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21772    87

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 181 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange     lfcSE      stat
## ENSG00000108984.15 MAP2K6  1163.93917       1.163862 0.1098349 111.05350
## ENSG00000155307.19 SAMSN1  2307.13372       1.925360 0.1860204 102.41948
## ENSG00000163221.9 S100A12  7562.48678       2.334673 0.2239419 101.32078
## ENSG00000172985.11 SH3RF3   304.79926       1.940902 0.1888729 101.01358
## ENSG00000173744.18 AGFG1   4295.84291       1.528808 0.1544175  95.58532
## ENSG00000096060.15 FKBP5  11280.57517       2.031109 0.2036190  94.53584
## ENSG00000115271.11 GCA     8435.27055       1.998679 0.2064862  89.18612
## ENSG00000166527.8 CLEC4D   1415.52476       2.345699 0.2408279  88.40084
## ENSG00000189221.10 MAOA      32.43834       3.404068 0.3419277  87.33806
## ENSG00000121316.11 PLBD1  11891.86514       1.644136 0.1735820  87.01962
##                                 pvalue         padj
## ENSG00000108984.15 MAP2K6 5.759514e-26 1.253961e-21
## ENSG00000155307.19 SAMSN1 4.492623e-24 4.890669e-20
## ENSG00000163221.9 S100A12 7.823084e-24 4.972452e-20
## ENSG00000172985.11 SH3RF3 9.135500e-24 4.972452e-20
## ENSG00000173744.18 AGFG1  1.416531e-22 6.168142e-19
## ENSG00000096060.15 FKBP5  2.406951e-22 8.734022e-19
## ENSG00000115271.11 GCA    3.593611e-21 1.117716e-17
## ENSG00000166527.8 CLEC4D  5.344823e-21 1.454594e-17
## ENSG00000189221.10 MAOA   9.147066e-21 2.212777e-17
## ENSG00000121316.11 PLBD1  1.074501e-20 2.339405e-17

mean(abs(dge$stat))

## [1] 6.155835

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange      lfcSE     stat
## ENSG00000197122.12 SRC       1085.1248     -0.9521654 0.07702740 146.9439
## ENSG00000281162.2 LINC01127   706.5774      1.6433786 0.13168911 138.3527
## ENSG00000242732.4 RTL5        179.9690     -0.8499702 0.07473365 126.3306
## ENSG00000183779.7 ZNF703      826.1425     -1.5467318 0.13077776 125.1475
## ENSG00000175130.7 MARCKSL1    924.2551     -0.8200107 0.07238320 124.3971
## ENSG00000123685.9 BATF3       129.8002     -0.9862378 0.08722390 123.8836
## ENSG00000112299.8 VNN1       1496.1166      1.9071846 0.15880675 123.8330
## ENSG00000184557.4 SOCS3     12204.5628      2.8331392 0.22010854 122.9167
## ENSG00000137962.13 ARHGAP29   199.8439      1.1381381 0.10024916 122.0688
## ENSG00000160013.9 PTGIR       680.1546     -1.0424021 0.09217769 121.4304
##                                   pvalue         padj
## ENSG00000197122.12 SRC      8.072069e-34 1.757451e-29
## ENSG00000281162.2 LINC01127 6.101780e-32 6.642397e-28
## ENSG00000242732.4 RTL5      2.602953e-29 1.889050e-25
## ENSG00000183779.7 ZNF703    4.724775e-29 2.571695e-25
## ENSG00000175130.7 MARCKSL1  6.896484e-29 2.850295e-25
## ENSG00000123685.9 BATF3     8.933509e-29 2.850295e-25
## ENSG00000112299.8 VNN1      9.164093e-29 2.850295e-25
## ENSG00000184557.4 SOCS3     1.454270e-28 3.957796e-25
## ENSG00000137962.13 ARHGAP29 2.229746e-28 5.394003e-25
## ENSG00000160013.9 PTGIR     3.076095e-28 6.288020e-25

mean(abs(dge$stat))

## [1] 8.780156

tc_hi_t0veos <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                  baseMean log2FoldChange      lfcSE      stat
## ENSG00000276107.1 THBS1-IT1      46.00122      3.6403423 0.57274117 355.98805
## ENSG00000169174.11 PCSK9         34.25862      5.0868309 0.80058573 332.99354
## ENSG00000272414.6 FAM47E-STBD1   27.84294      4.2648058 0.67531450 307.89890
## ENSG00000288622.1 PDCD6-AHRR     19.82942     -1.0186557 1.00201049 109.76930
## ENSG00000175130.7 MARCKSL1      924.25513     -0.8235201 0.08230708  97.19051
## ENSG00000123685.9 BATF3         129.80023     -1.0237730 0.10255427  96.61643
## ENSG00000281162.2 LINC01127     706.57744      1.6580775 0.16264838  92.90480
## ENSG00000152503.10 TRIM36        35.85686     -1.4583994 0.15500905  86.76617
## ENSG00000112299.8 VNN1         1496.11657      1.9261455 0.19259244  85.07585
## ENSG00000197122.12 SRC         1085.12480     -0.9173149 0.09757463  84.30576
##                                      pvalue         padj
## ENSG00000276107.1 THBS1-IT1    2.104693e-79 4.582337e-75
## ENSG00000169174.11 PCSK9       2.141898e-74 2.331670e-70
## ENSG00000272414.6 FAM47E-STBD1 6.265286e-69 4.546927e-65
## ENSG00000288622.1 PDCD6-AHRR   1.100854e-25 5.991950e-22
## ENSG00000175130.7 MARCKSL1     6.296781e-23 2.741870e-19
## ENSG00000123685.9 BATF3        8.414691e-23 3.053411e-19
## ENSG00000281162.2 LINC01127    5.487138e-22 1.706657e-18
## ENSG00000152503.10 TRIM36      1.221413e-20 3.324077e-17
## ENSG00000112299.8 VNN1         2.871363e-20 6.946147e-17
## ENSG00000197122.12 SRC         4.238776e-20 9.228663e-17

mean(abs(dge$stat))

## [1] 5.765449

tc_hi_t0veos_adj <- dge

EOS vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
dim(mx)

## [1] 60649   246

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21843   246

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 21843    82

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21487    82

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 146 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange      lfcSE      stat
## ENSG00000072310.18 SREBF1   2193.56072      1.0062352 0.08405451 140.56128
## ENSG00000159189.12 C1QC      134.53773      3.3487699 0.27034395 129.35924
## ENSG00000103066.13 PLA2G15   534.53708      1.0162764 0.09598874 109.99668
## ENSG00000152270.9 PDE3B     2375.00243     -1.0276634 0.09732732 109.24631
## ENSG00000019169.11 MARCO    1181.02163      1.8922924 0.17696206 107.04656
## ENSG00000173369.17 C1QB      212.89358      2.7066684 0.24964580 103.97934
## ENSG00000171812.13 COL8A2    303.08276      1.7758260 0.17186187 100.87947
## ENSG00000241484.10 ARHGAP8    34.87065     -1.6795093 0.16492123 100.82972
## ENSG00000010327.10 STAB1   28728.21066      1.6292933 0.15941838  99.33521
## ENSG00000131061.14 ZNF341    427.92437      0.6988326 0.07018116  98.26931
##                                  pvalue         padj
## ENSG00000072310.18 SREBF1  2.006675e-32 4.311742e-28
## ENSG00000159189.12 C1QC    5.659068e-30 6.079820e-26
## ENSG00000103066.13 PLA2G15 9.815507e-26 7.030193e-22
## ENSG00000152270.9 PDE3B    1.433220e-25 7.698902e-22
## ENSG00000019169.11 MARCO   4.348332e-25 1.868652e-21
## ENSG00000173369.17 C1QB    2.044365e-24 7.321213e-21
## ENSG00000171812.13 COL8A2  9.775468e-24 2.692349e-20
## ENSG00000241484.10 ARHGAP8 1.002411e-23 2.692349e-20
## ENSG00000010327.10 STAB1   2.131835e-23 5.089637e-20
## ENSG00000131061.14 ZNF341  3.651817e-23 7.846660e-20

mean(abs(dge$stat))

## [1] 5.73382

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 2 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                  baseMean log2FoldChange      lfcSE     stat
## ENSG00000007968.7 E2F2          832.59727      1.4639517 0.10619661 175.7347
## ENSG00000111424.12 VDR          667.35392      0.8696344 0.06496627 174.4571
## ENSG00000108950.12 FAM20A      1594.59623      2.5871067 0.18449386 156.8780
## ENSG00000014257.16 ACP3         889.52760      0.9967503 0.07829567 155.7042
## ENSG00000165092.13 ALDH1A1      417.48794     -1.9541750 0.14581640 155.6281
## ENSG00000019169.11 MARCO       1181.02163      1.8838898 0.14123978 155.0313
## ENSG00000213557.4 RP11-240E2.2   83.17379     -2.1299402 0.16549329 145.4866
## ENSG00000072310.18 SREBF1      2193.56072      0.9968507 0.08107402 144.6692
## ENSG00000137959.17 IFI44L      1289.09238     -1.6353118 0.13034988 140.8658
## ENSG00000173083.16 HPSE        1270.09457      0.8312757 0.06928466 139.6632
##                                      pvalue         padj
## ENSG00000007968.7 E2F2         4.137715e-40 8.451006e-36
## ENSG00000111424.12 VDR         7.866157e-40 8.451006e-36
## ENSG00000108950.12 FAM20A      5.442615e-36 3.898182e-32
## ENSG00000014257.16 ACP3        9.824305e-36 4.386694e-32
## ENSG00000165092.13 ALDH1A1     1.020779e-35 4.386694e-32
## ENSG00000019169.11 MARCO       1.378308e-35 4.935950e-32
## ENSG00000213557.4 RP11-240E2.2 1.681085e-33 5.160210e-30
## ENSG00000072310.18 SREBF1      2.536798e-33 6.813522e-30
## ENSG00000137959.17 IFI44L      1.721404e-32 4.109756e-29
## ENSG00000173083.16 HPSE        3.153949e-32 6.776891e-29

mean(abs(dge$stat))

## [1] 8.460332

tc_hi_eosvpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                   baseMean log2FoldChange      lfcSE     stat
## ENSG00000281887.3 GIMAP1-GIMAP5  523.95279      0.8662230 0.53466750 591.6478
## ENSG00000007968.7 E2F2           832.59727      1.4468759 0.11994765 133.2825
## ENSG00000108950.12 FAM20A       1594.59623      2.5714050 0.20409447 125.3972
## ENSG00000165092.13 ALDH1A1       417.48794     -2.0085148 0.16533340 124.0462
## ENSG00000104972.16 LILRB1       6765.22831      1.0777758 0.09480825 123.1025
## ENSG00000137959.17 IFI44L       1289.09238     -1.6919221 0.14354278 122.8933
## ENSG00000111424.12 VDR           667.35392      0.8605349 0.07690240 121.4375
## ENSG00000213557.4 RP11-240E2.2    83.17379     -2.2318827 0.18755832 121.2037
## ENSG00000014257.16 ACP3          889.52760      0.9882662 0.08835306 119.9041
## ENSG00000072310.18 SREBF1       2193.56072      0.9947363 0.08930313 118.8457
##                                        pvalue          padj
## ENSG00000281887.3 GIMAP1-GIMAP5 1.097733e-130 2.358699e-126
## ENSG00000007968.7 E2F2           7.841946e-31  8.424995e-27
## ENSG00000108950.12 FAM20A        4.166204e-29  2.983974e-25
## ENSG00000165092.13 ALDH1A1       8.230367e-29  4.421147e-25
## ENSG00000104972.16 LILRB1        1.324267e-28  5.269831e-25
## ENSG00000137959.17 IFI44L        1.471540e-28  5.269831e-25
## ENSG00000111424.12 VDR           3.065056e-28  9.261943e-25
## ENSG00000213557.4 RP11-240E2.2   3.448389e-28  9.261943e-25
## ENSG00000014257.16 ACP3          6.639345e-28  1.585107e-24
## ENSG00000072310.18 SREBF1        1.132007e-27  2.432343e-24

mean(abs(dge$stat))

## [1] 6.62683

tc_hi_eosvpod1_adj <- dge

T0 vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
dim(mx)

## [1] 60649   246

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21843   246

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 21843    87

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21614    87

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 227 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange     lfcSE     stat
## ENSG00000108950.12 FAM20A   1375.14606       3.596755 0.1787304 343.2011
## ENSG00000132170.24 PPARG     134.87968       3.365542 0.1858488 288.3518
## ENSG00000137869.15 CYP19A1    71.37647       6.348042 0.3471859 254.4232
## ENSG00000170439.7 METTL7B    161.71505       4.777226 0.2837569 220.8029
## ENSG00000163221.9 S100A12  14931.43969       3.505882 0.2375557 185.5556
## ENSG00000168615.13 ADAM9    1576.11714       1.676385 0.1237590 177.7620
## ENSG00000121316.11 PLBD1   14524.61471       2.068602 0.1550219 168.8004
## ENSG00000099377.14 HSD3B7    161.50738       1.568690 0.1200495 166.8454
## ENSG00000156414.19 TDRD9    1025.77610       2.493816 0.1860163 165.8936
## ENSG00000163251.4 FZD5       146.40894       1.904855 0.1494406 156.2562
##                                  pvalue         padj
## ENSG00000108950.12 FAM20A  1.281574e-76 2.769995e-72
## ENSG00000132170.24 PPARG   1.136790e-64 1.228529e-60
## ENSG00000137869.15 CYP19A1 2.819554e-57 2.031395e-53
## ENSG00000170439.7 METTL7B  6.042877e-50 3.265269e-46
## ENSG00000163221.9 S100A12  2.968279e-42 1.283127e-38
## ENSG00000168615.13 ADAM9   1.493114e-40 5.378696e-37
## ENSG00000121316.11 PLBD1   1.352649e-38 4.176594e-35
## ENSG00000099377.14 HSD3B7  3.615724e-38 9.768782e-35
## ENSG00000156414.19 TDRD9   5.835849e-38 1.401512e-34
## ENSG00000163251.4 FZD5     7.441782e-36 1.608467e-32

mean(abs(dge$stat))

## [1] 10.31991

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                             baseMean log2FoldChange      lfcSE     stat
## ENSG00000108950.12 FAM20A  1375.1461       3.726124 0.15921673 413.9110
## ENSG00000132170.24 PPARG    134.8797       3.266118 0.16507759 317.5829
## ENSG00000116574.6 RHOU     1101.4150       1.166667 0.06650012 295.5881
## ENSG00000150337.14 FCGR1A  1623.7786       2.110656 0.11817070 279.2149
## ENSG00000121316.11 PLBD1  14524.6147       1.971386 0.11162567 275.8873
## ENSG00000168615.13 ADAM9   1576.1171       1.582371 0.09296611 267.4887
## ENSG00000170439.7 METTL7B   161.7150       4.840541 0.25202509 257.6699
## ENSG00000099377.14 HSD3B7   161.5074       1.595766 0.09761700 250.7694
## ENSG00000156414.19 TDRD9   1025.7761       2.252194 0.13571294 235.9535
## ENSG00000014257.16 ACP3     843.5332       1.170991 0.07628976 224.9440
##                                 pvalue         padj
## ENSG00000108950.12 FAM20A 5.161947e-92 1.115703e-87
## ENSG00000132170.24 PPARG  4.868601e-71 5.261497e-67
## ENSG00000116574.6 RHOU    3.012944e-66 2.170725e-62
## ENSG00000150337.14 FCGR1A 1.113468e-62 6.016624e-59
## ENSG00000121316.11 PLBD1  5.913443e-62 2.556263e-58
## ENSG00000168615.13 ADAM9  4.001793e-60 1.441579e-56
## ENSG00000170439.7 METTL7B 5.526382e-58 1.706389e-54
## ENSG00000099377.14 HSD3B7 1.764879e-56 4.768263e-53
## ENSG00000156414.19 TDRD9  2.999526e-53 7.203527e-50
## ENSG00000014257.16 ACP3   7.551154e-51 1.632106e-47

mean(abs(dge$stat))

## [1] 13.55803

tc_hi_t0vpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                   baseMean log2FoldChange     lfcSE      stat
## ENSG00000206047.3 DEFA1         7687.64670     -1.6559594 0.2844943 3729.3118
## ENSG00000240247.8 DEFA1B        5062.09708     -3.2509896 0.4246854 1813.0291
## ENSG00000213178.3 RPL22P1        408.41976     -0.8688631 0.4235335 1232.9388
## ENSG00000137869.15 CYP19A1        71.37647      6.6374139 0.4731625 1222.3070
## ENSG00000281887.3 GIMAP1-GIMAP5  570.58111     -1.2009703 0.4976431 1104.5615
## ENSG00000167434.10 CA4           659.20775      1.6001609 0.4563801  823.0646
## ENSG00000262160.1 RP11-96D1.11   165.47842     -0.2872640 0.3288119  785.0106
## ENSG00000288534.1 TMX2-CTNND1    162.81263     -0.7053911 0.3932053  690.5123
## ENSG00000279716.1 AC006128.2     160.03325     -0.8862056 0.4135312  645.3026
## ENSG00000108950.12 FAM20A       1375.14606      3.7358638 0.1823623  312.6231
##                                        pvalue          padj
## ENSG00000206047.3 DEFA1          0.000000e+00  0.000000e+00
## ENSG00000240247.8 DEFA1B         0.000000e+00  0.000000e+00
## ENSG00000213178.3 RPL22P1       4.234954e-270 3.051143e-266
## ENSG00000137869.15 CYP19A1      8.657860e-268 4.678275e-264
## ENSG00000281887.3 GIMAP1-GIMAP5 3.368710e-242 1.456226e-238
## ENSG00000167434.10 CA4          5.217737e-181 1.879603e-177
## ENSG00000262160.1 RP11-96D1.11  9.796199e-173 3.024786e-169
## ENSG00000288534.1 TMX2-CTNND1   3.458430e-152 9.343812e-149
## ENSG00000279716.1 AC006128.2    2.348081e-142 5.639048e-139
## ENSG00000108950.12 FAM20A        5.858915e-70  1.266346e-66

mean(abs(dge$stat))

## [1] 10.6752

tc_hi_t0vpod1_adj <- dge

Timecourse in low CRP group and treatment group A

treatment_group==1

T0 vs EOS

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & treatment_group==1 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    49

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 22167    49

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 115 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange      lfcSE      stat
## ENSG00000048740.18 CELF2   15777.74519      1.0373666 0.09173198 124.64370
## ENSG00000140280.14 LYSMD2    559.91067     -0.7754547 0.07428222 108.25324
## ENSG00000109906.15 ZBTB16   5895.58267      2.6342706 0.24859479  98.45988
## ENSG00000077150.20 NFKB2    3351.89937     -1.0755448 0.11115532  92.00750
## ENSG00000132514.14 CLEC10A   512.10472     -2.1334526 0.21714463  89.83678
## ENSG00000155893.13 PXYLP1    925.27596      1.2451780 0.13014513  88.45305
## ENSG00000118257.17 NRP2       51.81586     -1.2688529 0.13480861  87.90620
## ENSG00000183779.7 ZNF703     660.85703     -2.1143787 0.21888688  86.90327
## ENSG00000255833.2 TIFAB       75.30968     -1.9165575 0.20848193  80.62370
## ENSG00000096060.15 FKBP5   16302.45011      2.2318230 0.24227153  76.83678
##                                  pvalue         padj
## ENSG00000048740.18 CELF2   6.090483e-29 1.350077e-24
## ENSG00000140280.14 LYSMD2  2.365403e-25 2.621694e-21
## ENSG00000109906.15 ZBTB16  3.316763e-23 2.450756e-19
## ENSG00000077150.20 NFKB2   8.634866e-22 4.785227e-18
## ENSG00000132514.14 CLEC10A 2.586411e-21 1.146659e-17
## ENSG00000155893.13 PXYLP1  5.205588e-21 1.923204e-17
## ENSG00000118257.17 NRP2    6.863329e-21 2.173420e-17
## ENSG00000183779.7 ZNF703   1.139610e-20 3.157718e-17
## ENSG00000255833.2 TIFAB    2.730645e-19 6.725579e-16
## ENSG00000096060.15 FKBP5   1.856873e-18 4.116130e-15

mean(abs(dge$stat))

## [1] 3.868721

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                   baseMean log2FoldChange     lfcSE      stat
## ENSG00000109321.11 AREG          392.20635       4.643570 0.3383736 126.20928
## ENSG00000179593.16 ALOX15B      1008.17625       4.674486 0.3593960 110.14152
## ENSG00000079215.15 SLC1A3       1054.14734       4.689518 0.3620222 109.27814
## ENSG00000121966.7 CXCR4        13191.30609       1.450479 0.1343090 107.24888
## ENSG00000109906.15 ZBTB16       5895.58267       3.242609 0.2721306 107.10484
## ENSG00000182580.3 EPHB3           53.65242      -2.564940 0.2322367 106.40712
## ENSG00000145990.11 GFOD1        2210.37959       1.591507 0.1472425 106.06822
## ENSG00000279359.1 RP11-36D19.9    98.08427       5.450377 0.4395542 101.66753
## ENSG00000119138.5 KLF9          2394.63472       1.250030 0.1201463 101.56426
## ENSG00000060982.15 BCAT1         663.65390       1.754205 0.1658765  99.38918
##                                      pvalue         padj
## ENSG00000109321.11 AREG        2.767079e-29 6.133784e-25
## ENSG00000179593.16 ALOX15B     9.123893e-26 1.011247e-21
## ENSG00000079215.15 SLC1A3      1.410394e-25 1.042140e-21
## ENSG00000121966.7 CXCR4        3.926333e-25 1.871922e-21
## ENSG00000109906.15 ZBTB16      4.222318e-25 1.871922e-21
## ENSG00000182580.3 EPHB3        6.004176e-25 2.218243e-21
## ENSG00000145990.11 GFOD1       7.123992e-25 2.255965e-21
## ENSG00000279359.1 RP11-36D19.9 6.566789e-24 1.703957e-20
## ENSG00000119138.5 KLF9         6.918219e-24 1.703957e-20
## ENSG00000060982.15 BCAT1       2.074526e-23 4.598602e-20

mean(abs(dge$stat))

## [1] 5.50503

tc_lo_a_t0veos <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange      lfcSE      stat
## ENSG00000227097.5 RPS28P7    76.96489     -1.3611093 1.13330866 116.77334
## ENSG00000183337.18 BCOR    1285.25400     -0.8690707 0.08867716  94.46863
## ENSG00000060982.15 BCAT1    663.65390      1.6755350 0.17467110  83.94137
## ENSG00000109906.15 ZBTB16  5895.58267      2.9617587 0.29831639  80.95599
## ENSG00000182580.3 EPHB3      53.65242     -2.3884880 0.28647416  62.08225
## ENSG00000048740.18 CELF2  15777.74519      0.9584689 0.11994960  61.76490
## ENSG00000145990.11 GFOD1   2210.37959      1.4593192 0.18084505  60.74434
## ENSG00000109321.11 AREG     392.20635      4.4500668 0.46973841  60.48764
## ENSG00000141510.18 TP53    1336.80441     -0.5270916 0.06821035  59.28293
## ENSG00000215784.6 FAM72D    124.62930      1.2899093 0.16574192  58.56101
##                                 pvalue         padj
## ENSG00000227097.5 RPS28P7 3.218225e-27 6.995456e-23
## ENSG00000183337.18 BCOR   2.490080e-22 2.706343e-18
## ENSG00000060982.15 BCAT1  5.096679e-20 3.692883e-16
## ENSG00000109906.15 ZBTB16 2.308009e-19 1.254230e-15
## ENSG00000182580.3 EPHB3   3.294074e-15 1.402079e-11
## ENSG00000048740.18 CELF2  3.870118e-15 1.402079e-11
## ENSG00000145990.11 GFOD1  6.498982e-15 2.011817e-11
## ENSG00000109321.11 AREG   7.404211e-15 2.011817e-11
## ENSG00000141510.18 TP53   1.365561e-14 3.298133e-11
## ENSG00000215784.6 FAM72D  1.970840e-14 4.284015e-11

mean(abs(dge$stat))

## [1] 3.388839

tc_lo_a_t0veos_adj <- dge

EOS vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & treatment_group==1 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    48

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21814    48

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 115 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                   baseMean log2FoldChange      lfcSE     stat
## ENSG00000109906.15 ZBTB16       5320.11507     -3.4374370 0.19199768 258.6610
## ENSG00000184988.8 TMEM106A       709.79315      1.2230051 0.08960810 183.3919
## ENSG00000048740.18 CELF2       14863.79302     -1.0654712 0.08074058 168.6745
## ENSG00000077150.20 NFKB2        3283.51502      1.1787100 0.09841794 141.2055
## ENSG00000132514.14 CLEC10A       619.24307      2.5933695 0.20884016 140.4500
## ENSG00000104064.18 GABPB1        394.75447     -0.7247996 0.06089129 140.1801
## ENSG00000279359.1 RP11-36D19.9    90.71086     -4.9034740 0.36755644 134.7708
## ENSG00000168389.18 MFSD2A        155.50071      2.0758630 0.17452667 134.4048
## ENSG00000145990.11 GFOD1        1957.39890     -1.8627148 0.15847949 127.5545
## ENSG00000010704.19 HFE           323.45134      1.5984260 0.14004655 126.3633
##                                      pvalue         padj
## ENSG00000109906.15 ZBTB16      3.360382e-58 7.330338e-54
## ENSG00000184988.8 TMEM106A     8.807893e-42 9.606769e-38
## ENSG00000048740.18 CELF2       1.441019e-38 1.047813e-34
## ENSG00000077150.20 NFKB2       1.450827e-32 7.912085e-29
## ENSG00000132514.14 CLEC10A     2.122289e-32 8.839202e-29
## ENSG00000104064.18 GABPB1      2.431247e-32 8.839202e-29
## ENSG00000279359.1 RP11-36D19.9 3.705666e-31 1.154791e-27
## ENSG00000168389.18 MFSD2A      4.455912e-31 1.215016e-27
## ENSG00000145990.11 GFOD1       1.404914e-29 3.405199e-26
## ENSG00000010704.19 HFE         2.560399e-29 5.585255e-26

mean(abs(dge$stat))

## [1] 7.680155

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange      lfcSE     stat
## ENSG00000185338.7 SOCS1      792.26712      -2.766210 0.13190802 381.8868
## ENSG00000109906.15 ZBTB16   5320.11507      -3.611869 0.18467444 295.3007
## ENSG00000128283.7 CDC42EP1   115.99336       3.358012 0.19675715 261.6463
## ENSG00000080546.13 SESN1    1604.94130      -1.399258 0.08732870 244.7479
## ENSG00000182580.3 EPHB3       72.35903       3.182280 0.19957381 230.9785
## ENSG00000074966.11 TXK      2023.01139      -1.649997 0.10569279 227.4593
## ENSG00000121578.13 B4GALT4   930.90220      -1.399099 0.09173109 221.8354
## ENSG00000048740.18 CELF2   14863.79302      -1.090982 0.07226423 220.7818
## ENSG00000039523.20 RIPOR1   3178.13477       1.175231 0.07819646 217.8315
## ENSG00000100027.17 YPEL1    1922.57004      -2.091714 0.13650554 209.4435
##                                  pvalue         padj
## ENSG00000185338.7 SOCS1    4.832469e-85 1.054155e-80
## ENSG00000109906.15 ZBTB16  3.480270e-66 3.795930e-62
## ENSG00000128283.7 CDC42EP1 7.510384e-59 5.461050e-55
## ENSG00000080546.13 SESN1   3.626519e-55 1.977722e-51
## ENSG00000182580.3 EPHB3    3.647192e-52 1.591197e-48
## ENSG00000074966.11 TXK     2.135190e-51 7.762840e-48
## ENSG00000121578.13 B4GALT4 3.597922e-50 1.121215e-46
## ENSG00000048740.18 CELF2   6.107343e-50 1.665320e-46
## ENSG00000039523.20 RIPOR1  2.687784e-49 6.514590e-46
## ENSG00000100027.17 YPEL1   1.816652e-47 3.962845e-44

mean(abs(dge$stat))

## [1] 10.90795

tc_lo_a_eosvpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                  baseMean log2FoldChange      lfcSE      stat
## ENSG00000206047.3 DEFA1         5469.3934      -2.983730 0.43216988 2723.0074
## ENSG00000281887.3 GIMAP1-GIMAP5  590.3117      -1.127849 0.78953782  333.9923
## ENSG00000185338.7 SOCS1          792.2671      -2.791833 0.14647377  311.5634
## ENSG00000167173.19 C15orf39     3499.7107       1.461975 0.08347172  290.5943
## ENSG00000114423.23 CBLB         1972.0767      -1.362626 0.08182847  264.9611
## ENSG00000162174.12 ASRGL1        323.3099       1.301687 0.08385340  234.1363
## ENSG00000128283.7 CDC42EP1       115.9934       3.206102 0.20574680  229.7277
## ENSG00000074966.11 TXK          2023.0114      -1.669524 0.10871471  218.6595
## ENSG00000109906.15 ZBTB16       5320.1151      -3.567818 0.20986303  217.2269
## ENSG00000167600.14 CYP2S1        234.6805       2.377137 0.15582853  209.7154
##                                       pvalue         padj
## ENSG00000206047.3 DEFA1         0.000000e+00 0.000000e+00
## ENSG00000281887.3 GIMAP1-GIMAP5 1.297997e-74 1.415725e-70
## ENSG00000185338.7 SOCS1         9.969131e-70 7.248888e-66
## ENSG00000167173.19 C15orf39     3.690361e-65 2.012538e-61
## ENSG00000114423.23 CBLB         1.422841e-59 6.207571e-56
## ENSG00000162174.12 ASRGL1       7.469973e-53 2.715833e-49
## ENSG00000128283.7 CDC42EP1      6.834692e-52 2.129885e-48
## ENSG00000074966.11 TXK          1.773302e-49 4.835350e-46
## ENSG00000109906.15 ZBTB16       3.641487e-49 8.826155e-46
## ENSG00000167600.14 CYP2S1       1.584727e-47 3.456924e-44

mean(abs(dge$stat))

## [1] 10.22145

tc_lo_a_eosvpod1_adj <- dge

T0 vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & treatment_group==1 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    47

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21696    47

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 137 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange     lfcSE      stat
## ENSG00000155659.15 VSIG4    1257.61013      3.5400766 0.2811023 131.90106
## ENSG00000169385.3 RNASE2     859.84644      1.8089705 0.1805673  95.29885
## ENSG00000185339.9 TCN2       860.35014      1.8349460 0.1872936  91.00457
## ENSG00000137474.22 MYO7A     696.02656      1.8551287 0.1896387  90.63056
## ENSG00000162745.11 OLFML2B   110.94478      1.7150246 0.1913517  77.19168
## ENSG00000149534.9 MS4A2      193.46247     -2.6524703 0.2911033  73.23761
## ENSG00000154269.15 ENPP3      64.64986     -2.0608870 0.2334975  72.90212
## ENSG00000115415.20 STAT1    5608.45831     -0.9174074 0.1103843  67.65283
## ENSG00000129538.14 RNASE1     59.48653      2.4605583 0.2904876  66.14691
## ENSG00000246363.3 LINC02458   49.73716     -2.1836489 0.2592985  66.01473
##                                   pvalue         padj
## ENSG00000155659.15 VSIG4    1.572648e-30 3.411860e-26
## ENSG00000169385.3 RNASE2    1.637078e-22 1.775821e-18
## ENSG00000185339.9 TCN2      1.433407e-21 9.392050e-18
## ENSG00000137474.22 MYO7A    1.731653e-21 9.392050e-18
## ENSG00000162745.11 OLFML2B  1.551461e-18 6.731788e-15
## ENSG00000149534.9 MS4A2     1.149447e-17 4.156210e-14
## ENSG00000154269.15 ENPP3    1.362415e-17 4.222513e-14
## ENSG00000115415.20 STAT1    1.949731e-16 5.287427e-13
## ENSG00000129538.14 RNASE1   4.185360e-16 9.582847e-13
## ENSG00000246363.3 LINC02458 4.475661e-16 9.582847e-13

mean(abs(dge$stat))

## [1] 3.720168

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange      lfcSE     stat
## ENSG00000149534.9 MS4A2      193.4625     -2.9737363 0.19611976 196.8226
## ENSG00000133106.15 EPSTI1    939.8100     -0.9543914 0.07075212 178.0256
## ENSG00000137474.22 MYO7A     696.0266      1.9849438 0.14192368 175.8134
## ENSG00000162654.9 GBP4      1675.4251     -1.0949994 0.08304901 168.0892
## ENSG00000198848.13 CES1     1838.3112      1.7230700 0.12894356 163.6883
## ENSG00000169385.3 RNASE2     859.8464      1.8544681 0.13990473 159.1185
## ENSG00000115415.20 STAT1    5608.4583     -0.9340165 0.07509938 150.5753
## ENSG00000163251.4 FZD5       106.1830      1.2512637 0.10147334 150.1279
## ENSG00000166033.13 HTRA1     167.4301      2.3769995 0.19071071 136.3502
## ENSG00000163220.11 S100A9 111200.8193      1.6876920 0.14188879 127.9446
##                                 pvalue         padj
## ENSG00000149534.9 MS4A2   1.030945e-44 2.236739e-40
## ENSG00000133106.15 EPSTI1 1.307772e-40 1.418671e-36
## ENSG00000137474.22 MYO7A  3.977180e-40 2.876297e-36
## ENSG00000162654.9 GBP4    1.934245e-38 1.049134e-34
## ENSG00000198848.13 CES1   1.769523e-37 7.678316e-34
## ENSG00000169385.3 RNASE2  1.762951e-36 6.374829e-33
## ENSG00000115415.20 STAT1  1.297812e-34 4.022475e-31
## ENSG00000163251.4 FZD5    1.625583e-34 4.408581e-31
## ENSG00000166033.13 HTRA1  1.672723e-31 4.032378e-28
## ENSG00000163220.11 S100A9 1.154191e-29 2.504133e-26

mean(abs(dge$stat))

## [1] 7.192428

tc_lo_a_t0vpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                            baseMean log2FoldChange      lfcSE      stat
## ENSG00000240247.8 DEFA1B  1402.2685     -3.8913167 0.80695196 730.62042
## ENSG00000162654.9 GBP4    1675.4251     -1.2372987 0.10563805 132.35701
## ENSG00000133106.15 EPSTI1  939.8100     -0.8942295 0.08537225 107.91928
## ENSG00000198848.13 CES1   1838.3112      1.7903478 0.16503140 106.51516
## ENSG00000137474.22 MYO7A   696.0266      2.0517585 0.19683525 100.13689
## ENSG00000149534.9 MS4A2    193.4625     -2.9443652 0.27311499  98.31781
## ENSG00000166033.13 HTRA1   167.4301      2.6276258 0.25941198  93.47978
## ENSG00000169385.3 RNASE2   859.8464      1.7731262 0.18220802  87.71400
## ENSG00000115415.20 STAT1  5608.4583     -1.0046473 0.10735334  85.06667
## ENSG00000164850.15 GPER1   357.3446      2.3897547 0.24486918  83.67710
##                                  pvalue          padj
## ENSG00000240247.8 DEFA1B  6.565894e-161 1.424536e-156
## ENSG00000162654.9 GBP4     1.249928e-30  1.355922e-26
## ENSG00000133106.15 EPSTI1  2.799502e-25  2.024600e-21
## ENSG00000198848.13 CES1    5.685577e-25  3.083857e-21
## ENSG00000137474.22 MYO7A   1.422200e-23  6.171208e-20
## ENSG00000149534.9 MS4A2    3.563464e-23  1.288549e-19
## ENSG00000166033.13 HTRA1   4.103722e-22  1.271919e-18
## ENSG00000169385.3 RNASE2   7.563720e-21  2.051281e-17
## ENSG00000115415.20 STAT1   2.884730e-20  6.954123e-17
## ENSG00000164850.15 GPER1   5.825617e-20  1.263926e-16

mean(abs(dge$stat))

## [1] 4.586292

tc_lo_a_t0vpod1_adj <- dge

Timecourse in low CRP group and treatment group B

treatment_group==2

T0 vs EOS

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & treatment_group==2 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    30

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 22091    30

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 669 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                  baseMean log2FoldChange     lfcSE     stat
## ENSG00000079215.15 SLC1A3       127.16034      1.7631835 0.3491554 23.59672
## ENSG00000171236.10 LRG1         585.52351      2.4543414 0.5028459 20.75322
## ENSG00000123358.20 NR4A1        909.66286     -1.1715008 0.2685336 18.86488
## ENSG00000100985.7 MMP9         2434.01959      3.4005253 0.7049743 18.54805
## ENSG00000224505.2 AC002117.1     24.27816      0.9134542 0.2162273 17.72735
## ENSG00000286813.1 RP11-81A1.10   63.34543      0.8204037 0.1960375 17.18595
## ENSG00000102010.15 BMX           49.58972      2.8248876 0.6316847 17.05447
## ENSG00000235706.8 DICER1-AS1    278.88111      0.5037422 0.1216700 16.94258
## ENSG00000151726.15 ACSL1       7884.12632      1.5529465 0.3650851 16.90440
## ENSG00000132170.24 PPARG         25.54928      0.8734849 0.2132271 16.60090
##                                      pvalue       padj
## ENSG00000079215.15 SLC1A3      1.187898e-06 0.02420461
## ENSG00000171236.10 LRG1        5.224361e-06 0.05322579
## ENSG00000123358.20 NR4A1       1.403125e-05 0.08439458
## ENSG00000100985.7 MMP9         1.656745e-05 0.08439458
## ENSG00000224505.2 AC002117.1   2.549352e-05 0.08880536
## ENSG00000286813.1 RP11-81A1.10 3.389330e-05 0.08880536
## ENSG00000102010.15 BMX         3.632269e-05 0.08880536
## ENSG00000235706.8 DICER1-AS1   3.852764e-05 0.08880536
## ENSG00000151726.15 ACSL1       3.931026e-05 0.08880536
## ENSG00000132170.24 PPARG       4.612918e-05 0.08880536

mean(abs(dge$stat))

## [1] 0.9597284

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                     baseMean log2FoldChange      lfcSE     stat
## ENSG00000272825.1 LL21NC02-1C16.2   96.96324      0.8371518 0.13343654 38.82559
## ENSG00000260793.2 RP5-882C2.2      311.34768      0.7681852 0.13246083 32.75518
## ENSG00000183250.12 LINC01547       757.64333      0.5588114 0.10048091 30.48659
## ENSG00000173083.16 HPSE           1236.34632     -0.5355590 0.09866876 29.04849
## ENSG00000152207.8 CYSLTR2          402.51383     -0.6530792 0.12625223 26.13514
## ENSG00000235706.8 DICER1-AS1       278.88111      0.4589188 0.09310901 24.14034
## ENSG00000070961.16 ATP2B1         3219.80585     -0.4696350 0.09658894 23.32565
## ENSG00000175445.17 LPL              70.12512      1.2408807 0.25610487 21.63312
## ENSG00000231711.2 LINC00899        259.64682      0.3900730 0.08516019 20.90837
## ENSG00000136603.14 SKIL            773.41007     -0.4776508 0.10455942 20.57883
##                                         pvalue         padj
## ENSG00000272825.1 LL21NC02-1C16.2 4.634122e-10 7.259816e-06
## ENSG00000260793.2 RP5-882C2.2     1.045273e-08 8.187622e-05
## ENSG00000183250.12 LINC01547      3.361820e-08 1.755543e-04
## ENSG00000173083.16 HPSE           7.058909e-08 2.764622e-04
## ENSG00000152207.8 CYSLTR2         3.183381e-07 9.974170e-04
## ENSG00000235706.8 DICER1-AS1      8.956427e-07 2.338523e-03
## ENSG00000070961.16 ATP2B1         1.367638e-06 3.060773e-03
## ENSG00000175445.17 LPL            3.301021e-06 6.464225e-03
## ENSG00000231711.2 LINC00899       4.817837e-06 8.386248e-03
## ENSG00000136603.14 SKIL           5.722531e-06 8.964918e-03

mean(abs(dge$stat))

## [1] 1.234482

tc_lo_b_t0veos <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 3 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                     baseMean log2FoldChange     lfcSE      stat
## ENSG00000115590.14 IL1R2           924.42601      0.2434183 3.9285354 180.31055
## ENSG00000237541.4 HLA-DQA2         560.95931      0.2585584 1.1672873 179.98409
## ENSG00000278621.1 THBS1-AS1         12.80480      2.7311473 0.7776560  89.70720
## ENSG00000273420.1 CTD-2540B15.13    18.17452      1.6761641 0.6644232  78.29255
## ENSG00000079393.21 DUSP13           20.67591      1.2931592 0.7832886  64.30114
## ENSG00000204044.6 SLC12A5-AS1       29.95286      1.8079286 0.9386269  46.22535
## ENSG00000260793.2 RP5-882C2.2      311.34768      0.8991426 0.1640768  29.24636
## ENSG00000165029.17 ABCA1           844.51147     -1.2120476 0.2623119  19.87194
## ENSG00000173083.16 HPSE           1236.34632     -0.5190125 0.1259207  16.84979
## ENSG00000272825.1 LL21NC02-1C16.2   96.96324      0.8422317 0.2056801  16.39734
##                                         pvalue         padj
## ENSG00000115590.14 IL1R2          4.145857e-41 5.396103e-37
## ENSG00000237541.4 HLA-DQA2        4.885341e-41 5.396103e-37
## ENSG00000278621.1 THBS1-AS1       2.761481e-21 2.033463e-17
## ENSG00000273420.1 CTD-2540B15.13  8.885629e-19 4.907311e-15
## ENSG00000079393.21 DUSP13         1.067840e-15 4.717931e-12
## ENSG00000204044.6 SLC12A5-AS1     1.054045e-11 3.880817e-08
## ENSG00000260793.2 RP5-882C2.2     6.373590e-08 2.011414e-04
## ENSG00000165029.17 ABCA1          8.280680e-06 2.286606e-02
## ENSG00000173083.16 HPSE           4.045782e-05 9.930597e-02
## ENSG00000272825.1 LL21NC02-1C16.2 5.135736e-05 1.134535e-01

mean(abs(dge$stat))

## [1] 0.894435

tc_lo_b_t0veos_adj <- dge

EOS vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & treatment_group==2 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    32

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21736    32

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 194 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                baseMean log2FoldChange      lfcSE     stat
## ENSG00000133816.18 MICAL2    2943.85516      0.7789089 0.12157049 40.56823
## ENSG00000224505.2 AC002117.1   21.51833     -1.1393530 0.19346895 35.03737
## ENSG00000134574.12 DDB2      1402.15391     -0.4139817 0.07261334 32.40799
## ENSG00000266405.3 CBX3P2       81.87776     -0.7669847 0.13551894 31.91774
## ENSG00000262001.1 DLGAP1-AS2   87.81656     -0.5739731 0.10346740 30.83874
## ENSG00000237499.7 WAKMAR2     268.38688     -0.8043144 0.14561925 30.18730
## ENSG00000184988.8 TMEM106A    714.83016      0.5574801 0.10152308 29.97874
## ENSG00000010704.19 HFE        376.91679      0.6080182 0.11149297 29.53619
## ENSG00000182580.3 EPHB3       102.49136      1.5782482 0.28671826 29.04251
## ENSG00000161509.14 GRIN2C     112.97523     -0.9314928 0.17459569 28.12506
##                                    pvalue         padj
## ENSG00000133816.18 MICAL2    1.898684e-10 3.566678e-06
## ENSG00000224505.2 AC002117.1 3.234379e-09 3.037891e-05
## ENSG00000134574.12 DDB2      1.249719e-08 7.553533e-05
## ENSG00000266405.3 CBX3P2     1.608418e-08 7.553533e-05
## ENSG00000262001.1 DLGAP1-AS2 2.803839e-08 1.053402e-04
## ENSG00000237499.7 WAKMAR2    3.922688e-08 1.172207e-04
## ENSG00000184988.8 TMEM106A   4.368085e-08 1.172207e-04
## ENSG00000010704.19 HFE       5.488229e-08 1.288705e-04
## ENSG00000182580.3 EPHB3      7.080728e-08 1.477905e-04
## ENSG00000161509.14 GRIN2C    1.137238e-07 1.958850e-04

mean(abs(dge$stat))

## [1] 1.46779

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange      lfcSE     stat
## ENSG00000182580.3 EPHB3     102.49136      1.3009831 0.14957311 73.47731
## ENSG00000073008.15 PVR      672.25551      0.6242081 0.08368927 54.91374
## ENSG00000133816.18 MICAL2  2943.85516      0.6909542 0.10021607 46.42623
## ENSG00000104972.16 LILRB1  5970.71157      0.5656853 0.08359891 45.10027
## ENSG00000107798.18 LIPA    3067.44485      0.7162865 0.10547282 44.91821
## ENSG00000173083.16 HPSE    1354.83693      0.7280429 0.10750470 44.67086
## ENSG00000101347.11 SAMHD1 15457.43230      0.4143883 0.06202348 44.31755
## ENSG00000183087.15 GAS6     735.11294      0.5494024 0.08237006 43.98306
## ENSG00000092964.18 DPYSL2  2184.22562      0.6168219 0.09218008 43.95720
## ENSG00000251429.1 AIDAP2     99.97742      0.8806600 0.13235644 43.49904
##                                 pvalue         padj
## ENSG00000182580.3 EPHB3   1.017999e-17 1.955271e-13
## ENSG00000073008.15 PVR    1.259373e-13 1.209439e-09
## ENSG00000133816.18 MICAL2 9.513335e-12 6.090754e-08
## ENSG00000104972.16 LILRB1 1.871994e-11 7.162838e-08
## ENSG00000107798.18 LIPA   2.054382e-11 7.162838e-08
## ENSG00000173083.16 HPSE   2.331007e-11 7.162838e-08
## ENSG00000101347.11 SAMHD1 2.792018e-11 7.162838e-08
## ENSG00000183087.15 GAS6   3.312298e-11 7.162838e-08
## ENSG00000092964.18 DPYSL2 3.356357e-11 7.162838e-08
## ENSG00000251429.1 AIDAP2  4.241669e-11 8.146974e-08

mean(abs(dge$stat))

## [1] 2.019014

tc_lo_b_eosvpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 3 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange      lfcSE      stat
## ENSG00000108950.12 FAM20A     493.1955      2.2607994 0.13667214 247.95834
## ENSG00000010327.10 STAB1    23946.7666      1.1433841 0.08971768 155.92752
## ENSG00000133816.18 MICAL2    2943.8552      0.8399458 0.07815621 112.92123
## ENSG00000174705.13 SH3PXD2B   260.1279      2.1205055 0.20375145  95.95791
## ENSG00000104972.16 LILRB1    5970.7116      0.6981740 0.07124726  94.74369
## ENSG00000123384.14 LRP1     26659.9755      0.7742280 0.07921944  93.71220
## ENSG00000101347.11 SAMHD1   15457.4323      0.5008277 0.05275850  89.48179
## ENSG00000119686.10 FLVCR2     890.2308      0.9376435 0.10086558  84.41021
## ENSG00000196576.16 PLXNB2   15082.8898      0.5513973 0.06172860  79.02920
## ENSG00000132205.11 EMILIN2   6960.7954      0.7329647 0.08178416  78.75009
##                                   pvalue         padj
## ENSG00000108950.12 FAM20A   7.236807e-56 1.237494e-51
## ENSG00000010327.10 STAB1    8.780236e-36 7.507102e-32
## ENSG00000133816.18 MICAL2   2.245193e-26 1.279760e-22
## ENSG00000174705.13 SH3PXD2B 1.173523e-22 5.016809e-19
## ENSG00000104972.16 LILRB1   2.167027e-22 7.411232e-19
## ENSG00000123384.14 LRP1     3.649045e-22 1.039978e-18
## ENSG00000101347.11 SAMHD1   3.094733e-21 7.559992e-18
## ENSG00000119686.10 FLVCR2   4.020671e-20 8.594185e-17
## ENSG00000196576.16 PLXNB2   6.119868e-19 1.162775e-15
## ENSG00000132205.11 EMILIN2  7.048551e-19 1.205302e-15

mean(abs(dge$stat))

## [1] 3.068885

tc_lo_b_eosvpod1_adj <- dge

T0 vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==1 & treatment_group==2 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    30

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21715    30

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 625 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                   baseMean log2FoldChange     lfcSE     stat
## ENSG00000132170.24 PPARG          48.20760      2.1505950 0.2662258 60.46573
## ENSG00000108950.12 FAM20A        494.07416      2.2063718 0.3165917 43.28003
## ENSG00000079215.15 SLC1A3        106.72679      1.5115198 0.2330986 39.81901
## ENSG00000198019.13 FCGR1B        513.61162      1.2300391 0.2054735 34.23319
## ENSG00000183019.7 MCEMP1        1806.85176      1.8138844 0.2975967 34.08075
## ENSG00000167680.17 SEMA6B        155.08979      2.1383801 0.3499958 33.56789
## ENSG00000251429.1 AIDAP2          94.95369      1.2347545 0.2093972 33.51177
## ENSG00000154856.13 APCDD1        154.49487      1.5265850 0.2578314 33.01377
## ENSG00000170439.7 METTL7B         35.76914      3.1686336 0.5109607 32.62022
## ENSG00000244115.1 DNAJC25-GNG10  198.47885      0.9388774 0.1633077 32.15711
##                                       pvalue         padj
## ENSG00000132170.24 PPARG        7.487048e-15 1.625738e-10
## ENSG00000108950.12 FAM20A       4.744017e-11 5.150580e-07
## ENSG00000079215.15 SLC1A3       2.786196e-10 2.016648e-06
## ENSG00000198019.13 FCGR1B       4.888800e-09 2.197275e-05
## ENSG00000183019.7 MCEMP1        5.287178e-09 2.197275e-05
## ENSG00000167680.17 SEMA6B       6.881947e-09 2.197275e-05
## ENSG00000251429.1 AIDAP2        7.083414e-09 2.197275e-05
## ENSG00000154856.13 APCDD1       9.150855e-09 2.483771e-05
## ENSG00000170439.7 METTL7B       1.120430e-08 2.703225e-05
## ENSG00000244115.1 DNAJC25-GNG10 1.421951e-08 3.087624e-05

mean(abs(dge$stat))

## [1] 1.717923

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                             baseMean log2FoldChange      lfcSE     stat
## ENSG00000271605.6 MILR1     793.1250      0.8555749 0.10416592 65.89361
## ENSG00000079215.15 SLC1A3   106.7268      1.5964974 0.19113598 65.17343
## ENSG00000078124.13 ACER3   1426.9688      0.5820186 0.07244318 63.92028
## ENSG00000154856.13 APCDD1   154.4949      1.5092698 0.18329589 63.59309
## ENSG00000132170.24 PPARG     48.2076      1.9670080 0.24227909 61.41409
## ENSG00000177706.9 FAM20C    750.0239      1.1609098 0.15812472 50.88953
## ENSG00000164821.5 DEFA4     115.4556     -1.7434251 0.23593328 50.83419
## ENSG00000164850.15 GPER1    272.1023      1.9465680 0.25610614 49.85400
## ENSG00000168615.13 ADAM9   1184.7640      0.9129877 0.12721043 49.67899
## ENSG00000179542.16 SLITRK4  168.9429      1.2218693 0.16988399 49.13772
##                                  pvalue         padj
## ENSG00000271605.6 MILR1    4.759321e-16 6.291942e-12
## ENSG00000079215.15 SLC1A3  6.858824e-16 6.291942e-12
## ENSG00000078124.13 ACER3   1.295571e-15 7.016127e-12
## ENSG00000154856.13 APCDD1  1.529651e-15 7.016127e-12
## ENSG00000132170.24 PPARG   4.624891e-15 1.697058e-11
## ENSG00000177706.9 FAM20C   9.771359e-13 2.634311e-09
## ENSG00000164821.5 DEFA4    1.005079e-12 2.634311e-09
## ENSG00000164850.15 GPER1   1.656222e-12 3.691311e-09
## ENSG00000168615.13 ADAM9   1.810749e-12 3.691311e-09
## ENSG00000179542.16 SLITRK4 2.386071e-12 4.377725e-09

mean(abs(dge$stat))

## [1] 2.307963

tc_lo_b_t0vpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange     lfcSE     stat
## ENSG00000108950.12 FAM20A   494.07416      2.0087618 0.2854317 44.46021
## ENSG00000172232.10 AZU1     208.72804     -1.0821626 0.1655359 41.89490
## ENSG00000142733.17 MAP3K6   876.43730      0.9653672 0.1540806 38.11734
## ENSG00000134243.12 SORT1   2233.24420      0.7976340 0.1301463 36.96559
## ENSG00000177706.9 FAM20C    750.02385      1.2542262 0.2017255 36.87785
## ENSG00000206047.3 DEFA1    2783.53260     -1.9948458 0.3269226 36.07951
## ENSG00000150337.14 FCGR1A  1120.27775      1.4095387 0.2281360 35.96390
## ENSG00000183307.4 TMEM121B  522.66955      0.9532160 0.1591333 34.87441
## ENSG00000164821.5 DEFA4     115.45557     -2.0163450 0.3496044 32.24600
## ENSG00000256713.8 PGA5       24.18784      2.7735595 0.5143974 31.96692
##                                  pvalue         padj
## ENSG00000108950.12 FAM20A  2.595793e-11 4.981066e-07
## ENSG00000172232.10 AZU1    9.631362e-11 9.240810e-07
## ENSG00000142733.17 MAP3K6  6.661543e-10 4.260945e-06
## ENSG00000134243.12 SORT1   1.202325e-09 4.826678e-06
## ENSG00000177706.9 FAM20C   1.257668e-09 4.826678e-06
## ENSG00000206047.3 DEFA1    1.894287e-09 5.510193e-06
## ENSG00000150337.14 FCGR1A  2.010076e-09 5.510193e-06
## ENSG00000183307.4 TMEM121B 3.516731e-09 8.435319e-06
## ENSG00000164821.5 DEFA4    1.358364e-08 2.896183e-05
## ENSG00000256713.8 PGA5     1.568207e-08 3.009232e-05

mean(abs(dge$stat))

## [1] 1.107873

tc_lo_b_t0vpod1_adj <- dge

Timecourse in high CRP group and treatment group A

treatment_group==1

T0 vs EOS

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & treatment_group==1 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    25

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 22227    25

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 167 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                    baseMean log2FoldChange     lfcSE     stat
## ENSG00000137801.11 THBS1        18797.18309       4.030477 0.3963519 83.13806
## ENSG00000064270.13 ATP2C2         402.83240       4.631437 0.4486478 82.12003
## ENSG00000265527.1 MIR5690          14.62137       3.550693 0.4052435 79.61081
## ENSG00000155307.19 SAMSN1        3520.51278       2.536829 0.2835327 72.97227
## ENSG00000198363.18 ASPH          2613.51210       2.670401 0.3125150 65.90632
## ENSG00000273812.3 WI2-87327B8.2   106.74736       3.062129 0.3610669 64.13354
## ENSG00000169902.15 TPST1          550.52177       3.259951 0.3818049 62.95361
## ENSG00000172985.11 SH3RF3         375.84195       2.657477 0.3221656 61.70452
## ENSG00000101187.16 SLCO4A1        275.69907       3.598141 0.4241167 60.66574
## ENSG00000250608.2 RP11-933H2.4    212.69083       1.568772 0.1988214 60.56301
##                                       pvalue         padj
## ENSG00000137801.11 THBS1        7.651834e-20 1.423219e-15
## ENSG00000064270.13 ATP2C2       1.280679e-19 1.423219e-15
## ENSG00000265527.1 MIR5690       4.559246e-19 3.377793e-15
## ENSG00000155307.19 SAMSN1       1.314846e-17 7.305941e-14
## ENSG00000198363.18 ASPH         4.728735e-16 2.102017e-12
## ENSG00000273812.3 WI2-87327B8.2 1.162655e-15 4.306863e-12
## ENSG00000169902.15 TPST1        2.116324e-15 6.719630e-12
## ENSG00000172985.11 SH3RF3       3.990636e-15 1.108698e-11
## ENSG00000101187.16 SLCO4A1      6.763709e-15 1.583838e-11
## ENSG00000250608.2 RP11-933H2.4  7.126060e-15 1.583838e-11

mean(abs(dge$stat))

## [1] 3.254617

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                  baseMean log2FoldChange      lfcSE     stat
## ENSG00000079215.15 SLC1A3       1113.8142       4.951827 0.18709941 521.9372
## ENSG00000179593.16 ALOX15B       870.0898       5.245785 0.20644794 473.6905
## ENSG00000164105.4 SAP30         1512.9030       2.736357 0.12704480 408.9775
## ENSG00000119138.5 KLF9          2326.1339       1.405745 0.07995513 297.8874
## ENSG00000140044.13 JDP2         2561.3681       2.077276 0.12138571 268.4997
## ENSG00000096060.15 FKBP5       18192.7606       3.242083 0.19611138 220.8318
## ENSG00000121933.19 TMIGD3        198.9827       4.037352 0.24706584 218.8149
## ENSG00000279359.1 RP11-36D19.9   164.2215       5.321338 0.33736811 216.5070
## ENSG00000134463.15 ECHDC3        705.0188       3.381691 0.20792654 215.7673
## ENSG00000164056.11 SPRY1         176.8216       2.775841 0.18288624 208.4013
##                                       pvalue          padj
## ENSG00000079215.15 SLC1A3      1.603543e-115 3.564195e-111
## ENSG00000179593.16 ALOX15B     5.042984e-105 5.604520e-101
## ENSG00000164105.4 SAP30         6.119167e-91  4.533691e-87
## ENSG00000119138.5 KLF9          9.506737e-67  5.282656e-63
## ENSG00000140044.13 JDP2         2.409385e-60  1.071068e-56
## ENSG00000096060.15 FKBP5        5.956032e-50  2.206412e-46
## ENSG00000121933.19 TMIGD3       1.640155e-49  5.207960e-46
## ENSG00000279359.1 RP11-36D19.9  5.227812e-49  1.452482e-45
## ENSG00000134463.15 ECHDC3       7.580092e-49  1.872030e-45
## ENSG00000164056.11 SPRY1        3.066653e-47  6.816250e-44

mean(abs(dge$stat))

## [1] 5.141268

tc_hi_a_t0veos <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 5 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                    baseMean log2FoldChange     lfcSE     stat
## ENSG00000169174.11 PCSK9           89.34576       7.212028 0.9670302 608.1842
## ENSG00000179593.16 ALOX15B        870.08982       4.929816 0.2099199 392.1615
## ENSG00000079215.15 SLC1A3        1113.81425       4.721836 0.2077981 362.4938
## ENSG00000079393.21 DUSP13         144.49220       5.927693 0.8491969 346.8927
## ENSG00000118520.15 ARG1          2407.98739       4.886759 0.2217240 333.8596
## ENSG00000279174.1 RP11-1099M24.9   24.00901       5.203995 0.8116484 308.4292
## ENSG00000278621.1 THBS1-AS1        85.37794       5.367301 0.9265756 267.1314
## ENSG00000276107.1 THBS1-IT1        79.67447       5.480677 0.9943344 262.0736
## ENSG00000258476.6 LINC02207       639.25327       3.405272 0.1923522 250.8748
## ENSG00000102010.15 BMX            435.13935       4.687378 0.2741784 230.1977
##                                         pvalue          padj
## ENSG00000169174.11 PCSK9         2.777726e-134 6.054332e-130
## ENSG00000179593.16 ALOX15B        2.801057e-87  3.052592e-83
## ENSG00000079215.15 SLC1A3         8.064402e-81  5.859057e-77
## ENSG00000079393.21 DUSP13         2.012841e-77  1.096797e-73
## ENSG00000118520.15 ARG1           1.387337e-74  6.047682e-71
## ENSG00000279174.1 RP11-1099M24.9  4.801869e-69  1.744359e-65
## ENSG00000278621.1 THBS1-AS1       4.787679e-60  1.490746e-56
## ENSG00000276107.1 THBS1-IT1       6.060797e-59  1.651264e-55
## ENSG00000258476.6 LINC02207       1.673893e-56  4.053797e-53
## ENSG00000102010.15 BMX            5.398020e-52  1.176552e-48

mean(abs(dge$stat))

## [1] 4.122525

tc_hi_a_t0veos_adj <- dge

EOS vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & treatment_group==1 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    23

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21654    23

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 197 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange     lfcSE     stat
## ENSG00000072310.18 SREBF1  2084.89569       1.374119 0.1490116 83.15173
## ENSG00000132514.14 CLEC10A  562.93900       2.849057 0.3058331 77.72400
## ENSG00000100453.13 GZMB    2459.77305      -2.167593 0.2566876 64.29149
## ENSG00000141744.4 PNMT       71.32042      -3.560621 0.4105019 63.17609
## ENSG00000039523.20 RIPOR1  2980.66339       1.027136 0.1317769 60.13118
## ENSG00000173372.17 C1QA     243.72605       2.839128 0.3490706 59.42622
## ENSG00000079215.15 SLC1A3  1141.83012      -2.763823 0.3345625 58.58345
## ENSG00000164056.11 SPRY1    167.30887      -2.551936 0.3141082 58.50142
## ENSG00000109906.15 ZBTB16  4969.85246      -3.160372 0.3775330 57.93555
## ENSG00000134780.10 DAGLA    188.22694       2.275761 0.2941307 56.08840
##                                  pvalue         padj
## ENSG00000072310.18 SREBF1  7.599082e-20 1.645353e-15
## ENSG00000132514.14 CLEC10A 1.184925e-18 1.282800e-14
## ENSG00000100453.13 GZMB    1.073080e-15 7.744779e-12
## ENSG00000141744.4 PNMT     1.890288e-15 1.023213e-11
## ENSG00000039523.20 RIPOR1  8.874140e-15 3.842858e-11
## ENSG00000173372.17 C1QA    1.269660e-14 4.581779e-11
## ENSG00000079215.15 SLC1A3  1.948487e-14 5.498098e-11
## ENSG00000164056.11 SPRY1   2.031442e-14 5.498098e-11
## ENSG00000109906.15 ZBTB16  2.708476e-14 6.515991e-11
## ENSG00000134780.10 DAGLA   6.928475e-14 1.500153e-10

mean(abs(dge$stat))

## [1] 2.710607

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                             baseMean log2FoldChange     lfcSE      stat
## ENSG00000164056.11 SPRY1    167.3089      -2.617604 0.1771423 204.44134
## ENSG00000172232.10 AZU1     256.6018      -1.932919 0.1889567  97.26575
## ENSG00000079215.15 SLC1A3  1141.8301      -2.760312 0.2632785  90.30577
## ENSG00000185338.7 SOCS1     596.4511      -1.871254 0.1940857  84.44088
## ENSG00000072310.18 SREBF1  2084.8957       1.412415 0.1507135  83.00525
## ENSG00000183542.5 KLRC4     354.2118      -1.386445 0.1535955  77.83535
## ENSG00000132514.14 CLEC10A  562.9390       3.079505 0.3089449  76.49791
## ENSG00000134539.17 KLRD1   4604.7957      -1.629675 0.1808981  74.64582
## ENSG00000146232.17 NFKBIE   631.9206       1.443095 0.1623899  74.42134
## ENSG00000152766.6 ANKRD22   458.2366      -2.338834 0.2536226  72.48926
##                                  pvalue         padj
## ENSG00000164056.11 SPRY1   2.242253e-46 4.855376e-42
## ENSG00000172232.10 AZU1    6.061975e-23 6.563300e-19
## ENSG00000079215.15 SLC1A3  2.040563e-21 1.472878e-17
## ENSG00000185338.7 SOCS1    3.958768e-20 2.143079e-16
## ENSG00000072310.18 SREBF1  8.183585e-20 3.544147e-16
## ENSG00000183542.5 KLRC4    1.119980e-18 4.042008e-15
## ENSG00000132514.14 CLEC10A 2.204458e-18 6.819333e-15
## ENSG00000134539.17 KLRD1   5.632088e-18 1.518263e-14
## ENSG00000146232.17 NFKBIE  6.310319e-18 1.518263e-14
## ENSG00000152766.6 ANKRD22  1.679436e-17 3.636650e-14

mean(abs(dge$stat))

## [1] 3.741003

tc_hi_a_eosvpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange     lfcSE      stat
## ENSG00000235655.3 H3P6       666.4614       4.746055 2.4665593 167.16278
## ENSG00000129538.14 RNASE1    143.7566       4.466465 0.3558303 141.44739
## ENSG00000173372.17 C1QA      243.7261       4.363465 0.3414017 137.48828
## ENSG00000162745.11 OLFML2B   164.2976       3.547194 0.2993985 130.27448
## ENSG00000164047.6 CAMP       508.5888      -3.214854 0.2771307 120.97930
## ENSG00000179921.15 GPBAR1    699.7191       2.243816 0.2141482 104.50115
## ENSG00000010327.10 STAB1   28597.5524       3.172264 0.2939730 101.16186
## ENSG00000092964.18 DPYSL2   1885.5321       2.307491 0.2252137  98.96122
## ENSG00000121966.7 CXCR4     9874.0219      -2.090096 0.2107991  92.51866
## ENSG00000107798.18 LIPA     2304.6076       2.487076 0.2490610  92.15894
##                                  pvalue         padj
## ENSG00000235655.3 H3P6     3.082290e-38 6.156567e-34
## ENSG00000129538.14 RNASE1  1.284441e-32 1.282771e-28
## ENSG00000173372.17 C1QA    9.429832e-32 6.278382e-28
## ENSG00000162745.11 OLFML2B 3.568567e-30 1.781964e-26
## ENSG00000164047.6 CAMP     3.861409e-28 1.542556e-24
## ENSG00000179921.15 GPBAR1  1.571024e-24 5.229938e-21
## ENSG00000010327.10 STAB1   8.476605e-24 2.418739e-20
## ENSG00000092964.18 DPYSL2  2.574955e-23 6.429019e-20
## ENSG00000121966.7 CXCR4    6.669304e-22 1.480141e-18
## ENSG00000107798.18 LIPA    7.998711e-22 1.597663e-18

mean(abs(dge$stat))

## [1] 4.128341

tc_hi_a_eosvpod1_adj <- dge

T0 vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & treatment_group==1 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    24

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21860    24

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 202 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange     lfcSE     stat
## ENSG00000132170.24 PPARG     140.52036       3.532818 0.3524650 88.66234
## ENSG00000137869.15 CYP19A1    64.76442       5.883919 0.5696610 85.85100
## ENSG00000108950.12 FAM20A   1340.46068       3.336057 0.3491410 80.34614
## ENSG00000167680.17 SEMA6B    402.11326       3.176831 0.3464517 75.20631
## ENSG00000129538.14 RNASE1    145.60592       3.604216 0.3952207 72.68911
## ENSG00000163221.9 S100A12  10264.16050       2.856398 0.3354250 66.09817
## ENSG00000183019.7 MCEMP1    3932.57736       2.760099 0.3325777 63.22748
## ENSG00000136160.17 EDNRB      50.50611       3.689244 0.4606377 56.98972
## ENSG00000139572.4 GPR84      178.40373       2.448494 0.3182039 55.85630
## ENSG00000143546.10 S100A8  54619.02839       2.359509 0.3129705 53.57704
##                                  pvalue         padj
## ENSG00000132170.24 PPARG   4.682973e-21 1.023651e-16
## ENSG00000137869.15 CYP19A1 1.940174e-20 2.120513e-16
## ENSG00000108950.12 FAM20A  3.142454e-19 2.289697e-15
## ENSG00000167680.17 SEMA6B  4.240090e-18 2.317103e-14
## ENSG00000129538.14 RNASE1  1.517696e-17 6.635063e-14
## ENSG00000163221.9 S100A12  4.290137e-16 1.562968e-12
## ENSG00000183019.7 MCEMP1   1.841610e-15 5.750822e-12
## ENSG00000136160.17 EDNRB   4.380966e-14 1.197044e-10
## ENSG00000139572.4 GPR84    7.796654e-14 1.893634e-10
## ENSG00000143546.10 S100A8  2.486479e-13 5.372700e-10

mean(abs(dge$stat))

## [1] 3.270127

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                            baseMean log2FoldChange     lfcSE      stat
## ENSG00000108950.12 FAM20A 1340.4607       3.366682 0.2306195 168.28922
## ENSG00000167680.17 SEMA6B  402.1133       3.146322 0.2374286 142.95644
## ENSG00000060558.4 GNA15   1341.5683       1.264053 0.1097692 127.73054
## ENSG00000161944.16 ASGR2  3254.7476       1.746071 0.1528648 120.24374
## ENSG00000170439.7 METTL7B  116.3331       3.983329 0.3421774 107.34490
## ENSG00000198848.13 CES1   2272.4987       2.167707 0.1974807 105.68611
## ENSG00000150337.14 FCGR1A 1469.0407       1.901307 0.1768162 104.46977
## ENSG00000149534.9 MS4A2    182.2406      -3.426363 0.3062217 100.02166
## ENSG00000104918.8 RETN    1306.2868       2.757746 0.2505300  98.72815
## ENSG00000136830.12 NIBAN2 3298.4497       1.407715 0.1388694  97.07961
##                                 pvalue         padj
## ENSG00000108950.12 FAM20A 1.749159e-38 3.823661e-34
## ENSG00000167680.17 SEMA6B 6.008352e-33 6.567129e-29
## ENSG00000060558.4 GNA15   1.285650e-29 9.368100e-26
## ENSG00000161944.16 ASGR2  5.594637e-28 3.057469e-24
## ENSG00000170439.7 METTL7B 3.740640e-25 1.635408e-21
## ENSG00000198848.13 CES1   8.639085e-25 3.147507e-21
## ENSG00000150337.14 FCGR1A 1.596104e-24 4.984405e-21
## ENSG00000149534.9 MS4A2   1.507397e-23 4.118963e-20
## ENSG00000104918.8 RETN    2.896550e-23 7.035398e-20
## ENSG00000136830.12 NIBAN2 6.659500e-23 1.455767e-19

mean(abs(dge$stat))

## [1] 4.576891

tc_hi_a_t0vpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 6 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                   baseMean log2FoldChange     lfcSE      stat
## ENSG00000139675.13 HNRNPA1L2      87.17300       2.246248 2.7937605 121.66326
## ENSG00000273047.1 RP4-583P15.14   68.10403       6.845862 3.3456051  79.02762
## ENSG00000079215.15 SLC1A3        243.72289      -3.931421 0.6576895  30.34254
## ENSG00000152766.6 ANKRD22        148.74159      -3.029033 0.5699001  27.08705
## ENSG00000123836.15 PFKFB2       1008.07343      -3.231340 0.6102561  26.01151
## ENSG00000112299.8 VNN1          1054.88471      -2.752552 0.5316643  25.54896
## ENSG00000096006.12 CRISP3        128.60621      -3.894003 0.7800306  23.30689
## ENSG00000225313.5 RP11-415J8.3   244.61892      -2.704253 0.5775340  21.27013
## ENSG00000166523.8 CLEC4E        1859.24532      -2.831758 0.6123570  19.79771
## ENSG00000132514.14 CLEC10A       867.89144       2.970602 0.6374807  19.77513
##                                       pvalue         padj
## ENSG00000139675.13 HNRNPA1L2    2.735395e-28 5.283963e-24
## ENSG00000273047.1 RP4-583P15.14 6.124756e-19 5.915595e-15
## ENSG00000079215.15 SLC1A3       3.620961e-08 2.331537e-04
## ENSG00000152766.6 ANKRD22       1.944961e-07 9.392701e-04
## ENSG00000123836.15 PFKFB2       3.393876e-07 1.311190e-03
## ENSG00000112299.8 VNN1          4.312986e-07 1.388566e-03
## ENSG00000096006.12 CRISP3       1.381042e-06 3.811083e-03
## ENSG00000225313.5 RP11-415J8.3  3.988988e-06 9.631909e-03
## ENSG00000166523.8 CLEC4E        8.608559e-06 1.682684e-02
## ENSG00000132514.14 CLEC10A      8.710897e-06 1.682684e-02

mean(abs(dge$stat))

## [1] 0.6226131

tc_hi_a_t0vpod1_adj <- dge

Timecourse in high CRP group and treatment group B

treatment_group==2

T0 vs EOS

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & treatment_group==2 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    62

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21828    62

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 206 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange     lfcSE     stat
## ENSG00000108984.15 MAP2K6    1061.6632      1.0618905 0.1237009 73.22654
## ENSG00000197632.9 SERPINB2    236.4370      1.8522465 0.2315241 61.70258
## ENSG00000163221.9 S100A12    6460.5551      2.0941184 0.2625110 60.42488
## ENSG00000096060.15 FKBP5     8486.6198      1.6524287 0.2203608 54.66055
## ENSG00000173744.18 AGFG1     3803.7375      1.3149106 0.1766162 54.63706
## ENSG00000137962.13 ARHGAP29   171.0255      1.0350793 0.1397988 54.54718
## ENSG00000160223.17 ICOSLG     535.8792     -0.7789236 0.1045706 54.29724
## ENSG00000121316.11 PLBD1    10701.9018      1.5491562 0.2089442 53.68398
## ENSG00000172985.11 SH3RF3     275.9671      1.6227396 0.2198581 53.08054
## ENSG00000155307.19 SAMSN1    1816.3282      1.5258123 0.2085799 52.31831
##                                   pvalue         padj
## ENSG00000108984.15 MAP2K6   1.155909e-17 2.523119e-13
## ENSG00000197632.9 SERPINB2  3.994572e-15 4.359676e-11
## ENSG00000163221.9 S100A12   7.644064e-15 5.561821e-11
## ENSG00000096060.15 FKBP5    1.432536e-13 5.374181e-10
## ENSG00000173744.18 AGFG1    1.449761e-13 5.374181e-10
## ENSG00000137962.13 ARHGAP29 1.517604e-13 5.374181e-10
## ENSG00000160223.17 ICOSLG   1.723441e-13 5.374181e-10
## ENSG00000121316.11 PLBD1    2.354758e-13 6.424957e-10
## ENSG00000172985.11 SH3RF3   3.201484e-13 7.764666e-10
## ENSG00000155307.19 SAMSN1   4.719543e-13 1.030182e-09

mean(abs(dge$stat))

## [1] 3.847355

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange      lfcSE      stat
## ENSG00000160223.17 ICOSLG    535.8792     -0.7467822 0.06673933 123.10529
## ENSG00000175130.7 MARCKSL1   967.7293     -0.7435331 0.07273058 102.20699
## ENSG00000197122.12 SRC      1158.8170     -0.8509687 0.08390001  99.68092
## ENSG00000012124.17 CD22     2387.7642     -0.8408112 0.08328323  98.72146
## ENSG00000281162.2 LINC01127  620.5166      1.3982997 0.13632090  95.97391
## ENSG00000072310.18 SREBF1   2015.9050     -0.5726827 0.05889950  93.21485
## ENSG00000183779.7 ZNF703     907.1750     -1.2034120 0.12064105  93.00314
## ENSG00000141682.12 PMAIP1    380.1075     -0.8925346 0.09144513  92.27686
## ENSG00000159958.7 TNFRSF13C  956.9752     -0.5825913 0.06055005  91.45275
## ENSG00000123685.9 BATF3      136.9244     -0.9271236 0.09649069  90.04453
##                                   pvalue         padj
## ENSG00000160223.17 ICOSLG   1.322420e-28 2.886578e-24
## ENSG00000175130.7 MARCKSL1  5.001317e-24 5.458438e-20
## ENSG00000197122.12 SRC      1.790381e-23 1.302681e-19
## ENSG00000012124.17 CD22     2.906349e-23 1.585995e-19
## ENSG00000281162.2 LINC01127 1.164075e-22 5.081885e-19
## ENSG00000072310.18 SREBF1   4.691478e-22 1.628100e-18
## ENSG00000183779.7 ZNF703    5.221137e-22 1.628100e-18
## ENSG00000141682.12 PMAIP1   7.536034e-22 2.056207e-18
## ENSG00000159958.7 TNFRSF13C 1.142891e-21 2.771892e-18
## ENSG00000123685.9 BATF3     2.328599e-21 5.082865e-18

mean(abs(dge$stat))

## [1] 5.859661

tc_hi_b_t0veos <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange      lfcSE      stat
## ENSG00000235655.3 H3P6       485.6623     -1.5372041 1.08740980 386.41247
## ENSG00000141682.12 PMAIP1    380.1075     -0.9773438 0.11827482  66.33028
## ENSG00000160223.17 ICOSLG    535.8792     -0.7106591 0.09165973  59.06123
## ENSG00000072310.18 SREBF1   2015.9050     -0.5405884 0.07334376  53.71543
## ENSG00000175130.7 MARCKSL1   967.7293     -0.7104446 0.09662604  53.07312
## ENSG00000169122.11 FAM110B   126.2614      1.3481101 0.17989510  52.57778
## ENSG00000197122.12 SRC      1158.8170     -0.8436746 0.11482131  52.23331
## ENSG00000242472.1 IGHJ5      134.7030     -0.7059525 0.09835176  51.17250
## ENSG00000281162.2 LINC01127  620.5166      1.4041647 0.18898671  51.03911
## ENSG00000159958.7 TNFRSF13C  956.9752     -0.5526402 0.07709455  50.82674
##                                   pvalue         padj
## ENSG00000235655.3 H3P6      4.999158e-86 1.091216e-81
## ENSG00000141682.12 PMAIP1   3.813556e-16 4.162115e-12
## ENSG00000160223.17 ICOSLG   1.528415e-14 1.112074e-10
## ENSG00000072310.18 SREBF1   2.317362e-13 1.264585e-09
## ENSG00000175130.7 MARCKSL1  3.213598e-13 1.402929e-09
## ENSG00000169122.11 FAM110B  4.135421e-13 1.504466e-09
## ENSG00000197122.12 SRC      4.928311e-13 1.536788e-09
## ENSG00000242472.1 IGHJ5     8.459552e-13 2.195993e-09
## ENSG00000281162.2 LINC01127 9.054396e-13 2.195993e-09
## ENSG00000159958.7 TNFRSF13C 1.008897e-12 2.202220e-09

mean(abs(dge$stat))

## [1] 3.170145

tc_hi_b_t0veos_adj <- dge

EOS vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & treatment_group==2 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    59

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21618    59

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 140 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                             baseMean log2FoldChange     lfcSE      stat
## ENSG00000019169.11 MARCO   1146.0967       1.875059 0.1759798 105.73472
## ENSG00000108950.12 FAM20A  1568.7848       2.571190 0.2365273 104.40275
## ENSG00000152270.9 PDE3B    2421.7636      -1.050534 0.1056526  97.18047
## ENSG00000159189.12 C1QC     137.9313       3.169276 0.3023666  93.17740
## ENSG00000171812.13 COL8A2   319.6901       1.639214 0.1672085  91.08459
## ENSG00000099377.14 HSD3B7   172.1246       1.379791 0.1447231  87.60091
## ENSG00000182580.3 EPHB3     106.3407       1.795891 0.1933337  81.54711
## ENSG00000010327.10 STAB1  28836.3295       1.534081 0.1656748  81.52376
## ENSG00000007968.7 E2F2      889.5286       1.487165 0.1627249  79.69012
## ENSG00000145287.11 PLAC8   4921.3813       1.407633 0.1544193  79.58844
##                                 pvalue         padj
## ENSG00000019169.11 MARCO  8.429723e-25 1.784575e-20
## ENSG00000108950.12 FAM20A 1.651009e-24 1.784575e-20
## ENSG00000152270.9 PDE3B   6.328795e-23 4.560530e-19
## ENSG00000159189.12 C1QC   4.781096e-22 2.583943e-18
## ENSG00000171812.13 COL8A2 1.376597e-21 5.951856e-18
## ENSG00000099377.14 HSD3B7 8.008774e-21 2.885561e-17
## ENSG00000182580.3 EPHB3   1.711329e-19 4.679420e-16
## ENSG00000010327.10 STAB1  1.731675e-19 4.679420e-16
## ENSG00000007968.7 E2F2    4.379855e-19 9.968419e-16
## ENSG00000145287.11 PLAC8  4.611166e-19 9.968419e-16

mean(abs(dge$stat))

## [1] 4.299517

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                             baseMean log2FoldChange      lfcSE     stat
## ENSG00000108950.12 FAM20A  1568.7848      2.7614099 0.16317570 235.5619
## ENSG00000165092.13 ALDH1A1  446.1609     -2.2654686 0.13985338 231.1344
## ENSG00000007968.7 E2F2      889.5286      1.4514824 0.10031487 196.5105
## ENSG00000135424.18 ITGA7    449.9249      1.6280069 0.11393975 189.1996
## ENSG00000019169.11 MARCO   1146.0967      1.8405815 0.13607686 164.1851
## ENSG00000111424.12 VDR      653.8612      0.8782710 0.07143206 147.5929
## ENSG00000137959.17 IFI44L  1364.0428     -1.5524693 0.12310558 146.0327
## ENSG00000014257.16 ACP3     906.4581      0.9777736 0.07970893 145.7403
## ENSG00000161944.16 ASGR2   2879.4472      1.2950035 0.10523282 142.3192
## ENSG00000099377.14 HSD3B7   172.1246      1.3976301 0.11829039 131.7320
##                                  pvalue         padj
## ENSG00000108950.12 FAM20A  3.651248e-53 7.893267e-49
## ENSG00000165092.13 ALDH1A1 3.372458e-52 3.645290e-48
## ENSG00000007968.7 E2F2     1.206031e-44 8.690661e-41
## ENSG00000135424.18 ITGA7   4.753743e-43 2.569160e-39
## ENSG00000019169.11 MARCO   1.378210e-37 5.958828e-34
## ENSG00000111424.12 VDR     5.822705e-34 2.097921e-30
## ENSG00000137959.17 IFI44L  1.276997e-33 3.943731e-30
## ENSG00000014257.16 ACP3    1.479515e-33 3.998018e-30
## ENSG00000161944.16 ASGR2   8.280973e-33 1.989090e-29
## ENSG00000099377.14 HSD3B7  1.712438e-30 3.701948e-27

mean(abs(dge$stat))

## [1] 6.712274

tc_hi_b_eosvpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                              baseMean log2FoldChange      lfcSE     stat
## ENSG00000235655.3 H3P6      608.09437       3.904034 0.98480695 488.6684
## ENSG00000165092.13 ALDH1A1  446.16087      -2.362199 0.15699364 194.3134
## ENSG00000108950.12 FAM20A  1568.78481       2.648635 0.17706493 185.9421
## ENSG00000135424.18 ITGA7    449.92494       1.462964 0.10863910 170.9103
## ENSG00000007968.7 E2F2      889.52858       1.389067 0.11141029 146.0457
## ENSG00000137959.17 IFI44L  1364.04275      -1.542075 0.12864932 131.4918
## ENSG00000019169.11 MARCO   1146.09674       1.789369 0.15470308 120.1663
## ENSG00000137869.15 CYP19A1   85.05678       2.864858 0.24740781 117.7876
## ENSG00000170439.7 METTL7B   212.43471       2.479187 0.21183023 116.6478
## ENSG00000174837.15 ADGRE1  1777.49986       1.059254 0.09711126 113.4690
##                                   pvalue          padj
## ENSG00000235655.3 H3P6     2.776805e-108 6.002896e-104
## ENSG00000165092.13 ALDH1A1  3.638032e-44  3.932349e-40
## ENSG00000108950.12 FAM20A   2.444196e-42  1.761288e-38
## ENSG00000135424.18 ITGA7    4.681249e-39  2.529981e-35
## ENSG00000007968.7 E2F2      1.268673e-33  5.485234e-30
## ENSG00000137959.17 IFI44L   1.932714e-30  6.963570e-27
## ENSG00000019169.11 MARCO    5.817281e-28  1.796542e-24
## ENSG00000137869.15 CYP19A1  1.929860e-27  5.214965e-24
## ENSG00000170439.7 METTL7B   3.428459e-27  8.235159e-24
## ENSG00000174837.15 ADGRE1   1.703266e-26  3.682120e-23

mean(abs(dge$stat))

## [1] 4.977048

tc_hi_b_eosvpod1_adj <- dge

T0 vs POD1

ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints

mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]

rownames(ss2) == colnames(mx)

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE

ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)

str(ss2)

## 'data.frame':    246 obs. of  49 variables:
##  $ PG_number           : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
##  $ sexD                : num  1 1 2 2 2 1 1 1 1 1 ...
##  $ ageD                : int  84 54 70 70 62 58 58 61 61 68 ...
##  $ ageCS               : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : NULL
##  $ weightD             : num  60 63.6 70.1 70.1 78.7 ...
##  $ asaD                : int  3 2 2 2 2 1 1 1 1 2 ...
##  $ heightD             : num  133 155 170 170 175 158 158 149 149 155 ...
##  $ ethnicityCAT        : chr  "Asian" "Asian" "Asian" "Asian" ...
##  $ ethnicityD          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ current_smokerD     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ diabetes_typeD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ daily_insulinD      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ creatinine_preopD   : int  54 47 109 109 98 50 50 49 49 61 ...
##  $ surgery_dominantD   : num  1 2 6 6 4 2 2 2 2 4 ...
##  $ wound_typeOP        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ HbA1c               : num  5.7 4.9 5.2 5.2 5.4 ...
##  $ bmi                 : num  33.9 26.5 24.3 24.3 25.7 ...
##  $ revised_whodas_preop: int  24 14 12 12 12 12 12 18 18 12 ...
##  $ neut_lymph_ratio_d0 : num  1.31 6 1.83 1.83 6.88 ...
##  $ neut_lymph_ratio_d1 : num  14 15.62 6.27 6.27 16.57 ...
##  $ neut_lymph_ratio_d2 : num  14 9.5 7.67 7.67 12.17 ...
##  $ ab_noninfection     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ risk                : int  4 1 2 2 1 1 1 1 1 1 ...
##  $ risk_cat            : num  3 1 2 2 1 1 1 1 1 1 ...
##  $ bmi_cat             : num  4 3 2 2 3 3 3 1 1 2 ...
##  $ wound_type_cat      : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ duration_sx         : num  3.067 1.333 5.167 5.167 0.683 ...
##  $ anyDex              : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ treatment_group     : int  2 1 2 2 1 1 1 2 2 2 ...
##  $ deltacrp            : num  277.9 32.7 202.9 202.9 24.8 ...
##  $ crp_group           : int  4 1 4 4 1 4 4 1 1 1 ...
##  $ timepoint           : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
##  $ Monocytes.C         : num  48.2 20.1 48.7 36.4 15.2 ...
##  $ NK                  : num  0.421 2.007 3.586 2.176 8.347 ...
##  $ T.CD8.Memory        : num  2.59 10.74 1.81 5.96 14.67 ...
##  $ T.CD4.Naive         : num  1.57 9.23 2.42 4.61 16.31 ...
##  $ T.CD8.Naive         : num  11.65 11.69 13.91 12.5 6.73 ...
##  $ B.Naive             : num  2.158 5.499 0.849 5.065 2.638 ...
##  $ T.CD4.Memory        : num  15.8 12.9 14.4 16.2 10.1 ...
##  $ MAIT                : num  0.398 1.474 2.769 1.372 0.525 ...
##  $ T.gd.Vd2            : num  1.93 2.05 1.86 2.17 1.85 ...
##  $ Neutrophils.LD      : num  2.808 3.663 5.722 0.739 14.631 ...
##  $ T.gd.non.Vd2        : num  0.473 0.304 0.338 0.519 0.337 ...
##  $ Basophils.LD        : num  0.74 1.188 0.779 0.343 1.778 ...
##  $ Monocytes.NC.I      : num  9.98 13.41 2.07 10.35 4.13 ...
##  $ B.Memory            : num  0.561 4.538 0.205 0.549 1.921 ...
##  $ mDCs                : num  0.529 0.766 0.45 0.819 0.634 ...
##  $ pDCs                : num  0.0712 0.3356 0.0858 0.0405 0.075 ...
##  $ Plasmablasts        : num  0.1371 0.0945 0.1127 0.1229 0.1151 ...

ss3 <- subset(ss2,crp_group==4 & treatment_group==2 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)

## [1] 60649    63

mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)

## [1] 21635    63

# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ case )

## converting counts to integer mode

res <- DESeq(dds,test="LRT",reduced=~1)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 251 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                               baseMean log2FoldChange     lfcSE     stat
## ENSG00000108950.12 FAM20A   1391.30563       3.715293 0.2045583 275.0285
## ENSG00000132170.24 PPARG     133.12355       3.310490 0.2179034 202.6237
## ENSG00000137869.15 CYP19A1    74.17400       6.571239 0.4247489 179.5044
## ENSG00000170439.7 METTL7B    179.48492       4.893747 0.3280872 170.4416
## ENSG00000138413.14 IDH1     1551.87780       1.177437 0.1009528 134.1505
## ENSG00000163221.9 S100A12  16730.91613       3.703320 0.2946629 131.4332
## ENSG00000109472.14 CPE        11.52067       3.245869 0.2853002 130.2922
## ENSG00000168615.13 ADAM9    1564.93119       1.673417 0.1441738 130.0716
## ENSG00000121316.11 PLBD1   14427.36193       2.145816 0.1827881 129.4533
## ENSG00000135424.18 ITGA7     409.50153       2.180367 0.1895864 124.2631
##                                  pvalue         padj
## ENSG00000108950.12 FAM20A  9.099378e-62 1.968650e-57
## ENSG00000132170.24 PPARG   5.588559e-46 6.045424e-42
## ENSG00000137869.15 CYP19A1 6.217852e-41 4.484107e-37
## ENSG00000170439.7 METTL7B  5.925253e-39 3.204821e-35
## ENSG00000138413.14 IDH1    5.064771e-31 2.191526e-27
## ENSG00000163221.9 S100A12  1.990593e-30 7.177748e-27
## ENSG00000109472.14 CPE     3.536855e-30 1.068957e-26
## ENSG00000168615.13 ADAM9   3.952697e-30 1.068957e-26
## ENSG00000121316.11 PLBD1   5.397062e-30 1.297394e-26
## ENSG00000135424.18 ITGA7   7.378083e-29 1.596248e-25

mean(abs(dge$stat))

## [1] 7.737621

# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + case )

## converting counts to integer mode

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced=~PG_number)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                             baseMean log2FoldChange      lfcSE     stat
## ENSG00000108950.12 FAM20A  1391.3056       3.886896 0.17556813 365.8679
## ENSG00000116574.6 RHOU     1103.0820       1.239058 0.07541680 259.1874
## ENSG00000132170.24 PPARG    133.1236       3.322844 0.18792148 257.4870
## ENSG00000121316.11 PLBD1  14427.3619       2.038051 0.12049015 254.8199
## ENSG00000170439.7 METTL7B   179.4849       5.247994 0.30024921 210.9196
## ENSG00000014257.16 ACP3     835.6431       1.195747 0.08068451 210.9061
## ENSG00000150337.14 FCGR1A  1684.8047       2.207690 0.14485575 200.9897
## ENSG00000168615.13 ADAM9   1564.9312       1.574381 0.10755937 198.4670
## ENSG00000198019.13 FCGR1B   824.1988       2.080885 0.13950435 195.8961
## ENSG00000271605.6 MILR1     880.7529       1.144319 0.08363852 179.7345
##                                 pvalue         padj
## ENSG00000108950.12 FAM20A 1.485539e-81 3.213963e-77
## ENSG00000116574.6 RHOU    2.580121e-58 2.791046e-54
## ENSG00000132170.24 PPARG  6.057466e-58 4.368442e-54
## ENSG00000121316.11 PLBD1  2.310405e-57 1.249640e-53
## ENSG00000170439.7 METTL7B 8.654199e-48 3.141813e-44
## ENSG00000014257.16 ACP3   8.713139e-48 3.141813e-44
## ENSG00000150337.14 FCGR1A 1.270175e-45 3.925747e-42
## ENSG00000168615.13 ADAM9  4.511964e-45 1.220204e-41
## ENSG00000198019.13 FCGR1B 1.642260e-44 3.947810e-41
## ENSG00000271605.6 MILR1   5.538548e-41 1.198265e-37

mean(abs(dge$stat))

## [1] 10.07069

tc_hi_b_t0vpod1 <- dge

# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
  design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )

## converting counts to integer mode

##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.

## factor levels were dropped which had no samples

res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)

##                                   baseMean log2FoldChange     lfcSE      stat
## ENSG00000240247.8 DEFA1B        6706.43307     -3.1402729 0.4696726 1473.7910
## ENSG00000137869.15 CYP19A1        74.17400      6.7234655 0.5637628  904.5017
## ENSG00000281887.3 GIMAP1-GIMAP5  629.98075     -2.4194832 0.6296709  839.7515
## ENSG00000213178.3 RPL22P1        430.37217     -1.5439699 0.5340927  831.2569
## ENSG00000288534.1 TMX2-CTNND1    168.91301     -1.2105872 0.4249622  584.1599
## ENSG00000167434.10 CA4           755.09367      1.6486680 0.5537508  572.9911
## ENSG00000279716.1 AC006128.2     159.11752     -0.9733946 0.4923367  445.3139
## ENSG00000108950.12 FAM20A       1391.30563      3.9369456 0.1853313  316.4386
## ENSG00000132170.24 PPARG         133.12355      3.3464063 0.1936334  235.6933
## ENSG00000227097.5 RPS28P7         77.96304     -5.0451526 1.0456026  214.3986
##                                        pvalue          padj
## ENSG00000240247.8 DEFA1B        1.926856e-322 4.168753e-318
## ENSG00000137869.15 CYP19A1      1.030869e-198 1.115142e-194
## ENSG00000281887.3 GIMAP1-GIMAP5 1.229225e-184 8.864760e-181
## ENSG00000213178.3 RPL22P1       8.637833e-183 4.671988e-179
## ENSG00000288534.1 TMX2-CTNND1   4.669107e-129 2.020323e-125
## ENSG00000167434.10 CA4          1.255065e-126 4.525557e-123
## ENSG00000279716.1 AC006128.2     7.550148e-99  2.333535e-95
## ENSG00000108950.12 FAM20A        8.643089e-71  2.337415e-67
## ENSG00000132170.24 PPARG         3.418169e-53  8.216898e-50
## ENSG00000227097.5 RPS28P7        1.507504e-48  3.261485e-45

mean(abs(dge$stat))

## [1] 8.504849

tc_hi_b_t0vpod1_adj <- dge

STOP HERE

This one is getting long, so I will continue downstream analysis in a separate script.

Session information

For reproducibility

save.image("tca_pairwise.Rdata")

sessionInfo()

## R version 4.4.3 (2025-02-28)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] eulerr_7.0.2                MASS_7.3-65                
##  [3] mitch_1.19.3                DESeq2_1.44.0              
##  [5] SummarizedExperiment_1.34.0 Biobase_2.64.0             
##  [7] MatrixGenerics_1.16.0       matrixStats_1.5.0          
##  [9] GenomicRanges_1.56.2        GenomeInfoDb_1.40.1        
## [11] IRanges_2.38.1              S4Vectors_0.42.1           
## [13] BiocGenerics_0.50.0         dplyr_1.1.4                
## [15] WGCNA_1.73                  fastcluster_1.2.6          
## [17] dynamicTreeCut_1.63-1       reshape2_1.4.4             
## [19] gplots_3.2.0               
## 
## loaded via a namespace (and not attached):
##   [1] DBI_1.2.3               bitops_1.0-9            gridExtra_2.3          
##   [4] echarts4r_0.4.5         rlang_1.1.5             magrittr_2.0.3         
##   [7] compiler_4.4.3          RSQLite_2.3.9           systemfonts_1.2.1      
##  [10] png_0.1-8               vctrs_0.6.5             stringr_1.5.1          
##  [13] pkgconfig_2.0.3         crayon_1.5.3            fastmap_1.2.0          
##  [16] backports_1.5.0         XVector_0.44.0          caTools_1.18.3         
##  [19] promises_1.3.2          rmarkdown_2.29          UCSC.utils_1.0.0       
##  [22] preprocessCore_1.66.0   purrr_1.0.4             bit_4.6.0              
##  [25] xfun_0.51               zlibbioc_1.50.0         cachem_1.1.0           
##  [28] jsonlite_1.9.1          blob_1.2.4              later_1.4.1            
##  [31] DelayedArray_0.30.1     BiocParallel_1.38.0     parallel_4.4.3         
##  [34] cluster_2.1.8.1         R6_2.6.1                RColorBrewer_1.1-3     
##  [37] bslib_0.9.0             stringi_1.8.4           GGally_2.2.1           
##  [40] rpart_4.1.24            jquerylib_0.1.4         Rcpp_1.0.14            
##  [43] iterators_1.0.14        knitr_1.50              base64enc_0.1-3        
##  [46] httpuv_1.6.15           Matrix_1.7-3            splines_4.4.3          
##  [49] nnet_7.3-20             tidyselect_1.2.1        rstudioapi_0.17.1      
##  [52] abind_1.4-8             yaml_2.3.10             doParallel_1.0.17      
##  [55] codetools_0.2-20        lattice_0.22-6          tibble_3.2.1           
##  [58] plyr_1.8.9              shiny_1.10.0            KEGGREST_1.44.1        
##  [61] evaluate_1.0.3          foreign_0.8-89          survival_3.8-3         
##  [64] ggstats_0.9.0           xml2_1.3.8              Biostrings_2.72.1      
##  [67] pillar_1.10.1           KernSmooth_2.23-26      checkmate_2.3.2        
##  [70] foreach_1.5.2           generics_0.1.3          ggplot2_3.5.1          
##  [73] munsell_0.5.1           scales_1.3.0            xtable_1.8-4           
##  [76] gtools_3.9.5            glue_1.8.0              Hmisc_5.2-3            
##  [79] tools_4.4.3             data.table_1.17.0       locfit_1.5-9.12        
##  [82] grid_4.4.3              impute_1.80.0           tidyr_1.3.1            
##  [85] AnnotationDbi_1.66.0    colorspace_2.1-1        GenomeInfoDbData_1.2.12
##  [88] beeswarm_0.4.0          htmlTable_2.4.3         Formula_1.2-5          
##  [91] cli_3.6.4               kableExtra_1.4.0        viridisLite_0.4.2      
##  [94] S4Arrays_1.4.1          svglite_2.1.3           gtable_0.3.6           
##  [97] sass_0.4.9              digest_0.6.37           SparseArray_1.4.8      
## [100] htmlwidgets_1.6.4       memoise_2.0.1.9000      htmltools_0.5.8.1      
## [103] lifecycle_1.0.4         httr_1.4.7              mime_0.13              
## [106] GO.db_3.19.1            bit64_4.6.0-1

PADDI RNA expression analysis - Timecourse Analysis

Mark Ziemann

2025-04-16

Introduction

Load the data

Number of reads per sample

MDS

Load patient info

MDS3

Blood composition

Centre and scale bloodd composition

Differential expression across time

Overview

Timecourse in low CRP group

T0 vs EOS

EOS vs POD1

T0 vs POD1

Timecourse in high CRP group

T0 vs EOS

EOS vs POD1

T0 vs POD1

Timecourse in low CRP group and treatment group A

T0 vs EOS

EOS vs POD1

T0 vs POD1

Timecourse in low CRP group and treatment group B

T0 vs EOS

EOS vs POD1

T0 vs POD1

Timecourse in high CRP group and treatment group A

T0 vs EOS

EOS vs POD1

T0 vs POD1

Timecourse in high CRP group and treatment group B

T0 vs EOS

EOS vs POD1

T0 vs POD1

STOP HERE

Session information