Time course analysis of PADDI genomics data.
suppressPackageStartupMessages({
library("gplots")
library("reshape2")
library("WGCNA")
library("dplyr")
library("DESeq2")
library("mitch")
library("MASS")
library("eulerr")
})
tmp <- read.table("3col.tsv.gz",header=FALSE)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
colnames <- gsub("T0R","T0",colnames(xx))
xx$geneid = NULL
xx <- round(xx)
xx[1:10,1:6]
## 3166-POD1 3166-T0 3167-POD1 3167-T0 3171-POD1
## ENSG00000000003.15 TSPAN6 3 1 5 5 23
## ENSG00000000005.6 TNMD 0 0 0 0 0
## ENSG00000000419.14 DPM1 685 577 521 735 811
## ENSG00000000457.14 SCYL3 622 611 550 777 789
## ENSG00000000460.17 C1orf112 181 171 232 263 215
## ENSG00000000938.13 FGR 33797 44344 31524 38959 26402
## ENSG00000000971.16 CFH 106 40 98 183 195
## ENSG00000001036.14 FUCA2 1229 769 1150 868 978
## ENSG00000001084.13 GCLC 944 1085 577 961 908
## ENSG00000001167.15 NFYA 1243 1277 1295 1605 1166
## 3171-T0
## ENSG00000000003.15 TSPAN6 4
## ENSG00000000005.6 TNMD 1
## ENSG00000000419.14 DPM1 494
## ENSG00000000457.14 SCYL3 575
## ENSG00000000460.17 C1orf112 196
## ENSG00000000938.13 FGR 33751
## ENSG00000000971.16 CFH 130
## ENSG00000001036.14 FUCA2 805
## ENSG00000001084.13 GCLC 798
## ENSG00000001167.15 NFYA 1251
Let’s look at the number of reads per sample
Most samples were in the range of 25-30 million assigned reads. Just 2 samples had less than 20 million reads: PG1452-EOS and PG1423-EOS. The maximum read count was about 40 million for PG7072-EOS.
xxcs <- colSums(xx)
par(mar=c(5,8,3,1))
barplot(xxcs,horiz=TRUE,las=1,main="no. reads per sample")
barplot(head(xxcs[order(xxcs)],20),horiz=TRUE,las=1,main="lowest no. reads per sample")
barplot(head(xxcs[order(-xxcs)],20),horiz=TRUE,las=1,main="highest no. reads per sample")
Some outliers are apparent.
PG2090-EOS to the left of the chart - this is clearly the effect of rRNA carryover. Other samples over to the left of the chart include PG815-EOS, PG145-EOS and PG702-POD1 which all have elevated rRNA.
heatmap.2( cor(xx),trace="none",scale="none")
mds <- cmdscale(dist(t(xx)))
par(mar=c(5,5,3,1))
minx <- min(mds[,1])
maxx <- max(mds[,1])
miny <- min(mds[,2])
maxy <- max(mds[,2])
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) ,
type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='n')
text(mds, labels=rownames(mds), cex=0.8)
col <- rownames(mds)
col <- sapply(strsplit(col,"-"),"[[",2)
col <- gsub("T0","lightblue",col)
col <- gsub("POD1","orange",col)
col <- gsub("EOS","pink",col)
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) , cex=1.5 ,
type = "p", col=col, pch=19, cex.axis=1.3,cex.lab=1.3, bty='n')
#text(mds, labels=rownames(mds), cex=0.8)
mtext("blue=T0, orange=POD1, pink=EOS")
Exclude PG2090-EOS and repeat the analysis.
xx <- xx[,grep("PG2090-EOS",colnames(xx),invert=TRUE)]
mds <- cmdscale(dist(t(xx)))
par(mar=c(5,5,3,1))
minx <- min(mds[,1])
maxx <- max(mds[,1])
miny <- min(mds[,2])
maxy <- max(mds[,2])
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) ,
type = "p", col="gray", pch=19, cex.axis=1.3,cex.lab=1.3, bty='n')
text(mds, labels=rownames(mds), cex=0.8)
col <- rownames(mds)
col <- sapply(strsplit(col,"-"),"[[",2)
col <- gsub("T0","lightblue",col)
col <- gsub("POD1","orange",col)
col <- gsub("EOS","pink",col)
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) , cex=1.5 ,
type = "p", col=col, pch=19, cex.axis=1.3,cex.lab=1.3, bty='n')
#text(mds, labels=rownames(mds), cex=0.8)
mtext("blue=T0, orange=POD1, pink=EOS")
In the MDS plot with PG2090-EOS removed, there appears to be some separation of T0, POD1 and EOS samples. POD1 (orange) are more towards the upper side of the chart and T0 (blue) are toward the bottom right. EOS (pink) are quite spread out.
xx <- xx[,order(colnames(xx))]
ss <- read.csv("PADDIgenomicsData.csv")
ss <- ss[order(ss$PG_number),]
colnames(ss)
## [1] "PG_number" "sexD"
## [3] "ageD" "weightD"
## [5] "heightD" "asaD"
## [7] "ethnicityD" "ethnicity_otherD"
## [9] "current_smokerD" "diabetes_typeD"
## [11] "daily_insulinD" "oral_hypoglycemicsD"
## [13] "non_insulin_injectablesD" "diabetes_yrs_since_diagnosisD"
## [15] "DM_years" "creatinine_preopD"
## [17] "crp_preopD" "crp_preop_typeD"
## [19] "crp_preop_naD" "hba1c_doneD"
## [21] "surgery_typeD" "surgery_procedureD"
## [23] "surgery_dominantD" "wound_typeOP"
## [25] "non_study_dexameth_steriodPOSTOP" "nonstudy_dexameth_steriodD3"
## [27] "HbA1c" "bmi"
## [29] "whodas_total_preop" "revised_whodas_preop"
## [31] "neut_lymph_ratio_d0" "neut_lymph_ratio_d1"
## [33] "neut_lymph_ratio_change_d1" "neut_lymph_ratio_d2"
## [35] "neut_lymph_ratio_change_d2" "neut_lymph_ratio_d1_2"
## [37] "neut_lymph_ratio_d2_2" "ab_noninfection"
## [39] "risk" "risk_cat"
## [41] "bmi_cat" "asa_cat"
## [43] "wound_type_cat" "oxygen_quin"
## [45] "duration_sx" "duration_sx_quin"
## [47] "anyDex" "anyDex_count"
## [49] "anyDexMiss" "anyDex2"
## [51] "treatment_group" "deltacrp"
## [53] "crp_group"
str(ss)
## 'data.frame': 117 obs. of 53 variables:
## $ PG_number : chr "3166" "3167" "3171" "3172" ...
## $ sexD : chr "Male" "Male" "Male" "Male" ...
## $ ageD : int 62 67 61 78 73 77 84 54 70 62 ...
## $ weightD : num 64.5 78.8 71.1 43 83.6 ...
## $ heightD : num 163 169 165 156 171 167 133 155 170 175 ...
## $ asaD : int 2 2 2 2 2 3 3 2 2 2 ...
## $ ethnicityD : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicity_otherD : chr "" "" "" "" ...
## $ current_smokerD : chr "No" "No" "No" "No" ...
## $ diabetes_typeD : chr "" "" "" "" ...
## $ daily_insulinD : chr "" "" "" "" ...
## $ oral_hypoglycemicsD : chr "" "" "" "" ...
## $ non_insulin_injectablesD : chr "" "" "" "" ...
## $ diabetes_yrs_since_diagnosisD : int NA NA NA NA NA 1 NA NA NA NA ...
## $ DM_years : int NA NA NA NA NA 1 NA NA NA NA ...
## $ creatinine_preopD : int 68 82 82 96 105 90 54 47 109 98 ...
## $ crp_preopD : chr "2.1" "0.6" "2.7" "1.2" ...
## $ crp_preop_typeD : chr "CRP" "CRP" "CRP" "CRP" ...
## $ crp_preop_naD : int 0 0 0 0 0 0 0 0 0 0 ...
## $ hba1c_doneD : chr "Yes" "Yes" "Yes" "Yes" ...
## $ surgery_typeD : chr "Laparoscopic assisted low anterior resection of rectum" "Laparoscopic sigmoidectomy" "Laparoscopic assisted anterior resection of rectum" "Robotic assisted laparoscopic radical prostatectomy, pelvic lymph node dissection" ...
## $ surgery_procedureD : chr "None of the above" "None of the above" "None of the above" "None of the above" ...
## $ surgery_dominantD : chr "Gastrointestinal" "Gastrointestinal" "Gastrointestinal" "Urology-renal" ...
## $ wound_typeOP : chr "Clean / contaminated" "Clean / contaminated" "Clean / contaminated" "Clean / contaminated" ...
## $ non_study_dexameth_steriodPOSTOP: chr "No" "No" "No" "No" ...
## $ nonstudy_dexameth_steriodD3 : chr "No" "No" "No" "No" ...
## $ HbA1c : num 5.7 6.2 6.2 6.3 6.3 ...
## $ bmi : num 24.3 27.6 26.1 17.7 28.6 ...
## $ whodas_total_preop : int 16 12 12 12 12 12 24 14 12 12 ...
## $ revised_whodas_preop : int 16 12 12 12 12 12 24 14 12 12 ...
## $ neut_lymph_ratio_d0 : num 4.3 2.94 2.29 2.93 2.62 ...
## $ neut_lymph_ratio_d1 : num 13 6.5 7.22 23.2 8.57 ...
## $ neut_lymph_ratio_change_d1 : num 8.7 3.56 4.93 20.27 5.95 ...
## $ neut_lymph_ratio_d2 : num 5.92 3.68 3.77 22 NA ...
## $ neut_lymph_ratio_change_d2 : num 1.623 0.741 1.475 19.071 NA ...
## $ neut_lymph_ratio_d1_2 : num 13 6.5 7.22 23.2 8.57 ...
## $ neut_lymph_ratio_d2_2 : num 5.92 3.68 3.77 22 NA ...
## $ ab_noninfection : int 1 1 0 1 1 1 1 1 1 1 ...
## $ risk : int 2 2 2 2 2 5 4 1 2 1 ...
## $ risk_cat : chr "Moderate" "Moderate" "Moderate" "Moderate" ...
## $ bmi_cat : chr "Normal [18.5 to <25]" "Overweight [25 to <30]" "Overweight [25 to <30]" "Underweight [BMI<18.5]" ...
## $ asa_cat : chr "1-2" "1-2" "1-2" "1-2" ...
## $ wound_type_cat : chr "Contaminated" "Contaminated" "Contaminated" "Contaminated" ...
## $ oxygen_quin : chr "0.21-0.4" "0.21-0.4" "0.21-0.4" "0.21-0.4" ...
## $ duration_sx : num 2.5 2.67 2.42 3.17 2.5 ...
## $ duration_sx_quin : chr "2.18-2.82" "2.18-2.82" "2.18-2.82" "2.83-3.75" ...
## $ anyDex : chr "No" "No" "No" "No" ...
## $ anyDex_count : int 0 0 0 0 0 0 0 0 0 0 ...
## $ anyDexMiss : int 0 0 0 0 0 0 0 0 0 0 ...
## $ anyDex2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ treatment_group : int 1 1 2 2 1 1 2 1 2 1 ...
## $ deltacrp : num 39.3 38.3 49 189.9 7.3 ...
## $ crp_group : int 1 1 1 4 1 1 4 1 4 1 ...
summary(ss)
## PG_number sexD ageD weightD
## Length:117 Length:117 Min. :25.00 Min. : 41.00
## Class :character Class :character 1st Qu.:54.00 1st Qu.: 68.50
## Mode :character Mode :character Median :62.00 Median : 82.00
## Mean :61.03 Mean : 84.55
## 3rd Qu.:69.00 3rd Qu.: 95.40
## Max. :86.00 Max. :185.00
##
## heightD asaD ethnicityD ethnicity_otherD
## Min. :133.0 Min. :1.000 Length:117 Length:117
## 1st Qu.:163.0 1st Qu.:2.000 Class :character Class :character
## Median :171.0 Median :2.000 Mode :character Mode :character
## Mean :170.2 Mean :2.308
## 3rd Qu.:178.0 3rd Qu.:3.000
## Max. :193.0 Max. :4.000
##
## current_smokerD diabetes_typeD daily_insulinD oral_hypoglycemicsD
## Length:117 Length:117 Length:117 Length:117
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## non_insulin_injectablesD diabetes_yrs_since_diagnosisD DM_years
## Length:117 Min. : 1.000 Min. : 1.000
## Class :character 1st Qu.: 1.500 1st Qu.: 1.500
## Mode :character Median : 7.000 Median : 7.000
## Mean : 7.467 Mean : 7.467
## 3rd Qu.:11.000 3rd Qu.:11.000
## Max. :18.000 Max. :18.000
## NA's :102 NA's :102
## creatinine_preopD crp_preopD crp_preop_typeD crp_preop_naD
## Min. : 19.0 Length:117 Length:117 Min. :0
## 1st Qu.: 66.0 Class :character Class :character 1st Qu.:0
## Median : 76.0 Mode :character Mode :character Median :0
## Mean : 80.3 Mean :0
## 3rd Qu.: 91.0 3rd Qu.:0
## Max. :177.0 Max. :0
## NA's :10
## hba1c_doneD surgery_typeD surgery_procedureD surgery_dominantD
## Length:117 Length:117 Length:117 Length:117
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## wound_typeOP non_study_dexameth_steriodPOSTOP
## Length:117 Length:117
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## nonstudy_dexameth_steriodD3 HbA1c bmi
## Length:117 Min. : 4.500 Min. :16.59
## Class :character 1st Qu.: 5.200 1st Qu.:24.93
## Mode :character Median : 5.600 Median :28.07
## Mean : 5.714 Mean :29.00
## 3rd Qu.: 5.900 3rd Qu.:31.73
## Max. :10.000 Max. :72.27
##
## whodas_total_preop revised_whodas_preop neut_lymph_ratio_d0
## Min. :12.00 Min. :12.00 Min. : 0.5312
## 1st Qu.:12.00 1st Qu.:12.00 1st Qu.: 1.8254
## Median :14.00 Median :14.00 Median : 2.5737
## Mean :16.74 Mean :16.74 Mean : 2.8745
## 3rd Qu.:17.00 3rd Qu.:17.00 3rd Qu.: 3.3338
## Max. :50.00 Max. :50.00 Max. :11.0000
## NA's :9
## neut_lymph_ratio_d1 neut_lymph_ratio_change_d1 neut_lymph_ratio_d2
## Min. : 1.375 Min. :-1.255 Min. : 0.1235
## 1st Qu.: 5.132 1st Qu.: 2.610 1st Qu.: 3.7692
## Median : 7.353 Median : 4.450 Median : 6.7273
## Mean : 8.882 Mean : 6.088 Mean : 8.1589
## 3rd Qu.:11.627 3rd Qu.: 8.730 3rd Qu.:10.8889
## Max. :44.000 Max. :39.299 Max. :25.6042
## NA's :13 NA's :21 NA's :28
## neut_lymph_ratio_change_d2 neut_lymph_ratio_d1_2 neut_lymph_ratio_d2_2
## Min. :-6.182 Min. : 1.375 Min. : 0.1235
## 1st Qu.: 1.591 1st Qu.: 5.132 1st Qu.: 3.7692
## Median : 4.356 Median : 7.353 Median : 6.7273
## Mean : 5.356 Mean : 8.882 Mean : 8.1589
## 3rd Qu.: 7.403 3rd Qu.:11.627 3rd Qu.:10.8889
## Max. :22.776 Max. :44.000 Max. :25.6042
## NA's :35 NA's :13 NA's :28
## ab_noninfection risk risk_cat bmi_cat
## Min. :0.0000 Min. :0.000 Length:117 Length:117
## 1st Qu.:0.0000 1st Qu.:1.000 Class :character Class :character
## Median :0.0000 Median :1.000 Mode :character Mode :character
## Mean :0.4495 Mean :1.598
## 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :1.0000 Max. :6.000
## NA's :8
## asa_cat wound_type_cat oxygen_quin duration_sx
## Length:117 Length:117 Length:117 Min. : 0.6833
## Class :character Class :character Class :character 1st Qu.: 2.5000
## Mode :character Mode :character Mode :character Median : 3.3333
## Mean : 3.9007
## 3rd Qu.: 4.7667
## Max. :10.6667
##
## duration_sx_quin anyDex anyDex_count anyDexMiss
## Length:117 Length:117 Min. :0.0000 Min. :0.000000
## Class :character Class :character 1st Qu.:0.0000 1st Qu.:0.000000
## Mode :character Mode :character Median :0.0000 Median :0.000000
## Mean :0.1282 Mean :0.008547
## 3rd Qu.:0.0000 3rd Qu.:0.000000
## Max. :2.0000 Max. :1.000000
##
## anyDex2 treatment_group deltacrp crp_group
## Min. :0.0000 Min. :1.000 Min. :-16.7 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.: 32.9 1st Qu.:1.000
## Median :0.0000 Median :2.000 Median : 49.5 Median :1.000
## Mean :0.1111 Mean :1.556 Mean :130.9 Mean :2.487
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:221.1 3rd Qu.:4.000
## Max. :1.0000 Max. :2.000 Max. :359.0 Max. :4.000
##
ss1 <- ss
rownames(ss) <- paste(ss$PG_number,ss$timepoint,sep="-")
dim(ss)
## [1] 117 53
ss$ageCS <- scale(ss$ageD)
ss$sexD <- as.numeric(factor(ss$sexD))
ss$ethnicityCAT <- ss$ethnicityD
ss$ethnicityD <- as.numeric(factor(ss$ethnicityD))
ss$current_smokerD <- as.numeric(factor(ss$current_smokerD))
ss$diabetes_typeD <- as.numeric(factor(ss$diabetes_typeD))
ss$daily_insulinD <- as.numeric(factor(ss$daily_insulinD))
ss$oral_hypoglycemicsD <- as.numeric(factor(ss$oral_hypoglycemicsD))
ss$crp_preopD <- as.numeric(gsub("<5","2.5",gsub("<1","0.5",gsub("<1.0","0.5",ss$crp_preopD))))
ss$surgery_dominantD <- as.numeric(factor(ss$surgery_dominantD))
ss$wound_typeOP <- as.numeric(factor(ss$wound_typeOP))
ss$risk_cat <- as.numeric(factor(ss$risk_cat,levels=c("Low","Moderate","High")))
ss$wound_type_cat <- as.numeric(factor(ss$wound_type_cat))
ss$anyDex <- as.numeric(factor(ss$anyDex))
ss$bmi_cat <- as.numeric(factor(ss$bmi_cat,
levels=c("Underweight [BMI<18.5]","Normal [18.5 to <25]",
"Overweight [25 to <30]","Obese [30 to <40]","Super obese [40+]")))
ss <- ss[,c("PG_number","sexD","ageD","ageCS","weightD","asaD","heightD","ethnicityCAT","ethnicityD",
"current_smokerD","diabetes_typeD","daily_insulinD","creatinine_preopD",
"surgery_dominantD","wound_typeOP","HbA1c","bmi","revised_whodas_preop",
"neut_lymph_ratio_d0","neut_lymph_ratio_d1","neut_lymph_ratio_d2","ab_noninfection",
"risk","risk_cat","bmi_cat","wound_type_cat","duration_sx","anyDex","treatment_group",
"deltacrp","crp_group")]
ss <- ss[order(rownames(ss)),]
ss_t0 <- ss
ss_eos <- ss
ss_pod1 <- ss
ss_t0$timepoint <- "T0"
ss_eos$timepoint <- "EOS"
ss_pod1$timepoint <- "POD1"
rownames(ss_t0) <- paste(ss_t0$PG_number,"T0",sep="-")
rownames(ss_eos) <- paste(ss_t0$PG_number,"EOS",sep="-")
rownames(ss_pod1) <- paste(ss_t0$PG_number,"POD1",sep="-")
ss <- rbind(ss_t0, ss_eos, ss_pod1)
rownames(ss) <- paste(ss$PG_number,ss$timepoint,sep="-")
xt0 <- xx[,grep("T0",colnames(xx))]
xpod1 <- xx[,grep("POD1",colnames(xx))]
xeos <- xx[,grep("EOS",colnames(xx))]
xt0f <- xt0[rowMeans(xt0)>=10,]
xpod1f <- xpod1[rowMeans(xpod1)>=10,]
xeosf <- xeos[rowMeans(xeos)>=10,]
dim(xt0f)
## [1] 21935 111
dim(xpod1f)
## [1] 21313 109
dim(xeosf)
## [1] 22067 98
ss_t0 <- ss_t0[which(rownames(ss_t0) %in% colnames(xt0)),]
ss_pod1 <- ss_pod1[which(rownames(ss_pod1) %in% colnames(xpod1)),]
ss_eos <- ss_eos[which(rownames(ss_eos) %in% colnames(xeos)),]
colnames(xt0) %in% rownames(ss_t0)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE
colnames(xpod1) %in% rownames(ss_pod1)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE
colnames(xeos) %in% rownames(ss_eos)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
rownames(ss_t0) %in% colnames(xt0)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE
rownames(ss_pod1) %in% colnames(xpod1)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE
rownames(ss_eos) %in% colnames(xeos)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
xxf <- xx[rowMeans(xx)>=10,]
xxf <- xxf[,order(colnames(xxf))]
ss1 <- ss
ss1 <- ss1[which(rownames(ss1) %in% colnames(xx)),]
mds <- cmdscale(dist(t(xx)))
par(mar=c(5,5,3,1))
minx <- min(mds[,1])
maxx <- max(mds[,1])
miny <- min(mds[,2])
maxy <- max(mds[,2])
col <- rownames(mds)
col <- sapply(strsplit(col,"-"),"[[",2)
col <- gsub("T0","lightblue",col)
col <- gsub("POD1","orange",col)
col <- gsub("EOS","pink",col)
shp <- ss1$crp_group + 14
plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
xlim=c(minx*1.1,maxx*1.1), ylim = c(miny*1.1,maxy*1.1) , cex=1.5 ,
type = "p", col=col, pch=shp, cex.axis=1.3,cex.lab=1.3, bty='n')
#text(mds, labels=rownames(mds), cex=0.8)
mtext("blue=T0,pink=EOS,orange=POD1,sq=lowCRP,di=highCRP")
xn <- xx
gt <- as.data.frame(sapply(strsplit(rownames(xn)," "),"[[",2) )
rownames(gt) <- rownames(xx)
colnames(gt) = "genesymbol"
gt$geneID <- rownames(xx)
blood <- read.table("https://raw.githubusercontent.com/giannimonaco/ABIS/master/data/sigmatrixRNAseq.txt")
blood2 <- merge(gt,blood,by.x="genesymbol",by.y=0)
blood2 <- blood2[which(!duplicated(blood2$genesymbol)),]
rownames(blood2) <- blood2$geneID
blood2 <- blood2[,c(3:ncol(blood2))]
genes <- intersect(rownames(xx), rownames(blood2))
dec <- apply(xx[genes, , drop=F], 2, function(x) coef(rlm( as.matrix(blood2[genes,]), x, maxit =100 ))) *100
## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm'
## failed to converge in 100 steps
## Warning in rlm.default(as.matrix(blood2[genes, ]), x, maxit = 100): 'rlm'
## failed to converge in 100 steps
dec <- t(dec/colSums(dec)*100)
dec <- signif(dec, 3)
# remove negative values
dec2 <- t(apply(dec,2,function(x) { mymin=min(x) ; if (mymin<0) { x + (mymin * -1) } else { x } } ))
dec2 <- apply(dec2,2,function(x) {x / sum(x) *100} )
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( dec2, col=colfunc(25),scale="row",
trace="none",margins = c(5,5), cexRow=.7, cexCol=.8, main="cell type abundances")
heatmap.2( dec2, col=colfunc(25),scale="none",
trace="none",margins = c(5,5), cexRow=.7, cexCol=.8, main="cell type abundances")
par(mar=c(5,10,3,1))
boxplot(t(dec2[order(rowMeans(dec2)),]),horizontal=TRUE,las=1, xlab="estimated cell proportion (%)")
par(mar = c(5.1, 4.1, 4.1, 2.1))
heatmap.2( cor(dec2),trace="none",scale="none")
heatmap.2( cor(t(dec2)),trace="none",scale="none", margins = c(8,8))
par(mar=c(5,10,3,1))
barplot(apply(dec2,1,sd),horiz=TRUE,las=1,xlab="SD of cell proportions (%)")
which(apply(dec2,1,sd)>4)
## Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
## 1 2 3 4 10
Based on this analysis we can begin with correction of:
According to the correlation heatmap, these are not strongly correlated.
Now look at how the cell proportions change over time.
ct0 <- dec2[,grep("-T0",colnames(dec2))]
ceos <- dec2[,grep("-EOS",colnames(dec2))]
cpod1 <- dec2[,grep("-POD1",colnames(dec2))]
par(mar=c(5,10,3,1))
boxplot(t(ct0),horizontal=TRUE,las=1, xlab="estimated cell proportion (%)",main="T0")
boxplot(t(ceos),horizontal=TRUE,las=1, xlab="estimated cell proportion (%)",main="EOS")
boxplot(t(cpod1),horizontal=TRUE,las=1, xlab="estimated cell proportion (%)",main="POD1")
sscell <- as.data.frame(t(dec2))
sscell_t0 <- sscell[grep("-T0",rownames(sscell)),]
sscell_eos <- sscell[grep("-EOS",rownames(sscell)),]
sscell_pod1 <- sscell[grep("POD1",rownames(sscell)),]
Now look at how cell types associate with the PCAs.
#xt0f xeosf xpod1f
#sscell_t0 sscell_eos sscell_pod1
## T0
mx <- xt0f
ss2 <- sscell_t0
pca <- prcomp(t(mx),center = TRUE, scale = TRUE,retx=TRUE)
loadings = pca$x
par(mar = c(5.1, 4.1, 4.1, 2.1))
plot(pca,type="lines",col="blue")
nGenes <- nrow(mx)
nSamples <- ncol(mx)
datTraits <- ss2
moduleTraitCor <- cor(loadings[,1:8], datTraits, use = "p")
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = t(moduleTraitCor),
xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
cex.lab.y = 0.6, zlim = c(-0.45,0.45),
main = paste("PCA-cell relationships @T0: Top principal components"))
## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are below given minimum and will be truncated to
## the minimum.
## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are above given maximum and will be truncated to
## the maximum.
## EOS
mx <- xeosf
ss2 <- sscell_eos
pca <- prcomp(t(mx),center = TRUE, scale = TRUE,retx=TRUE)
loadings = pca$x
plot(pca,type="lines",col="blue")
nGenes <- nrow(mx)
nSamples <- ncol(mx)
datTraits <- ss2
moduleTraitCor <- cor(loadings[,1:8], datTraits, use = "p")
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = t(moduleTraitCor),
xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
cex.lab.y = 0.6, zlim = c(-0.45,0.45),
main = paste("PCA-cell relationships @EOS: Top principal components"))
## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are below given minimum and will be truncated to
## the minimum.
## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are above given maximum and will be truncated to
## the maximum.
## POD1
mx <- xpod1f
ss2 <- sscell_pod1
pca <- prcomp(t(mx),center = TRUE, scale = TRUE,retx=TRUE)
loadings = pca$x
plot(pca,type="lines",col="blue")
nGenes <- nrow(mx)
nSamples <- ncol(mx)
datTraits <- ss2
moduleTraitCor <- cor(loadings[,1:8], datTraits, use = "p")
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = t(moduleTraitCor),
xLabels = colnames(loadings)[1:ncol(t(moduleTraitCor))],
yLabels = names(datTraits), colorLabels = FALSE, colors = blueWhiteRed(6),
textMatrix = t(textMatrix), setStdMargins = FALSE, cex.text = 0.5,
cex.lab.y = 0.6, zlim = c(-0.45,0.45),
main = paste("PCA-cell relationships @POD1: Top principal components"))
## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are below given minimum and will be truncated to
## the minimum.
## Warning in numbers2colors(data, signed, colors = colors, lim = zlim, naColor =
## naColor): Some values of 'x' are above given maximum and will be truncated to
## the maximum.
The conclusion here is that the cell types correlate strongly with the principal components. The good news is that we have selected the cell types that associate the strongest, so we can correct for their contribution.
Received this warning from DESeq2:
the design formula contains one or more numeric variables that have mean or
standard deviation larger than 5 (an arbitrary threshold to trigger this message).
Including numeric variables with large mean can induce collinearity with the intercept.
Users should center and scale numeric variables in the design to improve GLM convergence.
Indeed, the distributions are severely skewed. To remedy this, I will centre and scale the data.
par(mfrow=c(5,2))
hist(ss2$Monocytes.C)
hist(scale(ss2$Monocytes.C,center=TRUE))
hist(ss2$NK)
hist(scale(ss2$NK,center=TRUE))
hist(ss2$`T.CD8.Memory`)
hist(scale(ss2$`T.CD8.Memory`,center=TRUE))
hist(ss2$`T.CD4.Naive`)
hist(scale(ss2$`T.CD4.Naive`,center=TRUE))
hist(ss2$`Neutrophils.LD`)
hist(scale(ss2$`Neutrophils.LD`,center=TRUE))
par(mfrow=c(1,1))
ss2$Monocytes.C <- scale(ss2$Monocytes.C,center=TRUE)
ss2$NK <- scale(ss2$NK,center=TRUE)
ss2$`T.CD8.Memory` <- scale(ss2$`T.CD8.Memory`,center=TRUE)
ss2$`T.CD4.Naive` <- scale(ss2$`T.CD4.Naive`,center=TRUE)
ss2$`Neutrophils.LD` <- scale(ss2$`Neutrophils.LD`,center=TRUE)
Specific PCAs for key clinical parameters:
And blood composition:
And ones we didn’t include:
TODO:
age data centred and scaled
ethnicity categories unordered
We will run timecourse analysis with a simple pairwise approach. For each of the groups below, this will involve three comparisons:
T0 vs EOS
EOS vs POD1
T0 vs POD1
The groups/subgroups we will look at are:
Timecourse in low CRP group
Timecourse in high CRP group
Timecourse in low CRP group and treatment group A
Timecourse in low CRP group and treatment group B
Timecourse in high CRP group and treatment group A
Timecourse in high CRP group and treatment group B
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 79
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 22144 79
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 234 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000109906.15 ZBTB16 4434.62143 2.2683562 0.2561987 70.22346
## ENSG00000096060.15 FKBP5 12790.86023 1.9176227 0.2320495 62.82915
## ENSG00000155893.13 PXYLP1 815.34259 0.9238062 0.1161330 61.78145
## ENSG00000177575.13 CD163 14191.10338 1.8315389 0.2319288 57.72874
## ENSG00000196935.9 SRGAP1 245.07442 1.4964335 0.1924557 57.35463
## ENSG00000123358.20 NR4A1 1152.18623 -1.9382868 0.2534991 55.40814
## ENSG00000134780.10 DAGLA 254.95032 -1.0925964 0.1478656 53.90101
## ENSG00000183779.7 ZNF703 706.63137 -1.4253954 0.1934903 52.84257
## ENSG00000171136.7 RLN3 24.60743 1.7599987 0.2386469 51.61627
## ENSG00000185338.7 SOCS1 666.02397 1.8343657 0.2458094 51.57006
## pvalue padj
## ENSG00000109906.15 ZBTB16 5.295289e-17 1.172589e-12
## ENSG00000096060.15 FKBP5 2.254370e-15 2.496038e-11
## ENSG00000155893.13 PXYLP1 3.837732e-15 2.832758e-11
## ENSG00000177575.13 CD163 3.008748e-14 1.611666e-10
## ENSG00000196935.9 SRGAP1 3.639058e-14 1.611666e-10
## ENSG00000123358.20 NR4A1 9.792998e-14 3.614269e-10
## ENSG00000134780.10 DAGLA 2.108497e-13 6.670078e-10
## ENSG00000183779.7 ZNF703 3.613827e-13 1.000307e-09
## ENSG00000171136.7 RLN3 6.747992e-13 1.529865e-09
## ENSG00000185338.7 SOCS1 6.908711e-13 1.529865e-09
mean(abs(dge$stat))
## [1] 2.961886
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000235706.8 DICER1-AS1 293.06741 0.4440925 0.04967513 79.42134
## ENSG00000117228.11 GBP1 2168.31028 -0.6540265 0.07409947 75.85897
## ENSG00000152207.8 CYSLTR2 348.85253 -0.8830580 0.10313140 69.78812
## ENSG00000173083.16 HPSE 1117.77303 -0.7461422 0.09117295 64.40904
## ENSG00000111252.11 SH2B3 4768.12668 -0.6567997 0.08159330 62.73289
## ENSG00000109971.14 HSPA8 14953.38389 -0.3823666 0.04815554 62.40540
## ENSG00000079215.15 SLC1A3 705.60255 3.4504354 0.33980370 61.29834
## ENSG00000187474.5 FPR3 75.27347 -1.1796702 0.14640352 60.23829
## ENSG00000134531.10 EMP1 76.63453 -1.2043247 0.14999284 59.08054
## ENSG00000183337.18 BCOR 1343.06026 -0.6580184 0.08501873 57.89173
## pvalue padj
## ENSG00000235706.8 DICER1-AS1 5.018122e-19 1.111213e-14
## ENSG00000117228.11 GBP1 3.046657e-18 3.373259e-14
## ENSG00000152207.8 CYSLTR2 6.602940e-17 4.873850e-13
## ENSG00000173083.16 HPSE 1.010931e-15 5.596515e-12
## ENSG00000111252.11 SH2B3 2.367290e-15 1.031747e-11
## ENSG00000109971.14 HSPA8 2.795558e-15 1.031747e-11
## ENSG00000079215.15 SLC1A3 4.904948e-15 1.551645e-11
## ENSG00000187474.5 FPR3 8.404153e-15 2.326270e-11
## ENSG00000134531.10 EMP1 1.513486e-14 3.723848e-11
## ENSG00000183337.18 BCOR 2.769496e-14 6.132772e-11
mean(abs(dge$stat))
## [1] 3.82831
tc_lo_t0veos <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000259976.3 RP11-553L6.5 661.62056 -0.08956655 1.43937022 74.56869
## ENSG00000235706.8 DICER1-AS1 293.06741 0.42425149 0.05269524 64.57058
## ENSG00000267436.1 AC005786.7 22.26384 2.54184253 1.26137541 59.40823
## ENSG00000109971.14 HSPA8 14953.38389 -0.37312891 0.04866671 58.28592
## ENSG00000261093.1 CTD-3126B10.1 43.00486 -1.82030629 1.43778056 52.74200
## ENSG00000117228.11 GBP1 2168.31028 -0.62306238 0.08467305 52.34726
## ENSG00000152207.8 CYSLTR2 348.85253 -0.74645485 0.10330648 49.77869
## ENSG00000111252.11 SH2B3 4768.12668 -0.56986188 0.08264104 46.35758
## ENSG00000183337.18 BCOR 1343.06026 -0.61010894 0.09047868 44.34594
## ENSG00000242732.4 RTL5 189.97352 -0.77093222 0.11393097 43.97201
## pvalue padj
## ENSG00000259976.3 RP11-553L6.5 5.856474e-18 1.296858e-13
## ENSG00000235706.8 DICER1-AS1 9.313564e-16 1.031198e-11
## ENSG00000267436.1 AC005786.7 1.281318e-14 9.457834e-11
## ENSG00000109971.14 HSPA8 2.266610e-14 1.254795e-10
## ENSG00000261093.1 CTD-3126B10.1 3.803703e-13 1.684584e-09
## ENSG00000117228.11 GBP1 4.650468e-13 1.716333e-09
## ENSG00000152207.8 CYSLTR2 1.721032e-12 5.444362e-09
## ENSG00000111252.11 SH2B3 9.852550e-12 2.727186e-08
## ENSG00000183337.18 BCOR 2.751818e-11 6.770695e-08
## ENSG00000242732.4 RTL5 3.331056e-11 7.376291e-08
mean(abs(dge$stat))
## [1] 2.802647
tc_lo_t0veos_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 80
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21793 80
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 98 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000184988.8 TMEM106A 711.88397 0.9496308 0.07478479 159.42903
## ENSG00000109906.15 ZBTB16 3929.37961 -2.8555261 0.23286151 129.08414
## ENSG00000133816.18 MICAL2 2848.09796 0.9794016 0.08734690 124.08978
## ENSG00000266405.3 CBX3P2 84.50267 -0.8902742 0.08621985 105.72753
## ENSG00000010704.19 HFE 344.48974 1.1500596 0.11354191 100.77433
## ENSG00000156804.7 FBXO32 710.09299 -1.2566781 0.12379154 99.39384
## ENSG00000039523.20 RIPOR1 3289.23804 0.8759300 0.08824745 97.54057
## ENSG00000080986.13 NDC80 106.93744 -1.1650911 0.11771502 95.57876
## ENSG00000164674.17 SYTL3 3103.24422 -1.8743808 0.18469700 95.55281
## ENSG00000010327.10 STAB1 22427.91687 1.7880700 0.17865996 95.25108
## pvalue padj
## ENSG00000184988.8 TMEM106A 1.507997e-36 3.286378e-32
## ENSG00000109906.15 ZBTB16 6.500360e-30 7.083117e-26
## ENSG00000133816.18 MICAL2 8.051681e-29 5.849009e-25
## ENSG00000266405.3 CBX3P2 8.460396e-25 4.609435e-21
## ENSG00000010704.19 HFE 1.030838e-23 4.493012e-20
## ENSG00000156804.7 FBXO32 2.069651e-23 7.517316e-20
## ENSG00000039523.20 RIPOR1 5.276416e-23 1.642699e-19
## ENSG00000080986.13 NDC80 1.421232e-22 3.486847e-19
## ENSG00000164674.17 SYTL3 1.439986e-22 3.486847e-19
## ENSG00000010327.10 STAB1 1.677066e-22 3.654831e-19
mean(abs(dge$stat))
## [1] 7.104229
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000133816.18 MICAL2 2848.09796 0.9342429 0.07523959 148.6185
## ENSG00000182580.3 EPHB3 84.24902 2.3470632 0.18546232 136.2735
## ENSG00000107798.18 LIPA 2784.14312 1.0846430 0.09271535 129.6517
## ENSG00000184988.8 TMEM106A 711.88397 0.8854124 0.07905767 121.0531
## ENSG00000101347.11 SAMHD1 14367.49292 0.5043510 0.04737029 111.9630
## ENSG00000173083.16 HPSE 1225.94971 1.0911422 0.10073358 110.6136
## ENSG00000184992.13 BRI3BP 615.29434 0.5765545 0.05490327 108.7805
## ENSG00000134780.10 DAGLA 271.34390 1.3996228 0.12848922 108.3603
## ENSG00000149639.15 SOGA1 918.53585 0.7639237 0.07307951 106.1779
## ENSG00000072310.18 SREBF1 2231.75608 0.9352731 0.08897432 105.5724
## pvalue padj
## ENSG00000133816.18 MICAL2 3.474735e-34 7.572491e-30
## ENSG00000182580.3 EPHB3 1.738570e-31 1.894433e-27
## ENSG00000107798.18 LIPA 4.883754e-30 3.547722e-26
## ENSG00000184988.8 TMEM106A 3.720308e-28 2.026917e-24
## ENSG00000101347.11 SAMHD1 3.640350e-26 1.586683e-22
## ENSG00000173083.16 HPSE 7.190368e-26 2.611662e-22
## ENSG00000184992.13 BRI3BP 1.812875e-25 5.643997e-22
## ENSG00000134780.10 DAGLA 2.240963e-25 6.104664e-22
## ENSG00000149639.15 SOGA1 6.740442e-25 1.632161e-21
## ENSG00000072310.18 SREBF1 9.149255e-25 1.993897e-21
mean(abs(dge$stat))
## [1] 8.360329
tc_lo_eosvpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000240247.8 DEFA1B 1722.06173 -4.0980962 0.47378091 1779.0131
## ENSG00000281887.3 GIMAP1-GIMAP5 576.99100 -1.2141825 0.54067063 729.8270
## ENSG00000235655.3 H3P6 574.93467 0.9736707 0.83430259 389.3044
## ENSG00000279117.1 CTD-2562J17.6 61.45752 -1.9028310 0.85421563 257.8098
## ENSG00000133816.18 MICAL2 2848.09796 0.9870993 0.07503490 167.8258
## ENSG00000182580.3 EPHB3 84.24902 2.4206827 0.19949094 127.5448
## ENSG00000101347.11 SAMHD1 14367.49292 0.5382133 0.04764215 126.1909
## ENSG00000120029.13 ARMH3 1080.38040 0.3395919 0.03063002 122.5982
## ENSG00000107798.18 LIPA 2784.14312 1.1293832 0.10253820 115.5094
## ENSG00000010327.10 STAB1 22427.91687 1.7854273 0.16088322 109.7020
## pvalue padj
## ENSG00000240247.8 DEFA1B 0.000000e+00 0.000000e+00
## ENSG00000281887.3 GIMAP1-GIMAP5 9.768085e-161 1.064379e-156
## ENSG00000235655.3 H3P6 1.173018e-86 8.521195e-83
## ENSG00000279117.1 CTD-2562J17.6 5.151441e-58 2.806634e-54
## ENSG00000133816.18 MICAL2 2.208261e-38 9.624924e-35
## ENSG00000182580.3 EPHB3 1.411808e-29 5.127921e-26
## ENSG00000101347.11 SAMHD1 2.792772e-29 8.694698e-26
## ENSG00000120029.13 ARMH3 1.707479e-28 4.651386e-25
## ENSG00000107798.18 LIPA 6.087045e-27 1.473944e-23
## ENSG00000010327.10 STAB1 1.138864e-25 2.481926e-22
mean(abs(dge$stat))
## [1] 8.135987
tc_lo_eosvpod1_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 77
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21672 77
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 237 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000155659.15 VSIG4 945.42691 3.1528166 0.29004232 99.55518
## ENSG00000132170.24 PPARG 55.22429 1.9914785 0.19995750 93.10869
## ENSG00000183019.7 MCEMP1 2055.80403 1.9117621 0.19299523 91.24427
## ENSG00000108861.9 DUSP3 2085.59921 0.8511817 0.08967106 88.63389
## ENSG00000137474.22 MYO7A 638.70733 1.7214810 0.17876657 87.37989
## ENSG00000169385.3 RNASE2 838.07543 1.5563159 0.16590553 83.73570
## ENSG00000168615.13 ADAM9 1239.05833 0.9558012 0.10739737 77.61693
## ENSG00000149534.9 MS4A2 193.69066 -2.0452500 0.22614090 76.45519
## ENSG00000129538.14 RNASE1 58.70760 2.2959336 0.25361130 75.00801
## ENSG00000166033.13 HTRA1 141.57985 2.2858282 0.25198875 74.81289
## pvalue padj
## ENSG00000155659.15 VSIG4 1.907733e-23 4.134438e-19
## ENSG00000132170.24 PPARG 4.950001e-22 5.363821e-18
## ENSG00000183019.7 MCEMP1 1.269875e-21 9.173579e-18
## ENSG00000108861.9 DUSP3 4.750792e-21 2.573979e-17
## ENSG00000137474.22 MYO7A 8.955667e-21 3.881744e-17
## ENSG00000169385.3 RNASE2 5.655464e-20 2.042753e-16
## ENSG00000168615.13 ADAM9 1.250933e-18 3.872889e-15
## ENSG00000149534.9 MS4A2 2.252665e-18 6.102468e-15
## ENSG00000129538.14 RNASE1 4.688078e-18 1.121540e-14
## ENSG00000166033.13 HTRA1 5.175066e-18 1.121540e-14
mean(abs(dge$stat))
## [1] 4.544953
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 2 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000162654.9 GBP4 1788.1711 -0.9983599 0.07479456 171.7394
## ENSG00000198848.13 CES1 1867.1327 1.6797082 0.12769128 155.5637
## ENSG00000137474.22 MYO7A 638.7073 1.7598997 0.13756378 145.1405
## ENSG00000133106.15 EPSTI1 1071.3264 -0.8493338 0.06998927 143.1952
## ENSG00000166033.13 HTRA1 141.5798 2.2471987 0.17511114 142.6717
## ENSG00000168615.13 ADAM9 1239.0583 0.9878232 0.08148845 141.1859
## ENSG00000115415.20 STAT1 6084.1208 -0.8785012 0.07431848 135.1084
## ENSG00000079215.15 SLC1A3 162.7552 1.5105682 0.12643650 132.5324
## ENSG00000169385.3 RNASE2 838.0754 1.6198242 0.13342959 132.0131
## ENSG00000108861.9 DUSP3 2085.5992 0.8500539 0.07298078 131.5089
## pvalue padj
## ENSG00000162654.9 GBP4 3.085094e-39 6.686015e-35
## ENSG00000198848.13 CES1 1.054386e-35 1.142532e-31
## ENSG00000137474.22 MYO7A 2.001034e-33 1.445547e-29
## ENSG00000133106.15 EPSTI1 5.327845e-33 2.886626e-29
## ENSG00000166033.13 HTRA1 6.934615e-33 3.005740e-29
## ENSG00000168615.13 ADAM9 1.465162e-32 5.292166e-29
## ENSG00000115415.20 STAT1 3.126319e-31 9.679083e-28
## ENSG00000079215.15 SLC1A3 1.144259e-30 3.099798e-27
## ENSG00000169385.3 RNASE2 1.486338e-30 3.579101e-27
## ENSG00000108861.9 DUSP3 1.916165e-30 4.152713e-27
mean(abs(dge$stat))
## [1] 7.43484
tc_lo_t0vpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000240247.8 DEFA1B 1388.99148 -3.6332048 0.56059171 1145.4977
## ENSG00000285444.1 RP11-147H23.3 26.87468 -1.0535626 0.50518807 165.4591
## ENSG00000279117.1 CTD-2562J17.6 49.66612 -0.3138792 0.89085687 153.8945
## ENSG00000079215.15 SLC1A3 162.75517 1.5375608 0.12290200 148.1137
## ENSG00000198848.13 CES1 1867.13269 1.6267616 0.12949149 142.0641
## ENSG00000168615.13 ADAM9 1239.05833 0.9859404 0.08193039 139.6682
## ENSG00000162654.9 GBP4 1788.17112 -0.9827881 0.08333005 133.9204
## ENSG00000163221.9 S100A12 5108.69615 1.7331541 0.14226936 131.6449
## ENSG00000134243.12 SORT1 2296.81018 0.7564959 0.06589281 128.5670
## ENSG00000108861.9 DUSP3 2085.59921 0.8245210 0.07335456 122.5478
## pvalue padj
## ENSG00000240247.8 DEFA1B 4.269625e-251 9.253132e-247
## ENSG00000285444.1 RP11-147H23.3 7.261242e-38 7.868282e-34
## ENSG00000279117.1 CTD-2562J17.6 2.442163e-35 1.764218e-31
## ENSG00000079215.15 SLC1A3 4.479890e-34 2.427205e-30
## ENSG00000198848.13 CES1 9.416130e-33 4.081327e-29
## ENSG00000168615.13 ADAM9 3.146149e-32 1.136389e-28
## ENSG00000162654.9 GBP4 5.687095e-31 1.760725e-27
## ENSG00000163221.9 S100A12 1.789216e-30 4.846986e-27
## ENSG00000134243.12 SORT1 8.435189e-30 2.031193e-26
## ENSG00000108861.9 DUSP3 1.751485e-28 3.795819e-25
mean(abs(dge$stat))
## [1] 6.276693
tc_lo_t0vpod1_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
dim(mx)
## [1] 60649 246
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21843 246
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 21843 87
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21772 87
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 181 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108984.15 MAP2K6 1163.93917 1.163862 0.1098349 111.05350
## ENSG00000155307.19 SAMSN1 2307.13372 1.925360 0.1860204 102.41948
## ENSG00000163221.9 S100A12 7562.48678 2.334673 0.2239419 101.32078
## ENSG00000172985.11 SH3RF3 304.79926 1.940902 0.1888729 101.01358
## ENSG00000173744.18 AGFG1 4295.84291 1.528808 0.1544175 95.58532
## ENSG00000096060.15 FKBP5 11280.57517 2.031109 0.2036190 94.53584
## ENSG00000115271.11 GCA 8435.27055 1.998679 0.2064862 89.18612
## ENSG00000166527.8 CLEC4D 1415.52476 2.345699 0.2408279 88.40084
## ENSG00000189221.10 MAOA 32.43834 3.404068 0.3419277 87.33806
## ENSG00000121316.11 PLBD1 11891.86514 1.644136 0.1735820 87.01962
## pvalue padj
## ENSG00000108984.15 MAP2K6 5.759514e-26 1.253961e-21
## ENSG00000155307.19 SAMSN1 4.492623e-24 4.890669e-20
## ENSG00000163221.9 S100A12 7.823084e-24 4.972452e-20
## ENSG00000172985.11 SH3RF3 9.135500e-24 4.972452e-20
## ENSG00000173744.18 AGFG1 1.416531e-22 6.168142e-19
## ENSG00000096060.15 FKBP5 2.406951e-22 8.734022e-19
## ENSG00000115271.11 GCA 3.593611e-21 1.117716e-17
## ENSG00000166527.8 CLEC4D 5.344823e-21 1.454594e-17
## ENSG00000189221.10 MAOA 9.147066e-21 2.212777e-17
## ENSG00000121316.11 PLBD1 1.074501e-20 2.339405e-17
mean(abs(dge$stat))
## [1] 6.155835
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000197122.12 SRC 1085.1248 -0.9521654 0.07702740 146.9439
## ENSG00000281162.2 LINC01127 706.5774 1.6433786 0.13168911 138.3527
## ENSG00000242732.4 RTL5 179.9690 -0.8499702 0.07473365 126.3306
## ENSG00000183779.7 ZNF703 826.1425 -1.5467318 0.13077776 125.1475
## ENSG00000175130.7 MARCKSL1 924.2551 -0.8200107 0.07238320 124.3971
## ENSG00000123685.9 BATF3 129.8002 -0.9862378 0.08722390 123.8836
## ENSG00000112299.8 VNN1 1496.1166 1.9071846 0.15880675 123.8330
## ENSG00000184557.4 SOCS3 12204.5628 2.8331392 0.22010854 122.9167
## ENSG00000137962.13 ARHGAP29 199.8439 1.1381381 0.10024916 122.0688
## ENSG00000160013.9 PTGIR 680.1546 -1.0424021 0.09217769 121.4304
## pvalue padj
## ENSG00000197122.12 SRC 8.072069e-34 1.757451e-29
## ENSG00000281162.2 LINC01127 6.101780e-32 6.642397e-28
## ENSG00000242732.4 RTL5 2.602953e-29 1.889050e-25
## ENSG00000183779.7 ZNF703 4.724775e-29 2.571695e-25
## ENSG00000175130.7 MARCKSL1 6.896484e-29 2.850295e-25
## ENSG00000123685.9 BATF3 8.933509e-29 2.850295e-25
## ENSG00000112299.8 VNN1 9.164093e-29 2.850295e-25
## ENSG00000184557.4 SOCS3 1.454270e-28 3.957796e-25
## ENSG00000137962.13 ARHGAP29 2.229746e-28 5.394003e-25
## ENSG00000160013.9 PTGIR 3.076095e-28 6.288020e-25
mean(abs(dge$stat))
## [1] 8.780156
tc_hi_t0veos <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000276107.1 THBS1-IT1 46.00122 3.6403423 0.57274117 355.98805
## ENSG00000169174.11 PCSK9 34.25862 5.0868309 0.80058573 332.99354
## ENSG00000272414.6 FAM47E-STBD1 27.84294 4.2648058 0.67531450 307.89890
## ENSG00000288622.1 PDCD6-AHRR 19.82942 -1.0186557 1.00201049 109.76930
## ENSG00000175130.7 MARCKSL1 924.25513 -0.8235201 0.08230708 97.19051
## ENSG00000123685.9 BATF3 129.80023 -1.0237730 0.10255427 96.61643
## ENSG00000281162.2 LINC01127 706.57744 1.6580775 0.16264838 92.90480
## ENSG00000152503.10 TRIM36 35.85686 -1.4583994 0.15500905 86.76617
## ENSG00000112299.8 VNN1 1496.11657 1.9261455 0.19259244 85.07585
## ENSG00000197122.12 SRC 1085.12480 -0.9173149 0.09757463 84.30576
## pvalue padj
## ENSG00000276107.1 THBS1-IT1 2.104693e-79 4.582337e-75
## ENSG00000169174.11 PCSK9 2.141898e-74 2.331670e-70
## ENSG00000272414.6 FAM47E-STBD1 6.265286e-69 4.546927e-65
## ENSG00000288622.1 PDCD6-AHRR 1.100854e-25 5.991950e-22
## ENSG00000175130.7 MARCKSL1 6.296781e-23 2.741870e-19
## ENSG00000123685.9 BATF3 8.414691e-23 3.053411e-19
## ENSG00000281162.2 LINC01127 5.487138e-22 1.706657e-18
## ENSG00000152503.10 TRIM36 1.221413e-20 3.324077e-17
## ENSG00000112299.8 VNN1 2.871363e-20 6.946147e-17
## ENSG00000197122.12 SRC 4.238776e-20 9.228663e-17
mean(abs(dge$stat))
## [1] 5.765449
tc_hi_t0veos_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
dim(mx)
## [1] 60649 246
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21843 246
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 21843 82
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21487 82
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 146 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000072310.18 SREBF1 2193.56072 1.0062352 0.08405451 140.56128
## ENSG00000159189.12 C1QC 134.53773 3.3487699 0.27034395 129.35924
## ENSG00000103066.13 PLA2G15 534.53708 1.0162764 0.09598874 109.99668
## ENSG00000152270.9 PDE3B 2375.00243 -1.0276634 0.09732732 109.24631
## ENSG00000019169.11 MARCO 1181.02163 1.8922924 0.17696206 107.04656
## ENSG00000173369.17 C1QB 212.89358 2.7066684 0.24964580 103.97934
## ENSG00000171812.13 COL8A2 303.08276 1.7758260 0.17186187 100.87947
## ENSG00000241484.10 ARHGAP8 34.87065 -1.6795093 0.16492123 100.82972
## ENSG00000010327.10 STAB1 28728.21066 1.6292933 0.15941838 99.33521
## ENSG00000131061.14 ZNF341 427.92437 0.6988326 0.07018116 98.26931
## pvalue padj
## ENSG00000072310.18 SREBF1 2.006675e-32 4.311742e-28
## ENSG00000159189.12 C1QC 5.659068e-30 6.079820e-26
## ENSG00000103066.13 PLA2G15 9.815507e-26 7.030193e-22
## ENSG00000152270.9 PDE3B 1.433220e-25 7.698902e-22
## ENSG00000019169.11 MARCO 4.348332e-25 1.868652e-21
## ENSG00000173369.17 C1QB 2.044365e-24 7.321213e-21
## ENSG00000171812.13 COL8A2 9.775468e-24 2.692349e-20
## ENSG00000241484.10 ARHGAP8 1.002411e-23 2.692349e-20
## ENSG00000010327.10 STAB1 2.131835e-23 5.089637e-20
## ENSG00000131061.14 ZNF341 3.651817e-23 7.846660e-20
mean(abs(dge$stat))
## [1] 5.73382
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 2 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000007968.7 E2F2 832.59727 1.4639517 0.10619661 175.7347
## ENSG00000111424.12 VDR 667.35392 0.8696344 0.06496627 174.4571
## ENSG00000108950.12 FAM20A 1594.59623 2.5871067 0.18449386 156.8780
## ENSG00000014257.16 ACP3 889.52760 0.9967503 0.07829567 155.7042
## ENSG00000165092.13 ALDH1A1 417.48794 -1.9541750 0.14581640 155.6281
## ENSG00000019169.11 MARCO 1181.02163 1.8838898 0.14123978 155.0313
## ENSG00000213557.4 RP11-240E2.2 83.17379 -2.1299402 0.16549329 145.4866
## ENSG00000072310.18 SREBF1 2193.56072 0.9968507 0.08107402 144.6692
## ENSG00000137959.17 IFI44L 1289.09238 -1.6353118 0.13034988 140.8658
## ENSG00000173083.16 HPSE 1270.09457 0.8312757 0.06928466 139.6632
## pvalue padj
## ENSG00000007968.7 E2F2 4.137715e-40 8.451006e-36
## ENSG00000111424.12 VDR 7.866157e-40 8.451006e-36
## ENSG00000108950.12 FAM20A 5.442615e-36 3.898182e-32
## ENSG00000014257.16 ACP3 9.824305e-36 4.386694e-32
## ENSG00000165092.13 ALDH1A1 1.020779e-35 4.386694e-32
## ENSG00000019169.11 MARCO 1.378308e-35 4.935950e-32
## ENSG00000213557.4 RP11-240E2.2 1.681085e-33 5.160210e-30
## ENSG00000072310.18 SREBF1 2.536798e-33 6.813522e-30
## ENSG00000137959.17 IFI44L 1.721404e-32 4.109756e-29
## ENSG00000173083.16 HPSE 3.153949e-32 6.776891e-29
mean(abs(dge$stat))
## [1] 8.460332
tc_hi_eosvpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000281887.3 GIMAP1-GIMAP5 523.95279 0.8662230 0.53466750 591.6478
## ENSG00000007968.7 E2F2 832.59727 1.4468759 0.11994765 133.2825
## ENSG00000108950.12 FAM20A 1594.59623 2.5714050 0.20409447 125.3972
## ENSG00000165092.13 ALDH1A1 417.48794 -2.0085148 0.16533340 124.0462
## ENSG00000104972.16 LILRB1 6765.22831 1.0777758 0.09480825 123.1025
## ENSG00000137959.17 IFI44L 1289.09238 -1.6919221 0.14354278 122.8933
## ENSG00000111424.12 VDR 667.35392 0.8605349 0.07690240 121.4375
## ENSG00000213557.4 RP11-240E2.2 83.17379 -2.2318827 0.18755832 121.2037
## ENSG00000014257.16 ACP3 889.52760 0.9882662 0.08835306 119.9041
## ENSG00000072310.18 SREBF1 2193.56072 0.9947363 0.08930313 118.8457
## pvalue padj
## ENSG00000281887.3 GIMAP1-GIMAP5 1.097733e-130 2.358699e-126
## ENSG00000007968.7 E2F2 7.841946e-31 8.424995e-27
## ENSG00000108950.12 FAM20A 4.166204e-29 2.983974e-25
## ENSG00000165092.13 ALDH1A1 8.230367e-29 4.421147e-25
## ENSG00000104972.16 LILRB1 1.324267e-28 5.269831e-25
## ENSG00000137959.17 IFI44L 1.471540e-28 5.269831e-25
## ENSG00000111424.12 VDR 3.065056e-28 9.261943e-25
## ENSG00000213557.4 RP11-240E2.2 3.448389e-28 9.261943e-25
## ENSG00000014257.16 ACP3 6.639345e-28 1.585107e-24
## ENSG00000072310.18 SREBF1 1.132007e-27 2.432343e-24
mean(abs(dge$stat))
## [1] 6.62683
tc_hi_eosvpod1_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
dim(mx)
## [1] 60649 246
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21843 246
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 21843 87
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21614 87
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 227 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 1375.14606 3.596755 0.1787304 343.2011
## ENSG00000132170.24 PPARG 134.87968 3.365542 0.1858488 288.3518
## ENSG00000137869.15 CYP19A1 71.37647 6.348042 0.3471859 254.4232
## ENSG00000170439.7 METTL7B 161.71505 4.777226 0.2837569 220.8029
## ENSG00000163221.9 S100A12 14931.43969 3.505882 0.2375557 185.5556
## ENSG00000168615.13 ADAM9 1576.11714 1.676385 0.1237590 177.7620
## ENSG00000121316.11 PLBD1 14524.61471 2.068602 0.1550219 168.8004
## ENSG00000099377.14 HSD3B7 161.50738 1.568690 0.1200495 166.8454
## ENSG00000156414.19 TDRD9 1025.77610 2.493816 0.1860163 165.8936
## ENSG00000163251.4 FZD5 146.40894 1.904855 0.1494406 156.2562
## pvalue padj
## ENSG00000108950.12 FAM20A 1.281574e-76 2.769995e-72
## ENSG00000132170.24 PPARG 1.136790e-64 1.228529e-60
## ENSG00000137869.15 CYP19A1 2.819554e-57 2.031395e-53
## ENSG00000170439.7 METTL7B 6.042877e-50 3.265269e-46
## ENSG00000163221.9 S100A12 2.968279e-42 1.283127e-38
## ENSG00000168615.13 ADAM9 1.493114e-40 5.378696e-37
## ENSG00000121316.11 PLBD1 1.352649e-38 4.176594e-35
## ENSG00000099377.14 HSD3B7 3.615724e-38 9.768782e-35
## ENSG00000156414.19 TDRD9 5.835849e-38 1.401512e-34
## ENSG00000163251.4 FZD5 7.441782e-36 1.608467e-32
mean(abs(dge$stat))
## [1] 10.31991
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 1375.1461 3.726124 0.15921673 413.9110
## ENSG00000132170.24 PPARG 134.8797 3.266118 0.16507759 317.5829
## ENSG00000116574.6 RHOU 1101.4150 1.166667 0.06650012 295.5881
## ENSG00000150337.14 FCGR1A 1623.7786 2.110656 0.11817070 279.2149
## ENSG00000121316.11 PLBD1 14524.6147 1.971386 0.11162567 275.8873
## ENSG00000168615.13 ADAM9 1576.1171 1.582371 0.09296611 267.4887
## ENSG00000170439.7 METTL7B 161.7150 4.840541 0.25202509 257.6699
## ENSG00000099377.14 HSD3B7 161.5074 1.595766 0.09761700 250.7694
## ENSG00000156414.19 TDRD9 1025.7761 2.252194 0.13571294 235.9535
## ENSG00000014257.16 ACP3 843.5332 1.170991 0.07628976 224.9440
## pvalue padj
## ENSG00000108950.12 FAM20A 5.161947e-92 1.115703e-87
## ENSG00000132170.24 PPARG 4.868601e-71 5.261497e-67
## ENSG00000116574.6 RHOU 3.012944e-66 2.170725e-62
## ENSG00000150337.14 FCGR1A 1.113468e-62 6.016624e-59
## ENSG00000121316.11 PLBD1 5.913443e-62 2.556263e-58
## ENSG00000168615.13 ADAM9 4.001793e-60 1.441579e-56
## ENSG00000170439.7 METTL7B 5.526382e-58 1.706389e-54
## ENSG00000099377.14 HSD3B7 1.764879e-56 4.768263e-53
## ENSG00000156414.19 TDRD9 2.999526e-53 7.203527e-50
## ENSG00000014257.16 ACP3 7.551154e-51 1.632106e-47
mean(abs(dge$stat))
## [1] 13.55803
tc_hi_t0vpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000206047.3 DEFA1 7687.64670 -1.6559594 0.2844943 3729.3118
## ENSG00000240247.8 DEFA1B 5062.09708 -3.2509896 0.4246854 1813.0291
## ENSG00000213178.3 RPL22P1 408.41976 -0.8688631 0.4235335 1232.9388
## ENSG00000137869.15 CYP19A1 71.37647 6.6374139 0.4731625 1222.3070
## ENSG00000281887.3 GIMAP1-GIMAP5 570.58111 -1.2009703 0.4976431 1104.5615
## ENSG00000167434.10 CA4 659.20775 1.6001609 0.4563801 823.0646
## ENSG00000262160.1 RP11-96D1.11 165.47842 -0.2872640 0.3288119 785.0106
## ENSG00000288534.1 TMX2-CTNND1 162.81263 -0.7053911 0.3932053 690.5123
## ENSG00000279716.1 AC006128.2 160.03325 -0.8862056 0.4135312 645.3026
## ENSG00000108950.12 FAM20A 1375.14606 3.7358638 0.1823623 312.6231
## pvalue padj
## ENSG00000206047.3 DEFA1 0.000000e+00 0.000000e+00
## ENSG00000240247.8 DEFA1B 0.000000e+00 0.000000e+00
## ENSG00000213178.3 RPL22P1 4.234954e-270 3.051143e-266
## ENSG00000137869.15 CYP19A1 8.657860e-268 4.678275e-264
## ENSG00000281887.3 GIMAP1-GIMAP5 3.368710e-242 1.456226e-238
## ENSG00000167434.10 CA4 5.217737e-181 1.879603e-177
## ENSG00000262160.1 RP11-96D1.11 9.796199e-173 3.024786e-169
## ENSG00000288534.1 TMX2-CTNND1 3.458430e-152 9.343812e-149
## ENSG00000279716.1 AC006128.2 2.348081e-142 5.639048e-139
## ENSG00000108950.12 FAM20A 5.858915e-70 1.266346e-66
mean(abs(dge$stat))
## [1] 10.6752
tc_hi_t0vpod1_adj <- dge
treatment_group==1
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & treatment_group==1 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 49
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 22167 49
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 115 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000048740.18 CELF2 15777.74519 1.0373666 0.09173198 124.64370
## ENSG00000140280.14 LYSMD2 559.91067 -0.7754547 0.07428222 108.25324
## ENSG00000109906.15 ZBTB16 5895.58267 2.6342706 0.24859479 98.45988
## ENSG00000077150.20 NFKB2 3351.89937 -1.0755448 0.11115532 92.00750
## ENSG00000132514.14 CLEC10A 512.10472 -2.1334526 0.21714463 89.83678
## ENSG00000155893.13 PXYLP1 925.27596 1.2451780 0.13014513 88.45305
## ENSG00000118257.17 NRP2 51.81586 -1.2688529 0.13480861 87.90620
## ENSG00000183779.7 ZNF703 660.85703 -2.1143787 0.21888688 86.90327
## ENSG00000255833.2 TIFAB 75.30968 -1.9165575 0.20848193 80.62370
## ENSG00000096060.15 FKBP5 16302.45011 2.2318230 0.24227153 76.83678
## pvalue padj
## ENSG00000048740.18 CELF2 6.090483e-29 1.350077e-24
## ENSG00000140280.14 LYSMD2 2.365403e-25 2.621694e-21
## ENSG00000109906.15 ZBTB16 3.316763e-23 2.450756e-19
## ENSG00000077150.20 NFKB2 8.634866e-22 4.785227e-18
## ENSG00000132514.14 CLEC10A 2.586411e-21 1.146659e-17
## ENSG00000155893.13 PXYLP1 5.205588e-21 1.923204e-17
## ENSG00000118257.17 NRP2 6.863329e-21 2.173420e-17
## ENSG00000183779.7 ZNF703 1.139610e-20 3.157718e-17
## ENSG00000255833.2 TIFAB 2.730645e-19 6.725579e-16
## ENSG00000096060.15 FKBP5 1.856873e-18 4.116130e-15
mean(abs(dge$stat))
## [1] 3.868721
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000109321.11 AREG 392.20635 4.643570 0.3383736 126.20928
## ENSG00000179593.16 ALOX15B 1008.17625 4.674486 0.3593960 110.14152
## ENSG00000079215.15 SLC1A3 1054.14734 4.689518 0.3620222 109.27814
## ENSG00000121966.7 CXCR4 13191.30609 1.450479 0.1343090 107.24888
## ENSG00000109906.15 ZBTB16 5895.58267 3.242609 0.2721306 107.10484
## ENSG00000182580.3 EPHB3 53.65242 -2.564940 0.2322367 106.40712
## ENSG00000145990.11 GFOD1 2210.37959 1.591507 0.1472425 106.06822
## ENSG00000279359.1 RP11-36D19.9 98.08427 5.450377 0.4395542 101.66753
## ENSG00000119138.5 KLF9 2394.63472 1.250030 0.1201463 101.56426
## ENSG00000060982.15 BCAT1 663.65390 1.754205 0.1658765 99.38918
## pvalue padj
## ENSG00000109321.11 AREG 2.767079e-29 6.133784e-25
## ENSG00000179593.16 ALOX15B 9.123893e-26 1.011247e-21
## ENSG00000079215.15 SLC1A3 1.410394e-25 1.042140e-21
## ENSG00000121966.7 CXCR4 3.926333e-25 1.871922e-21
## ENSG00000109906.15 ZBTB16 4.222318e-25 1.871922e-21
## ENSG00000182580.3 EPHB3 6.004176e-25 2.218243e-21
## ENSG00000145990.11 GFOD1 7.123992e-25 2.255965e-21
## ENSG00000279359.1 RP11-36D19.9 6.566789e-24 1.703957e-20
## ENSG00000119138.5 KLF9 6.918219e-24 1.703957e-20
## ENSG00000060982.15 BCAT1 2.074526e-23 4.598602e-20
mean(abs(dge$stat))
## [1] 5.50503
tc_lo_a_t0veos <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000227097.5 RPS28P7 76.96489 -1.3611093 1.13330866 116.77334
## ENSG00000183337.18 BCOR 1285.25400 -0.8690707 0.08867716 94.46863
## ENSG00000060982.15 BCAT1 663.65390 1.6755350 0.17467110 83.94137
## ENSG00000109906.15 ZBTB16 5895.58267 2.9617587 0.29831639 80.95599
## ENSG00000182580.3 EPHB3 53.65242 -2.3884880 0.28647416 62.08225
## ENSG00000048740.18 CELF2 15777.74519 0.9584689 0.11994960 61.76490
## ENSG00000145990.11 GFOD1 2210.37959 1.4593192 0.18084505 60.74434
## ENSG00000109321.11 AREG 392.20635 4.4500668 0.46973841 60.48764
## ENSG00000141510.18 TP53 1336.80441 -0.5270916 0.06821035 59.28293
## ENSG00000215784.6 FAM72D 124.62930 1.2899093 0.16574192 58.56101
## pvalue padj
## ENSG00000227097.5 RPS28P7 3.218225e-27 6.995456e-23
## ENSG00000183337.18 BCOR 2.490080e-22 2.706343e-18
## ENSG00000060982.15 BCAT1 5.096679e-20 3.692883e-16
## ENSG00000109906.15 ZBTB16 2.308009e-19 1.254230e-15
## ENSG00000182580.3 EPHB3 3.294074e-15 1.402079e-11
## ENSG00000048740.18 CELF2 3.870118e-15 1.402079e-11
## ENSG00000145990.11 GFOD1 6.498982e-15 2.011817e-11
## ENSG00000109321.11 AREG 7.404211e-15 2.011817e-11
## ENSG00000141510.18 TP53 1.365561e-14 3.298133e-11
## ENSG00000215784.6 FAM72D 1.970840e-14 4.284015e-11
mean(abs(dge$stat))
## [1] 3.388839
tc_lo_a_t0veos_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & treatment_group==1 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 48
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21814 48
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 115 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000109906.15 ZBTB16 5320.11507 -3.4374370 0.19199768 258.6610
## ENSG00000184988.8 TMEM106A 709.79315 1.2230051 0.08960810 183.3919
## ENSG00000048740.18 CELF2 14863.79302 -1.0654712 0.08074058 168.6745
## ENSG00000077150.20 NFKB2 3283.51502 1.1787100 0.09841794 141.2055
## ENSG00000132514.14 CLEC10A 619.24307 2.5933695 0.20884016 140.4500
## ENSG00000104064.18 GABPB1 394.75447 -0.7247996 0.06089129 140.1801
## ENSG00000279359.1 RP11-36D19.9 90.71086 -4.9034740 0.36755644 134.7708
## ENSG00000168389.18 MFSD2A 155.50071 2.0758630 0.17452667 134.4048
## ENSG00000145990.11 GFOD1 1957.39890 -1.8627148 0.15847949 127.5545
## ENSG00000010704.19 HFE 323.45134 1.5984260 0.14004655 126.3633
## pvalue padj
## ENSG00000109906.15 ZBTB16 3.360382e-58 7.330338e-54
## ENSG00000184988.8 TMEM106A 8.807893e-42 9.606769e-38
## ENSG00000048740.18 CELF2 1.441019e-38 1.047813e-34
## ENSG00000077150.20 NFKB2 1.450827e-32 7.912085e-29
## ENSG00000132514.14 CLEC10A 2.122289e-32 8.839202e-29
## ENSG00000104064.18 GABPB1 2.431247e-32 8.839202e-29
## ENSG00000279359.1 RP11-36D19.9 3.705666e-31 1.154791e-27
## ENSG00000168389.18 MFSD2A 4.455912e-31 1.215016e-27
## ENSG00000145990.11 GFOD1 1.404914e-29 3.405199e-26
## ENSG00000010704.19 HFE 2.560399e-29 5.585255e-26
mean(abs(dge$stat))
## [1] 7.680155
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000185338.7 SOCS1 792.26712 -2.766210 0.13190802 381.8868
## ENSG00000109906.15 ZBTB16 5320.11507 -3.611869 0.18467444 295.3007
## ENSG00000128283.7 CDC42EP1 115.99336 3.358012 0.19675715 261.6463
## ENSG00000080546.13 SESN1 1604.94130 -1.399258 0.08732870 244.7479
## ENSG00000182580.3 EPHB3 72.35903 3.182280 0.19957381 230.9785
## ENSG00000074966.11 TXK 2023.01139 -1.649997 0.10569279 227.4593
## ENSG00000121578.13 B4GALT4 930.90220 -1.399099 0.09173109 221.8354
## ENSG00000048740.18 CELF2 14863.79302 -1.090982 0.07226423 220.7818
## ENSG00000039523.20 RIPOR1 3178.13477 1.175231 0.07819646 217.8315
## ENSG00000100027.17 YPEL1 1922.57004 -2.091714 0.13650554 209.4435
## pvalue padj
## ENSG00000185338.7 SOCS1 4.832469e-85 1.054155e-80
## ENSG00000109906.15 ZBTB16 3.480270e-66 3.795930e-62
## ENSG00000128283.7 CDC42EP1 7.510384e-59 5.461050e-55
## ENSG00000080546.13 SESN1 3.626519e-55 1.977722e-51
## ENSG00000182580.3 EPHB3 3.647192e-52 1.591197e-48
## ENSG00000074966.11 TXK 2.135190e-51 7.762840e-48
## ENSG00000121578.13 B4GALT4 3.597922e-50 1.121215e-46
## ENSG00000048740.18 CELF2 6.107343e-50 1.665320e-46
## ENSG00000039523.20 RIPOR1 2.687784e-49 6.514590e-46
## ENSG00000100027.17 YPEL1 1.816652e-47 3.962845e-44
mean(abs(dge$stat))
## [1] 10.90795
tc_lo_a_eosvpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000206047.3 DEFA1 5469.3934 -2.983730 0.43216988 2723.0074
## ENSG00000281887.3 GIMAP1-GIMAP5 590.3117 -1.127849 0.78953782 333.9923
## ENSG00000185338.7 SOCS1 792.2671 -2.791833 0.14647377 311.5634
## ENSG00000167173.19 C15orf39 3499.7107 1.461975 0.08347172 290.5943
## ENSG00000114423.23 CBLB 1972.0767 -1.362626 0.08182847 264.9611
## ENSG00000162174.12 ASRGL1 323.3099 1.301687 0.08385340 234.1363
## ENSG00000128283.7 CDC42EP1 115.9934 3.206102 0.20574680 229.7277
## ENSG00000074966.11 TXK 2023.0114 -1.669524 0.10871471 218.6595
## ENSG00000109906.15 ZBTB16 5320.1151 -3.567818 0.20986303 217.2269
## ENSG00000167600.14 CYP2S1 234.6805 2.377137 0.15582853 209.7154
## pvalue padj
## ENSG00000206047.3 DEFA1 0.000000e+00 0.000000e+00
## ENSG00000281887.3 GIMAP1-GIMAP5 1.297997e-74 1.415725e-70
## ENSG00000185338.7 SOCS1 9.969131e-70 7.248888e-66
## ENSG00000167173.19 C15orf39 3.690361e-65 2.012538e-61
## ENSG00000114423.23 CBLB 1.422841e-59 6.207571e-56
## ENSG00000162174.12 ASRGL1 7.469973e-53 2.715833e-49
## ENSG00000128283.7 CDC42EP1 6.834692e-52 2.129885e-48
## ENSG00000074966.11 TXK 1.773302e-49 4.835350e-46
## ENSG00000109906.15 ZBTB16 3.641487e-49 8.826155e-46
## ENSG00000167600.14 CYP2S1 1.584727e-47 3.456924e-44
mean(abs(dge$stat))
## [1] 10.22145
tc_lo_a_eosvpod1_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & treatment_group==1 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 47
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21696 47
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 137 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000155659.15 VSIG4 1257.61013 3.5400766 0.2811023 131.90106
## ENSG00000169385.3 RNASE2 859.84644 1.8089705 0.1805673 95.29885
## ENSG00000185339.9 TCN2 860.35014 1.8349460 0.1872936 91.00457
## ENSG00000137474.22 MYO7A 696.02656 1.8551287 0.1896387 90.63056
## ENSG00000162745.11 OLFML2B 110.94478 1.7150246 0.1913517 77.19168
## ENSG00000149534.9 MS4A2 193.46247 -2.6524703 0.2911033 73.23761
## ENSG00000154269.15 ENPP3 64.64986 -2.0608870 0.2334975 72.90212
## ENSG00000115415.20 STAT1 5608.45831 -0.9174074 0.1103843 67.65283
## ENSG00000129538.14 RNASE1 59.48653 2.4605583 0.2904876 66.14691
## ENSG00000246363.3 LINC02458 49.73716 -2.1836489 0.2592985 66.01473
## pvalue padj
## ENSG00000155659.15 VSIG4 1.572648e-30 3.411860e-26
## ENSG00000169385.3 RNASE2 1.637078e-22 1.775821e-18
## ENSG00000185339.9 TCN2 1.433407e-21 9.392050e-18
## ENSG00000137474.22 MYO7A 1.731653e-21 9.392050e-18
## ENSG00000162745.11 OLFML2B 1.551461e-18 6.731788e-15
## ENSG00000149534.9 MS4A2 1.149447e-17 4.156210e-14
## ENSG00000154269.15 ENPP3 1.362415e-17 4.222513e-14
## ENSG00000115415.20 STAT1 1.949731e-16 5.287427e-13
## ENSG00000129538.14 RNASE1 4.185360e-16 9.582847e-13
## ENSG00000246363.3 LINC02458 4.475661e-16 9.582847e-13
mean(abs(dge$stat))
## [1] 3.720168
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000149534.9 MS4A2 193.4625 -2.9737363 0.19611976 196.8226
## ENSG00000133106.15 EPSTI1 939.8100 -0.9543914 0.07075212 178.0256
## ENSG00000137474.22 MYO7A 696.0266 1.9849438 0.14192368 175.8134
## ENSG00000162654.9 GBP4 1675.4251 -1.0949994 0.08304901 168.0892
## ENSG00000198848.13 CES1 1838.3112 1.7230700 0.12894356 163.6883
## ENSG00000169385.3 RNASE2 859.8464 1.8544681 0.13990473 159.1185
## ENSG00000115415.20 STAT1 5608.4583 -0.9340165 0.07509938 150.5753
## ENSG00000163251.4 FZD5 106.1830 1.2512637 0.10147334 150.1279
## ENSG00000166033.13 HTRA1 167.4301 2.3769995 0.19071071 136.3502
## ENSG00000163220.11 S100A9 111200.8193 1.6876920 0.14188879 127.9446
## pvalue padj
## ENSG00000149534.9 MS4A2 1.030945e-44 2.236739e-40
## ENSG00000133106.15 EPSTI1 1.307772e-40 1.418671e-36
## ENSG00000137474.22 MYO7A 3.977180e-40 2.876297e-36
## ENSG00000162654.9 GBP4 1.934245e-38 1.049134e-34
## ENSG00000198848.13 CES1 1.769523e-37 7.678316e-34
## ENSG00000169385.3 RNASE2 1.762951e-36 6.374829e-33
## ENSG00000115415.20 STAT1 1.297812e-34 4.022475e-31
## ENSG00000163251.4 FZD5 1.625583e-34 4.408581e-31
## ENSG00000166033.13 HTRA1 1.672723e-31 4.032378e-28
## ENSG00000163220.11 S100A9 1.154191e-29 2.504133e-26
mean(abs(dge$stat))
## [1] 7.192428
tc_lo_a_t0vpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000240247.8 DEFA1B 1402.2685 -3.8913167 0.80695196 730.62042
## ENSG00000162654.9 GBP4 1675.4251 -1.2372987 0.10563805 132.35701
## ENSG00000133106.15 EPSTI1 939.8100 -0.8942295 0.08537225 107.91928
## ENSG00000198848.13 CES1 1838.3112 1.7903478 0.16503140 106.51516
## ENSG00000137474.22 MYO7A 696.0266 2.0517585 0.19683525 100.13689
## ENSG00000149534.9 MS4A2 193.4625 -2.9443652 0.27311499 98.31781
## ENSG00000166033.13 HTRA1 167.4301 2.6276258 0.25941198 93.47978
## ENSG00000169385.3 RNASE2 859.8464 1.7731262 0.18220802 87.71400
## ENSG00000115415.20 STAT1 5608.4583 -1.0046473 0.10735334 85.06667
## ENSG00000164850.15 GPER1 357.3446 2.3897547 0.24486918 83.67710
## pvalue padj
## ENSG00000240247.8 DEFA1B 6.565894e-161 1.424536e-156
## ENSG00000162654.9 GBP4 1.249928e-30 1.355922e-26
## ENSG00000133106.15 EPSTI1 2.799502e-25 2.024600e-21
## ENSG00000198848.13 CES1 5.685577e-25 3.083857e-21
## ENSG00000137474.22 MYO7A 1.422200e-23 6.171208e-20
## ENSG00000149534.9 MS4A2 3.563464e-23 1.288549e-19
## ENSG00000166033.13 HTRA1 4.103722e-22 1.271919e-18
## ENSG00000169385.3 RNASE2 7.563720e-21 2.051281e-17
## ENSG00000115415.20 STAT1 2.884730e-20 6.954123e-17
## ENSG00000164850.15 GPER1 5.825617e-20 1.263926e-16
mean(abs(dge$stat))
## [1] 4.586292
tc_lo_a_t0vpod1_adj <- dge
treatment_group==2
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & treatment_group==2 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 30
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 22091 30
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 669 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000079215.15 SLC1A3 127.16034 1.7631835 0.3491554 23.59672
## ENSG00000171236.10 LRG1 585.52351 2.4543414 0.5028459 20.75322
## ENSG00000123358.20 NR4A1 909.66286 -1.1715008 0.2685336 18.86488
## ENSG00000100985.7 MMP9 2434.01959 3.4005253 0.7049743 18.54805
## ENSG00000224505.2 AC002117.1 24.27816 0.9134542 0.2162273 17.72735
## ENSG00000286813.1 RP11-81A1.10 63.34543 0.8204037 0.1960375 17.18595
## ENSG00000102010.15 BMX 49.58972 2.8248876 0.6316847 17.05447
## ENSG00000235706.8 DICER1-AS1 278.88111 0.5037422 0.1216700 16.94258
## ENSG00000151726.15 ACSL1 7884.12632 1.5529465 0.3650851 16.90440
## ENSG00000132170.24 PPARG 25.54928 0.8734849 0.2132271 16.60090
## pvalue padj
## ENSG00000079215.15 SLC1A3 1.187898e-06 0.02420461
## ENSG00000171236.10 LRG1 5.224361e-06 0.05322579
## ENSG00000123358.20 NR4A1 1.403125e-05 0.08439458
## ENSG00000100985.7 MMP9 1.656745e-05 0.08439458
## ENSG00000224505.2 AC002117.1 2.549352e-05 0.08880536
## ENSG00000286813.1 RP11-81A1.10 3.389330e-05 0.08880536
## ENSG00000102010.15 BMX 3.632269e-05 0.08880536
## ENSG00000235706.8 DICER1-AS1 3.852764e-05 0.08880536
## ENSG00000151726.15 ACSL1 3.931026e-05 0.08880536
## ENSG00000132170.24 PPARG 4.612918e-05 0.08880536
mean(abs(dge$stat))
## [1] 0.9597284
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000272825.1 LL21NC02-1C16.2 96.96324 0.8371518 0.13343654 38.82559
## ENSG00000260793.2 RP5-882C2.2 311.34768 0.7681852 0.13246083 32.75518
## ENSG00000183250.12 LINC01547 757.64333 0.5588114 0.10048091 30.48659
## ENSG00000173083.16 HPSE 1236.34632 -0.5355590 0.09866876 29.04849
## ENSG00000152207.8 CYSLTR2 402.51383 -0.6530792 0.12625223 26.13514
## ENSG00000235706.8 DICER1-AS1 278.88111 0.4589188 0.09310901 24.14034
## ENSG00000070961.16 ATP2B1 3219.80585 -0.4696350 0.09658894 23.32565
## ENSG00000175445.17 LPL 70.12512 1.2408807 0.25610487 21.63312
## ENSG00000231711.2 LINC00899 259.64682 0.3900730 0.08516019 20.90837
## ENSG00000136603.14 SKIL 773.41007 -0.4776508 0.10455942 20.57883
## pvalue padj
## ENSG00000272825.1 LL21NC02-1C16.2 4.634122e-10 7.259816e-06
## ENSG00000260793.2 RP5-882C2.2 1.045273e-08 8.187622e-05
## ENSG00000183250.12 LINC01547 3.361820e-08 1.755543e-04
## ENSG00000173083.16 HPSE 7.058909e-08 2.764622e-04
## ENSG00000152207.8 CYSLTR2 3.183381e-07 9.974170e-04
## ENSG00000235706.8 DICER1-AS1 8.956427e-07 2.338523e-03
## ENSG00000070961.16 ATP2B1 1.367638e-06 3.060773e-03
## ENSG00000175445.17 LPL 3.301021e-06 6.464225e-03
## ENSG00000231711.2 LINC00899 4.817837e-06 8.386248e-03
## ENSG00000136603.14 SKIL 5.722531e-06 8.964918e-03
mean(abs(dge$stat))
## [1] 1.234482
tc_lo_b_t0veos <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 3 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000115590.14 IL1R2 924.42601 0.2434183 3.9285354 180.31055
## ENSG00000237541.4 HLA-DQA2 560.95931 0.2585584 1.1672873 179.98409
## ENSG00000278621.1 THBS1-AS1 12.80480 2.7311473 0.7776560 89.70720
## ENSG00000273420.1 CTD-2540B15.13 18.17452 1.6761641 0.6644232 78.29255
## ENSG00000079393.21 DUSP13 20.67591 1.2931592 0.7832886 64.30114
## ENSG00000204044.6 SLC12A5-AS1 29.95286 1.8079286 0.9386269 46.22535
## ENSG00000260793.2 RP5-882C2.2 311.34768 0.8991426 0.1640768 29.24636
## ENSG00000165029.17 ABCA1 844.51147 -1.2120476 0.2623119 19.87194
## ENSG00000173083.16 HPSE 1236.34632 -0.5190125 0.1259207 16.84979
## ENSG00000272825.1 LL21NC02-1C16.2 96.96324 0.8422317 0.2056801 16.39734
## pvalue padj
## ENSG00000115590.14 IL1R2 4.145857e-41 5.396103e-37
## ENSG00000237541.4 HLA-DQA2 4.885341e-41 5.396103e-37
## ENSG00000278621.1 THBS1-AS1 2.761481e-21 2.033463e-17
## ENSG00000273420.1 CTD-2540B15.13 8.885629e-19 4.907311e-15
## ENSG00000079393.21 DUSP13 1.067840e-15 4.717931e-12
## ENSG00000204044.6 SLC12A5-AS1 1.054045e-11 3.880817e-08
## ENSG00000260793.2 RP5-882C2.2 6.373590e-08 2.011414e-04
## ENSG00000165029.17 ABCA1 8.280680e-06 2.286606e-02
## ENSG00000173083.16 HPSE 4.045782e-05 9.930597e-02
## ENSG00000272825.1 LL21NC02-1C16.2 5.135736e-05 1.134535e-01
mean(abs(dge$stat))
## [1] 0.894435
tc_lo_b_t0veos_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & treatment_group==2 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 32
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21736 32
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 194 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000133816.18 MICAL2 2943.85516 0.7789089 0.12157049 40.56823
## ENSG00000224505.2 AC002117.1 21.51833 -1.1393530 0.19346895 35.03737
## ENSG00000134574.12 DDB2 1402.15391 -0.4139817 0.07261334 32.40799
## ENSG00000266405.3 CBX3P2 81.87776 -0.7669847 0.13551894 31.91774
## ENSG00000262001.1 DLGAP1-AS2 87.81656 -0.5739731 0.10346740 30.83874
## ENSG00000237499.7 WAKMAR2 268.38688 -0.8043144 0.14561925 30.18730
## ENSG00000184988.8 TMEM106A 714.83016 0.5574801 0.10152308 29.97874
## ENSG00000010704.19 HFE 376.91679 0.6080182 0.11149297 29.53619
## ENSG00000182580.3 EPHB3 102.49136 1.5782482 0.28671826 29.04251
## ENSG00000161509.14 GRIN2C 112.97523 -0.9314928 0.17459569 28.12506
## pvalue padj
## ENSG00000133816.18 MICAL2 1.898684e-10 3.566678e-06
## ENSG00000224505.2 AC002117.1 3.234379e-09 3.037891e-05
## ENSG00000134574.12 DDB2 1.249719e-08 7.553533e-05
## ENSG00000266405.3 CBX3P2 1.608418e-08 7.553533e-05
## ENSG00000262001.1 DLGAP1-AS2 2.803839e-08 1.053402e-04
## ENSG00000237499.7 WAKMAR2 3.922688e-08 1.172207e-04
## ENSG00000184988.8 TMEM106A 4.368085e-08 1.172207e-04
## ENSG00000010704.19 HFE 5.488229e-08 1.288705e-04
## ENSG00000182580.3 EPHB3 7.080728e-08 1.477905e-04
## ENSG00000161509.14 GRIN2C 1.137238e-07 1.958850e-04
mean(abs(dge$stat))
## [1] 1.46779
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000182580.3 EPHB3 102.49136 1.3009831 0.14957311 73.47731
## ENSG00000073008.15 PVR 672.25551 0.6242081 0.08368927 54.91374
## ENSG00000133816.18 MICAL2 2943.85516 0.6909542 0.10021607 46.42623
## ENSG00000104972.16 LILRB1 5970.71157 0.5656853 0.08359891 45.10027
## ENSG00000107798.18 LIPA 3067.44485 0.7162865 0.10547282 44.91821
## ENSG00000173083.16 HPSE 1354.83693 0.7280429 0.10750470 44.67086
## ENSG00000101347.11 SAMHD1 15457.43230 0.4143883 0.06202348 44.31755
## ENSG00000183087.15 GAS6 735.11294 0.5494024 0.08237006 43.98306
## ENSG00000092964.18 DPYSL2 2184.22562 0.6168219 0.09218008 43.95720
## ENSG00000251429.1 AIDAP2 99.97742 0.8806600 0.13235644 43.49904
## pvalue padj
## ENSG00000182580.3 EPHB3 1.017999e-17 1.955271e-13
## ENSG00000073008.15 PVR 1.259373e-13 1.209439e-09
## ENSG00000133816.18 MICAL2 9.513335e-12 6.090754e-08
## ENSG00000104972.16 LILRB1 1.871994e-11 7.162838e-08
## ENSG00000107798.18 LIPA 2.054382e-11 7.162838e-08
## ENSG00000173083.16 HPSE 2.331007e-11 7.162838e-08
## ENSG00000101347.11 SAMHD1 2.792018e-11 7.162838e-08
## ENSG00000183087.15 GAS6 3.312298e-11 7.162838e-08
## ENSG00000092964.18 DPYSL2 3.356357e-11 7.162838e-08
## ENSG00000251429.1 AIDAP2 4.241669e-11 8.146974e-08
mean(abs(dge$stat))
## [1] 2.019014
tc_lo_b_eosvpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 3 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 493.1955 2.2607994 0.13667214 247.95834
## ENSG00000010327.10 STAB1 23946.7666 1.1433841 0.08971768 155.92752
## ENSG00000133816.18 MICAL2 2943.8552 0.8399458 0.07815621 112.92123
## ENSG00000174705.13 SH3PXD2B 260.1279 2.1205055 0.20375145 95.95791
## ENSG00000104972.16 LILRB1 5970.7116 0.6981740 0.07124726 94.74369
## ENSG00000123384.14 LRP1 26659.9755 0.7742280 0.07921944 93.71220
## ENSG00000101347.11 SAMHD1 15457.4323 0.5008277 0.05275850 89.48179
## ENSG00000119686.10 FLVCR2 890.2308 0.9376435 0.10086558 84.41021
## ENSG00000196576.16 PLXNB2 15082.8898 0.5513973 0.06172860 79.02920
## ENSG00000132205.11 EMILIN2 6960.7954 0.7329647 0.08178416 78.75009
## pvalue padj
## ENSG00000108950.12 FAM20A 7.236807e-56 1.237494e-51
## ENSG00000010327.10 STAB1 8.780236e-36 7.507102e-32
## ENSG00000133816.18 MICAL2 2.245193e-26 1.279760e-22
## ENSG00000174705.13 SH3PXD2B 1.173523e-22 5.016809e-19
## ENSG00000104972.16 LILRB1 2.167027e-22 7.411232e-19
## ENSG00000123384.14 LRP1 3.649045e-22 1.039978e-18
## ENSG00000101347.11 SAMHD1 3.094733e-21 7.559992e-18
## ENSG00000119686.10 FLVCR2 4.020671e-20 8.594185e-17
## ENSG00000196576.16 PLXNB2 6.119868e-19 1.162775e-15
## ENSG00000132205.11 EMILIN2 7.048551e-19 1.205302e-15
mean(abs(dge$stat))
## [1] 3.068885
tc_lo_b_eosvpod1_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==1 & treatment_group==2 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 30
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21715 30
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 625 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000132170.24 PPARG 48.20760 2.1505950 0.2662258 60.46573
## ENSG00000108950.12 FAM20A 494.07416 2.2063718 0.3165917 43.28003
## ENSG00000079215.15 SLC1A3 106.72679 1.5115198 0.2330986 39.81901
## ENSG00000198019.13 FCGR1B 513.61162 1.2300391 0.2054735 34.23319
## ENSG00000183019.7 MCEMP1 1806.85176 1.8138844 0.2975967 34.08075
## ENSG00000167680.17 SEMA6B 155.08979 2.1383801 0.3499958 33.56789
## ENSG00000251429.1 AIDAP2 94.95369 1.2347545 0.2093972 33.51177
## ENSG00000154856.13 APCDD1 154.49487 1.5265850 0.2578314 33.01377
## ENSG00000170439.7 METTL7B 35.76914 3.1686336 0.5109607 32.62022
## ENSG00000244115.1 DNAJC25-GNG10 198.47885 0.9388774 0.1633077 32.15711
## pvalue padj
## ENSG00000132170.24 PPARG 7.487048e-15 1.625738e-10
## ENSG00000108950.12 FAM20A 4.744017e-11 5.150580e-07
## ENSG00000079215.15 SLC1A3 2.786196e-10 2.016648e-06
## ENSG00000198019.13 FCGR1B 4.888800e-09 2.197275e-05
## ENSG00000183019.7 MCEMP1 5.287178e-09 2.197275e-05
## ENSG00000167680.17 SEMA6B 6.881947e-09 2.197275e-05
## ENSG00000251429.1 AIDAP2 7.083414e-09 2.197275e-05
## ENSG00000154856.13 APCDD1 9.150855e-09 2.483771e-05
## ENSG00000170439.7 METTL7B 1.120430e-08 2.703225e-05
## ENSG00000244115.1 DNAJC25-GNG10 1.421951e-08 3.087624e-05
mean(abs(dge$stat))
## [1] 1.717923
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000271605.6 MILR1 793.1250 0.8555749 0.10416592 65.89361
## ENSG00000079215.15 SLC1A3 106.7268 1.5964974 0.19113598 65.17343
## ENSG00000078124.13 ACER3 1426.9688 0.5820186 0.07244318 63.92028
## ENSG00000154856.13 APCDD1 154.4949 1.5092698 0.18329589 63.59309
## ENSG00000132170.24 PPARG 48.2076 1.9670080 0.24227909 61.41409
## ENSG00000177706.9 FAM20C 750.0239 1.1609098 0.15812472 50.88953
## ENSG00000164821.5 DEFA4 115.4556 -1.7434251 0.23593328 50.83419
## ENSG00000164850.15 GPER1 272.1023 1.9465680 0.25610614 49.85400
## ENSG00000168615.13 ADAM9 1184.7640 0.9129877 0.12721043 49.67899
## ENSG00000179542.16 SLITRK4 168.9429 1.2218693 0.16988399 49.13772
## pvalue padj
## ENSG00000271605.6 MILR1 4.759321e-16 6.291942e-12
## ENSG00000079215.15 SLC1A3 6.858824e-16 6.291942e-12
## ENSG00000078124.13 ACER3 1.295571e-15 7.016127e-12
## ENSG00000154856.13 APCDD1 1.529651e-15 7.016127e-12
## ENSG00000132170.24 PPARG 4.624891e-15 1.697058e-11
## ENSG00000177706.9 FAM20C 9.771359e-13 2.634311e-09
## ENSG00000164821.5 DEFA4 1.005079e-12 2.634311e-09
## ENSG00000164850.15 GPER1 1.656222e-12 3.691311e-09
## ENSG00000168615.13 ADAM9 1.810749e-12 3.691311e-09
## ENSG00000179542.16 SLITRK4 2.386071e-12 4.377725e-09
mean(abs(dge$stat))
## [1] 2.307963
tc_lo_b_t0vpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 494.07416 2.0087618 0.2854317 44.46021
## ENSG00000172232.10 AZU1 208.72804 -1.0821626 0.1655359 41.89490
## ENSG00000142733.17 MAP3K6 876.43730 0.9653672 0.1540806 38.11734
## ENSG00000134243.12 SORT1 2233.24420 0.7976340 0.1301463 36.96559
## ENSG00000177706.9 FAM20C 750.02385 1.2542262 0.2017255 36.87785
## ENSG00000206047.3 DEFA1 2783.53260 -1.9948458 0.3269226 36.07951
## ENSG00000150337.14 FCGR1A 1120.27775 1.4095387 0.2281360 35.96390
## ENSG00000183307.4 TMEM121B 522.66955 0.9532160 0.1591333 34.87441
## ENSG00000164821.5 DEFA4 115.45557 -2.0163450 0.3496044 32.24600
## ENSG00000256713.8 PGA5 24.18784 2.7735595 0.5143974 31.96692
## pvalue padj
## ENSG00000108950.12 FAM20A 2.595793e-11 4.981066e-07
## ENSG00000172232.10 AZU1 9.631362e-11 9.240810e-07
## ENSG00000142733.17 MAP3K6 6.661543e-10 4.260945e-06
## ENSG00000134243.12 SORT1 1.202325e-09 4.826678e-06
## ENSG00000177706.9 FAM20C 1.257668e-09 4.826678e-06
## ENSG00000206047.3 DEFA1 1.894287e-09 5.510193e-06
## ENSG00000150337.14 FCGR1A 2.010076e-09 5.510193e-06
## ENSG00000183307.4 TMEM121B 3.516731e-09 8.435319e-06
## ENSG00000164821.5 DEFA4 1.358364e-08 2.896183e-05
## ENSG00000256713.8 PGA5 1.568207e-08 3.009232e-05
mean(abs(dge$stat))
## [1] 1.107873
tc_lo_b_t0vpod1_adj <- dge
treatment_group==1
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & treatment_group==1 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 25
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 22227 25
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 167 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000137801.11 THBS1 18797.18309 4.030477 0.3963519 83.13806
## ENSG00000064270.13 ATP2C2 402.83240 4.631437 0.4486478 82.12003
## ENSG00000265527.1 MIR5690 14.62137 3.550693 0.4052435 79.61081
## ENSG00000155307.19 SAMSN1 3520.51278 2.536829 0.2835327 72.97227
## ENSG00000198363.18 ASPH 2613.51210 2.670401 0.3125150 65.90632
## ENSG00000273812.3 WI2-87327B8.2 106.74736 3.062129 0.3610669 64.13354
## ENSG00000169902.15 TPST1 550.52177 3.259951 0.3818049 62.95361
## ENSG00000172985.11 SH3RF3 375.84195 2.657477 0.3221656 61.70452
## ENSG00000101187.16 SLCO4A1 275.69907 3.598141 0.4241167 60.66574
## ENSG00000250608.2 RP11-933H2.4 212.69083 1.568772 0.1988214 60.56301
## pvalue padj
## ENSG00000137801.11 THBS1 7.651834e-20 1.423219e-15
## ENSG00000064270.13 ATP2C2 1.280679e-19 1.423219e-15
## ENSG00000265527.1 MIR5690 4.559246e-19 3.377793e-15
## ENSG00000155307.19 SAMSN1 1.314846e-17 7.305941e-14
## ENSG00000198363.18 ASPH 4.728735e-16 2.102017e-12
## ENSG00000273812.3 WI2-87327B8.2 1.162655e-15 4.306863e-12
## ENSG00000169902.15 TPST1 2.116324e-15 6.719630e-12
## ENSG00000172985.11 SH3RF3 3.990636e-15 1.108698e-11
## ENSG00000101187.16 SLCO4A1 6.763709e-15 1.583838e-11
## ENSG00000250608.2 RP11-933H2.4 7.126060e-15 1.583838e-11
mean(abs(dge$stat))
## [1] 3.254617
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000079215.15 SLC1A3 1113.8142 4.951827 0.18709941 521.9372
## ENSG00000179593.16 ALOX15B 870.0898 5.245785 0.20644794 473.6905
## ENSG00000164105.4 SAP30 1512.9030 2.736357 0.12704480 408.9775
## ENSG00000119138.5 KLF9 2326.1339 1.405745 0.07995513 297.8874
## ENSG00000140044.13 JDP2 2561.3681 2.077276 0.12138571 268.4997
## ENSG00000096060.15 FKBP5 18192.7606 3.242083 0.19611138 220.8318
## ENSG00000121933.19 TMIGD3 198.9827 4.037352 0.24706584 218.8149
## ENSG00000279359.1 RP11-36D19.9 164.2215 5.321338 0.33736811 216.5070
## ENSG00000134463.15 ECHDC3 705.0188 3.381691 0.20792654 215.7673
## ENSG00000164056.11 SPRY1 176.8216 2.775841 0.18288624 208.4013
## pvalue padj
## ENSG00000079215.15 SLC1A3 1.603543e-115 3.564195e-111
## ENSG00000179593.16 ALOX15B 5.042984e-105 5.604520e-101
## ENSG00000164105.4 SAP30 6.119167e-91 4.533691e-87
## ENSG00000119138.5 KLF9 9.506737e-67 5.282656e-63
## ENSG00000140044.13 JDP2 2.409385e-60 1.071068e-56
## ENSG00000096060.15 FKBP5 5.956032e-50 2.206412e-46
## ENSG00000121933.19 TMIGD3 1.640155e-49 5.207960e-46
## ENSG00000279359.1 RP11-36D19.9 5.227812e-49 1.452482e-45
## ENSG00000134463.15 ECHDC3 7.580092e-49 1.872030e-45
## ENSG00000164056.11 SPRY1 3.066653e-47 6.816250e-44
mean(abs(dge$stat))
## [1] 5.141268
tc_hi_a_t0veos <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 5 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000169174.11 PCSK9 89.34576 7.212028 0.9670302 608.1842
## ENSG00000179593.16 ALOX15B 870.08982 4.929816 0.2099199 392.1615
## ENSG00000079215.15 SLC1A3 1113.81425 4.721836 0.2077981 362.4938
## ENSG00000079393.21 DUSP13 144.49220 5.927693 0.8491969 346.8927
## ENSG00000118520.15 ARG1 2407.98739 4.886759 0.2217240 333.8596
## ENSG00000279174.1 RP11-1099M24.9 24.00901 5.203995 0.8116484 308.4292
## ENSG00000278621.1 THBS1-AS1 85.37794 5.367301 0.9265756 267.1314
## ENSG00000276107.1 THBS1-IT1 79.67447 5.480677 0.9943344 262.0736
## ENSG00000258476.6 LINC02207 639.25327 3.405272 0.1923522 250.8748
## ENSG00000102010.15 BMX 435.13935 4.687378 0.2741784 230.1977
## pvalue padj
## ENSG00000169174.11 PCSK9 2.777726e-134 6.054332e-130
## ENSG00000179593.16 ALOX15B 2.801057e-87 3.052592e-83
## ENSG00000079215.15 SLC1A3 8.064402e-81 5.859057e-77
## ENSG00000079393.21 DUSP13 2.012841e-77 1.096797e-73
## ENSG00000118520.15 ARG1 1.387337e-74 6.047682e-71
## ENSG00000279174.1 RP11-1099M24.9 4.801869e-69 1.744359e-65
## ENSG00000278621.1 THBS1-AS1 4.787679e-60 1.490746e-56
## ENSG00000276107.1 THBS1-IT1 6.060797e-59 1.651264e-55
## ENSG00000258476.6 LINC02207 1.673893e-56 4.053797e-53
## ENSG00000102010.15 BMX 5.398020e-52 1.176552e-48
mean(abs(dge$stat))
## [1] 4.122525
tc_hi_a_t0veos_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & treatment_group==1 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 23
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21654 23
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 197 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000072310.18 SREBF1 2084.89569 1.374119 0.1490116 83.15173
## ENSG00000132514.14 CLEC10A 562.93900 2.849057 0.3058331 77.72400
## ENSG00000100453.13 GZMB 2459.77305 -2.167593 0.2566876 64.29149
## ENSG00000141744.4 PNMT 71.32042 -3.560621 0.4105019 63.17609
## ENSG00000039523.20 RIPOR1 2980.66339 1.027136 0.1317769 60.13118
## ENSG00000173372.17 C1QA 243.72605 2.839128 0.3490706 59.42622
## ENSG00000079215.15 SLC1A3 1141.83012 -2.763823 0.3345625 58.58345
## ENSG00000164056.11 SPRY1 167.30887 -2.551936 0.3141082 58.50142
## ENSG00000109906.15 ZBTB16 4969.85246 -3.160372 0.3775330 57.93555
## ENSG00000134780.10 DAGLA 188.22694 2.275761 0.2941307 56.08840
## pvalue padj
## ENSG00000072310.18 SREBF1 7.599082e-20 1.645353e-15
## ENSG00000132514.14 CLEC10A 1.184925e-18 1.282800e-14
## ENSG00000100453.13 GZMB 1.073080e-15 7.744779e-12
## ENSG00000141744.4 PNMT 1.890288e-15 1.023213e-11
## ENSG00000039523.20 RIPOR1 8.874140e-15 3.842858e-11
## ENSG00000173372.17 C1QA 1.269660e-14 4.581779e-11
## ENSG00000079215.15 SLC1A3 1.948487e-14 5.498098e-11
## ENSG00000164056.11 SPRY1 2.031442e-14 5.498098e-11
## ENSG00000109906.15 ZBTB16 2.708476e-14 6.515991e-11
## ENSG00000134780.10 DAGLA 6.928475e-14 1.500153e-10
mean(abs(dge$stat))
## [1] 2.710607
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000164056.11 SPRY1 167.3089 -2.617604 0.1771423 204.44134
## ENSG00000172232.10 AZU1 256.6018 -1.932919 0.1889567 97.26575
## ENSG00000079215.15 SLC1A3 1141.8301 -2.760312 0.2632785 90.30577
## ENSG00000185338.7 SOCS1 596.4511 -1.871254 0.1940857 84.44088
## ENSG00000072310.18 SREBF1 2084.8957 1.412415 0.1507135 83.00525
## ENSG00000183542.5 KLRC4 354.2118 -1.386445 0.1535955 77.83535
## ENSG00000132514.14 CLEC10A 562.9390 3.079505 0.3089449 76.49791
## ENSG00000134539.17 KLRD1 4604.7957 -1.629675 0.1808981 74.64582
## ENSG00000146232.17 NFKBIE 631.9206 1.443095 0.1623899 74.42134
## ENSG00000152766.6 ANKRD22 458.2366 -2.338834 0.2536226 72.48926
## pvalue padj
## ENSG00000164056.11 SPRY1 2.242253e-46 4.855376e-42
## ENSG00000172232.10 AZU1 6.061975e-23 6.563300e-19
## ENSG00000079215.15 SLC1A3 2.040563e-21 1.472878e-17
## ENSG00000185338.7 SOCS1 3.958768e-20 2.143079e-16
## ENSG00000072310.18 SREBF1 8.183585e-20 3.544147e-16
## ENSG00000183542.5 KLRC4 1.119980e-18 4.042008e-15
## ENSG00000132514.14 CLEC10A 2.204458e-18 6.819333e-15
## ENSG00000134539.17 KLRD1 5.632088e-18 1.518263e-14
## ENSG00000146232.17 NFKBIE 6.310319e-18 1.518263e-14
## ENSG00000152766.6 ANKRD22 1.679436e-17 3.636650e-14
mean(abs(dge$stat))
## [1] 3.741003
tc_hi_a_eosvpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000235655.3 H3P6 666.4614 4.746055 2.4665593 167.16278
## ENSG00000129538.14 RNASE1 143.7566 4.466465 0.3558303 141.44739
## ENSG00000173372.17 C1QA 243.7261 4.363465 0.3414017 137.48828
## ENSG00000162745.11 OLFML2B 164.2976 3.547194 0.2993985 130.27448
## ENSG00000164047.6 CAMP 508.5888 -3.214854 0.2771307 120.97930
## ENSG00000179921.15 GPBAR1 699.7191 2.243816 0.2141482 104.50115
## ENSG00000010327.10 STAB1 28597.5524 3.172264 0.2939730 101.16186
## ENSG00000092964.18 DPYSL2 1885.5321 2.307491 0.2252137 98.96122
## ENSG00000121966.7 CXCR4 9874.0219 -2.090096 0.2107991 92.51866
## ENSG00000107798.18 LIPA 2304.6076 2.487076 0.2490610 92.15894
## pvalue padj
## ENSG00000235655.3 H3P6 3.082290e-38 6.156567e-34
## ENSG00000129538.14 RNASE1 1.284441e-32 1.282771e-28
## ENSG00000173372.17 C1QA 9.429832e-32 6.278382e-28
## ENSG00000162745.11 OLFML2B 3.568567e-30 1.781964e-26
## ENSG00000164047.6 CAMP 3.861409e-28 1.542556e-24
## ENSG00000179921.15 GPBAR1 1.571024e-24 5.229938e-21
## ENSG00000010327.10 STAB1 8.476605e-24 2.418739e-20
## ENSG00000092964.18 DPYSL2 2.574955e-23 6.429019e-20
## ENSG00000121966.7 CXCR4 6.669304e-22 1.480141e-18
## ENSG00000107798.18 LIPA 7.998711e-22 1.597663e-18
mean(abs(dge$stat))
## [1] 4.128341
tc_hi_a_eosvpod1_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & treatment_group==1 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 24
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21860 24
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 202 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000132170.24 PPARG 140.52036 3.532818 0.3524650 88.66234
## ENSG00000137869.15 CYP19A1 64.76442 5.883919 0.5696610 85.85100
## ENSG00000108950.12 FAM20A 1340.46068 3.336057 0.3491410 80.34614
## ENSG00000167680.17 SEMA6B 402.11326 3.176831 0.3464517 75.20631
## ENSG00000129538.14 RNASE1 145.60592 3.604216 0.3952207 72.68911
## ENSG00000163221.9 S100A12 10264.16050 2.856398 0.3354250 66.09817
## ENSG00000183019.7 MCEMP1 3932.57736 2.760099 0.3325777 63.22748
## ENSG00000136160.17 EDNRB 50.50611 3.689244 0.4606377 56.98972
## ENSG00000139572.4 GPR84 178.40373 2.448494 0.3182039 55.85630
## ENSG00000143546.10 S100A8 54619.02839 2.359509 0.3129705 53.57704
## pvalue padj
## ENSG00000132170.24 PPARG 4.682973e-21 1.023651e-16
## ENSG00000137869.15 CYP19A1 1.940174e-20 2.120513e-16
## ENSG00000108950.12 FAM20A 3.142454e-19 2.289697e-15
## ENSG00000167680.17 SEMA6B 4.240090e-18 2.317103e-14
## ENSG00000129538.14 RNASE1 1.517696e-17 6.635063e-14
## ENSG00000163221.9 S100A12 4.290137e-16 1.562968e-12
## ENSG00000183019.7 MCEMP1 1.841610e-15 5.750822e-12
## ENSG00000136160.17 EDNRB 4.380966e-14 1.197044e-10
## ENSG00000139572.4 GPR84 7.796654e-14 1.893634e-10
## ENSG00000143546.10 S100A8 2.486479e-13 5.372700e-10
mean(abs(dge$stat))
## [1] 3.270127
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 1340.4607 3.366682 0.2306195 168.28922
## ENSG00000167680.17 SEMA6B 402.1133 3.146322 0.2374286 142.95644
## ENSG00000060558.4 GNA15 1341.5683 1.264053 0.1097692 127.73054
## ENSG00000161944.16 ASGR2 3254.7476 1.746071 0.1528648 120.24374
## ENSG00000170439.7 METTL7B 116.3331 3.983329 0.3421774 107.34490
## ENSG00000198848.13 CES1 2272.4987 2.167707 0.1974807 105.68611
## ENSG00000150337.14 FCGR1A 1469.0407 1.901307 0.1768162 104.46977
## ENSG00000149534.9 MS4A2 182.2406 -3.426363 0.3062217 100.02166
## ENSG00000104918.8 RETN 1306.2868 2.757746 0.2505300 98.72815
## ENSG00000136830.12 NIBAN2 3298.4497 1.407715 0.1388694 97.07961
## pvalue padj
## ENSG00000108950.12 FAM20A 1.749159e-38 3.823661e-34
## ENSG00000167680.17 SEMA6B 6.008352e-33 6.567129e-29
## ENSG00000060558.4 GNA15 1.285650e-29 9.368100e-26
## ENSG00000161944.16 ASGR2 5.594637e-28 3.057469e-24
## ENSG00000170439.7 METTL7B 3.740640e-25 1.635408e-21
## ENSG00000198848.13 CES1 8.639085e-25 3.147507e-21
## ENSG00000150337.14 FCGR1A 1.596104e-24 4.984405e-21
## ENSG00000149534.9 MS4A2 1.507397e-23 4.118963e-20
## ENSG00000104918.8 RETN 2.896550e-23 7.035398e-20
## ENSG00000136830.12 NIBAN2 6.659500e-23 1.455767e-19
mean(abs(dge$stat))
## [1] 4.576891
tc_hi_a_t0vpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 6 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000139675.13 HNRNPA1L2 87.17300 2.246248 2.7937605 121.66326
## ENSG00000273047.1 RP4-583P15.14 68.10403 6.845862 3.3456051 79.02762
## ENSG00000079215.15 SLC1A3 243.72289 -3.931421 0.6576895 30.34254
## ENSG00000152766.6 ANKRD22 148.74159 -3.029033 0.5699001 27.08705
## ENSG00000123836.15 PFKFB2 1008.07343 -3.231340 0.6102561 26.01151
## ENSG00000112299.8 VNN1 1054.88471 -2.752552 0.5316643 25.54896
## ENSG00000096006.12 CRISP3 128.60621 -3.894003 0.7800306 23.30689
## ENSG00000225313.5 RP11-415J8.3 244.61892 -2.704253 0.5775340 21.27013
## ENSG00000166523.8 CLEC4E 1859.24532 -2.831758 0.6123570 19.79771
## ENSG00000132514.14 CLEC10A 867.89144 2.970602 0.6374807 19.77513
## pvalue padj
## ENSG00000139675.13 HNRNPA1L2 2.735395e-28 5.283963e-24
## ENSG00000273047.1 RP4-583P15.14 6.124756e-19 5.915595e-15
## ENSG00000079215.15 SLC1A3 3.620961e-08 2.331537e-04
## ENSG00000152766.6 ANKRD22 1.944961e-07 9.392701e-04
## ENSG00000123836.15 PFKFB2 3.393876e-07 1.311190e-03
## ENSG00000112299.8 VNN1 4.312986e-07 1.388566e-03
## ENSG00000096006.12 CRISP3 1.381042e-06 3.811083e-03
## ENSG00000225313.5 RP11-415J8.3 3.988988e-06 9.631909e-03
## ENSG00000166523.8 CLEC4E 8.608559e-06 1.682684e-02
## ENSG00000132514.14 CLEC10A 8.710897e-06 1.682684e-02
mean(abs(dge$stat))
## [1] 0.6226131
tc_hi_a_t0vpod1_adj <- dge
treatment_group==2
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & treatment_group==2 & timepoint != "POD1")
ss3$case <- grepl("EOS",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 62
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21828 62
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 206 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108984.15 MAP2K6 1061.6632 1.0618905 0.1237009 73.22654
## ENSG00000197632.9 SERPINB2 236.4370 1.8522465 0.2315241 61.70258
## ENSG00000163221.9 S100A12 6460.5551 2.0941184 0.2625110 60.42488
## ENSG00000096060.15 FKBP5 8486.6198 1.6524287 0.2203608 54.66055
## ENSG00000173744.18 AGFG1 3803.7375 1.3149106 0.1766162 54.63706
## ENSG00000137962.13 ARHGAP29 171.0255 1.0350793 0.1397988 54.54718
## ENSG00000160223.17 ICOSLG 535.8792 -0.7789236 0.1045706 54.29724
## ENSG00000121316.11 PLBD1 10701.9018 1.5491562 0.2089442 53.68398
## ENSG00000172985.11 SH3RF3 275.9671 1.6227396 0.2198581 53.08054
## ENSG00000155307.19 SAMSN1 1816.3282 1.5258123 0.2085799 52.31831
## pvalue padj
## ENSG00000108984.15 MAP2K6 1.155909e-17 2.523119e-13
## ENSG00000197632.9 SERPINB2 3.994572e-15 4.359676e-11
## ENSG00000163221.9 S100A12 7.644064e-15 5.561821e-11
## ENSG00000096060.15 FKBP5 1.432536e-13 5.374181e-10
## ENSG00000173744.18 AGFG1 1.449761e-13 5.374181e-10
## ENSG00000137962.13 ARHGAP29 1.517604e-13 5.374181e-10
## ENSG00000160223.17 ICOSLG 1.723441e-13 5.374181e-10
## ENSG00000121316.11 PLBD1 2.354758e-13 6.424957e-10
## ENSG00000172985.11 SH3RF3 3.201484e-13 7.764666e-10
## ENSG00000155307.19 SAMSN1 4.719543e-13 1.030182e-09
mean(abs(dge$stat))
## [1] 3.847355
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000160223.17 ICOSLG 535.8792 -0.7467822 0.06673933 123.10529
## ENSG00000175130.7 MARCKSL1 967.7293 -0.7435331 0.07273058 102.20699
## ENSG00000197122.12 SRC 1158.8170 -0.8509687 0.08390001 99.68092
## ENSG00000012124.17 CD22 2387.7642 -0.8408112 0.08328323 98.72146
## ENSG00000281162.2 LINC01127 620.5166 1.3982997 0.13632090 95.97391
## ENSG00000072310.18 SREBF1 2015.9050 -0.5726827 0.05889950 93.21485
## ENSG00000183779.7 ZNF703 907.1750 -1.2034120 0.12064105 93.00314
## ENSG00000141682.12 PMAIP1 380.1075 -0.8925346 0.09144513 92.27686
## ENSG00000159958.7 TNFRSF13C 956.9752 -0.5825913 0.06055005 91.45275
## ENSG00000123685.9 BATF3 136.9244 -0.9271236 0.09649069 90.04453
## pvalue padj
## ENSG00000160223.17 ICOSLG 1.322420e-28 2.886578e-24
## ENSG00000175130.7 MARCKSL1 5.001317e-24 5.458438e-20
## ENSG00000197122.12 SRC 1.790381e-23 1.302681e-19
## ENSG00000012124.17 CD22 2.906349e-23 1.585995e-19
## ENSG00000281162.2 LINC01127 1.164075e-22 5.081885e-19
## ENSG00000072310.18 SREBF1 4.691478e-22 1.628100e-18
## ENSG00000183779.7 ZNF703 5.221137e-22 1.628100e-18
## ENSG00000141682.12 PMAIP1 7.536034e-22 2.056207e-18
## ENSG00000159958.7 TNFRSF13C 1.142891e-21 2.771892e-18
## ENSG00000123685.9 BATF3 2.328599e-21 5.082865e-18
mean(abs(dge$stat))
## [1] 5.859661
tc_hi_b_t0veos <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000235655.3 H3P6 485.6623 -1.5372041 1.08740980 386.41247
## ENSG00000141682.12 PMAIP1 380.1075 -0.9773438 0.11827482 66.33028
## ENSG00000160223.17 ICOSLG 535.8792 -0.7106591 0.09165973 59.06123
## ENSG00000072310.18 SREBF1 2015.9050 -0.5405884 0.07334376 53.71543
## ENSG00000175130.7 MARCKSL1 967.7293 -0.7104446 0.09662604 53.07312
## ENSG00000169122.11 FAM110B 126.2614 1.3481101 0.17989510 52.57778
## ENSG00000197122.12 SRC 1158.8170 -0.8436746 0.11482131 52.23331
## ENSG00000242472.1 IGHJ5 134.7030 -0.7059525 0.09835176 51.17250
## ENSG00000281162.2 LINC01127 620.5166 1.4041647 0.18898671 51.03911
## ENSG00000159958.7 TNFRSF13C 956.9752 -0.5526402 0.07709455 50.82674
## pvalue padj
## ENSG00000235655.3 H3P6 4.999158e-86 1.091216e-81
## ENSG00000141682.12 PMAIP1 3.813556e-16 4.162115e-12
## ENSG00000160223.17 ICOSLG 1.528415e-14 1.112074e-10
## ENSG00000072310.18 SREBF1 2.317362e-13 1.264585e-09
## ENSG00000175130.7 MARCKSL1 3.213598e-13 1.402929e-09
## ENSG00000169122.11 FAM110B 4.135421e-13 1.504466e-09
## ENSG00000197122.12 SRC 4.928311e-13 1.536788e-09
## ENSG00000242472.1 IGHJ5 8.459552e-13 2.195993e-09
## ENSG00000281162.2 LINC01127 9.054396e-13 2.195993e-09
## ENSG00000159958.7 TNFRSF13C 1.008897e-12 2.202220e-09
mean(abs(dge$stat))
## [1] 3.170145
tc_hi_b_t0veos_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & treatment_group==2 & timepoint != "T0")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 59
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21618 59
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 140 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000019169.11 MARCO 1146.0967 1.875059 0.1759798 105.73472
## ENSG00000108950.12 FAM20A 1568.7848 2.571190 0.2365273 104.40275
## ENSG00000152270.9 PDE3B 2421.7636 -1.050534 0.1056526 97.18047
## ENSG00000159189.12 C1QC 137.9313 3.169276 0.3023666 93.17740
## ENSG00000171812.13 COL8A2 319.6901 1.639214 0.1672085 91.08459
## ENSG00000099377.14 HSD3B7 172.1246 1.379791 0.1447231 87.60091
## ENSG00000182580.3 EPHB3 106.3407 1.795891 0.1933337 81.54711
## ENSG00000010327.10 STAB1 28836.3295 1.534081 0.1656748 81.52376
## ENSG00000007968.7 E2F2 889.5286 1.487165 0.1627249 79.69012
## ENSG00000145287.11 PLAC8 4921.3813 1.407633 0.1544193 79.58844
## pvalue padj
## ENSG00000019169.11 MARCO 8.429723e-25 1.784575e-20
## ENSG00000108950.12 FAM20A 1.651009e-24 1.784575e-20
## ENSG00000152270.9 PDE3B 6.328795e-23 4.560530e-19
## ENSG00000159189.12 C1QC 4.781096e-22 2.583943e-18
## ENSG00000171812.13 COL8A2 1.376597e-21 5.951856e-18
## ENSG00000099377.14 HSD3B7 8.008774e-21 2.885561e-17
## ENSG00000182580.3 EPHB3 1.711329e-19 4.679420e-16
## ENSG00000010327.10 STAB1 1.731675e-19 4.679420e-16
## ENSG00000007968.7 E2F2 4.379855e-19 9.968419e-16
## ENSG00000145287.11 PLAC8 4.611166e-19 9.968419e-16
mean(abs(dge$stat))
## [1] 4.299517
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 1568.7848 2.7614099 0.16317570 235.5619
## ENSG00000165092.13 ALDH1A1 446.1609 -2.2654686 0.13985338 231.1344
## ENSG00000007968.7 E2F2 889.5286 1.4514824 0.10031487 196.5105
## ENSG00000135424.18 ITGA7 449.9249 1.6280069 0.11393975 189.1996
## ENSG00000019169.11 MARCO 1146.0967 1.8405815 0.13607686 164.1851
## ENSG00000111424.12 VDR 653.8612 0.8782710 0.07143206 147.5929
## ENSG00000137959.17 IFI44L 1364.0428 -1.5524693 0.12310558 146.0327
## ENSG00000014257.16 ACP3 906.4581 0.9777736 0.07970893 145.7403
## ENSG00000161944.16 ASGR2 2879.4472 1.2950035 0.10523282 142.3192
## ENSG00000099377.14 HSD3B7 172.1246 1.3976301 0.11829039 131.7320
## pvalue padj
## ENSG00000108950.12 FAM20A 3.651248e-53 7.893267e-49
## ENSG00000165092.13 ALDH1A1 3.372458e-52 3.645290e-48
## ENSG00000007968.7 E2F2 1.206031e-44 8.690661e-41
## ENSG00000135424.18 ITGA7 4.753743e-43 2.569160e-39
## ENSG00000019169.11 MARCO 1.378210e-37 5.958828e-34
## ENSG00000111424.12 VDR 5.822705e-34 2.097921e-30
## ENSG00000137959.17 IFI44L 1.276997e-33 3.943731e-30
## ENSG00000014257.16 ACP3 1.479515e-33 3.998018e-30
## ENSG00000161944.16 ASGR2 8.280973e-33 1.989090e-29
## ENSG00000099377.14 HSD3B7 1.712438e-30 3.701948e-27
mean(abs(dge$stat))
## [1] 6.712274
tc_hi_b_eosvpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$fullBetaConv. Use larger maxit argument with nbinomLRT
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000235655.3 H3P6 608.09437 3.904034 0.98480695 488.6684
## ENSG00000165092.13 ALDH1A1 446.16087 -2.362199 0.15699364 194.3134
## ENSG00000108950.12 FAM20A 1568.78481 2.648635 0.17706493 185.9421
## ENSG00000135424.18 ITGA7 449.92494 1.462964 0.10863910 170.9103
## ENSG00000007968.7 E2F2 889.52858 1.389067 0.11141029 146.0457
## ENSG00000137959.17 IFI44L 1364.04275 -1.542075 0.12864932 131.4918
## ENSG00000019169.11 MARCO 1146.09674 1.789369 0.15470308 120.1663
## ENSG00000137869.15 CYP19A1 85.05678 2.864858 0.24740781 117.7876
## ENSG00000170439.7 METTL7B 212.43471 2.479187 0.21183023 116.6478
## ENSG00000174837.15 ADGRE1 1777.49986 1.059254 0.09711126 113.4690
## pvalue padj
## ENSG00000235655.3 H3P6 2.776805e-108 6.002896e-104
## ENSG00000165092.13 ALDH1A1 3.638032e-44 3.932349e-40
## ENSG00000108950.12 FAM20A 2.444196e-42 1.761288e-38
## ENSG00000135424.18 ITGA7 4.681249e-39 2.529981e-35
## ENSG00000007968.7 E2F2 1.268673e-33 5.485234e-30
## ENSG00000137959.17 IFI44L 1.932714e-30 6.963570e-27
## ENSG00000019169.11 MARCO 5.817281e-28 1.796542e-24
## ENSG00000137869.15 CYP19A1 1.929860e-27 5.214965e-24
## ENSG00000170439.7 METTL7B 3.428459e-27 8.235159e-24
## ENSG00000174837.15 ADGRE1 1.703266e-26 3.682120e-23
mean(abs(dge$stat))
## [1] 4.977048
tc_hi_b_eosvpod1_adj <- dge
ss2 <- as.data.frame(cbind(ss1,sscell))
ss2 <- ss2[order(rownames(ss2)),]
ss2 <- ss2[table(ss2$PG_number)==3,] # must include all 3 timepoints
mx <- xx[,colnames(xx) %in% rownames(ss2)]
mx <- mx[,order(colnames(mx)),]
rownames(ss2) == colnames(mx)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE
ss2$timepoint <- factor(ss2$timepoint, ordered = TRUE,levels=c("T0","EOS","POD1"))
ss2$PG_number <- factor(ss2$PG_number, ordered = FALSE)
str(ss2)
## 'data.frame': 246 obs. of 49 variables:
## $ PG_number : Factor w/ 102 levels "3176","3178",..: 1 2 3 3 4 5 5 6 6 7 ...
## $ sexD : num 1 1 2 2 2 1 1 1 1 1 ...
## $ ageD : int 84 54 70 70 62 58 58 61 61 68 ...
## $ ageCS : num [1:246, 1] 1.9878 -0.6079 0.7765 0.7765 0.0843 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : NULL
## $ weightD : num 60 63.6 70.1 70.1 78.7 ...
## $ asaD : int 3 2 2 2 2 1 1 1 1 2 ...
## $ heightD : num 133 155 170 170 175 158 158 149 149 155 ...
## $ ethnicityCAT : chr "Asian" "Asian" "Asian" "Asian" ...
## $ ethnicityD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ current_smokerD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diabetes_typeD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ daily_insulinD : num 1 1 1 1 1 1 1 1 1 1 ...
## $ creatinine_preopD : int 54 47 109 109 98 50 50 49 49 61 ...
## $ surgery_dominantD : num 1 2 6 6 4 2 2 2 2 4 ...
## $ wound_typeOP : num 2 2 2 2 2 2 2 2 2 2 ...
## $ HbA1c : num 5.7 4.9 5.2 5.2 5.4 ...
## $ bmi : num 33.9 26.5 24.3 24.3 25.7 ...
## $ revised_whodas_preop: int 24 14 12 12 12 12 12 18 18 12 ...
## $ neut_lymph_ratio_d0 : num 1.31 6 1.83 1.83 6.88 ...
## $ neut_lymph_ratio_d1 : num 14 15.62 6.27 6.27 16.57 ...
## $ neut_lymph_ratio_d2 : num 14 9.5 7.67 7.67 12.17 ...
## $ ab_noninfection : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risk : int 4 1 2 2 1 1 1 1 1 1 ...
## $ risk_cat : num 3 1 2 2 1 1 1 1 1 1 ...
## $ bmi_cat : num 4 3 2 2 3 3 3 1 1 2 ...
## $ wound_type_cat : num 2 2 2 2 2 2 2 2 2 2 ...
## $ duration_sx : num 3.067 1.333 5.167 5.167 0.683 ...
## $ anyDex : num 2 2 2 2 2 2 2 2 2 2 ...
## $ treatment_group : int 2 1 2 2 1 1 1 2 2 2 ...
## $ deltacrp : num 277.9 32.7 202.9 202.9 24.8 ...
## $ crp_group : int 4 1 4 4 1 4 4 1 1 1 ...
## $ timepoint : Ord.factor w/ 3 levels "T0"<"EOS"<"POD1": 1 3 3 1 1 3 1 3 1 3 ...
## $ Monocytes.C : num 48.2 20.1 48.7 36.4 15.2 ...
## $ NK : num 0.421 2.007 3.586 2.176 8.347 ...
## $ T.CD8.Memory : num 2.59 10.74 1.81 5.96 14.67 ...
## $ T.CD4.Naive : num 1.57 9.23 2.42 4.61 16.31 ...
## $ T.CD8.Naive : num 11.65 11.69 13.91 12.5 6.73 ...
## $ B.Naive : num 2.158 5.499 0.849 5.065 2.638 ...
## $ T.CD4.Memory : num 15.8 12.9 14.4 16.2 10.1 ...
## $ MAIT : num 0.398 1.474 2.769 1.372 0.525 ...
## $ T.gd.Vd2 : num 1.93 2.05 1.86 2.17 1.85 ...
## $ Neutrophils.LD : num 2.808 3.663 5.722 0.739 14.631 ...
## $ T.gd.non.Vd2 : num 0.473 0.304 0.338 0.519 0.337 ...
## $ Basophils.LD : num 0.74 1.188 0.779 0.343 1.778 ...
## $ Monocytes.NC.I : num 9.98 13.41 2.07 10.35 4.13 ...
## $ B.Memory : num 0.561 4.538 0.205 0.549 1.921 ...
## $ mDCs : num 0.529 0.766 0.45 0.819 0.634 ...
## $ pDCs : num 0.0712 0.3356 0.0858 0.0405 0.075 ...
## $ Plasmablasts : num 0.1371 0.0945 0.1127 0.1229 0.1151 ...
ss3 <- subset(ss2,crp_group==4 & treatment_group==2 & timepoint != "EOS")
ss3$case <- grepl("POD1",ss3$timepoint)
mx <- mx[,colnames(mx) %in% rownames(ss3)]
dim(mx)
## [1] 60649 63
mx <- mx[which(rowMeans(mx)>=10),]
dim(mx)
## [1] 21635 63
# base model
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ case )
## converting counts to integer mode
res <- DESeq(dds,test="LRT",reduced=~1)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 251 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 1391.30563 3.715293 0.2045583 275.0285
## ENSG00000132170.24 PPARG 133.12355 3.310490 0.2179034 202.6237
## ENSG00000137869.15 CYP19A1 74.17400 6.571239 0.4247489 179.5044
## ENSG00000170439.7 METTL7B 179.48492 4.893747 0.3280872 170.4416
## ENSG00000138413.14 IDH1 1551.87780 1.177437 0.1009528 134.1505
## ENSG00000163221.9 S100A12 16730.91613 3.703320 0.2946629 131.4332
## ENSG00000109472.14 CPE 11.52067 3.245869 0.2853002 130.2922
## ENSG00000168615.13 ADAM9 1564.93119 1.673417 0.1441738 130.0716
## ENSG00000121316.11 PLBD1 14427.36193 2.145816 0.1827881 129.4533
## ENSG00000135424.18 ITGA7 409.50153 2.180367 0.1895864 124.2631
## pvalue padj
## ENSG00000108950.12 FAM20A 9.099378e-62 1.968650e-57
## ENSG00000132170.24 PPARG 5.588559e-46 6.045424e-42
## ENSG00000137869.15 CYP19A1 6.217852e-41 4.484107e-37
## ENSG00000170439.7 METTL7B 5.925253e-39 3.204821e-35
## ENSG00000138413.14 IDH1 5.064771e-31 2.191526e-27
## ENSG00000163221.9 S100A12 1.990593e-30 7.177748e-27
## ENSG00000109472.14 CPE 3.536855e-30 1.068957e-26
## ENSG00000168615.13 ADAM9 3.952697e-30 1.068957e-26
## ENSG00000121316.11 PLBD1 5.397062e-30 1.297394e-26
## ENSG00000135424.18 ITGA7 7.378083e-29 1.596248e-25
mean(abs(dge$stat))
## [1] 7.737621
# model with PG number as batch
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + case )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced=~PG_number)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000108950.12 FAM20A 1391.3056 3.886896 0.17556813 365.8679
## ENSG00000116574.6 RHOU 1103.0820 1.239058 0.07541680 259.1874
## ENSG00000132170.24 PPARG 133.1236 3.322844 0.18792148 257.4870
## ENSG00000121316.11 PLBD1 14427.3619 2.038051 0.12049015 254.8199
## ENSG00000170439.7 METTL7B 179.4849 5.247994 0.30024921 210.9196
## ENSG00000014257.16 ACP3 835.6431 1.195747 0.08068451 210.9061
## ENSG00000150337.14 FCGR1A 1684.8047 2.207690 0.14485575 200.9897
## ENSG00000168615.13 ADAM9 1564.9312 1.574381 0.10755937 198.4670
## ENSG00000198019.13 FCGR1B 824.1988 2.080885 0.13950435 195.8961
## ENSG00000271605.6 MILR1 880.7529 1.144319 0.08363852 179.7345
## pvalue padj
## ENSG00000108950.12 FAM20A 1.485539e-81 3.213963e-77
## ENSG00000116574.6 RHOU 2.580121e-58 2.791046e-54
## ENSG00000132170.24 PPARG 6.057466e-58 4.368442e-54
## ENSG00000121316.11 PLBD1 2.310405e-57 1.249640e-53
## ENSG00000170439.7 METTL7B 8.654199e-48 3.141813e-44
## ENSG00000014257.16 ACP3 8.713139e-48 3.141813e-44
## ENSG00000150337.14 FCGR1A 1.270175e-45 3.925747e-42
## ENSG00000168615.13 ADAM9 4.511964e-45 1.220204e-41
## ENSG00000198019.13 FCGR1B 1.642260e-44 3.947810e-41
## ENSG00000271605.6 MILR1 5.538548e-41 1.198265e-37
mean(abs(dge$stat))
## [1] 10.07069
tc_hi_b_t0vpod1 <- dge
# model with cell covariates
# Monocytes.C NK T.CD8.Memory T.CD4.Naive Neutrophils.LD
dds <- DESeqDataSetFromMatrix(countData = mx , colData = ss3,
design = ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD + case )
## converting counts to integer mode
## the design formula contains one or more numeric variables that have mean or
## standard deviation larger than 5 (an arbitrary threshold to trigger this message).
## Including numeric variables with large mean can induce collinearity with the intercept.
## Users should center and scale numeric variables in the design to improve GLM convergence.
## factor levels were dropped which had no samples
res <- DESeq(dds,test="LRT",reduced= ~ PG_number + Monocytes.C + NK + T.CD8.Memory + T.CD4.Naive + Neutrophils.LD)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge[order(dge$pvalue),1:6],10)
## baseMean log2FoldChange lfcSE stat
## ENSG00000240247.8 DEFA1B 6706.43307 -3.1402729 0.4696726 1473.7910
## ENSG00000137869.15 CYP19A1 74.17400 6.7234655 0.5637628 904.5017
## ENSG00000281887.3 GIMAP1-GIMAP5 629.98075 -2.4194832 0.6296709 839.7515
## ENSG00000213178.3 RPL22P1 430.37217 -1.5439699 0.5340927 831.2569
## ENSG00000288534.1 TMX2-CTNND1 168.91301 -1.2105872 0.4249622 584.1599
## ENSG00000167434.10 CA4 755.09367 1.6486680 0.5537508 572.9911
## ENSG00000279716.1 AC006128.2 159.11752 -0.9733946 0.4923367 445.3139
## ENSG00000108950.12 FAM20A 1391.30563 3.9369456 0.1853313 316.4386
## ENSG00000132170.24 PPARG 133.12355 3.3464063 0.1936334 235.6933
## ENSG00000227097.5 RPS28P7 77.96304 -5.0451526 1.0456026 214.3986
## pvalue padj
## ENSG00000240247.8 DEFA1B 1.926856e-322 4.168753e-318
## ENSG00000137869.15 CYP19A1 1.030869e-198 1.115142e-194
## ENSG00000281887.3 GIMAP1-GIMAP5 1.229225e-184 8.864760e-181
## ENSG00000213178.3 RPL22P1 8.637833e-183 4.671988e-179
## ENSG00000288534.1 TMX2-CTNND1 4.669107e-129 2.020323e-125
## ENSG00000167434.10 CA4 1.255065e-126 4.525557e-123
## ENSG00000279716.1 AC006128.2 7.550148e-99 2.333535e-95
## ENSG00000108950.12 FAM20A 8.643089e-71 2.337415e-67
## ENSG00000132170.24 PPARG 3.418169e-53 8.216898e-50
## ENSG00000227097.5 RPS28P7 1.507504e-48 3.261485e-45
mean(abs(dge$stat))
## [1] 8.504849
tc_hi_b_t0vpod1_adj <- dge
This one is getting long, so I will continue downstream analysis in a separate script.
For reproducibility
save.image("tca_pairwise.Rdata")
sessionInfo()
## R version 4.4.3 (2025-02-28)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] eulerr_7.0.2 MASS_7.3-65
## [3] mitch_1.19.3 DESeq2_1.44.0
## [5] SummarizedExperiment_1.34.0 Biobase_2.64.0
## [7] MatrixGenerics_1.16.0 matrixStats_1.5.0
## [9] GenomicRanges_1.56.2 GenomeInfoDb_1.40.1
## [11] IRanges_2.38.1 S4Vectors_0.42.1
## [13] BiocGenerics_0.50.0 dplyr_1.1.4
## [15] WGCNA_1.73 fastcluster_1.2.6
## [17] dynamicTreeCut_1.63-1 reshape2_1.4.4
## [19] gplots_3.2.0
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 bitops_1.0-9 gridExtra_2.3
## [4] echarts4r_0.4.5 rlang_1.1.5 magrittr_2.0.3
## [7] compiler_4.4.3 RSQLite_2.3.9 systemfonts_1.2.1
## [10] png_0.1-8 vctrs_0.6.5 stringr_1.5.1
## [13] pkgconfig_2.0.3 crayon_1.5.3 fastmap_1.2.0
## [16] backports_1.5.0 XVector_0.44.0 caTools_1.18.3
## [19] promises_1.3.2 rmarkdown_2.29 UCSC.utils_1.0.0
## [22] preprocessCore_1.66.0 purrr_1.0.4 bit_4.6.0
## [25] xfun_0.51 zlibbioc_1.50.0 cachem_1.1.0
## [28] jsonlite_1.9.1 blob_1.2.4 later_1.4.1
## [31] DelayedArray_0.30.1 BiocParallel_1.38.0 parallel_4.4.3
## [34] cluster_2.1.8.1 R6_2.6.1 RColorBrewer_1.1-3
## [37] bslib_0.9.0 stringi_1.8.4 GGally_2.2.1
## [40] rpart_4.1.24 jquerylib_0.1.4 Rcpp_1.0.14
## [43] iterators_1.0.14 knitr_1.50 base64enc_0.1-3
## [46] httpuv_1.6.15 Matrix_1.7-3 splines_4.4.3
## [49] nnet_7.3-20 tidyselect_1.2.1 rstudioapi_0.17.1
## [52] abind_1.4-8 yaml_2.3.10 doParallel_1.0.17
## [55] codetools_0.2-20 lattice_0.22-6 tibble_3.2.1
## [58] plyr_1.8.9 shiny_1.10.0 KEGGREST_1.44.1
## [61] evaluate_1.0.3 foreign_0.8-89 survival_3.8-3
## [64] ggstats_0.9.0 xml2_1.3.8 Biostrings_2.72.1
## [67] pillar_1.10.1 KernSmooth_2.23-26 checkmate_2.3.2
## [70] foreach_1.5.2 generics_0.1.3 ggplot2_3.5.1
## [73] munsell_0.5.1 scales_1.3.0 xtable_1.8-4
## [76] gtools_3.9.5 glue_1.8.0 Hmisc_5.2-3
## [79] tools_4.4.3 data.table_1.17.0 locfit_1.5-9.12
## [82] grid_4.4.3 impute_1.80.0 tidyr_1.3.1
## [85] AnnotationDbi_1.66.0 colorspace_2.1-1 GenomeInfoDbData_1.2.12
## [88] beeswarm_0.4.0 htmlTable_2.4.3 Formula_1.2-5
## [91] cli_3.6.4 kableExtra_1.4.0 viridisLite_0.4.2
## [94] S4Arrays_1.4.1 svglite_2.1.3 gtable_0.3.6
## [97] sass_0.4.9 digest_0.6.37 SparseArray_1.4.8
## [100] htmlwidgets_1.6.4 memoise_2.0.1.9000 htmltools_0.5.8.1
## [103] lifecycle_1.0.4 httr_1.4.7 mime_0.13
## [106] GO.db_3.19.1 bit64_4.6.0-1