Load data
Also putting the columns in order.
Code
pg1 <- read.table ("pg_ml.tsv" ,sep= " \t " ,header= TRUE ,row.names= 1 )
pg2 <- read.table ("pg_ml_part2.tsv" ,sep= " \t " ,header= TRUE ,row.names= 1 )
pg <- cbind (pg1,pg2)
pg[1 : 4 ,]
b.NGF CTACK Eotaxin FGF.basic G.CSF GM.CSF GRO.a HGF IFN.a2 IFN.g
2001 23.60 282.95 34.43 40.54 80.60 7.46 1345.60 253.58 NA NA
2002 NA 452.07 57.31 40.54 107.43 12.48 1270.00 253.58 NA NA
2003 22.52 203.75 45.29 40.54 107.43 3.33 966.54 225.76 NA 6.23
2004 18.16 588.87 44.78 59.24 96.93 NA 1036.02 239.77 NA 6.23
IL.1a IL.1b IL.1ra IL.2 IL.2Ra IL.3 IL.4 IL.5 IL.6 IL.7 IL.8 IL.9
2001 4.73 0.45 324.85 8.44 91.34 3.22 8.37 NA 1.56 NA 13.54 585.45
2002 25.09 2.81 610.80 7.15 65.42 4.99 NA 365.01 2.84 NA 18.41 517.99
2003 4.73 2.26 374.34 7.15 42.05 5.40 4.37 NA 5.08 NA 12.13 413.08
2004 4.73 1.69 183.04 9.70 63.66 2.22 7.69 NA NA NA 9.26 346.61
IL.10 IL.12.p70. IL.12.p40. IL.13 IL.15 IL.16 IL.17A IL.18 IP.10 LIF
2001 NA 0.38 123.44 NA 529.04 127.76 11.83 79.69 221.82 46.45
2002 3.27 4.49 123.44 NA NA 112.55 9.65 NA 224.25 70.52
2003 8.82 11.13 55.15 1.6 638.16 96.79 0.79 NA 125.81 58.79
2004 NA 2.60 325.39 NA 294.37 127.76 11.83 NA 215.31 92.69
M.CSF MCP.1.MCAF. MCP.3 MIF MIG MIP.1a MIP.1b PDGF.bb RANTES SCF
2001 23.64 89.85 2.25 1559.62 226.93 1.86 278.07 608.61 4560.55 49.56
2002 44.19 115.49 NA 1007.59 180.49 2.73 245.57 525.17 4049.74 61.52
2003 29.78 33.79 NA 1847.22 208.32 2.11 138.60 525.17 650.50 80.77
2004 35.77 82.46 NA 918.96 280.73 1.86 164.51 567.11 2030.22 72.26
SCGF.b SDF.1a TNF.a TNF.b TRAIL VEGF VEGF.D Ghrelin MMP.13
2001 133243.15 1426.25 67.00 710.67 13.93 327.54 159.4830 142.5643 70.97097
2002 92790.18 1648.88 40.22 696.51 389.79 425.02 189.3592 262.1537 44.53349
2003 94154.71 972.72 36.78 252.92 NA 511.57 272.1746 NA 777.39888
2004 126081.46 1578.06 36.78 388.74 NA 425.02 272.1746 2973.9200 658.18179
Collagen TGF.b
2001 33344.52 7588.821
2002 26524.18 5731.419
2003 31607.60 8595.072
2004 33834.11 5292.231
Code
pg <- t (pg)
pg <- pg[,order (colnames (pg))]
pg[1 : 6 ,1 : 8 ]
1001 1002 1003 1004 1005 1006 1007 1008
b.NGF 6.03 20.81 13.39 13.39 22.04 15.86 15.86 14.63
CTACK 317.46 459.95 328.23 345.01 547.81 378.08 704.72 338.93
Eotaxin 32.08 47.89 76.44 78.25 74.74 47.59 85.15 102.43
FGF.basic 120.72 71.84 82.73 71.84 59.13 59.13 59.13 16.68
G.CSF 167.73 99.35 99.35 131.83 115.83 126.55 121.22 99.35
GM.CSF NA NA NA NA NA NA NA 11.02
Sample sheet
Here are the groups:
10**: Adhesive capsulitis (not for inclusion)
20**: Control Instability
30**: Rotator cuff (for inclusion)
40**: Case Osteoarthritis
Also I’m putting the rows in order.
Code
ss <- read.table ("samplesheet.tsv" ,header= TRUE ,row.names= 1 ,sep= " \t " )
ss <- ss[order (rownames (ss)),]
ss %>% kbl (caption = "Sample sheet" ) %>% kable_styling (full_width = FALSE )
Sample sheet
Participant.ID
Patient_Group
Redcap_record_ID
pvt.pub
Age
Sex
Primary.OA
Cuff_Arthropathy
Prosthesis
Affected_Side
Height_cm
Weight_kg
BMI
ASA
Smoking
Diabetes
Hypercholesterolaemia
Hypertension
Thyroid
CRP
Creat
eGFR
Urea
Fast_Glucose
hba1c
Metabolic.Syndrome
2001
AD-CAB 2001
Control_Instability
27
pub
18
M
Left
188
80.0
22.6
2
Yes
No
No
No
No
5.2
87
90
3.9
4.9
5.4
No
2002
AD-CAB 2002
Control_Instability
28
pub
22
F
Right
164
73.0
27.1
1
No
No
No
No
No
2.9
58
90
3.6
3.4
4.8
No
2003
AD-CAB 2003
Control_Instability
29
pvt
21
M
Right
178
105.0
33.0
1
Yes
No
na
No
Yes
3.0
85
90
5.8
4.3
5.1
No
2004
AD-CAB 2004
Control_Instability
30
pvt
19
M
Left
181
94.5
29.0
1
No
No
No
No
Normal
2.9
69
90
5.7
4.8
5.4
No
2005
AD-CAB 2005
Control_Instability
31
27
M
Right
NA
NA
NA
NA
NA
NA
NA
NA
NA
NA
No
2006
AD-CAB 2006
Control_Instability
32
pub
19
F
Left
160
47.0
18.4
1
Yes
No
No
No
No
8.2
79
90
3.8
4.2
4.9
No
2007
AD-CAB 2007
Control_Instability
33
pub
18
M
Right
180
74.0
22.8
1
No
No
na
No
Yes
2.9
81
90
5.5
6.6
4.9
No
2008
AD-CAB 2008
Control_Instability
34
pub
21
M
Right
195
129.0
33.9
1
No
No
No
No
No
2.9
96
90
5.9
4.9
4.7
No
2009
AD-CAB 2009
Control_Instability
35
pub
18
M
Right
192
98.0
26.6
2
Yes
No
No
No
No
2.9
82
90
4.1
4.6
5.7
No
2010
AD-CAB 2010
Control_Instability
36
pub
21
M
Left
180
115.0
35.5
2
Yes
No
No
No
No
3.2
84
90
5.4
4.4
5.3
No
2011
AD-CAB 2011
Control_Instability
37
pub
30
M
Left
178
82.0
25.9
1
Ex
No
No
No
No
2.9
94
90
8.2
5.1
5.3
No
2012
AD-CAB 2012
Control_Instability
38
pvt
35
M
Left
188
75.0
21.2
1
No
No
na
No
No
2.9
106
78
4.7
5.4
5.2
No
2013
AD-CAB 2013
Control_Instability
39
pub
21
M
Left
175
96.0
31.3
1
No
No
No
No
No
NA
93
90
4.1
4.6
4.9
No
2014
AD-CAB 2014
Control_Instability
40
pub
29
M
Right
185
105.0
30.7
1
No
No
No
No
No
2.9
81
90
6.5
5.0
4.6
No
2015
AD-CAB 2015
Control_Instability
41
pub
23
M
Left
175
82.0
26.8
1
Yes
No
No
No
No
2.9
94
90
4.5
4.1
4.9
No
2016
AD-CAB 2016
Control_Instability
42
pvt
35
M
Right
187
80.0
22.9
1
No
No
No
No
No
2.9
103
81
4.9
4.5
5.0
No
2017
AD-CAB 2017
Control_Instability
43
pub
19
M
Right
195
118.0
31.0
2
No
No
No
No
No
2.9
95
90
6.2
4.2
4.5
No
2018
AD-CAB 2018
Control_Instability
44
pub
27
M
Right
170
82.0
28.4
1
No
No
No
No
No
NA
95
90
6.2
5.4
5.5
No
2019
AD-CAB 2019
Control_Instability
45
pub
27
M
Left
180
76.0
23.5
1
Ex
No
NA
No
No
2.9
65
90
6.9
4.5
4.9
No
2020
AD-CAB 2020
Control_Instability
46
pub
20
M
Left
181
85.0
25.9
1
No
No
Normal
no
No
2.9
105
86
5.8
4.4
5.0
No
2021
AD-CAB 2021
Control_Instability
47
pub
18
M
Left
176
65.0
21.0
1
No
No
No
No
No
2.9
68
90
4.5
4.3
4.8
No
2022
AD-CAB 2022
Control_Instability
48
pvt
32
F
Left
166
66.0
24.0
1
No
No
na
No
Yes
2.9
52
90
3.1
4.1
5.3
No
2023
AD-CAB 2023
Control_Instability
49
pvt
27
F
Left
170
66.0
22.8
1
No
No
No
No
No
2.9
91
75
5.5
4.3
4.9
No
2024
AD-CAB 2024
Control_Instability
50
pvt
21
M
Right
189
83.0
23.2
1
No
No
na
No
No
2.9
87
90
3.9
4.7
4.9
No
2025
AD-CAB 2025
Control_Instability
51
pvt
21
M
Right
173
92.0
31.0
1
Yes
No
na
No
No
2.9
71
90
4.1
4.7
5.8
No
2026
AD-CAB 2026
Control_Instability
52
pub
23
F
Left
169
57.0
20.0
2
Yes
No
No
No
Yes
2.9
68
90
4.5
4.6
5.1
No
4001
AD-CAB 4001
Case_Osteoarthritis
84
pvt
69
F
Yes
No
Anatomic
Left
158
89.0
36.0
2
No
No
Yes
Yes
Yes
2.9
66
82
5.2
5.7
6.1
YES
4002
AD-CAB 4002
Case_Osteoarthritis
80
pvt
59
M
Yes
No
Anatomic
Left
182
106.0
32.0
1
Ex
No
Yes
No
No
9.3
69
90
4.3
6.2
5.4
YES
4003
AD-CAB 4003
Case_Osteoarthritis
81
pvt
74
F
No
Yes
Reverse
Right
153
87.0
37.0
2
Ex
No
Yes
Yes
No
6.9
77
66
5.0
5.4
5.4
YES
4004
AD-CAB 4004
Case_Osteoarthritis
82
pvt
69
F
Yes
No
Anatomic
Left
165
85.0
32.0
2
No
No
Yes
Yes
Yes
3.9
61
89
5.8
5.8
5.6
YES
4005
AD-CAB 4005
Case_Osteoarthritis
83
pvt
74
F
Yes
No
Anatomic
Left
169
75.0
26.0
2
Ex
No
Yes
Yes
No
2.9
90
72
6.6
5.2
5.7
No
4006
AD-CAB 4006
Case_Osteoarthritis
78
pvt
81
F
Yes
No
Anatomic
Right
150
54.0
24.0
3
No
No
Yes
Yes
No
2.9
105
43
10.5
5.6
6.0
No
4007
AD-CAB 4007
Case_Osteoarthritis
85
pvt
72
M
Yes
No
Anatomic
Right
184
118.0
35.0
2
Ex
No
Yes
Yes
No
12.7
66
90
5.8
5.6
5.6
YES
4008
AD-CAB 4008
Case_Osteoarthritis
86
pub
70
M
Yes
No
Anatomic
Right
172
86.0
29.0
2
No
No
No
Yes
No
2.9
103
64
7.9
5.6
5.6
No
4009
AD-CAB 4009
Case_Osteoarthritis
88
pvt
72
M
No
Yes
Reverse
Right
167
80.0
29.0
3
Ex
No
Yes
Yes
No
2.9
107
59
6.7
5.8
5.5
No
4010
AD-CAB 4010
Case_Osteoarthritis
90
pvt
80
F
No
Yes
Reverse
Left
161
72.0
28.0
2
No
No
Yes
No
Yes
11.2
54
86
8.5
7.3
5.6
No
4011
AD-CAB 4011
Case_Osteoarthritis
87
pub
76
M
No
Yes
Reverse
Right
180
92.0
28.0
2
No
No
Yes
Yes
No
4.0
75
85
7.0
6.0
6.0
No
4012
AD-CAB 4012
Case_Osteoarthritis
91
pub
71
F
Yes
No
Anatomic
Right
163
83.0
31.0
2
No
No
Yes
Yes
No
2.9
73
72
4.8
6.3
5.7
YES
4013
AD-CAB 4013
Case_Osteoarthritis
89
pvt
69
F
No
Yes
Reverse
Left
170
89.0
31.0
2
No
No
Yes
Yes
No
2.9
81
85
5.1
5.4
5.9
YES
4014
AD-CAB 4014
Case_Osteoarthritis
92
pvt
71
M
No
Yes
Reverse
Right
170
99.0
34.0
2
Yes
No
Yes
No
Yes
2.9
69
90
7.0
7.0
7.1
YES
4015
AD-CAB 4015
Case_Osteoarthritis
93
pvt
76
M
Yes
No
Anatomic
Left
183
95.0
28.0
2
Ex
No
Yes
Yes
No
5.6
90
71
6.6
5.5
5.0
No
4016
AD-CAB 4016
Case_Osteoarthritis
99
pvt
76
F
Yes
No
Anatomic
Left
167
54.0
19.0
3
Ex
No
No
No
Yes
2.9
60
86
11.0
4.7
5.3
No
4017
AD-CAB 4017
Case_Osteoarthritis
96
pvt
79
F
No
Yes
Reverse
Right
155
75.0
31.0
2
No
No
No
No
No
9.2
71
70
4.5
5.5
5.1
No
4018
AD-CAB 4018
Case_Osteoarthritis
97
pvt
73
F
Yes
No
Anatomic
Right
167
85.0
30.0
3
Yes
No
Yes
No
No
1.5
78
65
7.3
5.4
5.9
No
4019
AD-CAB 4019
Case_Osteoarthritis
95
pvt
65
F
Yes
No
Anatomic
Left
170
62.0
21.0
2
No
No
No
Yes
No
2.9
103
49
11.0
5.3
5.2
No
4020
AD-CAB 4020
Case_Osteoarthritis
98
pvt
62
M
Yes
No
Anatomic
Left
180
117.0
36.0
2
No
No
Yes
No
No
2.9
80
90
4.9
6.9
5.9
YES
To investigate if secreted proteins and biomarkers in plasma are associated with:
Differentially expressed cytokines in patients with rotator cuff tears who heal/don’t heal
Primary osteoarthritis versus control instability
Rotator cuff arthropathy versus control instability
My custom analysis
Basic analysis.
Need to remove 10** samples from the analysis.
Code
Code
pg <- pg[,grep ("^10" ,colnames (pg),invert= TRUE )]
dim (pg)
Code
colcount <- apply (pg,2 , function (x) {
length (which (! is.na (x)))
})
head (colcount)
2001 2002 2003 2004 2005 2006
47 44 46 43 49 43
Code
barplot (colcount,main= "num cytokines detected per sample" )
Code
rowcount <- apply (pg,1 , function (x) {
length (which (! is.na (x)))
})
head (rowcount)
b.NGF CTACK Eotaxin FGF.basic G.CSF GM.CSF
67 72 72 69 72 44
Code
barplot (rowcount,main= "number of samples with a cytokine detected" )
Code
rowcount <- rowcount[order (rowcount)]
barplot (head (rowcount,20 ),horiz = TRUE , las= 1 ,cex.names = 0.8 ,
xlab= "num samples" ,main= "detection rate" )
MDS plot
Running an MDS plot.
It shows some variability, in particular 4015, 4012, 2007 and some others are not clustered with the bulk of samples.
Code
mds <- cmdscale (dist (t (pg)))
cols <- sapply (strsplit (colnames (pg),"" ),"[[" ,1 )
cols <- gsub ("2" ,"green" ,cols)
cols <- gsub ("1" ,"lightblue" ,cols)
cols <- gsub ("3" ,"pink" ,cols)
cols <- gsub ("4" ,"darkgray" ,cols)
plot (mds, xlab= "Coordinate 1" , ylab= "Coordinate 2" ,
col= cols, cex= 4 , pch= 19 , main= "MDS plot all samples (not transformed)" ,
bty= "n" )
text (mds,labels = colnames (pg))
Now log transform the data and repeat.
It certainly looks better, as there is some clustering of the grops happening. toward the left are the control instability samples (20), on the right are the pink rotator cuff samples (30 ) and intermediate are the osteoarthritis samples (40**).
Code
lpg <- log (pg)
mds <- cmdscale (dist (t (lpg)))
cols <- sapply (strsplit (colnames (pg),"" ),"[[" ,1 )
cols <- gsub ("2" ,"green" ,cols)
cols <- gsub ("1" ,"lightblue" ,cols)
cols <- gsub ("3" ,"pink" ,cols)
cols <- gsub ("4" ,"darkgray" ,cols)
plot (mds, xlab= "Coordinate 1" , ylab= "Coordinate 2" ,
col= cols, cex= 4 , pch= 19 , main= "MDS plot all samples (log transformed)" ,
bty= "n" )
text (mds,labels = colnames (pg))
Now make an MDS plot based on the analytes, rather than samples.
Code
mds <- cmdscale (dist (lpg))
cols <- sapply (strsplit (colnames (pg),"" ),"[[" ,1 )
cols <- gsub ("2" ,"green" ,cols)
cols <- gsub ("1" ,"lightblue" ,cols)
cols <- gsub ("3" ,"pink" ,cols)
cols <- gsub ("4" ,"darkgray" ,cols)
plot (mds, xlab= "Coordinate 1" , ylab= "Coordinate 2" ,
col= "gray" , cex= 4 , pch= 19 , main= "MDS plot all cytokines (log transformed)" ,
bty= "n" )
text (mds,labels = rownames (pg))
Correlation heatmap
This can identify groups of samples with similar profiles, or groups of analytes that are co-regulated.
Code
colfunc <- colorRampPalette (c ("white" , "yellow" , "red" ))
mc <- cor (lpg,method= "pearson" ,use = "pairwise.complete" )
heatmap.2 ( mc, col= colfunc (25 ),scale= "none" ,
trace= "none" ,margins = c (5 ,5 ), cexRow= 0.6 , cexCol= 0.6 , main= "Pearson correlation of samples" )
Code
mc <- cor (lpg,method= "spearman" ,use = "pairwise.complete" )
heatmap.2 ( mc, col= colfunc (25 ),scale= "none" ,
trace= "none" ,margins = c (5 ,5 ), cexRow= 0.6 , cexCol= 0.6 , main= "Spearman correlation of samples" )
Code
mc2 <- cor (t (lpg),method= "pearson" ,use = "pairwise.complete" )
heatmap.2 ( mc2, col= colfunc (25 ),scale= "none" ,
trace= "none" ,margins = c (5 ,5 ), cexRow= 0.6 , cexCol= 0.8 , main= "Pearson correlation of cytokines" )
Code
mc2 <- cor (t (lpg),method= "spearman" ,use = "pairwise.complete" )
heatmap.2 ( mc2, col= colfunc (25 ),scale= "none" ,
trace= "none" ,margins = c (5 ,5 ), cexRow= 0.6 , cexCol= 0.8 , main= "Spearman correlation of cytokines" )
Heatmap of all measurements
Code
colfunc <- colorRampPalette (c ("yellow" , "red" ))
heatmap.2 ( lpg, col= colfunc (25 ),scale= "none" ,
trace= "none" ,margins = c (5 ,5 ), cexRow= 0.6 , cexCol= 0.6 ,
main= "Heatmap of all measurements - no scaling" )
Signatures of healing in rotator cuff patients
TODO: Differentially expressed cytokines in patients with rotator cuff tears who heal/don’t heal
Primary osteoarthritis versus control instability
Using a t-test approach
Code
# filter ss and lpg for only OA and ctrl samples
ss1 <- subset (ss,Patient_Group== "Case_Osteoarthritis" | Patient_Group== "Control_Instability" )
lpg1 <- lpg[,colnames (lpg) %in% rownames (ss1)]
ss1 <- ss1[rownames (ss1) %in% colnames (lpg1),]
case <- factor (ss1$ Patient_Group,levels= c ("Control_Instability" ,"Case_Osteoarthritis" ))
sex <- factor (ss1$ Sex)
age <- scale (ss1$ Age)
# ttest approach (control vs case)
res <- lapply (1 : nrow (lpg1), function (i) {
cy <- lpg1[i,]
NAME <- rownames (lpg1)[i]
ttest <- t.test (cy ~ case )
LFC <- log2 (ttest$ estimate[2 ]/ ttest$ estimate[1 ])
P <- signif (ttest$ p.value,3 )
BASEMEAN <- mean (ttest$ estimate[2 ]/ ttest$ estimate[1 ])
res <- c (BASEMEAN,LFC,P)
names (res) <- c ("basemean" ,"log2fc" ,"pvalue" )
return (res)
})
names (res) <- rownames (lpg1)
resdf <- as.data.frame (do.call (rbind,res))
resdf$ fdr <- p.adjust (resdf$ pvalue)
resdf <- resdf[order (resdf$ pvalue),]
head (resdf,15 ) %>%
kbl (caption = "cytokine t-test results" ) %>%
kable_styling (full_width = FALSE )
cytokine t-test results
basemean
log2fc
pvalue
fdr
MIP.1a
1.4621765
0.5481175
6.90e-05
0.0036570
MIG
1.1176339
0.1604477
9.24e-05
0.0048048
SCF
1.1015783
0.1395720
8.80e-04
0.0448800
CTACK
1.0657225
0.0918318
1.49e-03
0.0745000
IL.8
1.2513194
0.3234501
2.55e-03
0.1249500
MCP.1.MCAF.
1.0762154
0.1059669
3.27e-03
0.1569600
IP.10
1.0746904
0.1039211
8.04e-03
0.3778800
MMP.13
0.8261946
-0.2754464
1.83e-02
0.8418000
Ghrelin
0.7755925
-0.3666293
2.42e-02
1.0000000
Collagen
0.9870389
-0.0188212
3.18e-02
1.0000000
PDGF.bb
0.9615476
-0.0565699
3.22e-02
1.0000000
G.CSF
1.0909800
0.1256247
4.34e-02
1.0000000
IL.10
0.7104481
-0.4931988
4.46e-02
1.0000000
IL.1ra
1.0530692
0.0746003
4.70e-02
1.0000000
TGF.b
0.9761830
-0.0347764
7.54e-02
1.0000000
Volcano plot.
Code
sig <- subset (resdf,fdr< 0.05 )
plot (resdf$ log2fc,- log (resdf$ pvalue),pch= 19 ,
xlab= "log2 fold change" ,ylab= "-log10(pvalue)" ,
main= "volcano plot (ctrl vs OA)" )
points (sig$ log2fc,- log (sig$ pvalue),col= "red" ,pch= 19 )
text (sig$ log2fc,- log (sig$ pvalue)+ 0.25 ,
labels= rownames (sig))
Boxplot of top results.
Code
cys <- rownames (head (resdf,10 ))
null <- lapply (cys,function (cyname) {
cy <- lpg1[rownames (lpg1) == cyname,]
cydat <- unlist (as.vector (resdf[rownames (resdf) == cyname,]))
BASEMEAN <- signif (cydat[1 ],3 )
LFC <- signif (cydat[2 ],3 )
P <- signif (cydat[3 ],3 )
FDR <- signif (cydat[4 ],3 )
boxplot (cy ~ case ,col= "white" ,main= cyname)
beeswarm (cy ~ case,add= TRUE ,pch= 19 ,cex= 1.2 )
mtext (paste ("p =" ,P,"FDR =" ,FDR,"log2FC =" ,LFC,"basemean =" ,BASEMEAN))
})
Next I will use a GLM approach. First I will look at the sex and age in the different groups.
Code
case <- factor (ss1$ Patient_Group,levels= c ("Control_Instability" ,"Case_Osteoarthritis" ))
sex <- factor (ss1$ Sex)
age <- as.vector (scale (ss1$ Age))
ctrl_age <- age[case== "Control_Instability" ]
case_age <- age[case== "Case_Osteoarthritis" ]
boxplot (list ("ctrl" = ctrl_age,"case" = case_age),ylab= "age(scaled)" )
Code
age <- ss1$ Age
ctrl_age <- age[case== "Control_Instability" ]
case_age <- age[case== "Case_Osteoarthritis" ]
boxplot (list ("ctrl" = ctrl_age,"case" = case_age),ylab= "age(unscaled)" )
Code
age <- as.vector (scale (ss1$ Age))
Now look at the balance of sexes in the control and case groups.
Code
ctrl <- table (sex[case== "Control_Instability" ])
case <- table (sex[case== "Case_Osteoarthritis" ])
sexes <- cbind (ctrl,case)
barplot (sexes,col= c ("pink" ,"lightblue" ),
main= "sex balance between groups" ,
ylab= "n participants" )
legend ("topright" , legend= c ("male" , "female" ),
fill= c ("lightblue" , "pink" ), cex= 1.2 )
Now run the GLMs.
Code
# glm approach
case <- factor (ss1$ Patient_Group,levels= c ("Control_Instability" ,"Case_Osteoarthritis" ))
sex <- factor (ss1$ Sex)
age <- scale (ss1$ Age)
i= 2
cy <- lpg1[i,]
NAME <- rownames (lpg)[i]
myglm <- glm (cy ~ case + sex + age )
summary (myglm)
Call:
glm(formula = cy ~ case + sex + age)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.97850 -0.18549 0.02287 0.27734 0.69530
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.086011 0.252494 24.104 <2e-16 ***
caseCase_Osteoarthritis -0.363854 0.531904 -0.684 0.498
sexM 0.006657 0.130693 0.051 0.960
age 0.384140 0.272319 1.411 0.166
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 0.1429479)
Null deviance: 7.5709 on 43 degrees of freedom
Residual deviance: 5.7179 on 40 degrees of freedom
AIC: 45.081
Number of Fisher Scoring iterations: 2
Code
P <- summary (aov (myglm))[[1 ]][["Pr(>F)" ]][1 ]
glmres <- lapply (rownames (resdf), function (cyname) {
cy <- lpg1[rownames (lpg1) == cyname,]
fit <- glm (cy ~ sex + age + case)
summary (fit)
P <- summary (aov (fit))[[1 ]][["Pr(>F)" ]][1 ]
})
glmres <- abs (unname (unlist (glmres)))
resdf$ pglm <- unname (unlist (glmres))
resdf$ fdrglm <- p.adjust (resdf$ pglm)
head (resdf,15 ) %>%
kbl (caption = "cytokine t-test and GLM results" ) %>%
kable_styling (full_width = FALSE )
cytokine t-test and GLM results
basemean
log2fc
pvalue
fdr
pglm
fdrglm
MIP.1a
1.4621765
0.5481175
6.90e-05
0.0036570
0.0498450
1.0000000
MIG
1.1176339
0.1604477
9.24e-05
0.0048048
0.0530477
1.0000000
SCF
1.1015783
0.1395720
8.80e-04
0.0448800
0.0063282
0.3290685
CTACK
1.0657225
0.0918318
1.49e-03
0.0745000
0.1280656
1.0000000
IL.8
1.2513194
0.3234501
2.55e-03
0.1249500
0.0691095
1.0000000
MCP.1.MCAF.
1.0762154
0.1059669
3.27e-03
0.1569600
0.6514741
1.0000000
IP.10
1.0746904
0.1039211
8.04e-03
0.3778800
0.2279104
1.0000000
MMP.13
0.8261946
-0.2754464
1.83e-02
0.8418000
0.0324642
1.0000000
Ghrelin
0.7755925
-0.3666293
2.42e-02
1.0000000
0.0677190
1.0000000
Collagen
0.9870389
-0.0188212
3.18e-02
1.0000000
0.0007533
0.0399253
PDGF.bb
0.9615476
-0.0565699
3.22e-02
1.0000000
0.0140289
0.7154722
G.CSF
1.0909800
0.1256247
4.34e-02
1.0000000
0.4451483
1.0000000
IL.10
0.7104481
-0.4931988
4.46e-02
1.0000000
0.1019231
1.0000000
IL.1ra
1.0530692
0.0746003
4.70e-02
1.0000000
0.0998255
1.0000000
TGF.b
0.9761830
-0.0347764
7.54e-02
1.0000000
0.6190283
1.0000000
Look at the effect of the other covariates on the data.
Code
res_sex <- lapply (rownames (resdf), function (cyname) {
cy <- lpg1[rownames (lpg1) == cyname,]
fit <- glm (cy ~ sex)
summary (fit)
COEF <- coef (summary (fit))[,1 ][2 ]
})
res_sex <- abs (unname (unlist (res_sex)))
res_age <- lapply (rownames (resdf), function (cyname) {
cy <- lpg1[rownames (lpg1) == cyname,]
fit <- glm (cy ~ age)
summary (fit)
COEF <- coef (summary (fit))[,1 ][2 ]
})
res_age <- abs (unname (unlist (res_age)))
res_case <- lapply (rownames (resdf), function (cyname) {
cy <- lpg1[rownames (lpg1) == cyname,]
fit <- glm (cy ~ case)
summary (fit)
COEF <- coef (summary (fit))[,1 ][2 ]
})
res_case <- abs (unname (unlist (res_case)))
resl <- list ("sex" = res_sex,"age" = res_age,"case" = res_case)
vioplot (resl,ylab= "absolute GLM estimates" )
Code
Code
barplot (sapply (resl,sum),ylab= "sum of GLM estimates" )
Rotator cuff arthropathy versus control instability