Non-linear regression of Elisa data

Introduction

Need more accurate approaches to quantify differences in plasma concentration of analytes that are tolerant of outliers. My suggestion was to generate non-linear regression curves of Elisa data from each visit and use a statistical test to determine the difference.

library("kableExtra")

Example regression

Exponential decay non-linear regression in R

set.seed(1)
x <- 1:20
y <- 5 * exp(-0.3 * x) + 0.5 + rnorm(20, 0, 0.2)
df <- data.frame(x, y)

df |>
  kbl(caption="Synthetic dataset") |>
  kable_paper("hover", full_width = F)

Synthetic dataset
x	y
1	4.0788003
2	3.2807868
3	2.3657226
4	2.3250272
5	1.6815524
6	1.1624008
7	1.2097680
8	1.1012547
9	0.9511838
10	0.6878577
11	0.9867721
12	0.7145873
13	0.4769614
14	0.1320379
15	0.7805312
16	0.5321620
17	0.5272457
18	0.7113501
19	0.6809741
20	0.6311740

fit <- nls(
  y ~ a * exp(-b * x) + c,
  data = df,
  start = list(a = 5, b = 0.3, c = 0.1)
)
summary(fit)

## 
## Formula: y ~ a * exp(-b * x) + c
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  4.74807    0.28013   16.95 4.39e-12 ***
## b  0.28583    0.02858   10.00 1.55e-08 ***
## c  0.53575    0.07390    7.25 1.36e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1887 on 17 degrees of freedom
## 
## Number of iterations to convergence: 3 
## Achieved convergence tolerance: 5.776e-06

plot(df$x, df$y)
curve(coef(fit)["a"] * exp(-coef(fit)["b"] * x) + coef(fit)["c"],
      add = TRUE, lwd = 2, col = "red")

That looks great. Now try fitting a combined model and use an ANOVA (nested-model comparison).

set.seed(100)
x <- 1:20
y <-c( 5 * exp(-0.3 * x) + 0.5 + rnorm(20, 0, 0.2) ,
     10 * exp(-0.5 * x) + 0.5 + rnorm(20, 1, 0.2) )
x <- rep(x,2)

df0 <- data.frame(x, y)
df0$group <- c(rep("A",20),rep("B",20))

df0 |>
  kbl(caption="Synthetic dataset2") |>
  kable_paper("hover", full_width = F)

Synthetic dataset2
x	y	group
1	4.1036526	A
2	3.2703644	A
3	2.5170649	A
4	2.1833280	A
5	1.6390451	A
6	1.3902205	A
7	0.9959240	A
8	1.0964963	A
9	0.6709757	A
10	0.6769629	A
11	0.7023931	A
12	0.6558735	A
13	0.5608828	A
14	0.7229460	A
15	0.5802209	A
16	0.5352854	A
17	0.4527129	A
18	0.6247542	A
19	0.3339670	A
20	0.9744531	A
1	7.4776886	B
2	5.3316065	B
3	3.7836939	B
4	3.0080338	B
5	2.1579742	B
6	1.9101806	B
7	1.6579295	B
8	1.7293453	B
9	1.3795441	B
10	1.6167947	B
11	1.5226450	B
12	1.8762626	B
13	1.4874485	B
14	1.4868801	B
15	1.3675280	B
16	1.4589958	B
17	1.5386162	B
18	1.5846988	B
19	1.7138290	B
20	1.6944944	B

fit_null <- nls(
  y ~ a * exp(-b * x) + c,
  data = df0,
  start = list(a=5, b=0.3, c=0.5)
)

fit_alt <- nls(
  y ~ (a1 + (a2 - a1) * (group == "B")) *
       exp(-(b1 + (b2 - b1) * (group == "B")) * x) +
       (c1 + (c2 - c1) * (group == "B")),
  data = df0,
  start = list(a1=5, a2=5, b1=0.3, b2=0.3, c1=0.5, c2=0.5)
)

anova(fit_null, fit_alt)

## Analysis of Variance Table
## 
## Model 1: y ~ a * exp(-b * x) + c
## Model 2: y ~ (a1 + (a2 - a1) * (group == "B")) * exp(-(b1 + (b2 - b1) * (group == "B")) * x) + (c1 + (c2 - c1) * (group == "B"))
##   Res.Df Res.Sum Sq Df Sum Sq F value    Pr(>F)    
## 1     37    15.6108                                
## 2     34     0.7868  3 14.824  213.52 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Data for patient 82

Now we will look at some real data.

df1 <- read.table("pat82_visit1.tsv",header=TRUE)
x1 <- log10(rep(df1$plasma_dilution,3))
y1 <- c(df1$rep1,df1$rep2,df1$rep3)
df1 <- data.frame(x1, y1)
df1$group <- "A"

df1 |>
  kbl(caption="Patient 82 visit 1") |>
  kable_paper("hover", full_width = F)

Patient 82 visit 1
x1	y1	group
1.698970	884695	A
2.176091	397515	A
2.653212	123688	A
3.130334	44966	A
3.607455	162558	A
4.084576	4368	A
4.561698	75570	A
5.038819	11315	A
1.698970	1061770	A
2.176091	610262	A
2.653212	197542	A
3.130334	127090	A
3.607455	49270	A
4.084576	17749	A
4.561698	58737	A
5.038819	125826	A
1.698970	936790	A
2.176091	455914	A
2.653212	140470	A
3.130334	40562	A
3.607455	20606	A
4.084576	34556	A
4.561698	34663	A
5.038819	35200	A

fit1 <- nls(
  y1 ~ a * exp(-b * x1) + c,
  data = df1,
  start = list(a = 1e6, b = 0.3, c = 1e5)
)
summary(fit1)

## 
## Formula: y1 ~ a * exp(-b * x1) + c
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a 1.951e+07  6.584e+06   2.963  0.00742 ** 
## b 1.783e+00  1.974e-01   9.033 1.11e-08 ***
## c 2.974e+04  2.187e+04   1.360  0.18833    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 66820 on 21 degrees of freedom
## 
## Number of iterations to convergence: 12 
## Achieved convergence tolerance: 2.907e-06

df2 <- read.table("pat82_visit2.tsv",header=TRUE)
x2 <- log10(rep(df2$plasma_dilution,3))
y2 <- c(df2$rep1,df2$rep2,df2$rep3)
df2 <- data.frame(x2, y2)
df2$group <- "B"

df2 |>
  kbl(caption="Patient 82 visit 2") |>
  kable_paper("hover", full_width = F)

Patient 82 visit 2
x2	y2	group
1.698970	985013	B
2.176091	438322	B
2.653212	197765	B
3.130334	102960	B
3.607455	27067	B
4.084576	57768	B
4.561698	8434	B
5.038819	9367	B
1.698970	1016042	B
2.176091	450827	B
2.653212	261173	B
3.130334	107921	B
3.607455	41383	B
4.084576	52332	B
4.561698	70778	B
5.038819	49490	B
1.698970	858857	B
2.176091	416976	B
2.653212	203218	B
3.130334	111515	B
3.607455	97148	B
4.084576	22479	B
4.561698	619916	B
5.038819	63341	B

fit2 <- nls(
  y2 ~ a * exp(-b * x2) + c,
  data = df1,
  start = list(a = 1e6, b = 0.3, c = 1e5)
)

## Warning in min(x): no non-missing arguments to min; returning Inf

## Warning in max(x): no non-missing arguments to max; returning -Inf

summary(fit2)

## 
## Formula: y2 ~ a * exp(-b * x2) + c
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a 2.623e+07  2.043e+07   1.284 0.213254    
## b 2.002e+00  4.556e-01   4.395 0.000253 ***
## c 8.377e+04  3.875e+04   2.162 0.042348 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 126300 on 21 degrees of freedom
## 
## Number of iterations to convergence: 12 
## Achieved convergence tolerance: 2.365e-06

plot(df1$x,df1$y,pch=15,main="all data",
  xlab="Plasma dilution factor",
  ylab="Absorbance")

points(df2$x,df2$y,col="blue",pch=17)

curve(coef(fit1)["a"] * exp(-coef(fit1)["b"] * x) + coef(fit1)["c"],
      add = TRUE, lwd = 2, col = "black", lty=2)

curve(coef(fit2)["a"] * exp(-coef(fit2)["b"] * x) + coef(fit2)["c"],
      add = TRUE, lwd = 2, col = "blue", lty=2)

It looks like there are some outliers there. Let’s quantify their variation from the regression curve and exclude the bad ones.

df1$expect1 <- coef(fit1)["a"] * exp(-coef(fit1)["b"] * df1$x1) + coef(fit1)["c"]
df1$delta1 <- abs(df1$y1 - df1$expect1)
df1$cv1 <- abs(df1$delta / df1$expect1)
df1clean <- subset(df1,cv1<3)

df1clean |>
  kbl(caption="Patient 82 visit 1 clean") |>
  kable_paper("hover", full_width = F)

Patient 82 visit 1 clean
x1	y1	group	expect1	delta1	cv1
1.698970	884695	A	973642.75	88947.7507	0.0913556
2.176091	397515	A	432952.67	35437.6748	0.0818512
2.653212	123688	A	201982.45	78294.4470	0.3876300
3.130334	44966	A	103317.34	58351.3365	0.5647778
3.607455	162558	A	61169.90	101388.1045	1.6574837
4.084576	4368	A	43165.49	38797.4887	0.8988080
4.561698	75570	A	35474.42	40095.5752	1.1302671
5.038819	11315	A	32188.98	20873.9812	0.6484822
1.698970	1061770	A	973642.75	88127.2493	0.0905129
2.176091	610262	A	432952.67	177309.3252	0.4095351
2.653212	197542	A	201982.45	4440.4470	0.0219843
3.130334	127090	A	103317.34	23772.6635	0.2300937
3.607455	49270	A	61169.90	11899.8955	0.1945384
4.084576	17749	A	43165.49	25416.4887	0.5888150
4.561698	58737	A	35474.42	23262.5752	0.6557562
5.038819	125826	A	32188.98	93637.0188	2.9089774
1.698970	936790	A	973642.75	36852.7507	0.0378504
2.176091	455914	A	432952.67	22961.3252	0.0530343
2.653212	140470	A	201982.45	61512.4470	0.3045435
3.130334	40562	A	103317.34	62755.3365	0.6074037
3.607455	20606	A	61169.90	40563.8955	0.6631349
4.084576	34556	A	43165.49	8609.4887	0.1994531
4.561698	34663	A	35474.42	811.4248	0.0228735
5.038819	35200	A	32188.98	3011.0188	0.0935419

df2$expect2 <- coef(fit2)["a"] * exp(-coef(fit2)["b"] * df2$x2) + coef(fit2)["c"]
df2$delta1 <- abs(df2$y2 - df2$expect2)
df2$cv2 <- abs(df2$delta / df2$expect2)
df2clean <- subset(df2,cv2<3)

df2clean |>
  kbl(caption="Patient 82 visit 2 clean") |>
  kable_paper("hover", full_width = F)

Patient 82 visit 2 clean
	x2	y2	group	expect2	delta1	cv2
1	1.698970	985013	B	957824.30	27188.699	0.0283859
2	2.176091	438322	B	420030.79	18291.208	0.0435473
3	2.653212	197765	B	213133.13	15368.132	0.0721058
4	3.130334	102960	B	133536.33	30576.333	0.2289739
5	3.607455	27067	B	102914.19	75847.187	0.7369945
6	4.084576	57768	B	91133.36	33365.364	0.3661158
7	4.561698	8434	B	86601.09	78167.095	0.9026109
8	5.038819	9367	B	84857.46	75490.459	0.8896149
9	1.698970	1016042	B	957824.30	58217.699	0.0607812
10	2.176091	450827	B	420030.79	30796.208	0.0733189
11	2.653212	261173	B	213133.13	48039.868	0.2253984
12	3.130334	107921	B	133536.33	25615.333	0.1918229
13	3.607455	41383	B	102914.19	61531.187	0.5978883
14	4.084576	52332	B	91133.36	38801.364	0.4257646
15	4.561698	70778	B	86601.09	15823.095	0.1827124
16	5.038819	49490	B	84857.46	35367.459	0.4167867
17	1.698970	858857	B	957824.30	98967.301	0.1033251
18	2.176091	416976	B	420030.79	3054.792	0.0072728
19	2.653212	203218	B	213133.13	9915.132	0.0465208
20	3.130334	111515	B	133536.33	22021.333	0.1649089
21	3.607455	97148	B	102914.19	5766.187	0.0560291
22	4.084576	22479	B	91133.36	68654.364	0.7533395
24	5.038819	63341	B	84857.46	21516.459	0.2535600

Now that we have excluded the outliers, we can run regression again.

fit1c <- nls(
  y1 ~ a * exp(-b * x1) + c,
  data = df1clean,
  start = list(a = 1e6, b = 0.3, c = 1e5)
)
summary(fit1c)

## 
## Formula: y1 ~ a * exp(-b * x1) + c
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a 1.951e+07  6.584e+06   2.963  0.00742 ** 
## b 1.783e+00  1.974e-01   9.033 1.11e-08 ***
## c 2.974e+04  2.187e+04   1.360  0.18833    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 66820 on 21 degrees of freedom
## 
## Number of iterations to convergence: 12 
## Achieved convergence tolerance: 2.907e-06

fit2c <- nls(
  y2 ~ a * exp(-b * x2) + c,
  data = df2clean,
  start = list(a = 1e6, b = 0.3, c = 1e5)
)
summary(fit2c)

## 
## Formula: y2 ~ a * exp(-b * x2) + c
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a 1.682e+07  2.968e+06   5.666 1.52e-05 ***
## b 1.709e+00  1.036e-01  16.488 4.16e-13 ***
## c 3.039e+04  1.239e+04   2.453   0.0235 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35200 on 20 degrees of freedom
## 
## Number of iterations to convergence: 8 
## Achieved convergence tolerance: 2.083e-07

plot(df1clean$x,df1clean$y,pch=15, main="outliers removed",
  xlab="Plasma dilution factor",
  ylab="Absorbance")

points(df2clean$x,df2clean$y,col="blue",pch=17)

curve(coef(fit1c)["a"] * exp(-coef(fit1c)["b"] * x) + coef(fit1c)["c"],
      add = TRUE, lwd = 2, col = "black", lty=2)

curve(coef(fit2c)["a"] * exp(-coef(fit2c)["b"] * x) + coef(fit2c)["c"],
      add = TRUE, lwd = 2, col = "blue", lty=2)

This is looking a lot better.

Calculate difference between curves.

Predict difference between curves. Reliable range is 2-2.5. Calculate foldchange.

newx <- data.frame(x1 = seq(2, 2.5, length.out = 5))
predicted1 <- predict(fit1c, newdata = newx)
newx <- data.frame(x2 = seq(2, 2.5, length.out = 5))
predicted2 <- predict(fit2c, newdata = newx)
pred_diff <- (predicted2-predicted1)/predicted1
pred_diff

## [1] 0.0006868493 0.0096645677 0.0185095083 0.0271438452 0.0354713629

FOLDCHANGE=signif(mean(pred_diff),3)
message(paste("foldchange is",FOLDCHANGE))

## foldchange is 0.0183

Patient 82 combined model with ANOVA model comparison

df1clean$group <- "A"
df2clean$group <- "B"

colnames(df2clean) <- colnames(df1clean)
df0 <- rbind(df1clean,df2clean)

df0 |>
  kbl(caption="Patient 82 visit 1 and 2 clean") |>
  kable_paper("hover", full_width = F)

Patient 82 visit 1 and 2 clean
	x1	y1	group	expect1	delta1	cv1
1	1.698970	884695	A	973642.75	88947.7507	0.0913556
2	2.176091	397515	A	432952.67	35437.6748	0.0818512
3	2.653212	123688	A	201982.45	78294.4470	0.3876300
4	3.130334	44966	A	103317.34	58351.3365	0.5647778
5	3.607455	162558	A	61169.90	101388.1045	1.6574837
6	4.084576	4368	A	43165.49	38797.4887	0.8988080
7	4.561698	75570	A	35474.42	40095.5752	1.1302671
8	5.038819	11315	A	32188.98	20873.9812	0.6484822
9	1.698970	1061770	A	973642.75	88127.2493	0.0905129
10	2.176091	610262	A	432952.67	177309.3252	0.4095351
11	2.653212	197542	A	201982.45	4440.4470	0.0219843
12	3.130334	127090	A	103317.34	23772.6635	0.2300937
13	3.607455	49270	A	61169.90	11899.8955	0.1945384
14	4.084576	17749	A	43165.49	25416.4887	0.5888150
15	4.561698	58737	A	35474.42	23262.5752	0.6557562
16	5.038819	125826	A	32188.98	93637.0188	2.9089774
17	1.698970	936790	A	973642.75	36852.7507	0.0378504
18	2.176091	455914	A	432952.67	22961.3252	0.0530343
19	2.653212	140470	A	201982.45	61512.4470	0.3045435
20	3.130334	40562	A	103317.34	62755.3365	0.6074037
21	3.607455	20606	A	61169.90	40563.8955	0.6631349
22	4.084576	34556	A	43165.49	8609.4887	0.1994531
23	4.561698	34663	A	35474.42	811.4248	0.0228735
24	5.038819	35200	A	32188.98	3011.0188	0.0935419
110	1.698970	985013	B	957824.30	27188.6988	0.0283859
25	2.176091	438322	B	420030.79	18291.2080	0.0435473
31	2.653212	197765	B	213133.13	15368.1320	0.0721058
41	3.130334	102960	B	133536.33	30576.3332	0.2289739
51	3.607455	27067	B	102914.19	75847.1871	0.7369945
61	4.084576	57768	B	91133.36	33365.3636	0.3661158
71	4.561698	8434	B	86601.09	78167.0947	0.9026109
81	5.038819	9367	B	84857.46	75490.4593	0.8896149
91	1.698970	1016042	B	957824.30	58217.6988	0.0607812
101	2.176091	450827	B	420030.79	30796.2080	0.0733189
111	2.653212	261173	B	213133.13	48039.8680	0.2253984
121	3.130334	107921	B	133536.33	25615.3332	0.1918229
131	3.607455	41383	B	102914.19	61531.1871	0.5978883
141	4.084576	52332	B	91133.36	38801.3636	0.4257646
151	4.561698	70778	B	86601.09	15823.0947	0.1827124
161	5.038819	49490	B	84857.46	35367.4593	0.4167867
171	1.698970	858857	B	957824.30	98967.3012	0.1033251
181	2.176091	416976	B	420030.79	3054.7920	0.0072728
191	2.653212	203218	B	213133.13	9915.1320	0.0465208
201	3.130334	111515	B	133536.33	22021.3332	0.1649089
211	3.607455	97148	B	102914.19	5766.1871	0.0560291
221	4.084576	22479	B	91133.36	68654.3636	0.7533395
241	5.038819	63341	B	84857.46	21516.4593	0.2535600

fit_null <- nls(
  y1 ~ a * exp(-b * x1) + c,
  data = df0,
  start = list(a=1e6, b=0.3, c=5e4)
)
fit_null

## Nonlinear regression model
##   model: y1 ~ a * exp(-b * x1) + c
##    data: df0
##         a         b         c 
## 1.818e+07 1.748e+00 3.025e+04 
##  residual sum-of-squares: 1.195e+11
## 
## Number of iterations to convergence: 10 
## Achieved convergence tolerance: 6.906e-06

fit_alt <- nls(
  y1 ~ (a1 + (a2 - a1) * (group == "B")) *
       exp(-(b1 + (b2 - b1) * (group == "B")) * x1) +
       (c1 + (c2 - c1) * (group == "B")),
  data = df0,
  start = list(a1=1e6, a2=1e6, b1=0.3, b2=0.3, c1=5e4, c2=5e4)
)
fit_alt

## Nonlinear regression model
##   model: y1 ~ (a1 + (a2 - a1) * (group == "B")) * exp(-(b1 + (b2 - b1) *     (group == "B")) * x1) + (c1 + (c2 - c1) * (group == "B"))
##    data: df0
##        a1        a2        b1        b2        c1        c2 
## 1.951e+07 1.682e+07 1.783e+00 1.709e+00 2.974e+04 3.039e+04 
##  residual sum-of-squares: 1.185e+11
## 
## Number of iterations to convergence: 12 
## Achieved convergence tolerance: 2.602e-06

anova(fit_null, fit_alt)

## Analysis of Variance Table
## 
## Model 1: y1 ~ a * exp(-b * x1) + c
## Model 2: y1 ~ (a1 + (a2 - a1) * (group == "B")) * exp(-(b1 + (b2 - b1) * (group == "B")) * x1) + (c1 + (c2 - c1) * (group == "B"))
##   Res.Df Res.Sum Sq Df    Sum Sq F value Pr(>F)
## 1     44 1.1950e+11                            
## 2     41 1.1854e+11  3 958898044  0.1106 0.9535

Differences in these curves is not significant (p=0.95).

Data for patient 440

df1 <- read.table("pat440_visit1.tsv",header=TRUE)
x1 <- log10(rep(df1$plasma_dilution,3))
y1 <- c(df1$rep1,df1$rep2,df1$rep3)
df1 <- data.frame(x1, y1)

df1 |>
  kbl(caption="Patient 440 visit 1") |>
  kable_paper("hover", full_width = F)

Patient 440 visit 1
x1	y1
1.698970	1414693
2.176091	814797
2.653212	326273
3.130334	337861
3.607455	29326
4.084576	-48779
4.561698	-53038
5.038819	1233866
1.698970	1549611
2.176091	959106
2.653212	296757
3.130334	59949
3.607455	-24525
4.084576	170459
4.561698	-42896
5.038819	-59817
1.698970	1920647
2.176091	679551
2.653212	482107
3.130334	97169
3.607455	-6346
4.084576	-46687
4.561698	-66151
5.038819	-47045

fit1 <- nls(
  y1 ~ a * exp(-b * x1) + c,
  data = df1,
  start = list(a = 1e6, b = 0.3, c = 1e5)
)
summary(fit1)

## 
## Formula: y1 ~ a * exp(-b * x1) + c
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)   
## a 3.090e+07  2.664e+07    1.16  0.25898   
## b 1.749e+00  5.042e-01    3.47  0.00229 **
## c 6.666e+04  9.657e+04    0.69  0.49754   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 291600 on 21 degrees of freedom
## 
## Number of iterations to convergence: 13 
## Achieved convergence tolerance: 7.853e-06

df2 <- read.table("pat440_visit2.tsv",header=TRUE)
x2 <- log10(rep(df2$plasma_dilution,3))
y2 <- c(df2$rep1,df2$rep2,df2$rep3)
df2 <- data.frame(x2, y2)

df2 |>
  kbl(caption="Patient 440 visit 2") |>
  kable_paper("hover", full_width = F)

Patient 440 visit 2
x2	y2
1.698970	4460929
2.176091	2834260
2.653212	1466181
3.130334	537859
3.607455	150452
4.084576	20123
4.561698	-23541
5.038819	178675
1.698970	4556538
2.176091	2882266
2.653212	2501896
3.130334	828694
3.607455	185501
4.084576	75931
4.561698	-9989
5.038819	87438
1.698970	4582599
2.176091	2834746
2.653212	1392985
3.130334	695306
3.607455	250375
4.084576	38887
4.561698	7726
5.038819	291522

fit2 <- nls(
  y2 ~ a * exp(-b * x2) + c,
  data = df1,
  start = list(a = 1e6, b = 0.3, c = 1e5)
)

## Warning in min(x): no non-missing arguments to min; returning Inf

## Warning in max(x): no non-missing arguments to max; returning -Inf

summary(fit2)

## 
## Formula: y2 ~ a * exp(-b * x2) + c
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## a  2.819e+07  4.896e+06   5.758 1.03e-05 ***
## b  1.034e+00  1.065e-01   9.711 3.23e-09 ***
## c -2.473e+05  1.496e+05  -1.653    0.113    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 275700 on 21 degrees of freedom
## 
## Number of iterations to convergence: 11 
## Achieved convergence tolerance: 1.661e-06

MAX=max(c(df1$y1,df2$y2))

plot(df1$x,df1$y,pch=15,main="all data", ylim=c(0,MAX),
  xlab="Plasma dilution factor",
  ylab="Absorbance")

points(df2$x,df2$y,col="blue",pch=17)

curve(coef(fit1)["a"] * exp(-coef(fit1)["b"] * x) + coef(fit1)["c"],
      add = TRUE, lwd = 2, col = "black", lty=2)

curve(coef(fit2)["a"] * exp(-coef(fit2)["b"] * x) + coef(fit2)["c"],
      add = TRUE, lwd = 2, col = "blue", lty=2)

It looks like there are some outliers there. Let’s quantify their variation from the regression curve and exclude the bad ones.

df1$expect1 <- coef(fit1)["a"] * exp(-coef(fit1)["b"] * df1$x1) + coef(fit1)["c"]
df1$delta1 <- abs(df1$y1 - df1$expect1)
df1$cv1 <- abs(df1$delta / df1$expect1)
df1clean <- subset(df1,cv1<3)

df1clean |>
  kbl(caption="Patient 440 visit 1 clean") |>
  kable_paper("hover", full_width = F)

Patient 440 visit 1 clean
	x1	y1	expect1	delta1	cv1
1	1.698970	1414693	1649005.13	234312.13	0.1420930
2	2.176091	814797	753459.02	61337.98	0.0814085
3	2.653212	326273	364758.56	38485.56	0.1055097
4	3.130334	337861	196048.05	141812.95	0.7233581
5	3.607455	29326	122821.39	93495.39	0.7612305
6	4.084576	-48779	91038.28	139817.28	1.5358076
7	4.561698	-53038	77243.23	130281.23	1.6866363
9	1.698970	1549611	1649005.13	99394.13	0.0602752
10	2.176091	959106	753459.02	205646.98	0.2729372
11	2.653212	296757	364758.56	68001.56	0.1864290
12	3.130334	59949	196048.05	136099.05	0.6942127
13	3.607455	-24525	122821.39	147346.39	1.1996802
14	4.084576	170459	91038.28	79420.72	0.8723881
15	4.561698	-42896	77243.23	120139.23	1.5553367
16	5.038819	-59817	71255.66	131072.66	1.8394702
17	1.698970	1920647	1649005.13	271641.87	0.1647308
18	2.176091	679551	753459.02	73908.02	0.0980916
19	2.653212	482107	364758.56	117348.44	0.3217154
20	3.130334	97169	196048.05	98879.05	0.5043613
21	3.607455	-6346	122821.39	129167.39	1.0516685
22	4.084576	-46687	91038.28	137725.28	1.5128282
23	4.561698	-66151	77243.23	143394.23	1.8563987
24	5.038819	-47045	71255.66	118300.66	1.6602283

df2$expect2 <- coef(fit2)["a"] * exp(-coef(fit2)["b"] * df2$x2) + coef(fit2)["c"]
df2$delta1 <- abs(df2$y2 - df2$expect2)
df2$cv2 <- abs(df2$delta / df2$expect2)
df2clean <- subset(df2,cv2<3)

df2clean |>
  kbl(caption="Patient 440 visit 2 clean") |>
  kable_paper("hover", full_width = F)

Patient 440 visit 2 clean
	x2	y2	expect2	delta1	cv2
1	1.698970	4460929	4618816.747	157887.747	0.0341836
2	2.176091	2834260	2723820.748	110439.252	0.0405457
3	2.653212	1466181	1566784.667	100603.667	0.0642103
4	3.130334	537859	860328.035	322469.035	0.3748210
5	3.607455	150452	428983.687	278531.687	0.6492827
6	4.084576	20123	165615.863	145492.863	0.8784959
8	5.038819	178675	-93373.613	272048.613	2.9135492
9	1.698970	4556538	4618816.747	62278.747	0.0134837
10	2.176091	2882266	2723820.748	158445.252	0.0581702
11	2.653212	2501896	1566784.667	935111.333	0.5968346
12	3.130334	828694	860328.035	31634.035	0.0367697
13	3.607455	185501	428983.687	243482.687	0.5675803
14	4.084576	75931	165615.863	89684.863	0.5415234
16	5.038819	87438	-93373.613	180811.613	1.9364316
17	1.698970	4582599	4618816.747	36217.747	0.0078413
18	2.176091	2834746	2723820.748	110925.252	0.0407241
19	2.653212	1392985	1566784.667	173799.667	0.1109276
20	3.130334	695306	860328.035	165022.035	0.1918129
21	3.607455	250375	428983.687	178608.687	0.4163531
22	4.084576	38887	165615.863	126728.863	0.7651976
23	4.561698	7726	4810.207	2915.793	0.6061679

Now that we have excluded the outliers, we can run regression again.

fit1c <- nls(
  y1 ~ a * exp(-b * x1) + c,
  data = df1clean,
  start = list(a = 1e6, b = 0.3, c = 1e5)
)
summary(fit1c)

## 
## Formula: y1 ~ a * exp(-b * x1) + c
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## a  1.814e+07  4.937e+06   3.674   0.0015 ** 
## b  1.390e+00  1.615e-01   8.606  3.7e-08 ***
## c -7.668e+04  5.022e+04  -1.527   0.1424    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 119900 on 20 degrees of freedom
## 
## Number of iterations to convergence: 9 
## Achieved convergence tolerance: 4.78e-07

fit2c <- nls(
  y2 ~ a * exp(-b * x2) + c,
  data = df2clean,
  start = list(a = 5e6, b = 0.3, c = 1e5)
)
summary(fit2c)

## 
## Formula: y2 ~ a * exp(-b * x2) + c
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## a  2.668e+07  4.776e+06   5.586 2.66e-05 ***
## b  9.912e-01  1.143e-01   8.674 7.60e-08 ***
## c -3.427e+05  1.933e+05  -1.772   0.0932 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 281200 on 18 degrees of freedom
## 
## Number of iterations to convergence: 10 
## Achieved convergence tolerance: 1.721e-06

MAX=max(c(df1clean$y1,df2clean$y2))

plot(df1clean$x,df1clean$y,pch=15, main="outliers removed",ylim=c(0,MAX),
  xlab="Plasma dilution factor",
  ylab="Absorbance")

points(df2clean$x,df2clean$y,col="blue",pch=17)

curve(coef(fit1c)["a"] * exp(-coef(fit1c)["b"] * x) + coef(fit1c)["c"],
      add = TRUE, lwd = 2, col = "black", lty=2)

curve(coef(fit2c)["a"] * exp(-coef(fit2c)["b"] * x) + coef(fit2c)["c"],
      add = TRUE, lwd = 2, col = "blue", lty=2)

These curves are different, but we need to model and test them.

This is looking a lot better.

Patient 440 combined model with ANOVA model comparison

df1clean$group <- "A"
df2clean$group <- "B"

colnames(df2clean) <- colnames(df1clean)
df0 <- rbind(df1clean,df2clean)

df0 |>
  kbl(caption="Patient 440 visit 1 and 2") |>
  kable_paper("hover", full_width = F)

Patient 440 visit 1 and 2
	x1	y1	expect1	delta1	cv1	group
1	1.698970	1414693	1649005.126	234312.126	0.1420930	A
2	2.176091	814797	753459.018	61337.982	0.0814085	A
3	2.653212	326273	364758.564	38485.564	0.1055097	A
4	3.130334	337861	196048.051	141812.949	0.7233581	A
5	3.607455	29326	122821.388	93495.388	0.7612305	A
6	4.084576	-48779	91038.282	139817.282	1.5358076	A
7	4.561698	-53038	77243.227	130281.227	1.6866363	A
9	1.698970	1549611	1649005.126	99394.126	0.0602752	A
10	2.176091	959106	753459.018	205646.982	0.2729372	A
11	2.653212	296757	364758.564	68001.564	0.1864290	A
12	3.130334	59949	196048.051	136099.051	0.6942127	A
13	3.607455	-24525	122821.388	147346.388	1.1996802	A
14	4.084576	170459	91038.282	79420.718	0.8723881	A
15	4.561698	-42896	77243.227	120139.227	1.5553367	A
16	5.038819	-59817	71255.658	131072.658	1.8394702	A
17	1.698970	1920647	1649005.126	271641.874	0.1647308	A
18	2.176091	679551	753459.018	73908.018	0.0980916	A
19	2.653212	482107	364758.564	117348.436	0.3217154	A
20	3.130334	97169	196048.051	98879.051	0.5043613	A
21	3.607455	-6346	122821.388	129167.388	1.0516685	A
22	4.084576	-46687	91038.282	137725.282	1.5128282	A
23	4.561698	-66151	77243.227	143394.227	1.8563987	A
24	5.038819	-47045	71255.658	118300.658	1.6602283	A
110	1.698970	4460929	4618816.747	157887.747	0.0341836	B
25	2.176091	2834260	2723820.748	110439.252	0.0405457	B
31	2.653212	1466181	1566784.667	100603.667	0.0642103	B
41	3.130334	537859	860328.035	322469.035	0.3748210	B
51	3.607455	150452	428983.687	278531.687	0.6492827	B
61	4.084576	20123	165615.863	145492.863	0.8784959	B
8	5.038819	178675	-93373.613	272048.613	2.9135492	B
91	1.698970	4556538	4618816.747	62278.747	0.0134837	B
101	2.176091	2882266	2723820.748	158445.252	0.0581702	B
111	2.653212	2501896	1566784.667	935111.333	0.5968346	B
121	3.130334	828694	860328.035	31634.035	0.0367697	B
131	3.607455	185501	428983.687	243482.687	0.5675803	B
141	4.084576	75931	165615.863	89684.863	0.5415234	B
161	5.038819	87438	-93373.613	180811.613	1.9364316	B
171	1.698970	4582599	4618816.747	36217.747	0.0078413	B
181	2.176091	2834746	2723820.748	110925.252	0.0407241	B
191	2.653212	1392985	1566784.667	173799.667	0.1109276	B
201	3.130334	695306	860328.035	165022.035	0.1918129	B
211	3.607455	250375	428983.687	178608.687	0.4163531	B
221	4.084576	38887	165615.863	126728.863	0.7651976	B
231	4.561698	7726	4810.207	2915.793	0.6061679	B

fit_null <- nls(
  y1 ~ a * exp(-b * x1) + c,
  data = df0,
  start = list(a=3e6, b=0.3, c=1e5)
)
fit_null

## Nonlinear regression model
##   model: y1 ~ a * exp(-b * x1) + c
##    data: df0
##          a          b          c 
##  2.078e+07  1.080e+00 -2.001e+05 
##  residual sum-of-squares: 2.379e+13
## 
## Number of iterations to convergence: 8 
## Achieved convergence tolerance: 8.609e-06

fit_alt <- nls(
  y1 ~ (a1 + (a2 - a1) * (group == "B")) *
       exp(-(b1 + (b2 - b1) * (group == "B")) * x1) +
       (c1 + (c2 - c1) * (group == "B")),
  data = df0,
  start = list(a1=3e6, a2=3e6, b1=0.3, b2=0.3, c1=1e5, c2=1e5)
)
fit_alt

## Nonlinear regression model
##   model: y1 ~ (a1 + (a2 - a1) * (group == "B")) * exp(-(b1 + (b2 - b1) *     (group == "B")) * x1) + (c1 + (c2 - c1) * (group == "B"))
##    data: df0
##         a1         a2         b1         b2         c1         c2 
##  1.814e+07  2.668e+07  1.390e+00  9.912e-01 -7.668e+04 -3.427e+05 
##  residual sum-of-squares: 1.711e+12
## 
## Number of iterations to convergence: 9 
## Achieved convergence tolerance: 5.645e-06

anova(fit_null, fit_alt)

## Analysis of Variance Table
## 
## Model 1: y1 ~ a * exp(-b * x1) + c
## Model 2: y1 ~ (a1 + (a2 - a1) * (group == "B")) * exp(-(b1 + (b2 - b1) * (group == "B")) * x1) + (c1 + (c2 - c1) * (group == "B"))
##   Res.Df Res.Sum Sq Df     Sum Sq F value    Pr(>F)    
## 1     41  2.379e+13                                    
## 2     38  1.711e+12  3 2.2079e+13  163.45 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(fit_null, fit_alt)$Pr[2]

## [1] 9.232574e-22

The difference between curves is significant (p=9.2e-22).