Intro

Here we are analysing samples obtained before patients had a CVD event. We have case samples from patients who later had a CVD event and a control groups which in the follow up period did not have a CVD event.

Load libraries

library("limma")
library("mitch")

library("PLSDAbatch")

library("tibble")
library("tidyr")
library("readr")
library("dplyr")
library("ggplot2")
library("pheatmap")
library("janitor")
library("kableExtra")
library("eulerr")
library("beeswarm")

Load files and clean the data tables

These files were generated by FragPipe.

plexA <- read_tsv("abundance_protein_MD_plexA.tsv")
## Rows: 963 Columns: 27
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): Index, Gene, Protein, Protein ID, Entry Name, Protein Description,...
## dbl (19): NumberPSM, MaxPepProb, ReferenceIntensity, _126, _127N, _127C, _12...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plexB <- read_tsv("abundance_protein_MD_plexB.tsv")
## Rows: 962 Columns: 27
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): Index, Gene, Protein, Protein ID, Entry Name, Protein Description,...
## dbl (19): NumberPSM, MaxPepProb, ReferenceIntensity, _126, _127N, _127C, _12...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plexA <- janitor::clean_names(plexA)
plexB <- janitor::clean_names(plexB)

Extract out the protein quantifications.

plexA <- dplyr::select(plexA, protein, gene,
                       x126, x127n, x127c, x128n, x128c,
                       x129n, x129c, x130n, x130c,
                       x131n, x131c, x132n)

plexB <- dplyr::select(plexB, protein, gene,
                       x126, x127n, x127c, x128n, x128c,
                       x129n, x129c, x130n, x130c,
                       x131n, x131c, x132n)

Rename TMT channels to sample IDs.

annotationA <- c(
  x126="19191", x127n="34629", x127c="34862",
  x128n="40431", x128c="40475", x129n="35038",
  x129c="34866", x130n="41436", x130c="41383",
  x131n="40316", x131c="3746", x132n="102953"
)

annotationB <- c(
  x126="40797", x127n="4177", x127c="103376",
  x128n="47542", x128c="47442", x129n="42977",
  x129c="43081", x130n="113835", x130c="113856",
  x131n="19584", x131c="118117", x132n="118008"
)

colnames(plexA)[match(names(annotationA), colnames(plexA))] <- annotationA
colnames(plexB)[match(names(annotationB), colnames(plexB))] <- annotationB

Merge plexes.

merged <- full_join(plexA, plexB, by=c("protein","gene"))
head(merged) |>
    kbl(caption = "Example of merged data") |>
    kable_paper("hover", full_width = F)
Example of merged data
protein gene 19191 34629 34862 40431 40475 35038 34866 41436 41383 40316 3746 102953 40797 4177 103376 47542 47442 42977 43081 113835 113856 19584 118117 118008
sp|A0A075B6H7|KV37_HUMAN IGKV3-7 25.20197 25.39573 25.43256 26.05771 25.05397 25.57650 25.65469 25.37295 25.65501 25.64185 25.43883 25.33301 30.46372 30.34873 30.21146 30.78558 30.36076 30.19026 30.28124 30.56661 29.76611 30.72821 30.20665 30.00076
sp|A0A075B6H9|LV469_HUMAN IGLV4-69 28.95035 29.82285 29.61003 29.53702 29.44966 28.84860 29.91425 29.41320 29.00432 28.83877 28.59919 29.14621 22.40007 21.18373 21.26245 21.90925 22.59531 21.24953 21.98843 21.22654 21.40015 22.00258 21.02142 21.46372
sp|A0A075B6I0|LV861_HUMAN IGLV8-61 27.53609 28.54878 26.58017 27.43782 27.43139 26.83964 27.26845 26.57449 28.21526 26.57432 26.75554 NA 25.58149 26.12823 26.52763 25.72071 26.59545 25.47399 26.13838 25.35584 25.89193 25.69072 25.72252 26.67365
sp|A0A075B6I1|LV460_HUMAN IGLV4-60 21.13980 NA 20.95866 21.90418 19.86279 21.18880 21.20105 20.12885 20.95064 21.09826 20.35881 NA 27.79615 26.93927 25.90896 26.48209 28.84106 26.96525 28.56674 27.15404 27.20759 27.49989 27.18150 27.81810
sp|A0A075B6J9|LV218_HUMAN IGLV2-18 22.38769 25.30212 25.12898 24.02298 23.89291 24.49498 24.70158 24.32961 24.30966 24.58185 23.43995 23.82350 27.34292 26.35223 24.59685 26.37291 25.30866 25.64357 26.10869 25.59368 27.73543 25.64763 26.16884 26.54800
sp|A0A075B6K0|LV316_HUMAN IGLV3-16 21.37483 22.29617 21.39598 21.69973 21.51293 23.78554 22.11370 22.61187 20.93010 22.14908 20.87886 20.41397 20.66449 22.03552 22.04962 21.69365 21.55941 21.37521 22.05910 22.66605 21.11006 20.78442 21.76661 21.22903
dim(merged)
## [1] 1186   26

Expression matrix for Merged plexes including Plex A and Plex B.

expr_combined <- as.data.frame(merged)
protein_info_combined <- expr_combined[, c("protein", "gene")]
rownames(expr_combined) <- expr_combined$protein
expr_combined <- expr_combined[, !(colnames(expr_combined) %in% c("protein","gene"))]
expr_combined <- as.matrix(expr_combined)
storage.mode(expr_combined) <- "numeric"

expr_A <- as.data.frame(plexA)
protein_info_A <- expr_A[, c("protein", "gene")]
rownames(expr_A) <- expr_A$protein
expr_A <- expr_A[, !(colnames(expr_A) %in% c("protein","gene"))]
expr_A <- as.matrix(expr_A)
storage.mode(expr_A) <- "numeric"
head(expr_A) |>
    kbl(caption = "Example of Plex A data") |>
    kable_paper("hover", full_width = F)
Example of Plex A data
19191 34629 34862 40431 40475 35038 34866 41436 41383 40316 3746 102953
sp|A0A075B6H7|KV37_HUMAN 25.20197 25.39573 25.43256 26.05771 25.05397 25.57650 25.65469 25.37295 25.65501 25.64185 25.43883 25.33301
sp|A0A075B6H9|LV469_HUMAN 28.95035 29.82285 29.61003 29.53702 29.44966 28.84860 29.91425 29.41320 29.00432 28.83877 28.59919 29.14621
sp|A0A075B6I0|LV861_HUMAN 27.53609 28.54878 26.58017 27.43782 27.43139 26.83964 27.26845 26.57449 28.21526 26.57432 26.75554 NA
sp|A0A075B6I1|LV460_HUMAN 21.13980 NA 20.95866 21.90418 19.86279 21.18880 21.20105 20.12885 20.95064 21.09826 20.35881 NA
sp|A0A075B6J9|LV218_HUMAN 22.38769 25.30212 25.12898 24.02298 23.89291 24.49498 24.70158 24.32961 24.30966 24.58185 23.43995 23.82350
sp|A0A075B6K0|LV316_HUMAN 21.37483 22.29617 21.39598 21.69973 21.51293 23.78554 22.11370 22.61187 20.93010 22.14908 20.87886 20.41397
dim(expr_A)
## [1] 963  12
expr_B <- as.data.frame(plexB)
protein_info_B <- expr_B[, c("protein", "gene")]
rownames(expr_B) <- expr_B$protein
expr_B <- expr_B[, !(colnames(expr_B) %in% c("protein","gene"))]
expr_B <- as.matrix(expr_B)
storage.mode(expr_B) <- "numeric"
head(expr_B)|>
    kbl(caption = "Example of Plex B data") |>
    kable_paper("hover", full_width = F)
Example of Plex B data
40797 4177 103376 47542 47442 42977 43081 113835 113856 19584 118117 118008
sp|A0A075B6H7|KV37_HUMAN 30.46372 30.34873 30.21146 30.78558 30.36076 30.19026 30.28124 30.56661 29.76611 30.72821 30.20665 30.00076
sp|A0A075B6H9|LV469_HUMAN 22.40007 21.18373 21.26245 21.90925 22.59531 21.24953 21.98843 21.22654 21.40015 22.00258 21.02142 21.46372
sp|A0A075B6I0|LV861_HUMAN 25.58149 26.12823 26.52763 25.72071 26.59545 25.47399 26.13838 25.35584 25.89193 25.69072 25.72252 26.67365
sp|A0A075B6I1|LV460_HUMAN 27.79615 26.93927 25.90896 26.48209 28.84106 26.96525 28.56674 27.15404 27.20759 27.49989 27.18150 27.81810
sp|A0A075B6I4|LVX54_HUMAN 14.60397 13.68839 NA 13.41395 17.83912 14.71371 15.67450 14.14982 13.30315 NA 13.41029 14.51386
sp|A0A075B6J9|LV218_HUMAN 27.34292 26.35223 24.59685 26.37291 25.30866 25.64357 26.10869 25.59368 27.73543 25.64763 26.16884 26.54800
dim(expr_B)
## [1] 962  12

Filter proteins

Keep proteins which are present in >=50% of samples.

keep_combined <- rowSums(!is.na(expr_combined)) >= ncol(expr_combined)*0.5
expr_combined_filt <- expr_combined[keep_combined, ]
dim(expr_combined_filt)
## [1] 904  24
keep_A <- rowSums(!is.na(expr_A)) >= ncol(expr_A)*0.5
expr_A_filt <- expr_A[keep_A, ]
dim(expr_A_filt)
## [1] 873  12
keep_B <- rowSums(!is.na(expr_B)) >= ncol(expr_B)*0.5
expr_B_filt <- expr_B[keep_B, ]
dim(expr_B_filt)
## [1] 854  12

QC before normalization

qc_sample_combined <- data.frame(
  Sample = colnames(expr_combined_filt),
  Detected_Proteins = colSums(!is.na(expr_combined_filt)),
  Missing_Percent = colMeans(is.na(expr_combined_filt)) * 100,
  Median_Intensity = apply(expr_combined_filt, 2, median, na.rm = TRUE),
  SD_Intensity = apply(expr_combined_filt, 2, sd, na.rm = TRUE)
)

# Assign plex
plexA_samples <- c(
  "19191","34629","34862","40431","40475","35038",
  "34866","41436","41383","40316","3746","102953"
)

qc_sample_combined$Plex <- ifelse(qc_sample_combined$Sample %in% plexA_samples, "A", "B")

# QC summary by plex
qc_plex_combined <- qc_sample_combined %>%
  group_by(Plex) %>%
  summarise(
    N_Samples = n(),
    Mean_Detected_Proteins = mean(Detected_Proteins),
    Mean_Missing_Percent = mean(Missing_Percent),
    Mean_Median_Intensity = mean(Median_Intensity),
    .groups = "drop"
  )

qc_sample_combined |>
    kbl(caption = "QC on combined data") |>
    kable_paper("hover", full_width = F)
QC on combined data
Sample Detected_Proteins Missing_Percent Median_Intensity SD_Intensity Plex
19191 19191 811 10.28761 22.61437 3.608051 A
34629 34629 768 15.04425 22.52596 3.824270 A
34862 34862 792 12.38938 22.47182 3.687522 A
40431 40431 767 15.15487 22.17860 3.914548 A
40475 40475 768 15.04425 22.27699 3.916427 A
35038 35038 806 10.84071 22.40563 3.668941 A
34866 34866 805 10.95133 22.46457 3.701970 A
41436 41436 779 13.82743 22.34549 3.813249 A
41383 41383 791 12.50000 22.41082 3.736537 A
40316 40316 762 15.70796 22.20311 3.877041 A
3746 3746 797 11.83628 22.94357 3.486882 A
102953 102953 765 15.37611 22.48856 3.758113 A
40797 40797 766 15.26549 22.30942 3.775893 B
4177 4177 782 13.49558 22.93066 3.365816 B
103376 103376 714 21.01770 22.29280 3.759772 B
47542 47542 780 13.71681 22.25514 3.563151 B
47442 47442 731 19.13717 22.16957 3.792444 B
42977 42977 772 14.60177 22.38404 3.503025 B
43081 43081 776 14.15929 22.13263 3.707914 B
113835 113835 726 19.69027 22.26161 3.831329 B
113856 113856 768 15.04425 22.04823 3.841864 B
19584 19584 764 15.48673 22.35815 3.493251 B
118117 118117 727 19.57965 22.31801 3.876589 B
118008 118008 675 25.33186 22.33120 3.854068 B
qc_plex_combined |>
    kbl(caption = "Summary of Plex data QC") |>
    kable_paper("hover", full_width = F)
Summary of Plex data QC
Plex N_Samples Mean_Detected_Proteins Mean_Missing_Percent Mean_Median_Intensity
A 12 784.2500 13.24668 22.44412
B 12 748.4167 17.21055 22.31595

Now we have some QC plots. The first one is the missingness plot.

qc_long_combined <- as.data.frame(expr_combined_filt) %>%
  rownames_to_column("Protein") %>%
  pivot_longer(-Protein, names_to = "Sample", values_to = "Intensity") %>%
  filter(is.finite(Intensity))

ggplot(qc_sample_combined,
       aes(x = reorder(Sample, Missing_Percent),
           y = Missing_Percent,
           fill = Plex)) +
  geom_col() +
  coord_flip() +
  theme_bw() +
  labs(title = "Missingness per Sample",
       x = "Sample", y = "% Missing")

Detected proteins plot.

ggplot(qc_sample_combined,
       aes(x = reorder(Sample, Detected_Proteins),
           y = Detected_Proteins,
           fill = Plex)) +
  geom_col() +
  coord_flip() +
  theme_bw() +
  labs(title = "Detected Proteins per Sample",
       x = "Sample", y = "Detected Proteins")

Density plot.

ggplot(qc_long_combined,
       aes(x = Intensity, color = Sample)) +
  geom_density(linewidth = 0.8) +
  theme_bw() +
  labs(title = "Intensity Distribution per Sample",
       x = "Intensity", y = "Density")

Plex-level comparison.

ggplot(qc_plex_combined,
       aes(x = Plex, y = Mean_Detected_Proteins, fill = Plex)) +
  geom_col() +
  theme_bw() +
  labs(title = "Mean Detected Proteins by Plex",
       x = "Plex", y = "Mean Detected Proteins")

Correlation heatmap

A correlation heatmap shows the similarity between samples.

It shows that the samples are clustered into clusters based on plex A/B.

cor_pre_combined <- cor(expr_combined_filt, use = "pairwise.complete.obs")

pheatmap(cor_pre_combined,
         main = "Sample Correlation Heatmap (Pre-normalization)")

Quantile normalization and log2 transform

This needs to be run for combined dataset and separate plexes as well.

expr_combined_norm <- normalizeBetweenArrays(expr_combined_filt, method = "quantile")
expr_combined_log <- log2(expr_combined_norm + 1)
boxplot(expr_combined_log, main = "Post-normalization for both plexes", las = 2)

# Plex A
expr_A_norm <- normalizeBetweenArrays(expr_A_filt, method = "quantile")
expr_A_log <- log2(expr_A_norm + 1)
boxplot(expr_A_log, main = "Post-normalization for Plex A", las = 2)

# plex B
expr_B_norm <- normalizeBetweenArrays(expr_B_filt, method = "quantile")
expr_B_log <- log2(expr_B_norm + 1)
boxplot(expr_B_log, main = "Post-normalization for Plex B", las = 2)

QC after normalization

After normalisation, the signal distributions are uniform, but there’s a shoulder.

The correlation structure is retained as expected.

qc_long_norm_combined <- as.data.frame(expr_combined_log) %>%
  rownames_to_column("Protein") %>%
  pivot_longer(-Protein, names_to = "Sample", values_to = "Intensity") %>%
  filter(is.finite(Intensity))

ggplot(qc_long_norm_combined,
       aes(x = Intensity, color = Sample)) +
  geom_density(linewidth = 0.8) +
  theme_bw() +
  labs(title = "Post-normalization Intensity Distribution (Combined)",
       x = "Log2 Intensity",
       y = "Density")

cor_post_combined <- cor(expr_combined_log, use = "pairwise.complete.obs")

pheatmap(cor_post_combined,
         main = "Sample Correlation Heatmap (Post-normalization)")

Impute missing values

Here we are using the median method for imputation of missing values.

# Combine Plexes
expr_combined_imp <- expr_combined_log
for(i in 1:ncol(expr_combined_imp)){
  expr_combined_imp[is.na(expr_combined_imp[,i]), i] <-
    median(expr_combined_imp[,i], na.rm = TRUE)
}

# Plex A
expr_A_imp <- expr_A_log

for(i in 1:ncol(expr_A_imp)){
  expr_A_imp[is.na(expr_A_imp[,i]), i] <-
    median(expr_A_imp[,i], na.rm = TRUE)
}

# Plex B
expr_B_imp <- expr_B_log

for(i in 1:ncol(expr_B_imp)){
  expr_B_imp[is.na(expr_B_imp[,i]), i] <-
    median(expr_B_imp[,i], na.rm = TRUE)
}
sum(is.na(expr_combined_imp))
## [1] 0
sum(is.na(expr_A_imp))
## [1] 0
sum(is.na(expr_B_imp))
## [1] 0

Next, we will removing Zero Variance Proteins because they cannot be statistically significant and may cause other problems later on.

expr_combined_imp <- expr_combined_imp[apply(expr_combined_imp, 1, var) != 0, ]
expr_A_imp <- expr_A_imp[apply(expr_A_imp, 1, var) != 0, ]
expr_B_imp <- expr_B_imp[apply(expr_B_imp, 1, var) != 0, ]

Metadata Creation

# Create combined metadata from expression matrix
meta_combined <- data.frame(
  Sample = colnames(expr_combined_imp)
)

# Define Plex (based on known Plex A samples)

plexA_samples <- c(
  "19191","34629","34862","40431","40475","35038",
  "34866","41436","41383","40316","3746","102953"
)

meta_combined$Plex <- ifelse(meta_combined$Sample %in% plexA_samples,
                             "A", "B")

meta_combined$Plex <- factor(meta_combined$Plex)

# Define biological group (Case vs Control)

case_samples <- c(
  "19191","40797","34862","103376","40475","47442",
  "34866","43081","41383","113856","3746","118117"
)

meta_combined$Group <- ifelse(meta_combined$Sample %in% case_samples,
                              "Case", "Control")

meta_combined$Group <- factor(meta_combined$Group,
                              levels = c("Control","Case"))

# Final formatting

rownames(meta_combined) <- meta_combined$Sample

# Create Plex-specific metadata (CORRECT WAY)
meta_A <- meta_combined[meta_combined$Plex == "A", ]
meta_B <- meta_combined[meta_combined$Plex == "B", ]

# Align with expression matrices (CRITICAL)
meta_A <- meta_A[colnames(expr_A_imp), ]
meta_B <- meta_B[colnames(expr_B_imp), ]

# Keep only needed columns
meta_A <- meta_A[, c("Sample","Group")]
meta_B <- meta_B[, c("Sample","Group")]

# SANITY CHECKS (DO NOT SKIP)
# Alignment
all(rownames(meta_A) == colnames(expr_A_imp))
## [1] TRUE
all(rownames(meta_B) == colnames(expr_B_imp))
## [1] TRUE
all(rownames(meta_combined) == colnames(expr_combined_imp))
## [1] TRUE
# Distribution checks
table(meta_combined$Plex, meta_combined$Group)
##    
##     Control Case
##   A       6    6
##   B       6    6
table(meta_A$Group)
## 
## Control    Case 
##       6       6
table(meta_B$Group)
## 
## Control    Case 
##       6       6

PCA Before Batch Correction for combined plexes

pca_pre <- prcomp(t(expr_combined_imp), center = TRUE, scale. = TRUE)

pca_pre_df <- data.frame(
  PC1 = pca_pre$x[,1],
  PC2 = pca_pre$x[,2],
  Sample = colnames(expr_combined_imp),
  Plex = meta_combined$Plex,
  Group = meta_combined$Group
)

ggplot(pca_pre_df, aes(x = PC1, y = PC2, color = Plex, shape = Group)) +
  geom_point(size = 4) +
  geom_text(aes(label = Sample), vjust = -0.5, size = 3) +
  theme_bw() +
  labs(title = "PCA Before Batch Correction",
       x = "PC1",
       y = "PC2")

Batch correction

expr_combined_corrected <- removeBatchEffect(
  expr_combined_imp,
  batch = meta_combined$Plex,
  design = model.matrix(~ Group, data = meta_combined)
)

# Quick check
head(expr_combined_corrected[,1:5])
##                              19191    34629    34862    40431    40475
## sp|A0A075B6H7|KV37_HUMAN  4.833346 4.830965 4.839526 4.871850 4.827196
## sp|A0A075B6H9|LV469_HUMAN 4.702996 4.718612 4.721695 4.712016 4.715457
## sp|A0A075B6I0|LV861_HUMAN 4.825501 4.858319 4.764883 4.807596 4.800510
## sp|A0A075B6I1|LV460_HUMAN 4.638888 4.726509 4.627839 4.711256 4.579935
## sp|A0A075B6J9|LV218_HUMAN 4.595879 4.759264 4.755084 4.708453 4.696748
## sp|A0A075B6K0|LV316_HUMAN 4.471802 4.529905 4.479845 4.517084 4.502633
# Dimensions unchanged
dim(expr_combined_corrected)
## [1] 904  24

PCA On Combined Plexes after removing Batch effect

pca_post <- prcomp(t(expr_combined_corrected), center = TRUE, scale. = TRUE)

pca_post_df <- data.frame(
  PC1 = pca_post$x[,1],
  PC2 = pca_post$x[,2],
  Sample = colnames(expr_combined_corrected),
  Plex = meta_combined$Plex,
  Group = meta_combined$Group
)

ggplot(pca_post_df, aes(x = PC1, y = PC2, color = Group, shape = Plex)) +
  geom_point(size = 4) +
  geom_text(aes(label = Sample), vjust = -0.5, size = 3) +
  theme_bw() +
  labs(title = "PCA After Batch Correction",
       x = "PC1",
       y = "PC2")

Differential Expression Analysis

Combined plexes

First analysis includes batch correction at the limma step

design_combined <- model.matrix(~ Plex + Group, data = meta_combined)

fit_combined <- lmFit(expr_combined_imp, design_combined)
fit_combined <- eBayes(fit_combined)
results_combined <- topTable(
  fit_combined,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_combined$Protein <- rownames(results_combined)
# Add gene annotation
results_combined <- merge(
  results_combined,
  protein_info_combined,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)

results_combined <- results_combined[order(results_combined$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked proteins by p-value
Protein logFC AveExpr t P.Value adj.P.Val B gene
751 sp|Q27J81|INF2_HUMAN -0.1169760 4.464969 -4.867502 0.0000637 0.0576017 1.6695010 INF2
507 sp|P28482|MK01_HUMAN 0.1460155 4.370219 4.197978 0.0003398 0.1535753 0.0466474 MAPK1
96 sp|O75356|ENTP5_HUMAN 0.0411850 4.456341 3.137454 0.0045940 0.5645256 -2.4416598 ENTPD5
571 sp|P49407|ARRB1_HUMAN 0.0752710 4.480139 3.119861 0.0047905 0.5645256 -2.4810489 ARRB1
891 sp|Q9Y287|ITM2B_HUMAN -0.0322520 4.425711 -3.009591 0.0062189 0.5645256 -2.7258946 ITM2B
609 sp|P59998|ARPC4_HUMAN -0.0810547 4.524232 -2.956053 0.0070519 0.5645256 -2.8434299 ARPC4

Batch correction was done separately before limma.

design_combined <- model.matrix(~ Group, data = meta_combined)

fit_combined <- lmFit(expr_combined_corrected, design_combined)
fit_combined <- eBayes(fit_combined)
results_combined <- topTable(
  fit_combined,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_combined$Protein <- rownames(results_combined)
# Add gene annotation
results_combined <- merge(
  results_combined,
  protein_info_combined,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)

results_combined <- results_combined[order(results_combined$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked proteins by p-value
Protein logFC AveExpr t P.Value adj.P.Val B gene
751 sp|Q27J81|INF2_HUMAN -0.1169760 4.464969 -4.974535 0.0000436 0.0394328 2.0060272 INF2
507 sp|P28482|MK01_HUMAN 0.1460155 4.370219 4.288700 0.0002506 0.1132865 0.3085421 MAPK1
96 sp|O75356|ENTP5_HUMAN 0.0411850 4.456341 3.211806 0.0037135 0.5064708 -2.2717706 ENTPD5
571 sp|P49407|ARRB1_HUMAN 0.0752710 4.480139 3.188448 0.0039299 0.5064708 -2.3251860 ARRB1
891 sp|Q9Y287|ITM2B_HUMAN -0.0322520 4.425711 -3.084598 0.0050485 0.5064708 -2.5607553 ITM2B
757 sp|Q5JSH3|WDR44_HUMAN 0.0271306 4.503781 3.023974 0.0058367 0.5064708 -2.6967557 WDR44

Make sure to do volcano plot, box/beeswarm plot and heatmap.

Volcano plot

sig <- subset(results_combined,adj.P.Val<0.05)

plot(results_combined$logFC, -log10(results_combined$P.Value),
  cex=0.6,pch=19,col="darkgray",
  xlab="log2 fold change",ylab="-log10(p-value)")

abline(v=0,lty=2,lwd=1)
points(sig$logFC, -log10(sig$P.Value) , col="red",pch=19,cex=0.7)
text(sig$logFC+0.015, -log10(sig$P.Value), labels=sig$gene)

sig <- subset(results_combined,adj.P.Val<0.05)

plot(log10(results_combined$AveExpr),results_combined$logFC,
  cex=0.6,pch=19,col="darkgray",
  xlab="log10 basemean",ylab="log2 fold change")

abline(h=0,lty=2,lwd=1)
points(log10(sig$AveExpr),sig$logFC,col="red",pch=19,cex=0.7)
text(log10(sig$AveExpr)+0.004,sig$logFC,labels=sig$gene)

Beeswarm plot using batch corrected data.

par(mfrow=c(3,3))

null <- lapply(1:9,function(i) {
  mygene <- results_combined$gene[i]
  myprot <- results_combined$Protein[i]
  mydat <- expr_combined_corrected[which(rownames(expr_combined_corrected) == myprot),]
  mygroups <- meta_combined[match(names(mydat),meta_combined$Sample),3]
  ctrl <- mydat[which(mygroups=="Control")]
  case <- mydat[which(mygroups=="Case")]
  xl <- list("Ctrl"=ctrl,"Case"=case)
  boxplot(xl,ylab="Relative protein expression",main=mygene,col="white",cex=0)
  beeswarm(xl,add=TRUE,cex=1.5,pch=19,col="darkgray")
} )

Now look at the ratio between MAPK1 and INF2.

prota="sp|P28482|MK01_HUMAN"
protb="sp|Q27J81|INF2_HUMAN"
dat_a <- expr_combined_corrected[which(rownames(expr_combined_corrected)==prota),]
dat_b <- expr_combined_corrected[which(rownames(expr_combined_corrected)==protb),]
abrat <- dat_a / dat_b
case_samples <- meta_combined[which(meta_combined$Group=="Case"),"Sample"]
ctrl_samples <- meta_combined[which(meta_combined$Group=="Control"),"Sample"]

case_dat <- abrat[which(names(abrat) %in% case_samples)]
ctrl_dat <- abrat[which(names(abrat) %in% ctrl_samples)]

rl <- list("Ctrl"=ctrl_dat, "Case"=case_dat)
boxplot(rl,ylab="Ratio",main="Ratio MAPK1/INF2",col="white",cex=0)
beeswarm(rl,add=TRUE,cex=1.5,pch=19,col="darkgray")

t.test(case_dat,ctrl_dat)
## 
##  Welch Two Sample t-test
## 
## data:  case_dat and ctrl_dat
## t = 6.4566, df = 22, p-value = 1.701e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03962877 0.07713260
## sample estimates:
## mean of x mean of y 
## 1.0084825 0.9501018

Separate plexes

Analyse plex A and plex B.

design_A <- model.matrix(~ Group, data = meta_A)
fit_A <- lmFit(expr_A_imp, design_A)
fit_A <- eBayes(fit_A)
results_A <- topTable(
  fit_A,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_A$Protein <- rownames(results_A)
# Add gene annotation
results_A <- merge(
  results_A,
  protein_info_A,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)

# View
results_A <- results_A[order(results_A$P.Value),]

head(results_A,10) |>
    kbl(caption = "Top ranked proteins by p-value in plex A") |>
    kable_paper("hover", full_width = F)
Top ranked proteins by p-value in plex A
Protein logFC AveExpr t P.Value adj.P.Val B gene
740 sp|Q86UF1|TSN33_HUMAN -0.2007621 4.396326 -4.595562 0.0006315 0.3477544 -0.0924426 TSPAN33
270 sp|P06702|S10A9_HUMAN -0.1394058 4.711956 -4.203125 0.0012514 0.3477544 -0.7472006 S100A9
715 sp|Q27J81|INF2_HUMAN -0.1226512 4.448334 -4.067512 0.0015918 0.3477544 -0.9777981 INF2
546 sp|P49593|PPM1F_HUMAN 0.1064798 4.172522 3.995491 0.0018102 0.3477544 -1.1010580 PPM1F
863 sp|Q9Y287|ITM2B_HUMAN -0.0563506 4.300458 -3.616202 0.0035936 0.3477544 -1.7577260 ITM2B
253 sp|P05109|S10A8_HUMAN -0.1256702 4.779495 -3.570575 0.0039059 0.3477544 -1.8373967 S100A8
63 sp|O14974|MYPT1_HUMAN -0.2520287 4.360400 -3.529357 0.0042118 0.3477544 -1.9094565 PPP1R12A
335 sp|P0DP01|HV108_HUMAN -0.1724802 4.563498 -3.239505 0.0071775 0.3477544 -2.4176917 IGHV1-8
587 sp|P61106|RAB14_HUMAN 0.0912982 4.462361 3.077161 0.0096891 0.3477544 -2.7024736 RAB14
12 sp|A0A075B6S2|KVD29_HUMAN 0.0737667 4.787900 3.072875 0.0097662 0.3477544 -2.7099842 IGKV2D-29

Plex B.

design_B <- model.matrix(~ Group, data = meta_B)
fit_B <- lmFit(expr_B_imp, design_B)
fit_B <- eBayes(fit_B)
results_B <- topTable(
  fit_B,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_B$Protein <- rownames(results_B)
# Add gene annotation
results_B <- merge(
  results_B,
  protein_info_B,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)

# View
results_B <- results_B[order(results_B$P.Value),]

head(results_B,10) |>
    kbl(caption = "Top ranked proteins by p-value in plex B") |>
    kable_paper("hover", full_width = F)
Top ranked proteins by p-value in plex B
Protein logFC AveExpr t P.Value adj.P.Val B gene
112 sp|P00387|NB5R3_HUMAN -0.1768389 4.440961 -6.330268 0.0000379 0.0323369 2.5541250 CYB5R3
528 sp|P40227|TCPZ_HUMAN -0.1501753 4.378868 -5.615484 0.0001136 0.0371781 1.5320233 CCT6A
590 sp|P61106|RAB14_HUMAN -0.1788209 4.385867 -5.528161 0.0001306 0.0371781 1.4014708 RAB14
575 sp|P55084|ECHB_HUMAN -0.2729338 4.251690 -5.063074 0.0002790 0.0595748 0.6855750 HADHB
282 sp|P07203|GPX1_HUMAN -0.1583329 4.361424 -4.767197 0.0004594 0.0784686 0.2126430 GPX1
89 sp|O75356|ENTP5_HUMAN 0.0867614 4.370177 4.514696 0.0007096 0.1010049 -0.2010684 ENTPD5
827 sp|Q9UBR2|CATZ_HUMAN 0.0824984 4.454822 4.367655 0.0009176 0.1116510 -0.4459936 CTSZ
458 sp|P24539|AT5F1_HUMAN -0.1709199 4.334835 -4.195393 0.0012440 0.1116510 -0.7363886 ATP5PB
741 sp|Q8IYS5|OSCAR_HUMAN 0.1121910 4.272896 4.132539 0.0013914 0.1116510 -0.8432130 OSCAR
244 sp|P04899|GNAI2_HUMAN -0.1958671 4.296231 -4.072116 0.0015501 0.1116510 -0.9463140 GNAI2

Comparison of Plex A and B results

The top 50 up and down regulated proteins are selected from Plex A and B and these lists were compared.

plexA_up <- head(subset(results_A,logFC>0),50)[,1]
plexA_dn <- head(subset(results_A,logFC<0),50)[,1]
plexB_up <- head(subset(results_B,logFC>0),50)[,1]
plexB_dn <- head(subset(results_B,logFC<0),50)[,1]

v1 <- list("A up"=plexA_up, "A dn"=plexA_dn, "B up"=plexB_up, "B dn"=plexB_dn)

plot(euler(v1), quantities = TRUE)

intersect(plexA_up,plexB_up)
## character(0)
intersect(plexA_dn,plexB_dn)
## [1] "sp|Q27J81|INF2_HUMAN"
intersect(plexA_up,plexB_dn)
## [1] "sp|P61106|RAB14_HUMAN" "sp|Q9UBW5|BIN2_HUMAN"  "sp|Q3ZCW2|LEGL_HUMAN" 
## [4] "sp|P14770|GPIX_HUMAN"  "sp|Q15555|MARE2_HUMAN" "sp|Q86UX7|URP2_HUMAN" 
## [7] "sp|Q14247|SRC8_HUMAN"
intersect(plexB_up,plexA_dn)
## [1] "sp|P18428|LBP_HUMAN"   "sp|Q5D862|FILA2_HUMAN" "sp|P01766|HV313_HUMAN"

Ranking of proteins by logFC

results_A$score <- sign(results_A$logFC) * -log10(results_A$P.Value)
results_B$score <- sign(results_B$logFC) * -log10(results_B$P.Value)

results_m <- merge(results_A,results_B,by="Protein")

ranks_m <- results_m[,c("Protein","score.x","score.y")]
rownames(ranks_m) <- ranks_m$Protein ; ranks_m$Protein=NULL
colnames(ranks_m) <- c("Plex A","Plex B")

plot(ranks_m)

mylm <- lm(ranks_m$`Plex B` ~ ranks_m$`Plex A`)
abline(mylm,lwd=2,lty=2,col="red")
COR=signif(cor(ranks_m),3)[2,1]
COR
## [1] -0.551
mtext(paste("Pearson correlation=",COR))

Cross batch analysis

Look at the proteins that are differentially expressed across batches.

IGHV and IHKV are over-represented.

Would be interesting to look at the composition of these proteins to understand the batch effect better.

design_batch <- model.matrix(~ Plex , data = meta_combined)

fit_batch <- lmFit(expr_combined_imp, design_batch)

fit_batch <- eBayes(fit_batch)

summary(decideTests(fit_batch))
##        (Intercept) PlexB
## Down             0   299
## NotSig           0   331
## Up             904   274
results_batch <- topTable(
  fit_batch,
  coef = ncol(summary(decideTests(fit_batch))),
  number = Inf,
  sort.by = "P"
)

results_batch$Protein <- rownames(results_batch)

# Add gene annotation
results_batch <- merge(
  results_batch,
  protein_info_combined,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)

results_batch <- results_batch[order(results_batch$P.Value),]

head(results_batch,10) |>
    kbl(caption = "Proteins related to Plex A/B differences.") |>
    kable_paper("hover", full_width = F)
Proteins related to Plex A/B differences.
Protein logFC AveExpr t P.Value adj.P.Val B gene
713 sp|Q14204|DYHC1_HUMAN -0.4230034 4.335732 -39.76442 0 0 43.95389 DYNC1H1
861 sp|Q9NY33|DPP3_HUMAN -0.3047488 4.394859 -34.77114 0 0 40.80698 DPP3
2 sp|A0A075B6H9|LV469_HUMAN -0.4129798 4.707875 -32.80910 0 0 39.43752 IGLV4-69
1 sp|A0A075B6H7|KV37_HUMAN 0.2517374 4.847043 32.61892 0 0 39.30028 IGKV3-7
37 sp|A0A0C4DH31|HV118_HUMAN -0.2425323 4.820578 -32.13371 0 0 38.94635 IGHV1-18
174 sp|P01814|HV270_HUMAN 0.2878827 4.691175 31.74363 0 0 38.65779 IGHV2-70
215 sp|P02765|FETUA_HUMAN -0.2652081 4.856746 -30.92674 0 0 38.04153 AHSG
538 sp|P35542|SAA4_HUMAN 0.2109984 4.810501 29.13942 0 0 36.63325 SAA4
100 sp|O75822|EIF3J_HUMAN 0.1900294 4.642248 28.85470 0 0 36.40087 EIF3J
23 sp|A0A0B4J1U7|HV601_HUMAN 0.3305690 4.712518 28.55146 0 0 36.15082 IGHV6-1
head(subset(results_batch,logFC>0),20) |>
    kbl(caption = "Proteins with higher expression in Plex B.") |>
    kable_paper("hover", full_width = F)
Proteins with higher expression in Plex B.
Protein logFC AveExpr t P.Value adj.P.Val B gene
1 sp|A0A075B6H7|KV37_HUMAN 0.2517374 4.847043 32.61892 0 0 39.30028 IGKV3-7
174 sp|P01814|HV270_HUMAN 0.2878827 4.691175 31.74363 0 0 38.65779 IGHV2-70
538 sp|P35542|SAA4_HUMAN 0.2109984 4.810501 29.13942 0 0 36.63325 SAA4
100 sp|O75822|EIF3J_HUMAN 0.1900294 4.642248 28.85470 0 0 36.40087 EIF3J
23 sp|A0A0B4J1U7|HV601_HUMAN 0.3305690 4.712518 28.55146 0 0 36.15082 IGHV6-1
21 sp|A0A0A0MT89|KJ01_HUMAN 0.5697265 4.262369 28.24926 0 0 35.89898 IGKJ1
155 sp|P01619|KV320_HUMAN 0.2411374 4.845132 27.88502 0 0 35.59180 IGKV3-20
846 sp|Q9HBR0|S38AA_HUMAN 0.2405421 4.426961 27.06020 0 0 34.88120 SLC38A10
374 sp|P11047|LAMC1_HUMAN 0.2874736 4.403495 27.03158 0 0 34.85616 LAMC1
12 sp|A0A075B6R9|KVD24_HUMAN 0.2853694 4.626380 24.22532 0 0 32.26496 IGKV2D-24
294 sp|P07225|PROS_HUMAN 0.1403271 4.861195 24.21825 0 0 32.25807 PROS1
780 sp|Q8IWV2|CNTN4_HUMAN 0.3482763 4.373094 23.43664 0 0 31.48412 CNTN4
77 sp|O15394|NCAM2_HUMAN 0.2564100 4.419027 23.04011 0 0 31.08198 NCAM2
556 sp|P41222|PTGDS_HUMAN 0.3360664 4.526600 22.39538 0 0 30.41383 PTGDS
789 sp|Q8NBS9|TXND5_HUMAN 0.2134845 4.440490 21.43401 0 0 29.38300 TXNDC5
720 sp|Q14623|IHH_HUMAN 0.2363269 4.429068 21.14677 0 0 29.06660 IHH
806 sp|Q93070|NAR4_HUMAN 0.2486540 4.422905 21.13923 0 0 29.05824 ART4
748 sp|Q16775|GLO2_HUMAN 0.3274800 4.383492 21.12337 0 0 29.04065 HAGH
828 sp|Q9BRK5|CAB45_HUMAN 0.2748577 4.409803 20.49463 0 0 28.33314 SDF4
81 sp|O43242|PSMD3_HUMAN 0.2209955 4.436734 19.83966 0 0 27.57449 PSMD3
head(subset(results_batch,logFC<0),20) |>
    kbl(caption = "Proteins with higher expression in Plex A.") |>
    kable_paper("hover", full_width = F)
Proteins with higher expression in Plex A.
Protein logFC AveExpr t P.Value adj.P.Val B gene
713 sp|Q14204|DYHC1_HUMAN -0.4230034 4.335732 -39.76442 0 0 43.95389 DYNC1H1
861 sp|Q9NY33|DPP3_HUMAN -0.3047488 4.394859 -34.77114 0 0 40.80698 DPP3
2 sp|A0A075B6H9|LV469_HUMAN -0.4129798 4.707875 -32.80910 0 0 39.43752 IGLV4-69
37 sp|A0A0C4DH31|HV118_HUMAN -0.2425323 4.820578 -32.13371 0 0 38.94635 IGHV1-18
215 sp|P02765|FETUA_HUMAN -0.2652081 4.856746 -30.92674 0 0 38.04153 AHSG
206 sp|P02746|C1QB_HUMAN -0.1673451 4.860047 -28.29022 0 0 35.93327 C1QB
53 sp|A6NMY6|AXA2L_HUMAN -0.3200210 4.387223 -27.54504 0 0 35.30147 ANXA2P2
209 sp|P02749|APOH_HUMAN -0.2119714 4.846936 -26.55510 0 0 34.43536 APOH
321 sp|P08237|PFKAM_HUMAN -0.3363273 4.379070 -24.70965 0 0 32.73241 PFKM
140 sp|P01019|ANGT_HUMAN -0.1383361 4.841181 -23.77543 0 0 31.82260 AGT
331 sp|P08575|PTPRC_HUMAN -0.2611470 4.416660 -23.66786 0 0 31.71563 PTPRC
800 sp|Q92530|PSMF1_HUMAN -0.4651059 4.314681 -22.98949 0 0 31.03016 PSMF1
48 sp|A0A0J9YX35|HV64D_HUMAN -0.2875261 4.728430 -22.93122 0 0 30.97039 IGHV3-64D
579 sp|P50148|GNAQ_HUMAN -0.3877185 4.481589 -22.67411 0 0 30.70489 GNAQ
746 sp|Q16698|DECR_HUMAN -0.3726718 4.360898 -20.61462 0 0 28.46970 DECR1
167 sp|P01743|HV146_HUMAN -0.1826418 4.636493 -19.22869 0 0 26.84588 IGHV1-46
129 sp|P00740|FA9_HUMAN -0.1775446 4.690761 -19.03372 0 0 26.60894 F9
856 sp|Q9NSC7|SIA7A_HUMAN -0.3343780 4.380045 -18.76737 0 0 26.28170 ST6GALNAC1
670 sp|Q05315|LEG10_HUMAN -0.5500179 4.272225 -18.38986 0 0 25.81067 CLC
728 sp|Q15084|PDIA6_HUMAN -0.5164362 4.244026 -18.06663 0 0 25.40046 PDIA6

PLSDA-batch

expr_combined_pl_corr <- PLSDA_batch(
    X = t(expr_combined_imp),
    Y.trt = meta_combined$Group, Y.bat = meta_combined$Plex,
    ncomp.trt = 1, ncomp.bat = 1,
    mode = "regression"
)

expr_combined_pl_corr$X.nobatch[1:5,1:7]
##       sp|A0A075B6H7|KV37_HUMAN sp|A0A075B6H9|LV469_HUMAN
## 19191                 4.837655                  4.695762
## 34629                 4.829669                  4.720578
## 34862                 4.841301                  4.718620
## 40431                 4.866308                  4.720954
## 40475                 4.823830                  4.720825
##       sp|A0A075B6I0|LV861_HUMAN sp|A0A075B6I1|LV460_HUMAN
## 19191                  4.823905                  4.645763
## 34629                  4.857660                  4.725371
## 34862                  4.763711                  4.631092
## 40431                  4.807648                  4.704044
## 40475                  4.800197                  4.575834
##       sp|A0A075B6J9|LV218_HUMAN sp|A0A075B6K0|LV316_HUMAN
## 19191                  4.598065                  4.471714
## 34629                  4.758746                  4.530039
## 34862                  4.756047                  4.479858
## 40431                  4.705885                  4.517386
## 40475                  4.695230                  4.502849
##       sp|A0A075B6K4|LV310_HUMAN
## 19191                  4.795010
## 34629                  4.877746
## 34862                  4.875459
## 40431                  4.895919
## 40475                  4.855380
expr_combined_pl_corrmx <- t(expr_combined_pl_corr$X.nobatch)
expr_combined_pl_corrmx[1:6,1:4]
##                              19191    34629    34862    40431
## sp|A0A075B6H7|KV37_HUMAN  4.837655 4.829669 4.841301 4.866308
## sp|A0A075B6H9|LV469_HUMAN 4.695762 4.720578 4.718620 4.720954
## sp|A0A075B6I0|LV861_HUMAN 4.823905 4.857660 4.763711 4.807648
## sp|A0A075B6I1|LV460_HUMAN 4.645763 4.725371 4.631092 4.704044
## sp|A0A075B6J9|LV218_HUMAN 4.598065 4.758746 4.756047 4.705885
## sp|A0A075B6K0|LV316_HUMAN 4.471714 4.530039 4.479858 4.517386

Limma on PLSDA-batch corrected data

# stash the old analysis
results_combined0 <- results_combined

design_combined <- model.matrix(~ Group, data = meta_combined)

fit_combined <- lmFit(expr_combined_pl_corrmx, design_combined)
fit_combined <- eBayes(fit_combined)
results_combined <- topTable(
  fit_combined,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_combined$Protein <- rownames(results_combined)
# Add gene annotation
results_combined <- merge(
  results_combined,
  protein_info_combined,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)

results_combined <- results_combined[order(results_combined$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value (limma done on PLSDA-batch-corrected data)") |>
    kable_paper("hover", full_width = F)
Top ranked proteins by p-value (limma done on PLSDA-batch-corrected data)
Protein logFC AveExpr t P.Value adj.P.Val B gene
751 sp|Q27J81|INF2_HUMAN -0.1169760 4.464969 -4.987563 0.0000422 0.0381130 2.0408304 INF2
507 sp|P28482|MK01_HUMAN 0.1460155 4.370219 4.340409 0.0002195 0.0992365 0.4386394 MAPK1
891 sp|Q9Y287|ITM2B_HUMAN -0.0322520 4.425711 -3.288005 0.0030836 0.5117635 -2.0937802 ITM2B
871 sp|Q9UBQ0|VPS29_HUMAN -0.0557655 4.404436 -3.186019 0.0039520 0.5117635 -2.3280071 VPS29
571 sp|P49407|ARRB1_HUMAN 0.0752710 4.480139 3.172493 0.0040836 0.5117635 -2.3588576 ARRB1
844 sp|Q9HBB8|CDHR5_HUMAN 0.0384885 4.294720 3.055350 0.0054138 0.5117635 -2.6237749 CDHR5

Beeswarm plot using batch corrected data.

par(mfrow=c(3,3))

null <- lapply(1:9,function(i) {
  mygene <- results_combined$gene[i]
  myprot <- results_combined$Protein[i]
  mydat <- expr_combined_pl_corrmx[which(rownames(expr_combined_pl_corrmx) == myprot),]
  mygroups <- meta_combined[match(names(mydat),meta_combined$Sample),3]
  ctrl <- mydat[which(mygroups=="Control")]
  case <- mydat[which(mygroups=="Case")]
  xl <- list("Ctrl"=ctrl,"Case"=case)
  boxplot(xl,ylab="Relative protein expression",main=mygene,col="white",cex=0)
  beeswarm(xl,add=TRUE,cex=1.5,pch=19,col="darkgray")
} )

Now look at the ratio between MAPK1 and INF2.

prota="sp|P28482|MK01_HUMAN"
protb="sp|Q27J81|INF2_HUMAN"
dat_a <- expr_combined_pl_corrmx[which(rownames(expr_combined_pl_corrmx)==prota),]
dat_b <- expr_combined_pl_corrmx[which(rownames(expr_combined_pl_corrmx)==protb),]
abrat <- dat_a / dat_b
case_samples <- meta_combined[which(meta_combined$Group=="Case"),"Sample"]
ctrl_samples <- meta_combined[which(meta_combined$Group=="Control"),"Sample"]

case_dat <- abrat[which(names(abrat) %in% case_samples)]
ctrl_dat <- abrat[which(names(abrat) %in% ctrl_samples)]

rl <- list("Ctrl"=ctrl_dat, "Case"=case_dat)
boxplot(rl,ylab="Ratio",main="Ratio MAPK1/INF2",col="white",cex=0)
beeswarm(rl,add=TRUE,cex=1.5,pch=19,col="darkgray")

t.test(case_dat,ctrl_dat)
## 
##  Welch Two Sample t-test
## 
## data:  case_dat and ctrl_dat
## t = 6.4733, df = 22, p-value = 1.638e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03967759 0.07708572
## sample estimates:
## mean of x mean of y 
## 1.0084856 0.9501039

Incorporate time-to-event

tte <- read.table("time-to-event.tsv",header=TRUE)

meta_combined$timetoevent <- tte[match(meta_combined$Sample,tte$SampleID),"time_to_event"]

Pathway Enrichment Analysis

Using the combined protein profiles.

Some of the pathways are known to be involved in CVD.

Can try GOBP as well.

m <- mitch_import(results_combined,DEtype="limma",geneIDcol="gene")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 904
## Note: no. genes in output = 904
## Note: estimated proportion of input genes in output = 1
reactome_sets <- gmt_import("ReactomePathways_2026-04-02.gmt")

mres <- mitch_calc(m,  reactome_sets, minsetsize = 5, cores=8)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(mres$enrichment_result,10) |>
    kbl(caption = "Top ranked Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked Reactome pathways by p-value
set setSize pANOVA s.dist p.adjustANOVA
317 Metabolism of lipids 30 0.0016612 -0.3366895 0.4883308
374 Peptide hormone metabolism 15 0.0039444 0.4329209 0.4883308
197 Formation of the cornified envelope 28 0.0041411 0.3175147 0.4883308
282 Keratinization 28 0.0041411 0.3175147 0.4883308
383 Platelet Adhesion to exposed collagen 10 0.0045581 -0.5203579 0.4883308
548 Syndecan interactions 10 0.0053484 -0.5109620 0.4883308
151 Diseases associated with O-glycosylation of proteins 9 0.0056877 -0.5344507 0.4883308
354 Non-integrin membrane-ECM interactions 19 0.0064894 -0.3641392 0.4883308
353 Neutrophil degranulation 152 0.0073285 0.1375980 0.4901951
327 Mitochondrial protein degradation 6 0.0091798 -0.6158129 0.5526257
head(subset(mres$enrichment_result,s.dist>0),10) |>
    kbl(caption = "Top ranked upregulated Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked upregulated Reactome pathways by p-value
set setSize pANOVA s.dist p.adjustANOVA
374 Peptide hormone metabolism 15 0.0039444 0.4329209 0.4883308
197 Formation of the cornified envelope 28 0.0041411 0.3175147 0.4883308
282 Keratinization 28 0.0041411 0.3175147 0.4883308
353 Neutrophil degranulation 152 0.0073285 0.1375980 0.4901951
260 Innate Immune System 277 0.0171146 0.0992924 0.7473162
155 Diseases of carbohydrate metabolism 6 0.0184553 0.5571641 0.7473162
296 Lysosome Vesicle Biogenesis 6 0.0300802 0.5129918 0.7473162
253 Immune System 337 0.0448381 0.0796896 0.7570306
107 Complement cascade 91 0.0557145 0.1221497 0.8762406
312 Metabolism of Angiotensinogen to Angiotensins 7 0.0723389 0.3938525 0.8762406
head(subset(mres$enrichment_result,s.dist<0),10) |>
    kbl(caption = "Top ranked upregulated Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked upregulated Reactome pathways by p-value
set setSize pANOVA s.dist p.adjustANOVA
317 Metabolism of lipids 30 0.0016612 -0.3366895 0.4883308
383 Platelet Adhesion to exposed collagen 10 0.0045581 -0.5203579 0.4883308
548 Syndecan interactions 10 0.0053484 -0.5109620 0.4883308
151 Diseases associated with O-glycosylation of proteins 9 0.0056877 -0.5344507 0.4883308
354 Non-integrin membrane-ECM interactions 19 0.0064894 -0.3641392 0.4883308
327 Mitochondrial protein degradation 6 0.0091798 -0.6158129 0.5526257
170 EPHA-mediated growth cone collapse 7 0.0110258 -0.5566173 0.6034118
88 Cellular responses to stimuli 86 0.0122852 -0.1638142 0.6163070
433 RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function 5 0.0200070 -0.6022247 0.7473162
422 RHOBTB GTPase Cycle 8 0.0205369 -0.4748884 0.7473162
if ( file.exists("mitchreport1.html") ) { unlink("mitchreport1.html") }
mitch_report(mres,"mitchreport1.html")
## Dataset saved as " /tmp/Rtmp5EBewr/mitchreport1.rds ".
## 
## 
## processing file: mitch.Rmd
## 1/36                             
## 2/36 [checklibraries]            
## 3/36                             
## 4/36 [peek]                      
## 5/36                             
## 6/36 [metrics]                   
## 7/36                             
## 8/36 [scatterplot]
## 9/36                             
## 10/36 [contourplot]               
## 11/36                             
## 12/36 [input_geneset_metrics1]    
## 13/36                             
## 14/36 [input_geneset_metrics2]
## 15/36                             
## 16/36 [input_geneset_metrics3]
## 17/36                             
## 18/36 [echart1d]                  
## 19/36 [echart2d]                  
## 20/36                             
## 21/36 [heatmap]                   
## 22/36                             
## 23/36 [effectsize]                
## 24/36                             
## 25/36 [results_table]             
## 26/36                             
## 27/36 [results_table_complete]    
## 28/36                             
## 29/36 [detailed_geneset_reports1d]
## 30/36                             
## 31/36 [detailed_geneset_reports2d]
## 32/36                             
## 33/36 [network]                   
## 34/36                             
## 35/36 [session_info]              
## 36/36
## output file: /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md
## /home/mdz/anaconda3/bin/pandoc +RTS -K512m -RTS /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/Rtmp5EBewr/mitch_report.html --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /mnt/data/rlibs/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/Rtmp5EBewr/rmarkdown-str144e3d3adddcc5.html
## 
## Output created: /tmp/Rtmp5EBewr/mitch_report.html
## [1] TRUE

GOBP

#download.file("https://data.broadinstitute.org/gsea-msigdb/msigdb/release/2026.1.Hs/c5.go.v2026.1.Hs.symbols.gmt",destfile="  c5.go.v2026.1.Hs.symbols.gmt")
#https://data.broadinstitute.org/gsea-msigdb/msigdb/release/2026.1.Hs/c5.go.bp.v2026.1.Hs.symbols.gmt
gobp_sets <- gmt_import("c5.go.bp.v2026.1.Hs.symbols.gmt")

names(gobp_sets) <- gsub("GOBP_","",names(gobp_sets))

names(gobp_sets) <- gsub("_"," ",names(gobp_sets))

mres2 <- mitch_calc(m,  gobp_sets, minsetsize = 5, cores=8)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(mres2$enrichment_result,10) |>
    kbl(caption = "Top ranked GO sets by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked GO sets by p-value
set setSize pANOVA s.dist p.adjustANOVA
572 INTERMEDIATE FILAMENT BASED PROCESS 19 0.0003630 0.4763009 0.3075625
1144 POSITIVE REGULATION OF GTPASE ACTIVITY 6 0.0006944 -0.8006682 0.3075625
1171 POSITIVE REGULATION OF LEUKOCYTE MEDIATED IMMUNITY 19 0.0007658 -0.4496580 0.3075625
1702 REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE 10 0.0009163 0.6076063 0.3075625
573 INTERMEDIATE FILAMENT ORGANIZATION 17 0.0010166 0.4637575 0.3075625
427 ESTABLISHMENT OF PROTEIN LOCALIZATION TO MITOCHONDRION 7 0.0010640 -0.7158783 0.3075625
1831 RESPONSE TO TRANSFORMING GROWTH FACTOR BETA 24 0.0010669 -0.3902462 0.3075625
1431 REGULATION OF CELL CYCLE 37 0.0014081 -0.3090184 0.3120112
549 IMMUNE RESPONSE TO TUMOR CELL 6 0.0015476 -0.7475872 0.3120112
703 MITOTIC CELL CYCLE PHASE TRANSITION 21 0.0015739 -0.4023621 0.3120112
head(subset(mres2$enrichment_result,s.dist>0),10) |>
    kbl(caption = "Top ranked upregulated GO sets by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked upregulated GO sets by p-value
set setSize pANOVA s.dist p.adjustANOVA
572 INTERMEDIATE FILAMENT BASED PROCESS 19 0.0003630 0.4763009 0.3075625
1702 REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE 10 0.0009163 0.6076063 0.3075625
573 INTERMEDIATE FILAMENT ORGANIZATION 17 0.0010166 0.4637575 0.3075625
1166 POSITIVE REGULATION OF KINASE ACTIVITY 8 0.0030881 0.6060268 0.3120112
1335 PROTEIN N LINKED GLYCOSYLATION 10 0.0036117 0.5337808 0.3207063
516 HEMATOPOIETIC OR LYMPHOID ORGAN DEVELOPMENT 8 0.0044569 0.5825893 0.3207063
563 INSULIN RECEPTOR SIGNALING PATHWAY 12 0.0046088 0.4749626 0.3207063
1704 REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE MEDIATED BY A CHEMICAL SIGNAL 8 0.0061454 0.5613839 0.3545307
1076 POSITIVE REGULATION OF BLOOD PRESSURE 5 0.0065027 0.7041157 0.3545307
1173 POSITIVE REGULATION OF LEUKOCYTE PROLIFERATION 19 0.0069226 0.3612846 0.3545307
head(subset(mres2$enrichment_result,s.dist<0),10) |>
    kbl(caption = "Top ranked upregulated GO sets by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked upregulated GO sets by p-value
set setSize pANOVA s.dist p.adjustANOVA
1144 POSITIVE REGULATION OF GTPASE ACTIVITY 6 0.0006944 -0.8006682 0.3075625
1171 POSITIVE REGULATION OF LEUKOCYTE MEDIATED IMMUNITY 19 0.0007658 -0.4496580 0.3075625
427 ESTABLISHMENT OF PROTEIN LOCALIZATION TO MITOCHONDRION 7 0.0010640 -0.7158783 0.3075625
1831 RESPONSE TO TRANSFORMING GROWTH FACTOR BETA 24 0.0010669 -0.3902462 0.3075625
1431 REGULATION OF CELL CYCLE 37 0.0014081 -0.3090184 0.3120112
549 IMMUNE RESPONSE TO TUMOR CELL 6 0.0015476 -0.7475872 0.3120112
703 MITOTIC CELL CYCLE PHASE TRANSITION 21 0.0015739 -0.4023621 0.3120112
359 DNA METABOLIC PROCESS 34 0.0017689 -0.3151454 0.3120112
1727 REGULATION OF T CELL MEDIATED IMMUNITY 12 0.0021199 -0.5149477 0.3120112
1832 RESPONSE TO TUMOR CELL 9 0.0021783 -0.5920546 0.3120112
if ( file.exists("mitchreport2.html") ) { unlink("mitchreport2.html") }
mitch_report(mres2,"mitchreport2.html")
## Dataset saved as " /tmp/Rtmp5EBewr/mitchreport2.rds ".
## 
## 
## processing file: mitch.Rmd
## 1/36                             
## 2/36 [checklibraries]            
## 3/36                             
## 4/36 [peek]                      
## 5/36                             
## 6/36 [metrics]                   
## 7/36                             
## 8/36 [scatterplot]
## 9/36                             
## 10/36 [contourplot]               
## 11/36                             
## 12/36 [input_geneset_metrics1]    
## 13/36                             
## 14/36 [input_geneset_metrics2]
## 15/36                             
## 16/36 [input_geneset_metrics3]
## 17/36                             
## 18/36 [echart1d]                  
## 19/36 [echart2d]                  
## 20/36                             
## 21/36 [heatmap]                   
## 22/36                             
## 23/36 [effectsize]                
## 24/36                             
## 25/36 [results_table]             
## 26/36                             
## 27/36 [results_table_complete]    
## 28/36                             
## 29/36 [detailed_geneset_reports1d]
## 30/36                             
## 31/36 [detailed_geneset_reports2d]
## 32/36                             
## 33/36 [network]                   
## 34/36                             
## 35/36 [session_info]              
## 36/36
## output file: /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md
## /home/mdz/anaconda3/bin/pandoc +RTS -K512m -RTS /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/Rtmp5EBewr/mitch_report.html --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /mnt/data/rlibs/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/Rtmp5EBewr/rmarkdown-str144e3d1c5267c3.html
## 
## Output created: /tmp/Rtmp5EBewr/mitch_report.html
## [1] TRUE

Session Info

sessionInfo()
## R version 4.6.0 (2026-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gtools_3.9.5     beeswarm_0.4.0   tibble_3.3.1     tidyr_1.3.2     
##  [5] readr_2.2.0      eulerr_7.1.0     kableExtra_1.4.0 janitor_2.2.1   
##  [9] pheatmap_1.0.13  ggplot2_4.0.3    dplyr_1.2.1      PLSDAbatch_2.0.0
## [13] mitch_1.24.0     limma_3.68.2    
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3    rstudioapi_0.18.0     jsonlite_2.0.0       
##   [4] magrittr_2.0.5        farver_2.1.2          nloptr_2.2.1         
##   [7] rmarkdown_2.31        vctrs_0.7.3           minqa_1.2.8          
##  [10] rstatix_0.7.3         htmltools_0.5.9       broom_1.0.13         
##  [13] Formula_1.2-5         sass_0.4.10           KernSmooth_2.23-26   
##  [16] bslib_0.11.0          htmlwidgets_1.6.4     plyr_1.8.9           
##  [19] echarts4r_0.5.0       lubridate_1.9.5       cachem_1.1.0         
##  [22] igraph_2.3.1          mime_0.13             lifecycle_1.0.5      
##  [25] pkgconfig_2.0.3       Matrix_1.7-5          R6_2.6.1             
##  [28] fastmap_1.2.0         rbibutils_2.4.1       shiny_1.13.0         
##  [31] snakecase_0.11.1      digest_0.6.39         numDeriv_2016.8-1.1  
##  [34] rARPACK_0.11-0        GGally_2.4.0          RSpectra_0.16-2      
##  [37] textshaping_1.0.5     ellipse_0.5.0         ggpubr_0.6.3         
##  [40] labeling_0.4.3        timechange_0.4.0      polyclip_1.10-7      
##  [43] abind_1.4-8           compiler_4.6.0        bit64_4.8.0          
##  [46] withr_3.0.2           S7_0.2.2              backports_1.5.1      
##  [49] BiocParallel_1.46.0   carData_3.0-6         performance_0.16.0   
##  [52] ggstats_0.13.0        gplots_3.3.0          ggsignif_0.6.4       
##  [55] MASS_7.3-65           corpcor_1.6.10        caTools_1.18.3       
##  [58] tools_4.6.0           otel_0.2.0            httpuv_1.6.17        
##  [61] glue_1.8.1            nlme_3.1-169          promises_1.5.0       
##  [64] grid_4.6.0            polylabelr_1.0.0      reshape2_1.4.5       
##  [67] generics_0.1.4        gtable_0.3.6          tzdb_0.5.0           
##  [70] hms_1.1.4             xml2_1.5.2            car_3.1-5            
##  [73] ggrepel_0.9.8         pillar_1.11.1         stringr_1.6.0        
##  [76] vroom_1.7.1           later_1.4.8           splines_4.6.0        
##  [79] lattice_0.22-9        bit_4.6.0             tidyselect_1.2.1     
##  [82] mixOmics_6.36.0       knitr_1.51            reformulas_0.4.4     
##  [85] gridExtra_2.3         svglite_2.2.2         xfun_0.57            
##  [88] statmod_1.5.2         matrixStats_1.5.0     stringi_1.8.7        
##  [91] statnet.common_4.13.0 yaml_2.3.12           boot_1.3-32          
##  [94] evaluate_1.0.5        codetools_0.2-20      cli_3.6.6            
##  [97] xtable_1.8-8          systemfonts_1.3.2     Rdpack_2.6.6         
## [100] jquerylib_0.1.4       network_1.20.0        dichromat_2.0-0.1    
## [103] Rcpp_1.1.1-1.1        coda_0.19-4.1         parallel_4.6.0       
## [106] bitops_1.0-9          lme4_2.0-1            viridisLite_0.4.3    
## [109] lmerTest_3.2-1        scales_1.4.0          insight_1.5.0        
## [112] purrr_1.2.2           crayon_1.5.3          rlang_1.2.0
save.image("analysis4_mz.Rdata")