Protein Signatures as Biomarkers for Cardiovascular Disease in HIV Patients

1. Installing Packages

#{r install-packages, eval=FALSE} #install.packages(c("tidyverse","readr","janitor","pheatmap","ggplot2")) #install.packages("BiocManager") #BiocManager::install(c("limma","clusterProfiler","org.Hs.eg.db","umap")) #BiocManager::install("ReactomePA") #

2.Load all R libraries needed for data processing and visualization.

library(BiocManager)
library(dplyr)
library(tidyverse)
library(readr)
library(janitor)
library(limma)
library(pheatmap)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(umap)
library(ReactomePA)
library(tibble)
library(knitr)
library(mitch)
library(stringr)
library(msigdbr)
library(beeswarm)
library("kableExtra")
library("eulerr")

3. Load files

plexA <- read_tsv("abundance_protein_MD_plexA.tsv")

## Rows: 963 Columns: 27
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): Index, Gene, Protein, Protein ID, Entry Name, Protein Description,...
## dbl (19): NumberPSM, MaxPepProb, ReferenceIntensity, _126, _127N, _127C, _12...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

plexB <- read_tsv("abundance_protein_MD_plexB.tsv")

## Rows: 962 Columns: 27
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): Index, Gene, Protein, Protein ID, Entry Name, Protein Description,...
## dbl (19): NumberPSM, MaxPepProb, ReferenceIntensity, _126, _127N, _127C, _12...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

plexA <- janitor::clean_names(plexA)
plexB <- janitor::clean_names(plexB)

4. Keep relevant columns

plexA <- dplyr::select(plexA, protein, gene,
                       x126, x127n, x127c, x128n, x128c,
                       x129n, x129c, x130n, x130c,
                       x131n, x131c, x132n)

plexB <- dplyr::select(plexB, protein, gene,
                       x126, x127n, x127c, x128n, x128c,
                       x129n, x129c, x130n, x130c,
                       x131n, x131c, x132n)

5. Rename TMT channels to sample IDs

annotationA <- c(
  x126="19191", x127n="34629", x127c="34862",
  x128n="40431", x128c="40475", x129n="35038",
  x129c="34866", x130n="41436", x130c="41383",
  x131n="40316", x131c="3746", x132n="102953"
)

annotationB <- c(
  x126="40797", x127n="4177", x127c="103376",
  x128n="47542", x128c="47442", x129n="42977",
  x129c="43081", x130n="113835", x130c="113856",
  x131n="19584", x131c="118117", x132n="118008"
)

colnames(plexA)[match(names(annotationA), colnames(plexA))] <- annotationA
colnames(plexB)[match(names(annotationB), colnames(plexB))] <- annotationB

6. Merge plexes

merged <- full_join(plexA, plexB, by=c("protein","gene"))
head(merged)

## # A tibble: 6 × 26
##   protein  gene  `19191` `34629` `34862` `40431` `40475` `35038` `34866` `41436`
##   <chr>    <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 sp|A0A0… IGKV…    25.2    25.4    25.4    26.1    25.1    25.6    25.7    25.4
## 2 sp|A0A0… IGLV…    29.0    29.8    29.6    29.5    29.4    28.8    29.9    29.4
## 3 sp|A0A0… IGLV…    27.5    28.5    26.6    27.4    27.4    26.8    27.3    26.6
## 4 sp|A0A0… IGLV…    21.1    NA      21.0    21.9    19.9    21.2    21.2    20.1
## 5 sp|A0A0… IGLV…    22.4    25.3    25.1    24.0    23.9    24.5    24.7    24.3
## 6 sp|A0A0… IGLV…    21.4    22.3    21.4    21.7    21.5    23.8    22.1    22.6
## # ℹ 16 more variables: `41383` <dbl>, `40316` <dbl>, `3746` <dbl>,
## #   `102953` <dbl>, `40797` <dbl>, `4177` <dbl>, `103376` <dbl>, `47542` <dbl>,
## #   `47442` <dbl>, `42977` <dbl>, `43081` <dbl>, `113835` <dbl>,
## #   `113856` <dbl>, `19584` <dbl>, `118117` <dbl>, `118008` <dbl>

7. Expression matrix for Merged plexes, Plex A and Plex B

expr_combined <- as.data.frame(merged)
protein_info_combined <- expr_combined[, c("protein", "gene")]
rownames(expr_combined) <- expr_combined$protein
expr_combined <- expr_combined[, !(colnames(expr_combined) %in% c("protein","gene"))]
expr_combined <- as.matrix(expr_combined)
storage.mode(expr_combined) <- "numeric"

expr_A <- as.data.frame(plexA)
protein_info_A <- expr_A[, c("protein", "gene")]
rownames(expr_A) <- expr_A$protein
expr_A <- expr_A[, !(colnames(expr_A) %in% c("protein","gene"))]
expr_A <- as.matrix(expr_A)
storage.mode(expr_A) <- "numeric"
head(expr_A)

##                              19191    34629    34862    40431    40475    35038
## sp|A0A075B6H7|KV37_HUMAN  25.20197 25.39573 25.43255 26.05771 25.05397 25.57650
## sp|A0A075B6H9|LV469_HUMAN 28.95035 29.82285 29.61003 29.53702 29.44966 28.84860
## sp|A0A075B6I0|LV861_HUMAN 27.53609 28.54878 26.58017 27.43782 27.43139 26.83964
## sp|A0A075B6I1|LV460_HUMAN 21.13980       NA 20.95866 21.90418 19.86279 21.18880
## sp|A0A075B6J9|LV218_HUMAN 22.38769 25.30212 25.12898 24.02298 23.89291 24.49498
## sp|A0A075B6K0|LV316_HUMAN 21.37483 22.29617 21.39598 21.69973 21.51293 23.78554
##                              34866    41436    41383    40316     3746   102953
## sp|A0A075B6H7|KV37_HUMAN  25.65469 25.37295 25.65501 25.64185 25.43883 25.33301
## sp|A0A075B6H9|LV469_HUMAN 29.91425 29.41320 29.00432 28.83877 28.59919 29.14621
## sp|A0A075B6I0|LV861_HUMAN 27.26845 26.57449 28.21526 26.57432 26.75554       NA
## sp|A0A075B6I1|LV460_HUMAN 21.20105 20.12885 20.95064 21.09826 20.35881       NA
## sp|A0A075B6J9|LV218_HUMAN 24.70158 24.32961 24.30966 24.58185 23.43995 23.82350
## sp|A0A075B6K0|LV316_HUMAN 22.11370 22.61187 20.93010 22.14908 20.87886 20.41397

expr_B <- as.data.frame(plexB)
protein_info_B <- expr_B[, c("protein", "gene")]
rownames(expr_B) <- expr_B$protein
expr_B <- expr_B[, !(colnames(expr_B) %in% c("protein","gene"))]
expr_B <- as.matrix(expr_B)
storage.mode(expr_B) <- "numeric"
head(expr_B)

##                              40797     4177   103376    47542    47442    42977
## sp|A0A075B6H7|KV37_HUMAN  30.46372 30.34873 30.21146 30.78558 30.36076 30.19026
## sp|A0A075B6H9|LV469_HUMAN 22.40007 21.18373 21.26245 21.90925 22.59531 21.24953
## sp|A0A075B6I0|LV861_HUMAN 25.58149 26.12823 26.52763 25.72071 26.59545 25.47399
## sp|A0A075B6I1|LV460_HUMAN 27.79615 26.93927 25.90896 26.48209 28.84106 26.96525
## sp|A0A075B6I4|LVX54_HUMAN 14.60397 13.68839       NA 13.41395 17.83912 14.71371
## sp|A0A075B6J9|LV218_HUMAN 27.34292 26.35223 24.59685 26.37291 25.30866 25.64357
##                              43081   113835   113856    19584   118117   118008
## sp|A0A075B6H7|KV37_HUMAN  30.28124 30.56661 29.76611 30.72821 30.20665 30.00076
## sp|A0A075B6H9|LV469_HUMAN 21.98843 21.22654 21.40015 22.00258 21.02142 21.46372
## sp|A0A075B6I0|LV861_HUMAN 26.13838 25.35584 25.89193 25.69072 25.72252 26.67365
## sp|A0A075B6I1|LV460_HUMAN 28.56674 27.15404 27.20759 27.49989 27.18150 27.81810
## sp|A0A075B6I4|LVX54_HUMAN 15.67450 14.14982 13.30315       NA 13.41029 14.51386
## sp|A0A075B6J9|LV218_HUMAN 26.10869 25.59368 27.73543 25.64763 26.16884 26.54800

8. Filter proteins (present in >=50% of samples)

keep_combined <- rowSums(!is.na(expr_combined)) >= ncol(expr_combined)*0.5
expr_combined_filt <- expr_combined[keep_combined, ]

keep_A <- rowSums(!is.na(expr_A)) >= ncol(expr_A)*0.5
expr_A_filt <- expr_A[keep_A, ]
head(expr_A_filt)

##                              19191    34629    34862    40431    40475    35038
## sp|A0A075B6H7|KV37_HUMAN  25.20197 25.39573 25.43255 26.05771 25.05397 25.57650
## sp|A0A075B6H9|LV469_HUMAN 28.95035 29.82285 29.61003 29.53702 29.44966 28.84860
## sp|A0A075B6I0|LV861_HUMAN 27.53609 28.54878 26.58017 27.43782 27.43139 26.83964
## sp|A0A075B6I1|LV460_HUMAN 21.13980       NA 20.95866 21.90418 19.86279 21.18880
## sp|A0A075B6J9|LV218_HUMAN 22.38769 25.30212 25.12898 24.02298 23.89291 24.49498
## sp|A0A075B6K0|LV316_HUMAN 21.37483 22.29617 21.39598 21.69973 21.51293 23.78554
##                              34866    41436    41383    40316     3746   102953
## sp|A0A075B6H7|KV37_HUMAN  25.65469 25.37295 25.65501 25.64185 25.43883 25.33301
## sp|A0A075B6H9|LV469_HUMAN 29.91425 29.41320 29.00432 28.83877 28.59919 29.14621
## sp|A0A075B6I0|LV861_HUMAN 27.26845 26.57449 28.21526 26.57432 26.75554       NA
## sp|A0A075B6I1|LV460_HUMAN 21.20105 20.12885 20.95064 21.09826 20.35881       NA
## sp|A0A075B6J9|LV218_HUMAN 24.70158 24.32961 24.30966 24.58185 23.43995 23.82350
## sp|A0A075B6K0|LV316_HUMAN 22.11370 22.61187 20.93010 22.14908 20.87886 20.41397

keep_B <- rowSums(!is.na(expr_B)) >= ncol(expr_B)*0.5
expr_B_filt <- expr_B[keep_B, ]
head(expr_B_filt)

##                              40797     4177   103376    47542    47442    42977
## sp|A0A075B6H7|KV37_HUMAN  30.46372 30.34873 30.21146 30.78558 30.36076 30.19026
## sp|A0A075B6H9|LV469_HUMAN 22.40007 21.18373 21.26245 21.90925 22.59531 21.24953
## sp|A0A075B6I0|LV861_HUMAN 25.58149 26.12823 26.52763 25.72071 26.59545 25.47399
## sp|A0A075B6I1|LV460_HUMAN 27.79615 26.93927 25.90896 26.48209 28.84106 26.96525
## sp|A0A075B6I4|LVX54_HUMAN 14.60397 13.68839       NA 13.41395 17.83912 14.71371
## sp|A0A075B6J9|LV218_HUMAN 27.34292 26.35223 24.59685 26.37291 25.30866 25.64357
##                              43081   113835   113856    19584   118117   118008
## sp|A0A075B6H7|KV37_HUMAN  30.28124 30.56661 29.76611 30.72821 30.20665 30.00076
## sp|A0A075B6H9|LV469_HUMAN 21.98843 21.22654 21.40015 22.00258 21.02142 21.46372
## sp|A0A075B6I0|LV861_HUMAN 26.13838 25.35584 25.89193 25.69072 25.72252 26.67365
## sp|A0A075B6I1|LV460_HUMAN 28.56674 27.15404 27.20759 27.49989 27.18150 27.81810
## sp|A0A075B6I4|LVX54_HUMAN 15.67450 14.14982 13.30315       NA 13.41029 14.51386
## sp|A0A075B6J9|LV218_HUMAN 26.10869 25.59368 27.73543 25.64763 26.16884 26.54800

9. QC before normalization

qc_sample_combined <- data.frame(
  Sample = colnames(expr_combined_filt),
  Detected_Proteins = colSums(!is.na(expr_combined_filt)),
  Missing_Percent = colMeans(is.na(expr_combined_filt)) * 100,
  Median_Intensity = apply(expr_combined_filt, 2, median, na.rm = TRUE),
  SD_Intensity = apply(expr_combined_filt, 2, sd, na.rm = TRUE)
)

# Assign plex
plexA_samples <- c(
  "19191","34629","34862","40431","40475","35038",
  "34866","41436","41383","40316","3746","102953"
)

qc_sample_combined$Plex <- ifelse(qc_sample_combined$Sample %in% plexA_samples, "A", "B")

# QC summary by plex
qc_plex_combined <- qc_sample_combined %>%
  group_by(Plex) %>%
  summarise(
    N_Samples = n(),
    Mean_Detected_Proteins = mean(Detected_Proteins),
    Mean_Missing_Percent = mean(Missing_Percent),
    Mean_Median_Intensity = mean(Median_Intensity),
    .groups = "drop"
  )


kable(qc_sample_combined)

	Sample	Detected_Proteins	Missing_Percent	Median_Intensity	SD_Intensity	Plex
19191	19191	811	10.28761	22.61437	3.608051	A
34629	34629	768	15.04425	22.52596	3.824270	A
34862	34862	792	12.38938	22.47182	3.687522	A
40431	40431	767	15.15487	22.17860	3.914548	A
40475	40475	768	15.04425	22.27699	3.916427	A
35038	35038	806	10.84071	22.40563	3.668941	A
34866	34866	805	10.95133	22.46457	3.701970	A
41436	41436	779	13.82743	22.34549	3.813249	A
41383	41383	791	12.50000	22.41082	3.736537	A
40316	40316	762	15.70796	22.20311	3.877041	A
3746	3746	797	11.83628	22.94357	3.486882	A
102953	102953	765	15.37611	22.48856	3.758113	A
40797	40797	766	15.26549	22.30942	3.775893	B
4177	4177	782	13.49558	22.93066	3.365816	B
103376	103376	714	21.01770	22.29280	3.759772	B
47542	47542	780	13.71681	22.25514	3.563151	B
47442	47442	731	19.13717	22.16957	3.792444	B
42977	42977	772	14.60177	22.38404	3.503025	B
43081	43081	776	14.15929	22.13263	3.707914	B
113835	113835	726	19.69027	22.26161	3.831329	B
113856	113856	768	15.04425	22.04823	3.841864	B
19584	19584	764	15.48673	22.35815	3.493251	B
118117	118117	727	19.57965	22.31801	3.876589	B
118008	118008	675	25.33186	22.33120	3.854068	B

kable(qc_plex_combined)

Plex	N_Samples	Mean_Detected_Proteins	Mean_Missing_Percent	Mean_Median_Intensity
A	12	784.2500	13.24668	22.44412
B	12	748.4167	17.21055	22.31595

# QC plots
qc_long_combined <- as.data.frame(expr_combined_filt) %>%
  rownames_to_column("Protein") %>%
  pivot_longer(-Protein, names_to = "Sample", values_to = "Intensity") %>%
  filter(is.finite(Intensity))

# Missingness plot
ggplot(qc_sample_combined,
       aes(x = reorder(Sample, Missing_Percent),
           y = Missing_Percent,
           fill = Plex)) +
  geom_col() +
  coord_flip() +
  theme_bw() +
  labs(title = "Missingness per Sample",
       x = "Sample", y = "% Missing")

# Detected proteins plot
ggplot(qc_sample_combined,
       aes(x = reorder(Sample, Detected_Proteins),
           y = Detected_Proteins,
           fill = Plex)) +
  geom_col() +
  coord_flip() +
  theme_bw() +
  labs(title = "Detected Proteins per Sample",
       x = "Sample", y = "Detected Proteins")

# Density plot
ggplot(qc_long_combined,
       aes(x = Intensity, color = Sample)) +
  geom_density(linewidth = 0.8) +
  theme_bw() +
  labs(title = "Intensity Distribution per Sample",
       x = "Intensity", y = "Density")

# Correlation heatmap
cor_pre_combined <- cor(expr_combined_filt, use = "pairwise.complete.obs")

pheatmap(cor_pre_combined,
         main = "Sample Correlation Heatmap (Pre-normalization)")

# Plex-level comparison
ggplot(qc_plex_combined,
       aes(x = Plex, y = Mean_Detected_Proteins, fill = Plex)) +
  geom_col() +
  theme_bw() +
  labs(title = "Mean Detected Proteins by Plex",
       x = "Plex", y = "Mean Detected Proteins")

10. Quantile normalization and log2 transform

expr_combined_norm <- normalizeBetweenArrays(expr_combined_filt, method = "quantile")
expr_combined_log <- log2(expr_combined_norm + 1)
boxplot(expr_combined_log, main = "Post-normalization for both plexes", las = 2)

# Plex A
expr_A_norm <- normalizeBetweenArrays(expr_A_filt, method = "quantile")
expr_A_log <- log2(expr_A_norm + 1)
boxplot(expr_A_log, main = "Post-normalization for Plex A", las = 2)

# plex B
expr_B_norm <- normalizeBetweenArrays(expr_B_filt, method = "quantile")
expr_B_log <- log2(expr_B_norm + 1)
boxplot(expr_B_log, main = "Post-normalization for Plex B", las = 2)

11. QC after normalization

qc_long_norm_combined <- as.data.frame(expr_combined_log) %>%
  rownames_to_column("Protein") %>%
  pivot_longer(-Protein, names_to = "Sample", values_to = "Intensity") %>%
  filter(is.finite(Intensity))

ggplot(qc_long_norm_combined,
       aes(x = Intensity, color = Sample)) +
  geom_density(linewidth = 0.8) +
  theme_bw() +
  labs(title = "Post-normalization Intensity Distribution (Combined)",
       x = "Log2 Intensity",
       y = "Density")

cor_post_combined <- cor(expr_combined_log, use = "pairwise.complete.obs")

pheatmap(cor_post_combined,
         main = "Sample Correlation Heatmap (Post-normalization)")

12. Impute missing values (median)

# Combine Plexes
expr_combined_imp <- expr_combined_log
for(i in 1:ncol(expr_combined_imp)){
  expr_combined_imp[is.na(expr_combined_imp[,i]), i] <- 
    median(expr_combined_imp[,i], na.rm = TRUE)
}

# Plex A
expr_A_imp <- expr_A_log

for(i in 1:ncol(expr_A_imp)){
  expr_A_imp[is.na(expr_A_imp[,i]), i] <- 
    median(expr_A_imp[,i], na.rm = TRUE)
}

# Plex B
expr_B_imp <- expr_B_log

for(i in 1:ncol(expr_B_imp)){
  expr_B_imp[is.na(expr_B_imp[,i]), i] <- 
    median(expr_B_imp[,i], na.rm = TRUE)
}
sum(is.na(expr_combined_imp))

## [1] 0

sum(is.na(expr_A_imp))

## [1] 0

sum(is.na(expr_B_imp))

## [1] 0

# Removing Zero Variance Proteins
expr_combined_imp <- expr_combined_imp[apply(expr_combined_imp, 1, var) != 0, ]
expr_A_imp <- expr_A_imp[apply(expr_A_imp, 1, var) != 0, ]
expr_B_imp <- expr_B_imp[apply(expr_B_imp, 1, var) != 0, ]

Metadata Creation

############################################
# METADATA CREATION (SINGLE SOURCE OF TRUTH)
############################################

# Create combined metadata from expression matrix
meta_combined <- data.frame(
  Sample = colnames(expr_combined_imp)
)

############################################
# Define Plex (based on known Plex A samples)
############################################

plexA_samples <- c(
  "19191","34629","34862","40431","40475","35038",
  "34866","41436","41383","40316","3746","102953"
)

meta_combined$Plex <- ifelse(meta_combined$Sample %in% plexA_samples,
                             "A", "B")

meta_combined$Plex <- factor(meta_combined$Plex)

############################################
# Define biological group (Case vs Control)
############################################

case_samples <- c(
  "19191","40797","34862","103376","40475","47442",
  "34866","43081","41383","113856","3746","118117"
)

meta_combined$Group <- ifelse(meta_combined$Sample %in% case_samples,
                              "Case", "Control")

meta_combined$Group <- factor(meta_combined$Group,
                              levels = c("Control","Case"))

############################################
# Final formatting
############################################

rownames(meta_combined) <- meta_combined$Sample

############################################
# Create Plex-specific metadata (CORRECT WAY)
############################################

meta_A <- meta_combined[meta_combined$Plex == "A", ]
meta_B <- meta_combined[meta_combined$Plex == "B", ]

############################################
# Align with expression matrices (CRITICAL)
############################################

meta_A <- meta_A[colnames(expr_A_imp), ]
meta_B <- meta_B[colnames(expr_B_imp), ]

############################################
# Keep only needed columns
############################################

meta_A <- meta_A[, c("Sample","Group")]
meta_B <- meta_B[, c("Sample","Group")]

############################################
# SANITY CHECKS (DO NOT SKIP)
############################################

# Alignment
all(rownames(meta_A) == colnames(expr_A_imp))

## [1] TRUE

all(rownames(meta_B) == colnames(expr_B_imp))

## [1] TRUE

all(rownames(meta_combined) == colnames(expr_combined_imp))

## [1] TRUE

# Distribution checks
table(meta_combined$Plex, meta_combined$Group)

##    
##     Control Case
##   A       6    6
##   B       6    6

table(meta_A$Group)

## 
## Control    Case 
##       6       6

table(meta_B$Group)

## 
## Control    Case 
##       6       6

PCA Before Batch Correction for combined plexes

pca_pre <- prcomp(t(expr_combined_imp), center = TRUE, scale. = TRUE)

pca_pre_df <- data.frame(
  PC1 = pca_pre$x[,1],
  PC2 = pca_pre$x[,2],
  Sample = colnames(expr_combined_imp),
  Plex = meta_combined$Plex,
  Group = meta_combined$Group
)

ggplot(pca_pre_df, aes(x = PC1, y = PC2, color = Plex, shape = Group)) +
  geom_point(size = 4) +
  geom_text(aes(label = Sample), vjust = -0.5, size = 3) +
  theme_bw() +
  labs(title = "PCA Before Batch Correction",
       x = "PC1",
       y = "PC2")

batch Correction

expr_combined_corrected <- removeBatchEffect(
  expr_combined_imp,
  batch = meta_combined$Plex,
  design = model.matrix(~ Group, data = meta_combined)
)

# Quick check
head(expr_combined_corrected[,1:5])

##                              19191    34629    34862    40431    40475
## sp|A0A075B6H7|KV37_HUMAN  4.833346 4.830965 4.839526 4.871850 4.827196
## sp|A0A075B6H9|LV469_HUMAN 4.702996 4.718612 4.721695 4.712016 4.715457
## sp|A0A075B6I0|LV861_HUMAN 4.825501 4.858319 4.764883 4.807596 4.800510
## sp|A0A075B6I1|LV460_HUMAN 4.638888 4.726509 4.627839 4.711256 4.579935
## sp|A0A075B6J9|LV218_HUMAN 4.595879 4.759264 4.755084 4.708453 4.696748
## sp|A0A075B6K0|LV316_HUMAN 4.471802 4.529905 4.479845 4.517084 4.502633

# Dimensions unchanged
dim(expr_combined_corrected)

## [1] 904  24

PCA On Combined Plexes after removing Batch effect

pca_post <- prcomp(t(expr_combined_corrected), center = TRUE, scale. = TRUE)

pca_post_df <- data.frame(
  PC1 = pca_post$x[,1],
  PC2 = pca_post$x[,2],
  Sample = colnames(expr_combined_corrected),
  Plex = meta_combined$Plex,
  Group = meta_combined$Group
)

ggplot(pca_post_df, aes(x = PC1, y = PC2, color = Group, shape = Plex)) +
  geom_point(size = 4) +
  geom_text(aes(label = Sample), vjust = -0.5, size = 3) +
  theme_bw() +
  labs(title = "PCA After Batch Correction",
       x = "PC1",
       y = "PC2")

Differential Expression Analysis

Combined plexes

design_combined <- model.matrix(~ Plex + Group, data = meta_combined)
fit_combined <- lmFit(expr_combined_imp, design_combined)
fit_combined <- eBayes(fit_combined)
results_combined <- topTable(
  fit_combined,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_combined$Protein <- rownames(results_combined)
# Add gene annotation
results_combined <- merge(
  results_combined,
  protein_info_combined,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)
# Add significance labels
results_combined$Significance <- "Not Significant"

results_combined$Significance[
  results_combined$adj.P.Val < 0.05 & results_combined$logFC > 1
] <- "Upregulated"
results_combined$Significance[
  results_combined$adj.P.Val < 0.05 & results_combined$logFC < -1
] <- "Downregulated"
# View

results_combined <- results_combined[order(results_combined$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value") |>
    kable_paper("hover", full_width = F)

Top ranked proteins by p-value
	Protein	logFC	AveExpr	t	P.Value	adj.P.Val	B	gene	Significance
751	sp\|Q27J81\|INF2_HUMAN	-0.1169760	4.464969	-4.867502	0.0000637	0.0576017	1.6695010	INF2	Not Significant
507	sp\|P28482\|MK01_HUMAN	0.1460155	4.370219	4.197978	0.0003398	0.1535753	0.0466474	MAPK1	Not Significant
96	sp\|O75356\|ENTP5_HUMAN	0.0411850	4.456341	3.137454	0.0045940	0.5645256	-2.4416598	ENTPD5	Not Significant
571	sp\|P49407\|ARRB1_HUMAN	0.0752710	4.480139	3.119861	0.0047905	0.5645256	-2.4810489	ARRB1	Not Significant
891	sp\|Q9Y287\|ITM2B_HUMAN	-0.0322520	4.425711	-3.009591	0.0062189	0.5645256	-2.7258946	ITM2B	Not Significant
609	sp\|P59998\|ARPC4_HUMAN	-0.0810547	4.524232	-2.956053	0.0070519	0.5645256	-2.8434299	ARPC4	Not Significant

Separate plexes

Analyse plex A and plex B.

design_A <- model.matrix(~ Group, data = meta_A)
fit_A <- lmFit(expr_A_imp, design_A)
fit_A <- eBayes(fit_A)
results_A <- topTable(
  fit_A,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_A$Protein <- rownames(results_A)
# Add gene annotation
results_A <- merge(
  results_A,
  protein_info_A,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)

# Add significance labels
results_A$Significance <- "Not Significant"
results_A$Significance[
  results_A$adj.P.Val < 0.05 & results_A$logFC > 1
] <- "Upregulated"
results_A$Significance[
  results_A$adj.P.Val < 0.05 & results_A$logFC < -1
] <- "Downregulated"

# View
results_A <- results_A[order(results_A$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value in plex A") |>
    kable_paper("hover", full_width = F)

Top ranked proteins by p-value in plex A
	Protein	logFC	AveExpr	t	P.Value	adj.P.Val	B	gene	Significance
751	sp\|Q27J81\|INF2_HUMAN	-0.1169760	4.464969	-4.867502	0.0000637	0.0576017	1.6695010	INF2	Not Significant
507	sp\|P28482\|MK01_HUMAN	0.1460155	4.370219	4.197978	0.0003398	0.1535753	0.0466474	MAPK1	Not Significant
96	sp\|O75356\|ENTP5_HUMAN	0.0411850	4.456341	3.137454	0.0045940	0.5645256	-2.4416598	ENTPD5	Not Significant
571	sp\|P49407\|ARRB1_HUMAN	0.0752710	4.480139	3.119861	0.0047905	0.5645256	-2.4810489	ARRB1	Not Significant
891	sp\|Q9Y287\|ITM2B_HUMAN	-0.0322520	4.425711	-3.009591	0.0062189	0.5645256	-2.7258946	ITM2B	Not Significant
609	sp\|P59998\|ARPC4_HUMAN	-0.0810547	4.524232	-2.956053	0.0070519	0.5645256	-2.8434299	ARPC4	Not Significant

head(results_A)

##                   Protein       logFC  AveExpr         t      P.Value adj.P.Val
## 740 sp|Q86UF1|TSN33_HUMAN -0.20076209 4.396326 -4.595562 0.0006315126 0.3477544
## 270 sp|P06702|S10A9_HUMAN -0.13940577 4.711956 -4.203125 0.0012513585 0.3477544
## 715  sp|Q27J81|INF2_HUMAN -0.12265117 4.448334 -4.067512 0.0015917637 0.3477544
## 546 sp|P49593|PPM1F_HUMAN  0.10647979 4.172522  3.995491 0.0018102257 0.3477544
## 863 sp|Q9Y287|ITM2B_HUMAN -0.05635063 4.300458 -3.616202 0.0035936408 0.3477544
## 253 sp|P05109|S10A8_HUMAN -0.12567021 4.779495 -3.570575 0.0039058992 0.3477544
##               B    gene    Significance
## 740 -0.09244264 TSPAN33 Not Significant
## 270 -0.74720064  S100A9 Not Significant
## 715 -0.97779807    INF2 Not Significant
## 546 -1.10105802   PPM1F Not Significant
## 863 -1.75772600   ITM2B Not Significant
## 253 -1.83739665  S100A8 Not Significant

Plex B.

design_B <- model.matrix(~ Group, data = meta_B)
fit_B <- lmFit(expr_B_imp, design_B)
fit_B <- eBayes(fit_B)
results_B <- topTable(
  fit_B,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_B$Protein <- rownames(results_B)
# Add gene annotation
results_B <- merge(
  results_B,
  protein_info_B,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)
# Add significance labels
results_B$Significance <- "Not Significant"
results_B$Significance[
  results_B$adj.P.Val < 0.05 & results_B$logFC > 1
] <- "Upregulated"
results_B$Significance[
  results_B$adj.P.Val < 0.05 & results_B$logFC < -1
] <- "Downregulated"

# View
results_B <- results_B[order(results_B$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value in plex B") |>
    kable_paper("hover", full_width = F)

Top ranked proteins by p-value in plex B
	Protein	logFC	AveExpr	t	P.Value	adj.P.Val	B	gene	Significance
751	sp\|Q27J81\|INF2_HUMAN	-0.1169760	4.464969	-4.867502	0.0000637	0.0576017	1.6695010	INF2	Not Significant
507	sp\|P28482\|MK01_HUMAN	0.1460155	4.370219	4.197978	0.0003398	0.1535753	0.0466474	MAPK1	Not Significant
96	sp\|O75356\|ENTP5_HUMAN	0.0411850	4.456341	3.137454	0.0045940	0.5645256	-2.4416598	ENTPD5	Not Significant
571	sp\|P49407\|ARRB1_HUMAN	0.0752710	4.480139	3.119861	0.0047905	0.5645256	-2.4810489	ARRB1	Not Significant
891	sp\|Q9Y287\|ITM2B_HUMAN	-0.0322520	4.425711	-3.009591	0.0062189	0.5645256	-2.7258946	ITM2B	Not Significant
609	sp\|P59998\|ARPC4_HUMAN	-0.0810547	4.524232	-2.956053	0.0070519	0.5645256	-2.8434299	ARPC4	Not Significant

head(results_B)

##                   Protein       logFC  AveExpr         t      P.Value
## 112 sp|P00387|NB5R3_HUMAN -0.17683895 4.440961 -6.330268 3.786522e-05
## 528  sp|P40227|TCPZ_HUMAN -0.15017532 4.378868 -5.615484 1.136361e-04
## 590 sp|P61106|RAB14_HUMAN -0.17882092 4.385867 -5.528161 1.306021e-04
## 575  sp|P55084|ECHB_HUMAN -0.27293379 4.251690 -5.063074 2.790389e-04
## 282  sp|P07203|GPX1_HUMAN -0.15833288 4.361424 -4.767197 4.594180e-04
## 89  sp|O75356|ENTP5_HUMAN  0.08676138 4.370177  4.514696 7.096365e-04
##      adj.P.Val          B   gene    Significance
## 112 0.03233690  2.5541250 CYB5R3 Not Significant
## 528 0.03717808  1.5320233  CCT6A Not Significant
## 590 0.03717808  1.4014708  RAB14 Not Significant
## 575 0.05957481  0.6855750  HADHB Not Significant
## 282 0.07846859  0.2126430   GPX1 Not Significant
## 89  0.10100493 -0.2010684 ENTPD5 Not Significant

Comparison of Plex A and B results

The top 50 up and down regulated proteins are selected from Plex A and B and these lists were compared.

plexA_up <- head(subset(results_A,logFC>0),50)[,1]
plexA_dn <- head(subset(results_A,logFC<0),50)[,1]
plexB_up <- head(subset(results_B,logFC>0),50)[,1]
plexB_dn <- head(subset(results_B,logFC<0),50)[,1]

v1 <- list("A up"=plexA_up, "A dn"=plexA_dn, "B up"=plexB_up, "B dn"=plexB_dn)

plot(euler(v1), quantities = TRUE)

intersect(plexA_up,plexB_up)

## character(0)

intersect(plexA_dn,plexB_dn)

## [1] "sp|Q27J81|INF2_HUMAN"

intersect(plexA_up,plexB_dn)

## [1] "sp|P61106|RAB14_HUMAN" "sp|Q9UBW5|BIN2_HUMAN"  "sp|Q3ZCW2|LEGL_HUMAN" 
## [4] "sp|P14770|GPIX_HUMAN"  "sp|Q15555|MARE2_HUMAN" "sp|Q86UX7|URP2_HUMAN" 
## [7] "sp|Q14247|SRC8_HUMAN"

intersect(plexB_up,plexA_dn)

## [1] "sp|P18428|LBP_HUMAN"   "sp|Q5D862|FILA2_HUMAN" "sp|P01766|HV313_HUMAN"

Ranking of proteins by logFC

results_A$score <- sign(results_A$logFC) * -log10(results_A$P.Value)
results_B$score <- sign(results_B$logFC) * -log10(results_B$P.Value)

results_m <- merge(results_A,results_B,by="Protein")

ranks_m <- results_m[,c("Protein","score.x","score.y")]
rownames(ranks_m) <- ranks_m$Protein ; ranks_m$Protein=NULL
colnames(ranks_m) <- c("Plex A","Plex B")

plot(ranks_m)

mylm <- lm(ranks_m$`Plex B` ~ ranks_m$`Plex A`)
abline(mylm,lwd=2,lty=2,col="red")
COR=signif(cor(ranks_m),3)[2,1]
COR

## [1] -0.551

mtext(paste("Pearson correlation=",COR))

Pathway Enrichment Analysis

Using the combined protein profiles.

Some of the pathways are known to be involved in CVD.

m <- mitch_import(results_combined,DEtype="limma",geneIDcol="gene")

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 904

## Note: no. genes in output = 904

## Note: estimated proportion of input genes in output = 1

reactome_sets <- gmt_import("ReactomePathways_2026-04-02.gmt")

mres <- mitch_calc(m,  reactome_sets, minsetsize = 5, cores=8)

## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.

head(mres$enrichment_result,10) |>
    kbl(caption = "Top ranked Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)

Top ranked Reactome pathways by p-value
	set	setSize	pANOVA	s.dist	p.adjustANOVA
317	Metabolism of lipids	30	0.0017402	-0.3352403	0.4962473
374	Peptide hormone metabolism	15	0.0040461	0.4317210	0.4962473
197	Formation of the cornified envelope	28	0.0041121	0.3177593	0.4962473
282	Keratinization	28	0.0041121	0.3177593	0.4962473
383	Platelet Adhesion to exposed collagen	10	0.0044372	-0.5219239	0.4962473
548	Syndecan interactions	10	0.0053484	-0.5109620	0.4962473
151	Diseases associated with O-glycosylation of proteins	9	0.0059401	-0.5317194	0.4962473
354	Non-integrin membrane-ECM interactions	19	0.0065954	-0.3634255	0.4962473
353	Neutrophil degranulation	152	0.0074190	0.1373880	0.4962473
327	Mitochondrial protein degradation	6	0.0091377	-0.6161841	0.5500882

head(subset(mres$enrichment_result,s.dist>0),10) |>
    kbl(caption = "Top ranked upregulated Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)

Top ranked upregulated Reactome pathways by p-value
	set	setSize	pANOVA	s.dist	p.adjustANOVA
374	Peptide hormone metabolism	15	0.0040461	0.4317210	0.4962473
197	Formation of the cornified envelope	28	0.0041121	0.3177593	0.4962473
282	Keratinization	28	0.0041121	0.3177593	0.4962473
353	Neutrophil degranulation	152	0.0074190	0.1373880	0.4962473
260	Innate Immune System	277	0.0187814	0.0978645	0.7508590
155	Diseases of carbohydrate metabolism	6	0.0190101	0.5545657	0.7508590
296	Lysosome Vesicle Biogenesis	6	0.0294880	0.5148478	0.7508590
253	Immune System	337	0.0488998	0.0782347	0.7956135
107	Complement cascade	91	0.0592232	0.1204466	0.8987126
312	Metabolism of Angiotensinogen to Angiotensins	7	0.0716482	0.3948081	0.8987126

head(subset(mres$enrichment_result,s.dist<0),10) |>
    kbl(caption = "Top ranked upregulated Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)

Top ranked upregulated Reactome pathways by p-value
	set	setSize	pANOVA	s.dist	p.adjustANOVA
317	Metabolism of lipids	30	0.0017402	-0.3352403	0.4962473
383	Platelet Adhesion to exposed collagen	10	0.0044372	-0.5219239	0.4962473
548	Syndecan interactions	10	0.0053484	-0.5109620	0.4962473
151	Diseases associated with O-glycosylation of proteins	9	0.0059401	-0.5317194	0.4962473
354	Non-integrin membrane-ECM interactions	19	0.0065954	-0.3634255	0.4962473
327	Mitochondrial protein degradation	6	0.0091377	-0.6161841	0.5500882
170	EPHA-mediated growth cone collapse	7	0.0103976	-0.5610766	0.5690322
88	Cellular responses to stimuli	86	0.0129358	-0.1626201	0.6489475
433	RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function	5	0.0178210	-0.6133482	0.7508590
422	RHOBTB GTPase Cycle	8	0.0216807	-0.4707031	0.7508590

if ( file.exists("mitchreport1.html") ) { unlink("mitchreport1.html") }
mitch_report(mres,"mitchreport1.html")

## Dataset saved as " /tmp/RtmpM1seI5/mitchreport1.rds ".

## 
## 
## processing file: mitch.Rmd

## 1/36                             
## 2/36 [checklibraries]            
## 3/36                             
## 4/36 [peek]                      
## 5/36                             
## 6/36 [metrics]                   
## 7/36                             
## 8/36 [scatterplot]

## 9/36                             
## 10/36 [contourplot]               
## 11/36                             
## 12/36 [input_geneset_metrics1]    
## 13/36                             
## 14/36 [input_geneset_metrics2]

## 15/36                             
## 16/36 [input_geneset_metrics3]

## 17/36                             
## 18/36 [echart1d]                  
## 19/36 [echart2d]                  
## 20/36                             
## 21/36 [heatmap]                   
## 22/36                             
## 23/36 [effectsize]                
## 24/36                             
## 25/36 [results_table]             
## 26/36                             
## 27/36 [results_table_complete]    
## 28/36                             
## 29/36 [detailed_geneset_reports1d]

## 30/36                             
## 31/36 [detailed_geneset_reports2d]
## 32/36                             
## 33/36 [network]                   
## 34/36                             
## 35/36 [session_info]              
## 36/36

## output file: /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md

## /home/mdz/anaconda3/bin/pandoc +RTS -K512m -RTS /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpM1seI5/mitch_report.html --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /mnt/data/rlibs/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpM1seI5/rmarkdown-str13c6855b1103e5.html

## 
## Output created: /tmp/RtmpM1seI5/mitch_report.html

## [1] TRUE

IG Filtering

# plex A
ig_filter_A <- grepl("IG|KV|LV|IGH|IGK|IGL", rownames(expr_A_imp))
expr_A_clean <- expr_A_imp[!ig_filter_A, ]

# Plex B
ig_filter_B <- grepl("IG|KV|LV|IGH|IGK|IGL", rownames(expr_B_imp))
expr_B_clean <- expr_B_imp[!ig_filter_B, ]

# DE For Plex A
design_A <- model.matrix(~ Group, data = meta_A)

fit_A_clean <- lmFit(expr_A_clean, design_A)
fit_A_clean <- eBayes(fit_A_clean)

results_A_clean <- topTable(fit_A_clean, number = Inf)

## Removing intercept from test coefficients

results_A_clean$Protein <- rownames(results_A_clean)

# DE for Plex B
design_B <- model.matrix(~ Group, data = meta_B)

fit_B_clean <- lmFit(expr_B_clean, design_B)
fit_B_clean <- eBayes(fit_B_clean)

results_B_clean <- topTable(fit_B_clean, number = Inf)

## Removing intercept from test coefficients

results_B_clean$Protein <- rownames(results_B_clean)

head(results_A_clean)

##                             logFC  AveExpr         t      P.Value adj.P.Val
## sp|Q86UF1|TSN33_HUMAN -0.20076209 4.396326 -4.590756 0.0006400107 0.3320219
## sp|P06702|S10A9_HUMAN -0.13940577 4.711956 -4.198574 0.0012668477 0.3320219
## sp|Q27J81|INF2_HUMAN  -0.12265117 4.448334 -4.063033 0.0016109123 0.3320219
## sp|P49593|PPM1F_HUMAN  0.10647979 4.172522  3.990973 0.0018319207 0.3320219
## sp|Q9Y287|ITM2B_HUMAN -0.05635063 4.300458 -3.611170 0.0036379341 0.3320219
## sp|P05109|S10A8_HUMAN -0.12567021 4.779495 -3.566743 0.0039450988 0.3320219
##                                B               Protein
## sp|Q86UF1|TSN33_HUMAN -0.1018677 sp|Q86UF1|TSN33_HUMAN
## sp|P06702|S10A9_HUMAN -0.7548051 sp|P06702|S10A9_HUMAN
## sp|Q27J81|INF2_HUMAN  -0.9848090  sp|Q27J81|INF2_HUMAN
## sp|P49593|PPM1F_HUMAN -1.1078887 sp|P49593|PPM1F_HUMAN
## sp|Q9Y287|ITM2B_HUMAN -1.7641828 sp|Q9Y287|ITM2B_HUMAN
## sp|P05109|S10A8_HUMAN -1.8416104 sp|P05109|S10A8_HUMAN

head(results_B_clean)

##                             logFC  AveExpr         t      P.Value  adj.P.Val
## sp|P00387|NB5R3_HUMAN -0.17683895 4.440961 -6.325361 3.860124e-05 0.03076519
## sp|P40227|TCPZ_HUMAN  -0.15017532 4.378868 -5.611405 1.154988e-04 0.03531205
## sp|P61106|RAB14_HUMAN -0.17882092 4.385867 -5.523120 1.329186e-04 0.03531205
## sp|P55084|ECHB_HUMAN  -0.27293379 4.251690 -5.057160 2.840958e-04 0.05660608
## sp|P07203|GPX1_HUMAN  -0.15833288 4.361424 -4.762751 4.662760e-04 0.07432439
## sp|O75356|ENTP5_HUMAN  0.08676138 4.370177  4.513899 7.152604e-04 0.09501042
##                                B               Protein
## sp|P00387|NB5R3_HUMAN  2.5247563 sp|P00387|NB5R3_HUMAN
## sp|P40227|TCPZ_HUMAN   1.5147695  sp|P40227|TCPZ_HUMAN
## sp|P61106|RAB14_HUMAN  1.3840769 sp|P61106|RAB14_HUMAN
## sp|P55084|ECHB_HUMAN   0.6734127  sp|P55084|ECHB_HUMAN
## sp|P07203|GPX1_HUMAN   0.2068041  sp|P07203|GPX1_HUMAN
## sp|O75356|ENTP5_HUMAN -0.1976055 sp|O75356|ENTP5_HUMAN

Top 50 up and down regulated genes after IG removal

# plex A
# top 50 up
top50_up_A <- results_A_clean %>%
  dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC > 0) %>%
  dplyr::arrange(P.Value) %>%
  dplyr::slice_head(n = 50)

# top 50 down
top50_down_A <- results_A_clean %>%
  dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC < 0) %>%
  dplyr::arrange(P.Value) %>%
  dplyr::slice_head(n = 50)

# Plex B
# Top 50 UP
top50_up_B <- results_B_clean %>%
  dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC > 0) %>%
  dplyr::arrange(P.Value) %>%
  dplyr::slice_head(n = 50)
# Top 50 down
top50_down_B <- results_B_clean %>%
  dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC < 0) %>%
  dplyr::arrange(P.Value) %>%
  dplyr::slice_head(n = 50)

up_A <- top50_up_A$Protein
down_A <- top50_down_A$Protein

up_B <- top50_up_B$Protein
down_B <- top50_down_B$Protein

# Up overlap
common_up <- intersect(up_A, up_B)

# Down overlap
common_down <- intersect(down_A, down_B)

# Print counts
cat("Common UP proteins:", length(common_up), "\n")

## Common UP proteins: 0

cat("Common DOWN proteins:", length(common_down), "\n")

## Common DOWN proteins: 1

library(eulerr)

# UP proteins Euler
fit_up <- euler(list(
  "Plex A Up" = up_A,
  "Plex B Up" = up_B
))

plot(fit_up,
     fills = c("#4CAF50", "#2196F3"),
     alpha = 0.6,
     quantities = TRUE,
     main = "Top 50 Upregulated Proteins Overlap")

# down regulated
fit_down <- euler(list(
  "Plex A Down" = down_A,
  "Plex B Down" = down_B
))

plot(fit_down,
     fills = c("#FF7043", "#AB47BC"),
     alpha = 0.6,
     quantities = TRUE,
     main = "Top 50 Downregulated Proteins Overlap")

Pathway Enrichment

results_A_clean$gene <- str_extract(results_A_clean$Protein, "(?<=\\|)[A-Z0-9]+(?=_HUMAN)")
results_B_clean$gene <- str_extract(results_B_clean$Protein, "(?<=\\|)[A-Z0-9]+(?=_HUMAN)")

# Plex A CLEAN
rank_A_clean <- results_A_clean %>%
  dplyr::select(gene, logFC) %>%
  filter(!is.na(gene)) %>%
  group_by(gene) %>%
  summarise(logFC = mean(logFC), .groups = "drop")

gene_list_A_clean <- rank_A_clean$logFC
names(gene_list_A_clean) <- rank_A_clean$gene

df_A_clean <- data.frame(logFC = gene_list_A_clean)

# run mitch
mitch_A_clean <- mitch_calc(df_A_clean, reactome_sets, minsetsize = 10)

## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.

res_mitch_A_clean <- mitch_A_clean$enrichment_result %>%
  as.data.frame() %>%
  dplyr::arrange(p.adjustANOVA)

# Plex B
rank_B_clean <- results_B_clean %>%
  dplyr::select(gene, logFC) %>%
  filter(!is.na(gene)) %>%
  group_by(gene) %>%
  summarise(logFC = mean(logFC), .groups = "drop")

gene_list_B_clean <- rank_B_clean$logFC
names(gene_list_B_clean) <- rank_B_clean$gene

df_B_clean <- data.frame(logFC = gene_list_B_clean)

mitch_B_clean <- mitch_calc(df_B_clean, reactome_sets, minsetsize = 10)

## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.

res_mitch_B_clean <- mitch_B_clean$enrichment_result %>%
  as.data.frame() %>%
  dplyr::arrange(p.adjustANOVA)



go_A_clean <- enrichGO(
  gene = names(gene_list_A_clean),
  OrgDb = org.Hs.eg.db,
  keyType = "SYMBOL",
  ont = "BP",
  pAdjustMethod = "BH",
  pvalueCutoff = 0.05,
  readable = TRUE
)

## 'select()' returned 1:many mapping between keys and columns

## Warning in bitr(gene, fromType = fromType, toType = "ENTREZID", OrgDb = OrgDb):
## 57.63% of input gene IDs are fail to map...

go_A_clean_df <- as.data.frame(go_A_clean)

top_mitch_A <- res_mitch_A_clean %>%
  arrange(p.adjustANOVA) %>%
  slice_head(n = 20)

kable(top_mitch_A, 
      caption = "Top Reactome Pathways (Plex A - IG Removed)")

Top Reactome Pathways (Plex A - IG Removed)
set	setSize	pANOVA	s.dist	p.adjustANOVA
Membrane Trafficking	29	0.0000001	0.5896115	0.0000041
Platelet activation, signaling and aggregation	41	0.0000769	0.3645997	0.0020477
Vesicle-mediated transport	44	0.0000798	0.3518475	0.0020477
Diseases of signal transduction by growth factor receptors and second messengers	21	0.0001673	0.4790548	0.0032208
Hemostasis	57	0.0003507	0.2827278	0.0054003
Cell-Cell communication	21	0.0009534	0.4209333	0.0102977
Signaling by Rho GTPases	26	0.0010699	0.3757881	0.0102977
Signaling by Rho GTPases, Miro GTPases and RHOBTB3	26	0.0010699	0.3757881	0.0102977
Post-translational protein modification	65	0.0016095	0.2350541	0.0131230
Cell junction organization	17	0.0017043	0.4433035	0.0131230
RHO GTPase Effectors	16	0.0023828	0.4422478	0.0166797
MAPK family signaling cascades	18	0.0040415	0.3952004	0.0200841
MAPK1/MAPK3 signaling	16	0.0044199	0.4145392	0.0200841
RAF/MAP kinase cascade	16	0.0044199	0.4145392	0.0200841
Platelet degranulation	32	0.0045531	0.2951080	0.0200841
Response to elevated platelet cytosolic Ca2+	32	0.0045531	0.2951080	0.0200841
Leishmania infection	11	0.0048288	0.4934743	0.0200841
Parasitic Infection Pathways	11	0.0048288	0.4934743	0.0200841
Asparagine N-linked glycosylation	15	0.0049558	0.4223881	0.0200841
Signal Transduction	76	0.0052251	0.1940214	0.0201167

top_mitch_B <- res_mitch_B_clean %>%
  arrange(p.adjustANOVA) %>%
  slice_head(n = 20)

kable(top_mitch_B, 
      caption = "Top Reactome Pathways (Plex B - IG Removed)")

Top Reactome Pathways (Plex B - IG Removed)
set	setSize	pANOVA	s.dist	p.adjustANOVA
Signaling by Rho GTPases	24	0.0000042	-0.5475420	0.0001170
Signaling by Rho GTPases, Miro GTPases and RHOBTB3	24	0.0000042	-0.5475420	0.0001170
Cell junction organization	13	0.0000047	-0.7350863	0.0001170
Platelet activation, signaling and aggregation	38	0.0000338	-0.3964358	0.0006246
RHO GTPase Effectors	13	0.0001247	-0.6175432	0.0016676
Hemostasis	51	0.0001352	-0.3179835	0.0016676
Membrane Trafficking	24	0.0002094	-0.4424321	0.0022138
Cell-Cell communication	16	0.0003042	-0.5252881	0.0028137
Muscle contraction	10	0.0005403	-0.6343075	0.0039979
Smooth Muscle Contraction	10	0.0005403	-0.6343075	0.0039979
Platelet degranulation	30	0.0007806	-0.3602781	0.0048135
Response to elevated platelet cytosolic Ca2+	30	0.0007806	-0.3602781	0.0048135
EPH-Ephrin signaling	13	0.0012376	-0.5206044	0.0070449
Cellular responses to stimuli	26	0.0033516	-0.3373242	0.0177158
RHO GTPase cycle	15	0.0065504	-0.4088662	0.0323154
Vesicle-mediated transport	36	0.0069872	-0.2654402	0.0323160
FCGR3A-mediated phagocytosis	10	0.0124551	-0.4589581	0.0460839
Fcgamma receptor (FCGR) dependent phagocytosis	10	0.0124551	-0.4589581	0.0460839
Leishmania phagocytosis	10	0.0124551	-0.4589581	0.0460839
Parasite infection	10	0.0124551	-0.4589581	0.0460839

Session Info

sessionInfo()

## R version 4.6.0 (2026-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] kableExtra_1.4.0       gtools_3.9.5           beeswarm_0.4.0        
##  [4] eulerr_7.1.0           msigdbr_26.1.0         mitch_1.24.0          
##  [7] knitr_1.51             ReactomePA_1.56.0      umap_0.2.10.0         
## [10] org.Hs.eg.db_3.23.1    AnnotationDbi_1.74.0   IRanges_2.46.0        
## [13] S4Vectors_0.50.1       Biobase_2.72.0         BiocGenerics_0.58.1   
## [16] generics_0.1.4         clusterProfiler_4.20.0 pheatmap_1.0.13       
## [19] limma_3.68.2           janitor_2.2.1          lubridate_1.9.5       
## [22] forcats_1.0.1          stringr_1.6.0          purrr_1.2.2           
## [25] readr_2.2.0            tidyr_1.3.2            tibble_3.3.1          
## [28] ggplot2_4.0.3          tidyverse_2.0.0        dplyr_1.2.1           
## [31] BiocManager_1.30.27   
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.6.0           later_1.4.8             bitops_1.0-9           
##   [4] ggplotify_0.1.3         polyclip_1.10-7         enrichit_0.1.4         
##   [7] graph_1.90.0            lifecycle_1.0.5         httr2_1.2.2            
##  [10] vroom_1.7.1             processx_3.9.0          lattice_0.22-9         
##  [13] MASS_7.3-65             magrittr_2.0.5          sass_0.4.10            
##  [16] rmarkdown_2.31          jquerylib_0.1.4         yaml_2.3.12            
##  [19] httpuv_1.6.17           otel_0.2.0              ggtangle_0.1.2         
##  [22] askpass_1.2.1           reticulate_1.46.0       DBI_1.3.0              
##  [25] RColorBrewer_1.1-3      ggraph_2.2.2            yulab.utils_0.2.4      
##  [28] tweenr_2.0.3            rappdirs_0.3.4          aisdk_1.1.0            
##  [31] gdtools_0.5.0           enrichplot_1.32.0       ggrepel_0.9.8          
##  [34] tidytree_0.4.7          reactome.db_1.96.0      RSpectra_0.16-2        
##  [37] svglite_2.2.2           DOSE_4.6.0              xml2_1.5.2             
##  [40] ggforce_0.5.0           tidyselect_1.2.1        aplot_0.2.9            
##  [43] farver_2.1.2            viridis_0.6.5           Seqinfo_1.2.0          
##  [46] jsonlite_2.0.0          tidygraph_1.3.1         systemfonts_1.3.2      
##  [49] polylabelr_1.0.0        tools_4.6.0             ggnewscale_0.5.2       
##  [52] treeio_1.36.1           Rcpp_1.1.1-1.1          glue_1.8.1             
##  [55] gridExtra_2.3           xfun_0.57               qvalue_2.44.0          
##  [58] withr_3.0.2             fastmap_1.2.0           GGally_2.4.0           
##  [61] openssl_2.4.1           caTools_1.18.3          callr_3.7.6            
##  [64] digest_0.6.39           timechange_0.4.0        R6_2.6.1               
##  [67] mime_0.13               gridGraphics_0.5-1      textshaping_1.0.5      
##  [70] GO.db_3.23.1            dichromat_2.0-0.1       RSQLite_3.52.0         
##  [73] utf8_1.2.6              fontLiberation_0.1.0    graphlayouts_1.2.3     
##  [76] httr_1.4.8              htmlwidgets_1.6.4       scatterpie_0.2.6       
##  [79] ggstats_0.13.0          graphite_1.58.0         pkgconfig_2.0.3        
##  [82] gtable_0.3.6            blob_1.3.0              S7_0.2.2               
##  [85] XVector_0.52.0          htmltools_0.5.9         fontBitstreamVera_0.1.1
##  [88] echarts4r_0.5.0         scales_1.4.0            png_0.1-9              
##  [91] snakecase_0.11.1        ggfun_0.2.0             rstudioapi_0.18.0      
##  [94] tzdb_0.5.0              reshape2_1.4.5          curl_7.1.0             
##  [97] coda_0.19-4.1           statnet.common_4.13.0   nlme_3.1-169           
## [100] cachem_1.1.0            KernSmooth_2.23-26      parallel_4.6.0         
## [103] pillar_1.11.1           grid_4.6.0              vctrs_0.7.3            
## [106] gplots_3.3.0            promises_1.5.0          tidydr_0.0.6           
## [109] xtable_1.8-8            cluster_2.1.8.2         evaluate_1.0.5         
## [112] cli_3.6.6               compiler_4.6.0          rlang_1.2.0            
## [115] crayon_1.5.3            labeling_0.4.3          ps_1.9.3               
## [118] plyr_1.8.9              fs_2.1.0                ggiraph_0.9.6          
## [121] stringi_1.8.7           network_1.20.0          viridisLite_0.4.3      
## [124] babelgene_22.9          assertthat_0.2.1        Biostrings_2.80.0      
## [127] lazyeval_0.2.3          GOSemSim_2.38.0         fontquiver_0.2.1       
## [130] Matrix_1.7-5            hms_1.1.4               patchwork_1.3.2        
## [133] bit64_4.8.0             KEGGREST_1.52.0         statmod_1.5.2          
## [136] shiny_1.13.0            igraph_2.3.1            memoise_2.0.1          
## [139] bslib_0.11.0            ggtree_4.2.0            bit_4.6.0              
## [142] ape_5.8-1               gson_0.1.0

save.image("Analysis2.rdata")