1. Installing Packages

#{r install-packages, eval=FALSE} #install.packages(c("tidyverse","readr","janitor","pheatmap","ggplot2")) #install.packages("BiocManager") #BiocManager::install(c("limma","clusterProfiler","org.Hs.eg.db","umap")) #BiocManager::install("ReactomePA") #

2.Load all R libraries needed for data processing and visualization.

library(BiocManager)
library(dplyr)
library(tidyverse)
library(readr)
library(janitor)
library(limma)
library(pheatmap)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(umap)
library(ReactomePA)
library(tibble)
library(knitr)
library(mitch)
library(stringr)
library(msigdbr)
library(beeswarm)
library("kableExtra")
library("eulerr")

3. Load files

plexA <- read_tsv("abundance_protein_MD_plexA.tsv")
## Rows: 963 Columns: 27
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): Index, Gene, Protein, Protein ID, Entry Name, Protein Description,...
## dbl (19): NumberPSM, MaxPepProb, ReferenceIntensity, _126, _127N, _127C, _12...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plexB <- read_tsv("abundance_protein_MD_plexB.tsv")
## Rows: 962 Columns: 27
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): Index, Gene, Protein, Protein ID, Entry Name, Protein Description,...
## dbl (19): NumberPSM, MaxPepProb, ReferenceIntensity, _126, _127N, _127C, _12...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plexA <- janitor::clean_names(plexA)
plexB <- janitor::clean_names(plexB)

4. Keep relevant columns

plexA <- dplyr::select(plexA, protein, gene,
                       x126, x127n, x127c, x128n, x128c,
                       x129n, x129c, x130n, x130c,
                       x131n, x131c, x132n)

plexB <- dplyr::select(plexB, protein, gene,
                       x126, x127n, x127c, x128n, x128c,
                       x129n, x129c, x130n, x130c,
                       x131n, x131c, x132n)

5. Rename TMT channels to sample IDs

annotationA <- c(
  x126="19191", x127n="34629", x127c="34862",
  x128n="40431", x128c="40475", x129n="35038",
  x129c="34866", x130n="41436", x130c="41383",
  x131n="40316", x131c="3746", x132n="102953"
)

annotationB <- c(
  x126="40797", x127n="4177", x127c="103376",
  x128n="47542", x128c="47442", x129n="42977",
  x129c="43081", x130n="113835", x130c="113856",
  x131n="19584", x131c="118117", x132n="118008"
)

colnames(plexA)[match(names(annotationA), colnames(plexA))] <- annotationA
colnames(plexB)[match(names(annotationB), colnames(plexB))] <- annotationB

6. Merge plexes

merged <- full_join(plexA, plexB, by=c("protein","gene"))
head(merged)
## # A tibble: 6 × 26
##   protein  gene  `19191` `34629` `34862` `40431` `40475` `35038` `34866` `41436`
##   <chr>    <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 sp|A0A0… IGKV…    25.2    25.4    25.4    26.1    25.1    25.6    25.7    25.4
## 2 sp|A0A0… IGLV…    29.0    29.8    29.6    29.5    29.4    28.8    29.9    29.4
## 3 sp|A0A0… IGLV…    27.5    28.5    26.6    27.4    27.4    26.8    27.3    26.6
## 4 sp|A0A0… IGLV…    21.1    NA      21.0    21.9    19.9    21.2    21.2    20.1
## 5 sp|A0A0… IGLV…    22.4    25.3    25.1    24.0    23.9    24.5    24.7    24.3
## 6 sp|A0A0… IGLV…    21.4    22.3    21.4    21.7    21.5    23.8    22.1    22.6
## # ℹ 16 more variables: `41383` <dbl>, `40316` <dbl>, `3746` <dbl>,
## #   `102953` <dbl>, `40797` <dbl>, `4177` <dbl>, `103376` <dbl>, `47542` <dbl>,
## #   `47442` <dbl>, `42977` <dbl>, `43081` <dbl>, `113835` <dbl>,
## #   `113856` <dbl>, `19584` <dbl>, `118117` <dbl>, `118008` <dbl>

7. Expression matrix for Merged plexes, Plex A and Plex B

expr_combined <- as.data.frame(merged)
protein_info_combined <- expr_combined[, c("protein", "gene")]
rownames(expr_combined) <- expr_combined$protein
expr_combined <- expr_combined[, !(colnames(expr_combined) %in% c("protein","gene"))]
expr_combined <- as.matrix(expr_combined)
storage.mode(expr_combined) <- "numeric"

expr_A <- as.data.frame(plexA)
protein_info_A <- expr_A[, c("protein", "gene")]
rownames(expr_A) <- expr_A$protein
expr_A <- expr_A[, !(colnames(expr_A) %in% c("protein","gene"))]
expr_A <- as.matrix(expr_A)
storage.mode(expr_A) <- "numeric"
head(expr_A)
##                              19191    34629    34862    40431    40475    35038
## sp|A0A075B6H7|KV37_HUMAN  25.20197 25.39573 25.43255 26.05771 25.05397 25.57650
## sp|A0A075B6H9|LV469_HUMAN 28.95035 29.82285 29.61003 29.53702 29.44966 28.84860
## sp|A0A075B6I0|LV861_HUMAN 27.53609 28.54878 26.58017 27.43782 27.43139 26.83964
## sp|A0A075B6I1|LV460_HUMAN 21.13980       NA 20.95866 21.90418 19.86279 21.18880
## sp|A0A075B6J9|LV218_HUMAN 22.38769 25.30212 25.12898 24.02298 23.89291 24.49498
## sp|A0A075B6K0|LV316_HUMAN 21.37483 22.29617 21.39598 21.69973 21.51293 23.78554
##                              34866    41436    41383    40316     3746   102953
## sp|A0A075B6H7|KV37_HUMAN  25.65469 25.37295 25.65501 25.64185 25.43883 25.33301
## sp|A0A075B6H9|LV469_HUMAN 29.91425 29.41320 29.00432 28.83877 28.59919 29.14621
## sp|A0A075B6I0|LV861_HUMAN 27.26845 26.57449 28.21526 26.57432 26.75554       NA
## sp|A0A075B6I1|LV460_HUMAN 21.20105 20.12885 20.95064 21.09826 20.35881       NA
## sp|A0A075B6J9|LV218_HUMAN 24.70158 24.32961 24.30966 24.58185 23.43995 23.82350
## sp|A0A075B6K0|LV316_HUMAN 22.11370 22.61187 20.93010 22.14908 20.87886 20.41397
expr_B <- as.data.frame(plexB)
protein_info_B <- expr_B[, c("protein", "gene")]
rownames(expr_B) <- expr_B$protein
expr_B <- expr_B[, !(colnames(expr_B) %in% c("protein","gene"))]
expr_B <- as.matrix(expr_B)
storage.mode(expr_B) <- "numeric"
head(expr_B)
##                              40797     4177   103376    47542    47442    42977
## sp|A0A075B6H7|KV37_HUMAN  30.46372 30.34873 30.21146 30.78558 30.36076 30.19026
## sp|A0A075B6H9|LV469_HUMAN 22.40007 21.18373 21.26245 21.90925 22.59531 21.24953
## sp|A0A075B6I0|LV861_HUMAN 25.58149 26.12823 26.52763 25.72071 26.59545 25.47399
## sp|A0A075B6I1|LV460_HUMAN 27.79615 26.93927 25.90896 26.48209 28.84106 26.96525
## sp|A0A075B6I4|LVX54_HUMAN 14.60397 13.68839       NA 13.41395 17.83912 14.71371
## sp|A0A075B6J9|LV218_HUMAN 27.34292 26.35223 24.59685 26.37291 25.30866 25.64357
##                              43081   113835   113856    19584   118117   118008
## sp|A0A075B6H7|KV37_HUMAN  30.28124 30.56661 29.76611 30.72821 30.20665 30.00076
## sp|A0A075B6H9|LV469_HUMAN 21.98843 21.22654 21.40015 22.00258 21.02142 21.46372
## sp|A0A075B6I0|LV861_HUMAN 26.13838 25.35584 25.89193 25.69072 25.72252 26.67365
## sp|A0A075B6I1|LV460_HUMAN 28.56674 27.15404 27.20759 27.49989 27.18150 27.81810
## sp|A0A075B6I4|LVX54_HUMAN 15.67450 14.14982 13.30315       NA 13.41029 14.51386
## sp|A0A075B6J9|LV218_HUMAN 26.10869 25.59368 27.73543 25.64763 26.16884 26.54800

8. Filter proteins (present in >=50% of samples)

keep_combined <- rowSums(!is.na(expr_combined)) >= ncol(expr_combined)*0.5
expr_combined_filt <- expr_combined[keep_combined, ]

keep_A <- rowSums(!is.na(expr_A)) >= ncol(expr_A)*0.5
expr_A_filt <- expr_A[keep_A, ]
head(expr_A_filt)
##                              19191    34629    34862    40431    40475    35038
## sp|A0A075B6H7|KV37_HUMAN  25.20197 25.39573 25.43255 26.05771 25.05397 25.57650
## sp|A0A075B6H9|LV469_HUMAN 28.95035 29.82285 29.61003 29.53702 29.44966 28.84860
## sp|A0A075B6I0|LV861_HUMAN 27.53609 28.54878 26.58017 27.43782 27.43139 26.83964
## sp|A0A075B6I1|LV460_HUMAN 21.13980       NA 20.95866 21.90418 19.86279 21.18880
## sp|A0A075B6J9|LV218_HUMAN 22.38769 25.30212 25.12898 24.02298 23.89291 24.49498
## sp|A0A075B6K0|LV316_HUMAN 21.37483 22.29617 21.39598 21.69973 21.51293 23.78554
##                              34866    41436    41383    40316     3746   102953
## sp|A0A075B6H7|KV37_HUMAN  25.65469 25.37295 25.65501 25.64185 25.43883 25.33301
## sp|A0A075B6H9|LV469_HUMAN 29.91425 29.41320 29.00432 28.83877 28.59919 29.14621
## sp|A0A075B6I0|LV861_HUMAN 27.26845 26.57449 28.21526 26.57432 26.75554       NA
## sp|A0A075B6I1|LV460_HUMAN 21.20105 20.12885 20.95064 21.09826 20.35881       NA
## sp|A0A075B6J9|LV218_HUMAN 24.70158 24.32961 24.30966 24.58185 23.43995 23.82350
## sp|A0A075B6K0|LV316_HUMAN 22.11370 22.61187 20.93010 22.14908 20.87886 20.41397
keep_B <- rowSums(!is.na(expr_B)) >= ncol(expr_B)*0.5
expr_B_filt <- expr_B[keep_B, ]
head(expr_B_filt)
##                              40797     4177   103376    47542    47442    42977
## sp|A0A075B6H7|KV37_HUMAN  30.46372 30.34873 30.21146 30.78558 30.36076 30.19026
## sp|A0A075B6H9|LV469_HUMAN 22.40007 21.18373 21.26245 21.90925 22.59531 21.24953
## sp|A0A075B6I0|LV861_HUMAN 25.58149 26.12823 26.52763 25.72071 26.59545 25.47399
## sp|A0A075B6I1|LV460_HUMAN 27.79615 26.93927 25.90896 26.48209 28.84106 26.96525
## sp|A0A075B6I4|LVX54_HUMAN 14.60397 13.68839       NA 13.41395 17.83912 14.71371
## sp|A0A075B6J9|LV218_HUMAN 27.34292 26.35223 24.59685 26.37291 25.30866 25.64357
##                              43081   113835   113856    19584   118117   118008
## sp|A0A075B6H7|KV37_HUMAN  30.28124 30.56661 29.76611 30.72821 30.20665 30.00076
## sp|A0A075B6H9|LV469_HUMAN 21.98843 21.22654 21.40015 22.00258 21.02142 21.46372
## sp|A0A075B6I0|LV861_HUMAN 26.13838 25.35584 25.89193 25.69072 25.72252 26.67365
## sp|A0A075B6I1|LV460_HUMAN 28.56674 27.15404 27.20759 27.49989 27.18150 27.81810
## sp|A0A075B6I4|LVX54_HUMAN 15.67450 14.14982 13.30315       NA 13.41029 14.51386
## sp|A0A075B6J9|LV218_HUMAN 26.10869 25.59368 27.73543 25.64763 26.16884 26.54800

9. QC before normalization

qc_sample_combined <- data.frame(
  Sample = colnames(expr_combined_filt),
  Detected_Proteins = colSums(!is.na(expr_combined_filt)),
  Missing_Percent = colMeans(is.na(expr_combined_filt)) * 100,
  Median_Intensity = apply(expr_combined_filt, 2, median, na.rm = TRUE),
  SD_Intensity = apply(expr_combined_filt, 2, sd, na.rm = TRUE)
)

# Assign plex
plexA_samples <- c(
  "19191","34629","34862","40431","40475","35038",
  "34866","41436","41383","40316","3746","102953"
)

qc_sample_combined$Plex <- ifelse(qc_sample_combined$Sample %in% plexA_samples, "A", "B")

# QC summary by plex
qc_plex_combined <- qc_sample_combined %>%
  group_by(Plex) %>%
  summarise(
    N_Samples = n(),
    Mean_Detected_Proteins = mean(Detected_Proteins),
    Mean_Missing_Percent = mean(Missing_Percent),
    Mean_Median_Intensity = mean(Median_Intensity),
    .groups = "drop"
  )


kable(qc_sample_combined)
Sample Detected_Proteins Missing_Percent Median_Intensity SD_Intensity Plex
19191 19191 811 10.28761 22.61437 3.608051 A
34629 34629 768 15.04425 22.52596 3.824270 A
34862 34862 792 12.38938 22.47182 3.687522 A
40431 40431 767 15.15487 22.17860 3.914548 A
40475 40475 768 15.04425 22.27699 3.916427 A
35038 35038 806 10.84071 22.40563 3.668941 A
34866 34866 805 10.95133 22.46457 3.701970 A
41436 41436 779 13.82743 22.34549 3.813249 A
41383 41383 791 12.50000 22.41082 3.736537 A
40316 40316 762 15.70796 22.20311 3.877041 A
3746 3746 797 11.83628 22.94357 3.486882 A
102953 102953 765 15.37611 22.48856 3.758113 A
40797 40797 766 15.26549 22.30942 3.775893 B
4177 4177 782 13.49558 22.93066 3.365816 B
103376 103376 714 21.01770 22.29280 3.759772 B
47542 47542 780 13.71681 22.25514 3.563151 B
47442 47442 731 19.13717 22.16957 3.792444 B
42977 42977 772 14.60177 22.38404 3.503025 B
43081 43081 776 14.15929 22.13263 3.707914 B
113835 113835 726 19.69027 22.26161 3.831329 B
113856 113856 768 15.04425 22.04823 3.841864 B
19584 19584 764 15.48673 22.35815 3.493251 B
118117 118117 727 19.57965 22.31801 3.876589 B
118008 118008 675 25.33186 22.33120 3.854068 B
kable(qc_plex_combined)
Plex N_Samples Mean_Detected_Proteins Mean_Missing_Percent Mean_Median_Intensity
A 12 784.2500 13.24668 22.44412
B 12 748.4167 17.21055 22.31595
# QC plots
qc_long_combined <- as.data.frame(expr_combined_filt) %>%
  rownames_to_column("Protein") %>%
  pivot_longer(-Protein, names_to = "Sample", values_to = "Intensity") %>%
  filter(is.finite(Intensity))

# Missingness plot
ggplot(qc_sample_combined,
       aes(x = reorder(Sample, Missing_Percent),
           y = Missing_Percent,
           fill = Plex)) +
  geom_col() +
  coord_flip() +
  theme_bw() +
  labs(title = "Missingness per Sample",
       x = "Sample", y = "% Missing")

# Detected proteins plot
ggplot(qc_sample_combined,
       aes(x = reorder(Sample, Detected_Proteins),
           y = Detected_Proteins,
           fill = Plex)) +
  geom_col() +
  coord_flip() +
  theme_bw() +
  labs(title = "Detected Proteins per Sample",
       x = "Sample", y = "Detected Proteins")

# Density plot
ggplot(qc_long_combined,
       aes(x = Intensity, color = Sample)) +
  geom_density(linewidth = 0.8) +
  theme_bw() +
  labs(title = "Intensity Distribution per Sample",
       x = "Intensity", y = "Density")

# Correlation heatmap
cor_pre_combined <- cor(expr_combined_filt, use = "pairwise.complete.obs")

pheatmap(cor_pre_combined,
         main = "Sample Correlation Heatmap (Pre-normalization)")

# Plex-level comparison
ggplot(qc_plex_combined,
       aes(x = Plex, y = Mean_Detected_Proteins, fill = Plex)) +
  geom_col() +
  theme_bw() +
  labs(title = "Mean Detected Proteins by Plex",
       x = "Plex", y = "Mean Detected Proteins")

10. Quantile normalization and log2 transform

expr_combined_norm <- normalizeBetweenArrays(expr_combined_filt, method = "quantile")
expr_combined_log <- log2(expr_combined_norm + 1)
boxplot(expr_combined_log, main = "Post-normalization for both plexes", las = 2)

# Plex A
expr_A_norm <- normalizeBetweenArrays(expr_A_filt, method = "quantile")
expr_A_log <- log2(expr_A_norm + 1)
boxplot(expr_A_log, main = "Post-normalization for Plex A", las = 2)

# plex B
expr_B_norm <- normalizeBetweenArrays(expr_B_filt, method = "quantile")
expr_B_log <- log2(expr_B_norm + 1)
boxplot(expr_B_log, main = "Post-normalization for Plex B", las = 2)

11. QC after normalization

qc_long_norm_combined <- as.data.frame(expr_combined_log) %>%
  rownames_to_column("Protein") %>%
  pivot_longer(-Protein, names_to = "Sample", values_to = "Intensity") %>%
  filter(is.finite(Intensity))

ggplot(qc_long_norm_combined,
       aes(x = Intensity, color = Sample)) +
  geom_density(linewidth = 0.8) +
  theme_bw() +
  labs(title = "Post-normalization Intensity Distribution (Combined)",
       x = "Log2 Intensity",
       y = "Density")

cor_post_combined <- cor(expr_combined_log, use = "pairwise.complete.obs")

pheatmap(cor_post_combined,
         main = "Sample Correlation Heatmap (Post-normalization)")

12. Impute missing values (median)

# Combine Plexes
expr_combined_imp <- expr_combined_log
for(i in 1:ncol(expr_combined_imp)){
  expr_combined_imp[is.na(expr_combined_imp[,i]), i] <- 
    median(expr_combined_imp[,i], na.rm = TRUE)
}

# Plex A
expr_A_imp <- expr_A_log

for(i in 1:ncol(expr_A_imp)){
  expr_A_imp[is.na(expr_A_imp[,i]), i] <- 
    median(expr_A_imp[,i], na.rm = TRUE)
}

# Plex B
expr_B_imp <- expr_B_log

for(i in 1:ncol(expr_B_imp)){
  expr_B_imp[is.na(expr_B_imp[,i]), i] <- 
    median(expr_B_imp[,i], na.rm = TRUE)
}
sum(is.na(expr_combined_imp))
## [1] 0
sum(is.na(expr_A_imp))
## [1] 0
sum(is.na(expr_B_imp))
## [1] 0
# Removing Zero Variance Proteins
expr_combined_imp <- expr_combined_imp[apply(expr_combined_imp, 1, var) != 0, ]
expr_A_imp <- expr_A_imp[apply(expr_A_imp, 1, var) != 0, ]
expr_B_imp <- expr_B_imp[apply(expr_B_imp, 1, var) != 0, ]

Metadata Creation

############################################
# METADATA CREATION (SINGLE SOURCE OF TRUTH)
############################################

# Create combined metadata from expression matrix
meta_combined <- data.frame(
  Sample = colnames(expr_combined_imp)
)

############################################
# Define Plex (based on known Plex A samples)
############################################

plexA_samples <- c(
  "19191","34629","34862","40431","40475","35038",
  "34866","41436","41383","40316","3746","102953"
)

meta_combined$Plex <- ifelse(meta_combined$Sample %in% plexA_samples,
                             "A", "B")

meta_combined$Plex <- factor(meta_combined$Plex)

############################################
# Define biological group (Case vs Control)
############################################

case_samples <- c(
  "19191","40797","34862","103376","40475","47442",
  "34866","43081","41383","113856","3746","118117"
)

meta_combined$Group <- ifelse(meta_combined$Sample %in% case_samples,
                              "Case", "Control")

meta_combined$Group <- factor(meta_combined$Group,
                              levels = c("Control","Case"))

############################################
# Final formatting
############################################

rownames(meta_combined) <- meta_combined$Sample

############################################
# Create Plex-specific metadata (CORRECT WAY)
############################################

meta_A <- meta_combined[meta_combined$Plex == "A", ]
meta_B <- meta_combined[meta_combined$Plex == "B", ]

############################################
# Align with expression matrices (CRITICAL)
############################################

meta_A <- meta_A[colnames(expr_A_imp), ]
meta_B <- meta_B[colnames(expr_B_imp), ]

############################################
# Keep only needed columns
############################################

meta_A <- meta_A[, c("Sample","Group")]
meta_B <- meta_B[, c("Sample","Group")]

############################################
# SANITY CHECKS (DO NOT SKIP)
############################################

# Alignment
all(rownames(meta_A) == colnames(expr_A_imp))
## [1] TRUE
all(rownames(meta_B) == colnames(expr_B_imp))
## [1] TRUE
all(rownames(meta_combined) == colnames(expr_combined_imp))
## [1] TRUE
# Distribution checks
table(meta_combined$Plex, meta_combined$Group)
##    
##     Control Case
##   A       6    6
##   B       6    6
table(meta_A$Group)
## 
## Control    Case 
##       6       6
table(meta_B$Group)
## 
## Control    Case 
##       6       6

PCA Before Batch Correction for combined plexes

pca_pre <- prcomp(t(expr_combined_imp), center = TRUE, scale. = TRUE)

pca_pre_df <- data.frame(
  PC1 = pca_pre$x[,1],
  PC2 = pca_pre$x[,2],
  Sample = colnames(expr_combined_imp),
  Plex = meta_combined$Plex,
  Group = meta_combined$Group
)

ggplot(pca_pre_df, aes(x = PC1, y = PC2, color = Plex, shape = Group)) +
  geom_point(size = 4) +
  geom_text(aes(label = Sample), vjust = -0.5, size = 3) +
  theme_bw() +
  labs(title = "PCA Before Batch Correction",
       x = "PC1",
       y = "PC2")

batch Correction

expr_combined_corrected <- removeBatchEffect(
  expr_combined_imp,
  batch = meta_combined$Plex,
  design = model.matrix(~ Group, data = meta_combined)
)

# Quick check
head(expr_combined_corrected[,1:5])
##                              19191    34629    34862    40431    40475
## sp|A0A075B6H7|KV37_HUMAN  4.833346 4.830965 4.839526 4.871850 4.827196
## sp|A0A075B6H9|LV469_HUMAN 4.702996 4.718612 4.721695 4.712016 4.715457
## sp|A0A075B6I0|LV861_HUMAN 4.825501 4.858319 4.764883 4.807596 4.800510
## sp|A0A075B6I1|LV460_HUMAN 4.638888 4.726509 4.627839 4.711256 4.579935
## sp|A0A075B6J9|LV218_HUMAN 4.595879 4.759264 4.755084 4.708453 4.696748
## sp|A0A075B6K0|LV316_HUMAN 4.471802 4.529905 4.479845 4.517084 4.502633
# Dimensions unchanged
dim(expr_combined_corrected)
## [1] 904  24

PCA On Combined Plexes after removing Batch effect

pca_post <- prcomp(t(expr_combined_corrected), center = TRUE, scale. = TRUE)

pca_post_df <- data.frame(
  PC1 = pca_post$x[,1],
  PC2 = pca_post$x[,2],
  Sample = colnames(expr_combined_corrected),
  Plex = meta_combined$Plex,
  Group = meta_combined$Group
)

ggplot(pca_post_df, aes(x = PC1, y = PC2, color = Group, shape = Plex)) +
  geom_point(size = 4) +
  geom_text(aes(label = Sample), vjust = -0.5, size = 3) +
  theme_bw() +
  labs(title = "PCA After Batch Correction",
       x = "PC1",
       y = "PC2")

Differential Expression Analysis

Combined plexes

design_combined <- model.matrix(~ Plex + Group, data = meta_combined)
fit_combined <- lmFit(expr_combined_imp, design_combined)
fit_combined <- eBayes(fit_combined)
results_combined <- topTable(
  fit_combined,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_combined$Protein <- rownames(results_combined)
# Add gene annotation
results_combined <- merge(
  results_combined,
  protein_info_combined,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)
# Add significance labels
results_combined$Significance <- "Not Significant"

results_combined$Significance[
  results_combined$adj.P.Val < 0.05 & results_combined$logFC > 1
] <- "Upregulated"
results_combined$Significance[
  results_combined$adj.P.Val < 0.05 & results_combined$logFC < -1
] <- "Downregulated"
# View

results_combined <- results_combined[order(results_combined$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked proteins by p-value
Protein logFC AveExpr t P.Value adj.P.Val B gene Significance
751 sp|Q27J81|INF2_HUMAN -0.1169760 4.464969 -4.867502 0.0000637 0.0576017 1.6695010 INF2 Not Significant
507 sp|P28482|MK01_HUMAN 0.1460155 4.370219 4.197978 0.0003398 0.1535753 0.0466474 MAPK1 Not Significant
96 sp|O75356|ENTP5_HUMAN 0.0411850 4.456341 3.137454 0.0045940 0.5645256 -2.4416598 ENTPD5 Not Significant
571 sp|P49407|ARRB1_HUMAN 0.0752710 4.480139 3.119861 0.0047905 0.5645256 -2.4810489 ARRB1 Not Significant
891 sp|Q9Y287|ITM2B_HUMAN -0.0322520 4.425711 -3.009591 0.0062189 0.5645256 -2.7258946 ITM2B Not Significant
609 sp|P59998|ARPC4_HUMAN -0.0810547 4.524232 -2.956053 0.0070519 0.5645256 -2.8434299 ARPC4 Not Significant

Separate plexes

Analyse plex A and plex B.

design_A <- model.matrix(~ Group, data = meta_A)
fit_A <- lmFit(expr_A_imp, design_A)
fit_A <- eBayes(fit_A)
results_A <- topTable(
  fit_A,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_A$Protein <- rownames(results_A)
# Add gene annotation
results_A <- merge(
  results_A,
  protein_info_A,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)

# Add significance labels
results_A$Significance <- "Not Significant"
results_A$Significance[
  results_A$adj.P.Val < 0.05 & results_A$logFC > 1
] <- "Upregulated"
results_A$Significance[
  results_A$adj.P.Val < 0.05 & results_A$logFC < -1
] <- "Downregulated"

# View
results_A <- results_A[order(results_A$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value in plex A") |>
    kable_paper("hover", full_width = F)
Top ranked proteins by p-value in plex A
Protein logFC AveExpr t P.Value adj.P.Val B gene Significance
751 sp|Q27J81|INF2_HUMAN -0.1169760 4.464969 -4.867502 0.0000637 0.0576017 1.6695010 INF2 Not Significant
507 sp|P28482|MK01_HUMAN 0.1460155 4.370219 4.197978 0.0003398 0.1535753 0.0466474 MAPK1 Not Significant
96 sp|O75356|ENTP5_HUMAN 0.0411850 4.456341 3.137454 0.0045940 0.5645256 -2.4416598 ENTPD5 Not Significant
571 sp|P49407|ARRB1_HUMAN 0.0752710 4.480139 3.119861 0.0047905 0.5645256 -2.4810489 ARRB1 Not Significant
891 sp|Q9Y287|ITM2B_HUMAN -0.0322520 4.425711 -3.009591 0.0062189 0.5645256 -2.7258946 ITM2B Not Significant
609 sp|P59998|ARPC4_HUMAN -0.0810547 4.524232 -2.956053 0.0070519 0.5645256 -2.8434299 ARPC4 Not Significant
head(results_A)
##                   Protein       logFC  AveExpr         t      P.Value adj.P.Val
## 740 sp|Q86UF1|TSN33_HUMAN -0.20076209 4.396326 -4.595562 0.0006315126 0.3477544
## 270 sp|P06702|S10A9_HUMAN -0.13940577 4.711956 -4.203125 0.0012513585 0.3477544
## 715  sp|Q27J81|INF2_HUMAN -0.12265117 4.448334 -4.067512 0.0015917637 0.3477544
## 546 sp|P49593|PPM1F_HUMAN  0.10647979 4.172522  3.995491 0.0018102257 0.3477544
## 863 sp|Q9Y287|ITM2B_HUMAN -0.05635063 4.300458 -3.616202 0.0035936408 0.3477544
## 253 sp|P05109|S10A8_HUMAN -0.12567021 4.779495 -3.570575 0.0039058992 0.3477544
##               B    gene    Significance
## 740 -0.09244264 TSPAN33 Not Significant
## 270 -0.74720064  S100A9 Not Significant
## 715 -0.97779807    INF2 Not Significant
## 546 -1.10105802   PPM1F Not Significant
## 863 -1.75772600   ITM2B Not Significant
## 253 -1.83739665  S100A8 Not Significant

Plex B.

design_B <- model.matrix(~ Group, data = meta_B)
fit_B <- lmFit(expr_B_imp, design_B)
fit_B <- eBayes(fit_B)
results_B <- topTable(
  fit_B,
  coef = "GroupCase",
  number = Inf,
  sort.by = "P"
)
results_B$Protein <- rownames(results_B)
# Add gene annotation
results_B <- merge(
  results_B,
  protein_info_B,
  by.x = "Protein",
  by.y = "protein",
  all.x = TRUE
)
# Add significance labels
results_B$Significance <- "Not Significant"
results_B$Significance[
  results_B$adj.P.Val < 0.05 & results_B$logFC > 1
] <- "Upregulated"
results_B$Significance[
  results_B$adj.P.Val < 0.05 & results_B$logFC < -1
] <- "Downregulated"

# View
results_B <- results_B[order(results_B$P.Value),]

head(results_combined) |>
    kbl(caption = "Top ranked proteins by p-value in plex B") |>
    kable_paper("hover", full_width = F)
Top ranked proteins by p-value in plex B
Protein logFC AveExpr t P.Value adj.P.Val B gene Significance
751 sp|Q27J81|INF2_HUMAN -0.1169760 4.464969 -4.867502 0.0000637 0.0576017 1.6695010 INF2 Not Significant
507 sp|P28482|MK01_HUMAN 0.1460155 4.370219 4.197978 0.0003398 0.1535753 0.0466474 MAPK1 Not Significant
96 sp|O75356|ENTP5_HUMAN 0.0411850 4.456341 3.137454 0.0045940 0.5645256 -2.4416598 ENTPD5 Not Significant
571 sp|P49407|ARRB1_HUMAN 0.0752710 4.480139 3.119861 0.0047905 0.5645256 -2.4810489 ARRB1 Not Significant
891 sp|Q9Y287|ITM2B_HUMAN -0.0322520 4.425711 -3.009591 0.0062189 0.5645256 -2.7258946 ITM2B Not Significant
609 sp|P59998|ARPC4_HUMAN -0.0810547 4.524232 -2.956053 0.0070519 0.5645256 -2.8434299 ARPC4 Not Significant
head(results_B)
##                   Protein       logFC  AveExpr         t      P.Value
## 112 sp|P00387|NB5R3_HUMAN -0.17683895 4.440961 -6.330268 3.786522e-05
## 528  sp|P40227|TCPZ_HUMAN -0.15017532 4.378868 -5.615484 1.136361e-04
## 590 sp|P61106|RAB14_HUMAN -0.17882092 4.385867 -5.528161 1.306021e-04
## 575  sp|P55084|ECHB_HUMAN -0.27293379 4.251690 -5.063074 2.790389e-04
## 282  sp|P07203|GPX1_HUMAN -0.15833288 4.361424 -4.767197 4.594180e-04
## 89  sp|O75356|ENTP5_HUMAN  0.08676138 4.370177  4.514696 7.096365e-04
##      adj.P.Val          B   gene    Significance
## 112 0.03233690  2.5541250 CYB5R3 Not Significant
## 528 0.03717808  1.5320233  CCT6A Not Significant
## 590 0.03717808  1.4014708  RAB14 Not Significant
## 575 0.05957481  0.6855750  HADHB Not Significant
## 282 0.07846859  0.2126430   GPX1 Not Significant
## 89  0.10100493 -0.2010684 ENTPD5 Not Significant

Comparison of Plex A and B results

The top 50 up and down regulated proteins are selected from Plex A and B and these lists were compared.

plexA_up <- head(subset(results_A,logFC>0),50)[,1]
plexA_dn <- head(subset(results_A,logFC<0),50)[,1]
plexB_up <- head(subset(results_B,logFC>0),50)[,1]
plexB_dn <- head(subset(results_B,logFC<0),50)[,1]

v1 <- list("A up"=plexA_up, "A dn"=plexA_dn, "B up"=plexB_up, "B dn"=plexB_dn)

plot(euler(v1), quantities = TRUE)

intersect(plexA_up,plexB_up)
## character(0)
intersect(plexA_dn,plexB_dn)
## [1] "sp|Q27J81|INF2_HUMAN"
intersect(plexA_up,plexB_dn)
## [1] "sp|P61106|RAB14_HUMAN" "sp|Q9UBW5|BIN2_HUMAN"  "sp|Q3ZCW2|LEGL_HUMAN" 
## [4] "sp|P14770|GPIX_HUMAN"  "sp|Q15555|MARE2_HUMAN" "sp|Q86UX7|URP2_HUMAN" 
## [7] "sp|Q14247|SRC8_HUMAN"
intersect(plexB_up,plexA_dn)
## [1] "sp|P18428|LBP_HUMAN"   "sp|Q5D862|FILA2_HUMAN" "sp|P01766|HV313_HUMAN"

Ranking of proteins by logFC

results_A$score <- sign(results_A$logFC) * -log10(results_A$P.Value)
results_B$score <- sign(results_B$logFC) * -log10(results_B$P.Value)

results_m <- merge(results_A,results_B,by="Protein")

ranks_m <- results_m[,c("Protein","score.x","score.y")]
rownames(ranks_m) <- ranks_m$Protein ; ranks_m$Protein=NULL
colnames(ranks_m) <- c("Plex A","Plex B")

plot(ranks_m)

mylm <- lm(ranks_m$`Plex B` ~ ranks_m$`Plex A`)
abline(mylm,lwd=2,lty=2,col="red")
COR=signif(cor(ranks_m),3)[2,1]
COR
## [1] -0.551
mtext(paste("Pearson correlation=",COR))

Pathway Enrichment Analysis

Using the combined protein profiles.

Some of the pathways are known to be involved in CVD.

m <- mitch_import(results_combined,DEtype="limma",geneIDcol="gene")
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 904
## Note: no. genes in output = 904
## Note: estimated proportion of input genes in output = 1
reactome_sets <- gmt_import("ReactomePathways_2026-04-02.gmt")

mres <- mitch_calc(m,  reactome_sets, minsetsize = 5, cores=8)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(mres$enrichment_result,10) |>
    kbl(caption = "Top ranked Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked Reactome pathways by p-value
set setSize pANOVA s.dist p.adjustANOVA
317 Metabolism of lipids 30 0.0017402 -0.3352403 0.4962473
374 Peptide hormone metabolism 15 0.0040461 0.4317210 0.4962473
197 Formation of the cornified envelope 28 0.0041121 0.3177593 0.4962473
282 Keratinization 28 0.0041121 0.3177593 0.4962473
383 Platelet Adhesion to exposed collagen 10 0.0044372 -0.5219239 0.4962473
548 Syndecan interactions 10 0.0053484 -0.5109620 0.4962473
151 Diseases associated with O-glycosylation of proteins 9 0.0059401 -0.5317194 0.4962473
354 Non-integrin membrane-ECM interactions 19 0.0065954 -0.3634255 0.4962473
353 Neutrophil degranulation 152 0.0074190 0.1373880 0.4962473
327 Mitochondrial protein degradation 6 0.0091377 -0.6161841 0.5500882
head(subset(mres$enrichment_result,s.dist>0),10) |>
    kbl(caption = "Top ranked upregulated Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked upregulated Reactome pathways by p-value
set setSize pANOVA s.dist p.adjustANOVA
374 Peptide hormone metabolism 15 0.0040461 0.4317210 0.4962473
197 Formation of the cornified envelope 28 0.0041121 0.3177593 0.4962473
282 Keratinization 28 0.0041121 0.3177593 0.4962473
353 Neutrophil degranulation 152 0.0074190 0.1373880 0.4962473
260 Innate Immune System 277 0.0187814 0.0978645 0.7508590
155 Diseases of carbohydrate metabolism 6 0.0190101 0.5545657 0.7508590
296 Lysosome Vesicle Biogenesis 6 0.0294880 0.5148478 0.7508590
253 Immune System 337 0.0488998 0.0782347 0.7956135
107 Complement cascade 91 0.0592232 0.1204466 0.8987126
312 Metabolism of Angiotensinogen to Angiotensins 7 0.0716482 0.3948081 0.8987126
head(subset(mres$enrichment_result,s.dist<0),10) |>
    kbl(caption = "Top ranked upregulated Reactome pathways by p-value") |>
    kable_paper("hover", full_width = F)
Top ranked upregulated Reactome pathways by p-value
set setSize pANOVA s.dist p.adjustANOVA
317 Metabolism of lipids 30 0.0017402 -0.3352403 0.4962473
383 Platelet Adhesion to exposed collagen 10 0.0044372 -0.5219239 0.4962473
548 Syndecan interactions 10 0.0053484 -0.5109620 0.4962473
151 Diseases associated with O-glycosylation of proteins 9 0.0059401 -0.5317194 0.4962473
354 Non-integrin membrane-ECM interactions 19 0.0065954 -0.3634255 0.4962473
327 Mitochondrial protein degradation 6 0.0091377 -0.6161841 0.5500882
170 EPHA-mediated growth cone collapse 7 0.0103976 -0.5610766 0.5690322
88 Cellular responses to stimuli 86 0.0129358 -0.1626201 0.6489475
433 RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function 5 0.0178210 -0.6133482 0.7508590
422 RHOBTB GTPase Cycle 8 0.0216807 -0.4707031 0.7508590
if ( file.exists("mitchreport1.html") ) { unlink("mitchreport1.html") }
mitch_report(mres,"mitchreport1.html")
## Dataset saved as " /tmp/RtmpM1seI5/mitchreport1.rds ".
## 
## 
## processing file: mitch.Rmd
## 1/36                             
## 2/36 [checklibraries]            
## 3/36                             
## 4/36 [peek]                      
## 5/36                             
## 6/36 [metrics]                   
## 7/36                             
## 8/36 [scatterplot]
## 9/36                             
## 10/36 [contourplot]               
## 11/36                             
## 12/36 [input_geneset_metrics1]    
## 13/36                             
## 14/36 [input_geneset_metrics2]
## 15/36                             
## 16/36 [input_geneset_metrics3]
## 17/36                             
## 18/36 [echart1d]                  
## 19/36 [echart2d]                  
## 20/36                             
## 21/36 [heatmap]                   
## 22/36                             
## 23/36 [effectsize]                
## 24/36                             
## 25/36 [results_table]             
## 26/36                             
## 27/36 [results_table_complete]    
## 28/36                             
## 29/36 [detailed_geneset_reports1d]
## 30/36                             
## 31/36 [detailed_geneset_reports2d]
## 32/36                             
## 33/36 [network]                   
## 34/36                             
## 35/36 [session_info]              
## 36/36
## output file: /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md
## /home/mdz/anaconda3/bin/pandoc +RTS -K512m -RTS /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpM1seI5/mitch_report.html --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /mnt/data/rlibs/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpM1seI5/rmarkdown-str13c6855b1103e5.html
## 
## Output created: /tmp/RtmpM1seI5/mitch_report.html
## [1] TRUE

IG Filtering

# plex A
ig_filter_A <- grepl("IG|KV|LV|IGH|IGK|IGL", rownames(expr_A_imp))
expr_A_clean <- expr_A_imp[!ig_filter_A, ]

# Plex B
ig_filter_B <- grepl("IG|KV|LV|IGH|IGK|IGL", rownames(expr_B_imp))
expr_B_clean <- expr_B_imp[!ig_filter_B, ]

# DE For Plex A
design_A <- model.matrix(~ Group, data = meta_A)

fit_A_clean <- lmFit(expr_A_clean, design_A)
fit_A_clean <- eBayes(fit_A_clean)

results_A_clean <- topTable(fit_A_clean, number = Inf)
## Removing intercept from test coefficients
results_A_clean$Protein <- rownames(results_A_clean)

# DE for Plex B
design_B <- model.matrix(~ Group, data = meta_B)

fit_B_clean <- lmFit(expr_B_clean, design_B)
fit_B_clean <- eBayes(fit_B_clean)

results_B_clean <- topTable(fit_B_clean, number = Inf)
## Removing intercept from test coefficients
results_B_clean$Protein <- rownames(results_B_clean)

head(results_A_clean)
##                             logFC  AveExpr         t      P.Value adj.P.Val
## sp|Q86UF1|TSN33_HUMAN -0.20076209 4.396326 -4.590756 0.0006400107 0.3320219
## sp|P06702|S10A9_HUMAN -0.13940577 4.711956 -4.198574 0.0012668477 0.3320219
## sp|Q27J81|INF2_HUMAN  -0.12265117 4.448334 -4.063033 0.0016109123 0.3320219
## sp|P49593|PPM1F_HUMAN  0.10647979 4.172522  3.990973 0.0018319207 0.3320219
## sp|Q9Y287|ITM2B_HUMAN -0.05635063 4.300458 -3.611170 0.0036379341 0.3320219
## sp|P05109|S10A8_HUMAN -0.12567021 4.779495 -3.566743 0.0039450988 0.3320219
##                                B               Protein
## sp|Q86UF1|TSN33_HUMAN -0.1018677 sp|Q86UF1|TSN33_HUMAN
## sp|P06702|S10A9_HUMAN -0.7548051 sp|P06702|S10A9_HUMAN
## sp|Q27J81|INF2_HUMAN  -0.9848090  sp|Q27J81|INF2_HUMAN
## sp|P49593|PPM1F_HUMAN -1.1078887 sp|P49593|PPM1F_HUMAN
## sp|Q9Y287|ITM2B_HUMAN -1.7641828 sp|Q9Y287|ITM2B_HUMAN
## sp|P05109|S10A8_HUMAN -1.8416104 sp|P05109|S10A8_HUMAN
head(results_B_clean)
##                             logFC  AveExpr         t      P.Value  adj.P.Val
## sp|P00387|NB5R3_HUMAN -0.17683895 4.440961 -6.325361 3.860124e-05 0.03076519
## sp|P40227|TCPZ_HUMAN  -0.15017532 4.378868 -5.611405 1.154988e-04 0.03531205
## sp|P61106|RAB14_HUMAN -0.17882092 4.385867 -5.523120 1.329186e-04 0.03531205
## sp|P55084|ECHB_HUMAN  -0.27293379 4.251690 -5.057160 2.840958e-04 0.05660608
## sp|P07203|GPX1_HUMAN  -0.15833288 4.361424 -4.762751 4.662760e-04 0.07432439
## sp|O75356|ENTP5_HUMAN  0.08676138 4.370177  4.513899 7.152604e-04 0.09501042
##                                B               Protein
## sp|P00387|NB5R3_HUMAN  2.5247563 sp|P00387|NB5R3_HUMAN
## sp|P40227|TCPZ_HUMAN   1.5147695  sp|P40227|TCPZ_HUMAN
## sp|P61106|RAB14_HUMAN  1.3840769 sp|P61106|RAB14_HUMAN
## sp|P55084|ECHB_HUMAN   0.6734127  sp|P55084|ECHB_HUMAN
## sp|P07203|GPX1_HUMAN   0.2068041  sp|P07203|GPX1_HUMAN
## sp|O75356|ENTP5_HUMAN -0.1976055 sp|O75356|ENTP5_HUMAN

Top 50 up and down regulated genes after IG removal

# plex A
# top 50 up
top50_up_A <- results_A_clean %>%
  dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC > 0) %>%
  dplyr::arrange(P.Value) %>%
  dplyr::slice_head(n = 50)

# top 50 down
top50_down_A <- results_A_clean %>%
  dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC < 0) %>%
  dplyr::arrange(P.Value) %>%
  dplyr::slice_head(n = 50)

# Plex B
# Top 50 UP
top50_up_B <- results_B_clean %>%
  dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC > 0) %>%
  dplyr::arrange(P.Value) %>%
  dplyr::slice_head(n = 50)
# Top 50 down
top50_down_B <- results_B_clean %>%
  dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC < 0) %>%
  dplyr::arrange(P.Value) %>%
  dplyr::slice_head(n = 50)

up_A <- top50_up_A$Protein
down_A <- top50_down_A$Protein

up_B <- top50_up_B$Protein
down_B <- top50_down_B$Protein

# Up overlap
common_up <- intersect(up_A, up_B)

# Down overlap
common_down <- intersect(down_A, down_B)

# Print counts
cat("Common UP proteins:", length(common_up), "\n")
## Common UP proteins: 0
cat("Common DOWN proteins:", length(common_down), "\n")
## Common DOWN proteins: 1
library(eulerr)

# UP proteins Euler
fit_up <- euler(list(
  "Plex A Up" = up_A,
  "Plex B Up" = up_B
))

plot(fit_up,
     fills = c("#4CAF50", "#2196F3"),
     alpha = 0.6,
     quantities = TRUE,
     main = "Top 50 Upregulated Proteins Overlap")

# down regulated
fit_down <- euler(list(
  "Plex A Down" = down_A,
  "Plex B Down" = down_B
))

plot(fit_down,
     fills = c("#FF7043", "#AB47BC"),
     alpha = 0.6,
     quantities = TRUE,
     main = "Top 50 Downregulated Proteins Overlap")

Pathway Enrichment

results_A_clean$gene <- str_extract(results_A_clean$Protein, "(?<=\\|)[A-Z0-9]+(?=_HUMAN)")
results_B_clean$gene <- str_extract(results_B_clean$Protein, "(?<=\\|)[A-Z0-9]+(?=_HUMAN)")

# Plex A CLEAN
rank_A_clean <- results_A_clean %>%
  dplyr::select(gene, logFC) %>%
  filter(!is.na(gene)) %>%
  group_by(gene) %>%
  summarise(logFC = mean(logFC), .groups = "drop")

gene_list_A_clean <- rank_A_clean$logFC
names(gene_list_A_clean) <- rank_A_clean$gene

df_A_clean <- data.frame(logFC = gene_list_A_clean)

# run mitch
mitch_A_clean <- mitch_calc(df_A_clean, reactome_sets, minsetsize = 10)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
res_mitch_A_clean <- mitch_A_clean$enrichment_result %>%
  as.data.frame() %>%
  dplyr::arrange(p.adjustANOVA)

# Plex B
rank_B_clean <- results_B_clean %>%
  dplyr::select(gene, logFC) %>%
  filter(!is.na(gene)) %>%
  group_by(gene) %>%
  summarise(logFC = mean(logFC), .groups = "drop")

gene_list_B_clean <- rank_B_clean$logFC
names(gene_list_B_clean) <- rank_B_clean$gene

df_B_clean <- data.frame(logFC = gene_list_B_clean)

mitch_B_clean <- mitch_calc(df_B_clean, reactome_sets, minsetsize = 10)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
res_mitch_B_clean <- mitch_B_clean$enrichment_result %>%
  as.data.frame() %>%
  dplyr::arrange(p.adjustANOVA)



go_A_clean <- enrichGO(
  gene = names(gene_list_A_clean),
  OrgDb = org.Hs.eg.db,
  keyType = "SYMBOL",
  ont = "BP",
  pAdjustMethod = "BH",
  pvalueCutoff = 0.05,
  readable = TRUE
)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gene, fromType = fromType, toType = "ENTREZID", OrgDb = OrgDb):
## 57.63% of input gene IDs are fail to map...
go_A_clean_df <- as.data.frame(go_A_clean)

top_mitch_A <- res_mitch_A_clean %>%
  arrange(p.adjustANOVA) %>%
  slice_head(n = 20)

kable(top_mitch_A, 
      caption = "Top Reactome Pathways (Plex A - IG Removed)")
Top Reactome Pathways (Plex A - IG Removed)
set setSize pANOVA s.dist p.adjustANOVA
Membrane Trafficking 29 0.0000001 0.5896115 0.0000041
Platelet activation, signaling and aggregation 41 0.0000769 0.3645997 0.0020477
Vesicle-mediated transport 44 0.0000798 0.3518475 0.0020477
Diseases of signal transduction by growth factor receptors and second messengers 21 0.0001673 0.4790548 0.0032208
Hemostasis 57 0.0003507 0.2827278 0.0054003
Cell-Cell communication 21 0.0009534 0.4209333 0.0102977
Signaling by Rho GTPases 26 0.0010699 0.3757881 0.0102977
Signaling by Rho GTPases, Miro GTPases and RHOBTB3 26 0.0010699 0.3757881 0.0102977
Post-translational protein modification 65 0.0016095 0.2350541 0.0131230
Cell junction organization 17 0.0017043 0.4433035 0.0131230
RHO GTPase Effectors 16 0.0023828 0.4422478 0.0166797
MAPK family signaling cascades 18 0.0040415 0.3952004 0.0200841
MAPK1/MAPK3 signaling 16 0.0044199 0.4145392 0.0200841
RAF/MAP kinase cascade 16 0.0044199 0.4145392 0.0200841
Platelet degranulation 32 0.0045531 0.2951080 0.0200841
Response to elevated platelet cytosolic Ca2+ 32 0.0045531 0.2951080 0.0200841
Leishmania infection 11 0.0048288 0.4934743 0.0200841
Parasitic Infection Pathways 11 0.0048288 0.4934743 0.0200841
Asparagine N-linked glycosylation 15 0.0049558 0.4223881 0.0200841
Signal Transduction 76 0.0052251 0.1940214 0.0201167
top_mitch_B <- res_mitch_B_clean %>%
  arrange(p.adjustANOVA) %>%
  slice_head(n = 20)

kable(top_mitch_B, 
      caption = "Top Reactome Pathways (Plex B - IG Removed)")
Top Reactome Pathways (Plex B - IG Removed)
set setSize pANOVA s.dist p.adjustANOVA
Signaling by Rho GTPases 24 0.0000042 -0.5475420 0.0001170
Signaling by Rho GTPases, Miro GTPases and RHOBTB3 24 0.0000042 -0.5475420 0.0001170
Cell junction organization 13 0.0000047 -0.7350863 0.0001170
Platelet activation, signaling and aggregation 38 0.0000338 -0.3964358 0.0006246
RHO GTPase Effectors 13 0.0001247 -0.6175432 0.0016676
Hemostasis 51 0.0001352 -0.3179835 0.0016676
Membrane Trafficking 24 0.0002094 -0.4424321 0.0022138
Cell-Cell communication 16 0.0003042 -0.5252881 0.0028137
Muscle contraction 10 0.0005403 -0.6343075 0.0039979
Smooth Muscle Contraction 10 0.0005403 -0.6343075 0.0039979
Platelet degranulation 30 0.0007806 -0.3602781 0.0048135
Response to elevated platelet cytosolic Ca2+ 30 0.0007806 -0.3602781 0.0048135
EPH-Ephrin signaling 13 0.0012376 -0.5206044 0.0070449
Cellular responses to stimuli 26 0.0033516 -0.3373242 0.0177158
RHO GTPase cycle 15 0.0065504 -0.4088662 0.0323154
Vesicle-mediated transport 36 0.0069872 -0.2654402 0.0323160
FCGR3A-mediated phagocytosis 10 0.0124551 -0.4589581 0.0460839
Fcgamma receptor (FCGR) dependent phagocytosis 10 0.0124551 -0.4589581 0.0460839
Leishmania phagocytosis 10 0.0124551 -0.4589581 0.0460839
Parasite infection 10 0.0124551 -0.4589581 0.0460839

Session Info

sessionInfo()
## R version 4.6.0 (2026-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] kableExtra_1.4.0       gtools_3.9.5           beeswarm_0.4.0        
##  [4] eulerr_7.1.0           msigdbr_26.1.0         mitch_1.24.0          
##  [7] knitr_1.51             ReactomePA_1.56.0      umap_0.2.10.0         
## [10] org.Hs.eg.db_3.23.1    AnnotationDbi_1.74.0   IRanges_2.46.0        
## [13] S4Vectors_0.50.1       Biobase_2.72.0         BiocGenerics_0.58.1   
## [16] generics_0.1.4         clusterProfiler_4.20.0 pheatmap_1.0.13       
## [19] limma_3.68.2           janitor_2.2.1          lubridate_1.9.5       
## [22] forcats_1.0.1          stringr_1.6.0          purrr_1.2.2           
## [25] readr_2.2.0            tidyr_1.3.2            tibble_3.3.1          
## [28] ggplot2_4.0.3          tidyverse_2.0.0        dplyr_1.2.1           
## [31] BiocManager_1.30.27   
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.6.0           later_1.4.8             bitops_1.0-9           
##   [4] ggplotify_0.1.3         polyclip_1.10-7         enrichit_0.1.4         
##   [7] graph_1.90.0            lifecycle_1.0.5         httr2_1.2.2            
##  [10] vroom_1.7.1             processx_3.9.0          lattice_0.22-9         
##  [13] MASS_7.3-65             magrittr_2.0.5          sass_0.4.10            
##  [16] rmarkdown_2.31          jquerylib_0.1.4         yaml_2.3.12            
##  [19] httpuv_1.6.17           otel_0.2.0              ggtangle_0.1.2         
##  [22] askpass_1.2.1           reticulate_1.46.0       DBI_1.3.0              
##  [25] RColorBrewer_1.1-3      ggraph_2.2.2            yulab.utils_0.2.4      
##  [28] tweenr_2.0.3            rappdirs_0.3.4          aisdk_1.1.0            
##  [31] gdtools_0.5.0           enrichplot_1.32.0       ggrepel_0.9.8          
##  [34] tidytree_0.4.7          reactome.db_1.96.0      RSpectra_0.16-2        
##  [37] svglite_2.2.2           DOSE_4.6.0              xml2_1.5.2             
##  [40] ggforce_0.5.0           tidyselect_1.2.1        aplot_0.2.9            
##  [43] farver_2.1.2            viridis_0.6.5           Seqinfo_1.2.0          
##  [46] jsonlite_2.0.0          tidygraph_1.3.1         systemfonts_1.3.2      
##  [49] polylabelr_1.0.0        tools_4.6.0             ggnewscale_0.5.2       
##  [52] treeio_1.36.1           Rcpp_1.1.1-1.1          glue_1.8.1             
##  [55] gridExtra_2.3           xfun_0.57               qvalue_2.44.0          
##  [58] withr_3.0.2             fastmap_1.2.0           GGally_2.4.0           
##  [61] openssl_2.4.1           caTools_1.18.3          callr_3.7.6            
##  [64] digest_0.6.39           timechange_0.4.0        R6_2.6.1               
##  [67] mime_0.13               gridGraphics_0.5-1      textshaping_1.0.5      
##  [70] GO.db_3.23.1            dichromat_2.0-0.1       RSQLite_3.52.0         
##  [73] utf8_1.2.6              fontLiberation_0.1.0    graphlayouts_1.2.3     
##  [76] httr_1.4.8              htmlwidgets_1.6.4       scatterpie_0.2.6       
##  [79] ggstats_0.13.0          graphite_1.58.0         pkgconfig_2.0.3        
##  [82] gtable_0.3.6            blob_1.3.0              S7_0.2.2               
##  [85] XVector_0.52.0          htmltools_0.5.9         fontBitstreamVera_0.1.1
##  [88] echarts4r_0.5.0         scales_1.4.0            png_0.1-9              
##  [91] snakecase_0.11.1        ggfun_0.2.0             rstudioapi_0.18.0      
##  [94] tzdb_0.5.0              reshape2_1.4.5          curl_7.1.0             
##  [97] coda_0.19-4.1           statnet.common_4.13.0   nlme_3.1-169           
## [100] cachem_1.1.0            KernSmooth_2.23-26      parallel_4.6.0         
## [103] pillar_1.11.1           grid_4.6.0              vctrs_0.7.3            
## [106] gplots_3.3.0            promises_1.5.0          tidydr_0.0.6           
## [109] xtable_1.8-8            cluster_2.1.8.2         evaluate_1.0.5         
## [112] cli_3.6.6               compiler_4.6.0          rlang_1.2.0            
## [115] crayon_1.5.3            labeling_0.4.3          ps_1.9.3               
## [118] plyr_1.8.9              fs_2.1.0                ggiraph_0.9.6          
## [121] stringi_1.8.7           network_1.20.0          viridisLite_0.4.3      
## [124] babelgene_22.9          assertthat_0.2.1        Biostrings_2.80.0      
## [127] lazyeval_0.2.3          GOSemSim_2.38.0         fontquiver_0.2.1       
## [130] Matrix_1.7-5            hms_1.1.4               patchwork_1.3.2        
## [133] bit64_4.8.0             KEGGREST_1.52.0         statmod_1.5.2          
## [136] shiny_1.13.0            igraph_2.3.1            memoise_2.0.1          
## [139] bslib_0.11.0            ggtree_4.2.0            bit_4.6.0              
## [142] ape_5.8-1               gson_0.1.0
save.image("Analysis2.rdata")