1. Installing Packages
#{r install-packages, eval=FALSE} #install.packages(c("tidyverse","readr","janitor","pheatmap","ggplot2")) #install.packages("BiocManager") #BiocManager::install(c("limma","clusterProfiler","org.Hs.eg.db","umap")) #BiocManager::install("ReactomePA") #
2.Load all R libraries needed for data processing and visualization.
library(BiocManager)
library(dplyr)
library(tidyverse)
library(readr)
library(janitor)
library(limma)
library(pheatmap)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(umap)
library(ReactomePA)
library(tibble)
library(knitr)
library(mitch)
library(stringr)
library(msigdbr)
library(beeswarm)
library("kableExtra")
library("eulerr")
3. Load files
plexA <- read_tsv("abundance_protein_MD_plexA.tsv")
## Rows: 963 Columns: 27
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): Index, Gene, Protein, Protein ID, Entry Name, Protein Description,...
## dbl (19): NumberPSM, MaxPepProb, ReferenceIntensity, _126, _127N, _127C, _12...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plexB <- read_tsv("abundance_protein_MD_plexB.tsv")
## Rows: 962 Columns: 27
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): Index, Gene, Protein, Protein ID, Entry Name, Protein Description,...
## dbl (19): NumberPSM, MaxPepProb, ReferenceIntensity, _126, _127N, _127C, _12...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plexA <- janitor::clean_names(plexA)
plexB <- janitor::clean_names(plexB)
4. Keep relevant columns
plexA <- dplyr::select(plexA, protein, gene,
x126, x127n, x127c, x128n, x128c,
x129n, x129c, x130n, x130c,
x131n, x131c, x132n)
plexB <- dplyr::select(plexB, protein, gene,
x126, x127n, x127c, x128n, x128c,
x129n, x129c, x130n, x130c,
x131n, x131c, x132n)
5. Rename TMT channels to sample IDs
annotationA <- c(
x126="19191", x127n="34629", x127c="34862",
x128n="40431", x128c="40475", x129n="35038",
x129c="34866", x130n="41436", x130c="41383",
x131n="40316", x131c="3746", x132n="102953"
)
annotationB <- c(
x126="40797", x127n="4177", x127c="103376",
x128n="47542", x128c="47442", x129n="42977",
x129c="43081", x130n="113835", x130c="113856",
x131n="19584", x131c="118117", x132n="118008"
)
colnames(plexA)[match(names(annotationA), colnames(plexA))] <- annotationA
colnames(plexB)[match(names(annotationB), colnames(plexB))] <- annotationB
6. Merge plexes
merged <- full_join(plexA, plexB, by=c("protein","gene"))
head(merged)
## # A tibble: 6 × 26
## protein gene `19191` `34629` `34862` `40431` `40475` `35038` `34866` `41436`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 sp|A0A0… IGKV… 25.2 25.4 25.4 26.1 25.1 25.6 25.7 25.4
## 2 sp|A0A0… IGLV… 29.0 29.8 29.6 29.5 29.4 28.8 29.9 29.4
## 3 sp|A0A0… IGLV… 27.5 28.5 26.6 27.4 27.4 26.8 27.3 26.6
## 4 sp|A0A0… IGLV… 21.1 NA 21.0 21.9 19.9 21.2 21.2 20.1
## 5 sp|A0A0… IGLV… 22.4 25.3 25.1 24.0 23.9 24.5 24.7 24.3
## 6 sp|A0A0… IGLV… 21.4 22.3 21.4 21.7 21.5 23.8 22.1 22.6
## # ℹ 16 more variables: `41383` <dbl>, `40316` <dbl>, `3746` <dbl>,
## # `102953` <dbl>, `40797` <dbl>, `4177` <dbl>, `103376` <dbl>, `47542` <dbl>,
## # `47442` <dbl>, `42977` <dbl>, `43081` <dbl>, `113835` <dbl>,
## # `113856` <dbl>, `19584` <dbl>, `118117` <dbl>, `118008` <dbl>
7. Expression matrix for Merged plexes, Plex A and Plex B
expr_combined <- as.data.frame(merged)
protein_info_combined <- expr_combined[, c("protein", "gene")]
rownames(expr_combined) <- expr_combined$protein
expr_combined <- expr_combined[, !(colnames(expr_combined) %in% c("protein","gene"))]
expr_combined <- as.matrix(expr_combined)
storage.mode(expr_combined) <- "numeric"
expr_A <- as.data.frame(plexA)
protein_info_A <- expr_A[, c("protein", "gene")]
rownames(expr_A) <- expr_A$protein
expr_A <- expr_A[, !(colnames(expr_A) %in% c("protein","gene"))]
expr_A <- as.matrix(expr_A)
storage.mode(expr_A) <- "numeric"
head(expr_A)
## 19191 34629 34862 40431 40475 35038
## sp|A0A075B6H7|KV37_HUMAN 25.20197 25.39573 25.43255 26.05771 25.05397 25.57650
## sp|A0A075B6H9|LV469_HUMAN 28.95035 29.82285 29.61003 29.53702 29.44966 28.84860
## sp|A0A075B6I0|LV861_HUMAN 27.53609 28.54878 26.58017 27.43782 27.43139 26.83964
## sp|A0A075B6I1|LV460_HUMAN 21.13980 NA 20.95866 21.90418 19.86279 21.18880
## sp|A0A075B6J9|LV218_HUMAN 22.38769 25.30212 25.12898 24.02298 23.89291 24.49498
## sp|A0A075B6K0|LV316_HUMAN 21.37483 22.29617 21.39598 21.69973 21.51293 23.78554
## 34866 41436 41383 40316 3746 102953
## sp|A0A075B6H7|KV37_HUMAN 25.65469 25.37295 25.65501 25.64185 25.43883 25.33301
## sp|A0A075B6H9|LV469_HUMAN 29.91425 29.41320 29.00432 28.83877 28.59919 29.14621
## sp|A0A075B6I0|LV861_HUMAN 27.26845 26.57449 28.21526 26.57432 26.75554 NA
## sp|A0A075B6I1|LV460_HUMAN 21.20105 20.12885 20.95064 21.09826 20.35881 NA
## sp|A0A075B6J9|LV218_HUMAN 24.70158 24.32961 24.30966 24.58185 23.43995 23.82350
## sp|A0A075B6K0|LV316_HUMAN 22.11370 22.61187 20.93010 22.14908 20.87886 20.41397
expr_B <- as.data.frame(plexB)
protein_info_B <- expr_B[, c("protein", "gene")]
rownames(expr_B) <- expr_B$protein
expr_B <- expr_B[, !(colnames(expr_B) %in% c("protein","gene"))]
expr_B <- as.matrix(expr_B)
storage.mode(expr_B) <- "numeric"
head(expr_B)
## 40797 4177 103376 47542 47442 42977
## sp|A0A075B6H7|KV37_HUMAN 30.46372 30.34873 30.21146 30.78558 30.36076 30.19026
## sp|A0A075B6H9|LV469_HUMAN 22.40007 21.18373 21.26245 21.90925 22.59531 21.24953
## sp|A0A075B6I0|LV861_HUMAN 25.58149 26.12823 26.52763 25.72071 26.59545 25.47399
## sp|A0A075B6I1|LV460_HUMAN 27.79615 26.93927 25.90896 26.48209 28.84106 26.96525
## sp|A0A075B6I4|LVX54_HUMAN 14.60397 13.68839 NA 13.41395 17.83912 14.71371
## sp|A0A075B6J9|LV218_HUMAN 27.34292 26.35223 24.59685 26.37291 25.30866 25.64357
## 43081 113835 113856 19584 118117 118008
## sp|A0A075B6H7|KV37_HUMAN 30.28124 30.56661 29.76611 30.72821 30.20665 30.00076
## sp|A0A075B6H9|LV469_HUMAN 21.98843 21.22654 21.40015 22.00258 21.02142 21.46372
## sp|A0A075B6I0|LV861_HUMAN 26.13838 25.35584 25.89193 25.69072 25.72252 26.67365
## sp|A0A075B6I1|LV460_HUMAN 28.56674 27.15404 27.20759 27.49989 27.18150 27.81810
## sp|A0A075B6I4|LVX54_HUMAN 15.67450 14.14982 13.30315 NA 13.41029 14.51386
## sp|A0A075B6J9|LV218_HUMAN 26.10869 25.59368 27.73543 25.64763 26.16884 26.54800
8. Filter proteins (present in >=50% of samples)
keep_combined <- rowSums(!is.na(expr_combined)) >= ncol(expr_combined)*0.5
expr_combined_filt <- expr_combined[keep_combined, ]
keep_A <- rowSums(!is.na(expr_A)) >= ncol(expr_A)*0.5
expr_A_filt <- expr_A[keep_A, ]
head(expr_A_filt)
## 19191 34629 34862 40431 40475 35038
## sp|A0A075B6H7|KV37_HUMAN 25.20197 25.39573 25.43255 26.05771 25.05397 25.57650
## sp|A0A075B6H9|LV469_HUMAN 28.95035 29.82285 29.61003 29.53702 29.44966 28.84860
## sp|A0A075B6I0|LV861_HUMAN 27.53609 28.54878 26.58017 27.43782 27.43139 26.83964
## sp|A0A075B6I1|LV460_HUMAN 21.13980 NA 20.95866 21.90418 19.86279 21.18880
## sp|A0A075B6J9|LV218_HUMAN 22.38769 25.30212 25.12898 24.02298 23.89291 24.49498
## sp|A0A075B6K0|LV316_HUMAN 21.37483 22.29617 21.39598 21.69973 21.51293 23.78554
## 34866 41436 41383 40316 3746 102953
## sp|A0A075B6H7|KV37_HUMAN 25.65469 25.37295 25.65501 25.64185 25.43883 25.33301
## sp|A0A075B6H9|LV469_HUMAN 29.91425 29.41320 29.00432 28.83877 28.59919 29.14621
## sp|A0A075B6I0|LV861_HUMAN 27.26845 26.57449 28.21526 26.57432 26.75554 NA
## sp|A0A075B6I1|LV460_HUMAN 21.20105 20.12885 20.95064 21.09826 20.35881 NA
## sp|A0A075B6J9|LV218_HUMAN 24.70158 24.32961 24.30966 24.58185 23.43995 23.82350
## sp|A0A075B6K0|LV316_HUMAN 22.11370 22.61187 20.93010 22.14908 20.87886 20.41397
keep_B <- rowSums(!is.na(expr_B)) >= ncol(expr_B)*0.5
expr_B_filt <- expr_B[keep_B, ]
head(expr_B_filt)
## 40797 4177 103376 47542 47442 42977
## sp|A0A075B6H7|KV37_HUMAN 30.46372 30.34873 30.21146 30.78558 30.36076 30.19026
## sp|A0A075B6H9|LV469_HUMAN 22.40007 21.18373 21.26245 21.90925 22.59531 21.24953
## sp|A0A075B6I0|LV861_HUMAN 25.58149 26.12823 26.52763 25.72071 26.59545 25.47399
## sp|A0A075B6I1|LV460_HUMAN 27.79615 26.93927 25.90896 26.48209 28.84106 26.96525
## sp|A0A075B6I4|LVX54_HUMAN 14.60397 13.68839 NA 13.41395 17.83912 14.71371
## sp|A0A075B6J9|LV218_HUMAN 27.34292 26.35223 24.59685 26.37291 25.30866 25.64357
## 43081 113835 113856 19584 118117 118008
## sp|A0A075B6H7|KV37_HUMAN 30.28124 30.56661 29.76611 30.72821 30.20665 30.00076
## sp|A0A075B6H9|LV469_HUMAN 21.98843 21.22654 21.40015 22.00258 21.02142 21.46372
## sp|A0A075B6I0|LV861_HUMAN 26.13838 25.35584 25.89193 25.69072 25.72252 26.67365
## sp|A0A075B6I1|LV460_HUMAN 28.56674 27.15404 27.20759 27.49989 27.18150 27.81810
## sp|A0A075B6I4|LVX54_HUMAN 15.67450 14.14982 13.30315 NA 13.41029 14.51386
## sp|A0A075B6J9|LV218_HUMAN 26.10869 25.59368 27.73543 25.64763 26.16884 26.54800
9. QC before normalization
qc_sample_combined <- data.frame(
Sample = colnames(expr_combined_filt),
Detected_Proteins = colSums(!is.na(expr_combined_filt)),
Missing_Percent = colMeans(is.na(expr_combined_filt)) * 100,
Median_Intensity = apply(expr_combined_filt, 2, median, na.rm = TRUE),
SD_Intensity = apply(expr_combined_filt, 2, sd, na.rm = TRUE)
)
# Assign plex
plexA_samples <- c(
"19191","34629","34862","40431","40475","35038",
"34866","41436","41383","40316","3746","102953"
)
qc_sample_combined$Plex <- ifelse(qc_sample_combined$Sample %in% plexA_samples, "A", "B")
# QC summary by plex
qc_plex_combined <- qc_sample_combined %>%
group_by(Plex) %>%
summarise(
N_Samples = n(),
Mean_Detected_Proteins = mean(Detected_Proteins),
Mean_Missing_Percent = mean(Missing_Percent),
Mean_Median_Intensity = mean(Median_Intensity),
.groups = "drop"
)
kable(qc_sample_combined)
| 19191 |
19191 |
811 |
10.28761 |
22.61437 |
3.608051 |
A |
| 34629 |
34629 |
768 |
15.04425 |
22.52596 |
3.824270 |
A |
| 34862 |
34862 |
792 |
12.38938 |
22.47182 |
3.687522 |
A |
| 40431 |
40431 |
767 |
15.15487 |
22.17860 |
3.914548 |
A |
| 40475 |
40475 |
768 |
15.04425 |
22.27699 |
3.916427 |
A |
| 35038 |
35038 |
806 |
10.84071 |
22.40563 |
3.668941 |
A |
| 34866 |
34866 |
805 |
10.95133 |
22.46457 |
3.701970 |
A |
| 41436 |
41436 |
779 |
13.82743 |
22.34549 |
3.813249 |
A |
| 41383 |
41383 |
791 |
12.50000 |
22.41082 |
3.736537 |
A |
| 40316 |
40316 |
762 |
15.70796 |
22.20311 |
3.877041 |
A |
| 3746 |
3746 |
797 |
11.83628 |
22.94357 |
3.486882 |
A |
| 102953 |
102953 |
765 |
15.37611 |
22.48856 |
3.758113 |
A |
| 40797 |
40797 |
766 |
15.26549 |
22.30942 |
3.775893 |
B |
| 4177 |
4177 |
782 |
13.49558 |
22.93066 |
3.365816 |
B |
| 103376 |
103376 |
714 |
21.01770 |
22.29280 |
3.759772 |
B |
| 47542 |
47542 |
780 |
13.71681 |
22.25514 |
3.563151 |
B |
| 47442 |
47442 |
731 |
19.13717 |
22.16957 |
3.792444 |
B |
| 42977 |
42977 |
772 |
14.60177 |
22.38404 |
3.503025 |
B |
| 43081 |
43081 |
776 |
14.15929 |
22.13263 |
3.707914 |
B |
| 113835 |
113835 |
726 |
19.69027 |
22.26161 |
3.831329 |
B |
| 113856 |
113856 |
768 |
15.04425 |
22.04823 |
3.841864 |
B |
| 19584 |
19584 |
764 |
15.48673 |
22.35815 |
3.493251 |
B |
| 118117 |
118117 |
727 |
19.57965 |
22.31801 |
3.876589 |
B |
| 118008 |
118008 |
675 |
25.33186 |
22.33120 |
3.854068 |
B |
kable(qc_plex_combined)
| A |
12 |
784.2500 |
13.24668 |
22.44412 |
| B |
12 |
748.4167 |
17.21055 |
22.31595 |
# QC plots
qc_long_combined <- as.data.frame(expr_combined_filt) %>%
rownames_to_column("Protein") %>%
pivot_longer(-Protein, names_to = "Sample", values_to = "Intensity") %>%
filter(is.finite(Intensity))
# Missingness plot
ggplot(qc_sample_combined,
aes(x = reorder(Sample, Missing_Percent),
y = Missing_Percent,
fill = Plex)) +
geom_col() +
coord_flip() +
theme_bw() +
labs(title = "Missingness per Sample",
x = "Sample", y = "% Missing")

# Detected proteins plot
ggplot(qc_sample_combined,
aes(x = reorder(Sample, Detected_Proteins),
y = Detected_Proteins,
fill = Plex)) +
geom_col() +
coord_flip() +
theme_bw() +
labs(title = "Detected Proteins per Sample",
x = "Sample", y = "Detected Proteins")

# Density plot
ggplot(qc_long_combined,
aes(x = Intensity, color = Sample)) +
geom_density(linewidth = 0.8) +
theme_bw() +
labs(title = "Intensity Distribution per Sample",
x = "Intensity", y = "Density")

# Correlation heatmap
cor_pre_combined <- cor(expr_combined_filt, use = "pairwise.complete.obs")
pheatmap(cor_pre_combined,
main = "Sample Correlation Heatmap (Pre-normalization)")
# Plex-level comparison
ggplot(qc_plex_combined,
aes(x = Plex, y = Mean_Detected_Proteins, fill = Plex)) +
geom_col() +
theme_bw() +
labs(title = "Mean Detected Proteins by Plex",
x = "Plex", y = "Mean Detected Proteins")
11. QC after normalization
qc_long_norm_combined <- as.data.frame(expr_combined_log) %>%
rownames_to_column("Protein") %>%
pivot_longer(-Protein, names_to = "Sample", values_to = "Intensity") %>%
filter(is.finite(Intensity))
ggplot(qc_long_norm_combined,
aes(x = Intensity, color = Sample)) +
geom_density(linewidth = 0.8) +
theme_bw() +
labs(title = "Post-normalization Intensity Distribution (Combined)",
x = "Log2 Intensity",
y = "Density")

cor_post_combined <- cor(expr_combined_log, use = "pairwise.complete.obs")
pheatmap(cor_post_combined,
main = "Sample Correlation Heatmap (Post-normalization)")
Metadata Creation
############################################
# METADATA CREATION (SINGLE SOURCE OF TRUTH)
############################################
# Create combined metadata from expression matrix
meta_combined <- data.frame(
Sample = colnames(expr_combined_imp)
)
############################################
# Define Plex (based on known Plex A samples)
############################################
plexA_samples <- c(
"19191","34629","34862","40431","40475","35038",
"34866","41436","41383","40316","3746","102953"
)
meta_combined$Plex <- ifelse(meta_combined$Sample %in% plexA_samples,
"A", "B")
meta_combined$Plex <- factor(meta_combined$Plex)
############################################
# Define biological group (Case vs Control)
############################################
case_samples <- c(
"19191","40797","34862","103376","40475","47442",
"34866","43081","41383","113856","3746","118117"
)
meta_combined$Group <- ifelse(meta_combined$Sample %in% case_samples,
"Case", "Control")
meta_combined$Group <- factor(meta_combined$Group,
levels = c("Control","Case"))
############################################
# Final formatting
############################################
rownames(meta_combined) <- meta_combined$Sample
############################################
# Create Plex-specific metadata (CORRECT WAY)
############################################
meta_A <- meta_combined[meta_combined$Plex == "A", ]
meta_B <- meta_combined[meta_combined$Plex == "B", ]
############################################
# Align with expression matrices (CRITICAL)
############################################
meta_A <- meta_A[colnames(expr_A_imp), ]
meta_B <- meta_B[colnames(expr_B_imp), ]
############################################
# Keep only needed columns
############################################
meta_A <- meta_A[, c("Sample","Group")]
meta_B <- meta_B[, c("Sample","Group")]
############################################
# SANITY CHECKS (DO NOT SKIP)
############################################
# Alignment
all(rownames(meta_A) == colnames(expr_A_imp))
## [1] TRUE
all(rownames(meta_B) == colnames(expr_B_imp))
## [1] TRUE
all(rownames(meta_combined) == colnames(expr_combined_imp))
## [1] TRUE
# Distribution checks
table(meta_combined$Plex, meta_combined$Group)
##
## Control Case
## A 6 6
## B 6 6
table(meta_A$Group)
##
## Control Case
## 6 6
table(meta_B$Group)
##
## Control Case
## 6 6
PCA Before Batch Correction for combined plexes
pca_pre <- prcomp(t(expr_combined_imp), center = TRUE, scale. = TRUE)
pca_pre_df <- data.frame(
PC1 = pca_pre$x[,1],
PC2 = pca_pre$x[,2],
Sample = colnames(expr_combined_imp),
Plex = meta_combined$Plex,
Group = meta_combined$Group
)
ggplot(pca_pre_df, aes(x = PC1, y = PC2, color = Plex, shape = Group)) +
geom_point(size = 4) +
geom_text(aes(label = Sample), vjust = -0.5, size = 3) +
theme_bw() +
labs(title = "PCA Before Batch Correction",
x = "PC1",
y = "PC2")

batch Correction
expr_combined_corrected <- removeBatchEffect(
expr_combined_imp,
batch = meta_combined$Plex,
design = model.matrix(~ Group, data = meta_combined)
)
# Quick check
head(expr_combined_corrected[,1:5])
## 19191 34629 34862 40431 40475
## sp|A0A075B6H7|KV37_HUMAN 4.833346 4.830965 4.839526 4.871850 4.827196
## sp|A0A075B6H9|LV469_HUMAN 4.702996 4.718612 4.721695 4.712016 4.715457
## sp|A0A075B6I0|LV861_HUMAN 4.825501 4.858319 4.764883 4.807596 4.800510
## sp|A0A075B6I1|LV460_HUMAN 4.638888 4.726509 4.627839 4.711256 4.579935
## sp|A0A075B6J9|LV218_HUMAN 4.595879 4.759264 4.755084 4.708453 4.696748
## sp|A0A075B6K0|LV316_HUMAN 4.471802 4.529905 4.479845 4.517084 4.502633
# Dimensions unchanged
dim(expr_combined_corrected)
## [1] 904 24
PCA On Combined Plexes after removing Batch effect
pca_post <- prcomp(t(expr_combined_corrected), center = TRUE, scale. = TRUE)
pca_post_df <- data.frame(
PC1 = pca_post$x[,1],
PC2 = pca_post$x[,2],
Sample = colnames(expr_combined_corrected),
Plex = meta_combined$Plex,
Group = meta_combined$Group
)
ggplot(pca_post_df, aes(x = PC1, y = PC2, color = Group, shape = Plex)) +
geom_point(size = 4) +
geom_text(aes(label = Sample), vjust = -0.5, size = 3) +
theme_bw() +
labs(title = "PCA After Batch Correction",
x = "PC1",
y = "PC2")

Differential Expression Analysis
Combined plexes
design_combined <- model.matrix(~ Plex + Group, data = meta_combined)
fit_combined <- lmFit(expr_combined_imp, design_combined)
fit_combined <- eBayes(fit_combined)
results_combined <- topTable(
fit_combined,
coef = "GroupCase",
number = Inf,
sort.by = "P"
)
results_combined$Protein <- rownames(results_combined)
# Add gene annotation
results_combined <- merge(
results_combined,
protein_info_combined,
by.x = "Protein",
by.y = "protein",
all.x = TRUE
)
# Add significance labels
results_combined$Significance <- "Not Significant"
results_combined$Significance[
results_combined$adj.P.Val < 0.05 & results_combined$logFC > 1
] <- "Upregulated"
results_combined$Significance[
results_combined$adj.P.Val < 0.05 & results_combined$logFC < -1
] <- "Downregulated"
# View
results_combined <- results_combined[order(results_combined$P.Value),]
head(results_combined) |>
kbl(caption = "Top ranked proteins by p-value") |>
kable_paper("hover", full_width = F)
Top ranked proteins by p-value
|
|
Protein
|
logFC
|
AveExpr
|
t
|
P.Value
|
adj.P.Val
|
B
|
gene
|
Significance
|
|
751
|
sp|Q27J81|INF2_HUMAN
|
-0.1169760
|
4.464969
|
-4.867502
|
0.0000637
|
0.0576017
|
1.6695010
|
INF2
|
Not Significant
|
|
507
|
sp|P28482|MK01_HUMAN
|
0.1460155
|
4.370219
|
4.197978
|
0.0003398
|
0.1535753
|
0.0466474
|
MAPK1
|
Not Significant
|
|
96
|
sp|O75356|ENTP5_HUMAN
|
0.0411850
|
4.456341
|
3.137454
|
0.0045940
|
0.5645256
|
-2.4416598
|
ENTPD5
|
Not Significant
|
|
571
|
sp|P49407|ARRB1_HUMAN
|
0.0752710
|
4.480139
|
3.119861
|
0.0047905
|
0.5645256
|
-2.4810489
|
ARRB1
|
Not Significant
|
|
891
|
sp|Q9Y287|ITM2B_HUMAN
|
-0.0322520
|
4.425711
|
-3.009591
|
0.0062189
|
0.5645256
|
-2.7258946
|
ITM2B
|
Not Significant
|
|
609
|
sp|P59998|ARPC4_HUMAN
|
-0.0810547
|
4.524232
|
-2.956053
|
0.0070519
|
0.5645256
|
-2.8434299
|
ARPC4
|
Not Significant
|
Separate plexes
Analyse plex A and plex B.
design_A <- model.matrix(~ Group, data = meta_A)
fit_A <- lmFit(expr_A_imp, design_A)
fit_A <- eBayes(fit_A)
results_A <- topTable(
fit_A,
coef = "GroupCase",
number = Inf,
sort.by = "P"
)
results_A$Protein <- rownames(results_A)
# Add gene annotation
results_A <- merge(
results_A,
protein_info_A,
by.x = "Protein",
by.y = "protein",
all.x = TRUE
)
# Add significance labels
results_A$Significance <- "Not Significant"
results_A$Significance[
results_A$adj.P.Val < 0.05 & results_A$logFC > 1
] <- "Upregulated"
results_A$Significance[
results_A$adj.P.Val < 0.05 & results_A$logFC < -1
] <- "Downregulated"
# View
results_A <- results_A[order(results_A$P.Value),]
head(results_combined) |>
kbl(caption = "Top ranked proteins by p-value in plex A") |>
kable_paper("hover", full_width = F)
Top ranked proteins by p-value in plex A
|
|
Protein
|
logFC
|
AveExpr
|
t
|
P.Value
|
adj.P.Val
|
B
|
gene
|
Significance
|
|
751
|
sp|Q27J81|INF2_HUMAN
|
-0.1169760
|
4.464969
|
-4.867502
|
0.0000637
|
0.0576017
|
1.6695010
|
INF2
|
Not Significant
|
|
507
|
sp|P28482|MK01_HUMAN
|
0.1460155
|
4.370219
|
4.197978
|
0.0003398
|
0.1535753
|
0.0466474
|
MAPK1
|
Not Significant
|
|
96
|
sp|O75356|ENTP5_HUMAN
|
0.0411850
|
4.456341
|
3.137454
|
0.0045940
|
0.5645256
|
-2.4416598
|
ENTPD5
|
Not Significant
|
|
571
|
sp|P49407|ARRB1_HUMAN
|
0.0752710
|
4.480139
|
3.119861
|
0.0047905
|
0.5645256
|
-2.4810489
|
ARRB1
|
Not Significant
|
|
891
|
sp|Q9Y287|ITM2B_HUMAN
|
-0.0322520
|
4.425711
|
-3.009591
|
0.0062189
|
0.5645256
|
-2.7258946
|
ITM2B
|
Not Significant
|
|
609
|
sp|P59998|ARPC4_HUMAN
|
-0.0810547
|
4.524232
|
-2.956053
|
0.0070519
|
0.5645256
|
-2.8434299
|
ARPC4
|
Not Significant
|
head(results_A)
## Protein logFC AveExpr t P.Value adj.P.Val
## 740 sp|Q86UF1|TSN33_HUMAN -0.20076209 4.396326 -4.595562 0.0006315126 0.3477544
## 270 sp|P06702|S10A9_HUMAN -0.13940577 4.711956 -4.203125 0.0012513585 0.3477544
## 715 sp|Q27J81|INF2_HUMAN -0.12265117 4.448334 -4.067512 0.0015917637 0.3477544
## 546 sp|P49593|PPM1F_HUMAN 0.10647979 4.172522 3.995491 0.0018102257 0.3477544
## 863 sp|Q9Y287|ITM2B_HUMAN -0.05635063 4.300458 -3.616202 0.0035936408 0.3477544
## 253 sp|P05109|S10A8_HUMAN -0.12567021 4.779495 -3.570575 0.0039058992 0.3477544
## B gene Significance
## 740 -0.09244264 TSPAN33 Not Significant
## 270 -0.74720064 S100A9 Not Significant
## 715 -0.97779807 INF2 Not Significant
## 546 -1.10105802 PPM1F Not Significant
## 863 -1.75772600 ITM2B Not Significant
## 253 -1.83739665 S100A8 Not Significant
Plex B.
design_B <- model.matrix(~ Group, data = meta_B)
fit_B <- lmFit(expr_B_imp, design_B)
fit_B <- eBayes(fit_B)
results_B <- topTable(
fit_B,
coef = "GroupCase",
number = Inf,
sort.by = "P"
)
results_B$Protein <- rownames(results_B)
# Add gene annotation
results_B <- merge(
results_B,
protein_info_B,
by.x = "Protein",
by.y = "protein",
all.x = TRUE
)
# Add significance labels
results_B$Significance <- "Not Significant"
results_B$Significance[
results_B$adj.P.Val < 0.05 & results_B$logFC > 1
] <- "Upregulated"
results_B$Significance[
results_B$adj.P.Val < 0.05 & results_B$logFC < -1
] <- "Downregulated"
# View
results_B <- results_B[order(results_B$P.Value),]
head(results_combined) |>
kbl(caption = "Top ranked proteins by p-value in plex B") |>
kable_paper("hover", full_width = F)
Top ranked proteins by p-value in plex B
|
|
Protein
|
logFC
|
AveExpr
|
t
|
P.Value
|
adj.P.Val
|
B
|
gene
|
Significance
|
|
751
|
sp|Q27J81|INF2_HUMAN
|
-0.1169760
|
4.464969
|
-4.867502
|
0.0000637
|
0.0576017
|
1.6695010
|
INF2
|
Not Significant
|
|
507
|
sp|P28482|MK01_HUMAN
|
0.1460155
|
4.370219
|
4.197978
|
0.0003398
|
0.1535753
|
0.0466474
|
MAPK1
|
Not Significant
|
|
96
|
sp|O75356|ENTP5_HUMAN
|
0.0411850
|
4.456341
|
3.137454
|
0.0045940
|
0.5645256
|
-2.4416598
|
ENTPD5
|
Not Significant
|
|
571
|
sp|P49407|ARRB1_HUMAN
|
0.0752710
|
4.480139
|
3.119861
|
0.0047905
|
0.5645256
|
-2.4810489
|
ARRB1
|
Not Significant
|
|
891
|
sp|Q9Y287|ITM2B_HUMAN
|
-0.0322520
|
4.425711
|
-3.009591
|
0.0062189
|
0.5645256
|
-2.7258946
|
ITM2B
|
Not Significant
|
|
609
|
sp|P59998|ARPC4_HUMAN
|
-0.0810547
|
4.524232
|
-2.956053
|
0.0070519
|
0.5645256
|
-2.8434299
|
ARPC4
|
Not Significant
|
head(results_B)
## Protein logFC AveExpr t P.Value
## 112 sp|P00387|NB5R3_HUMAN -0.17683895 4.440961 -6.330268 3.786522e-05
## 528 sp|P40227|TCPZ_HUMAN -0.15017532 4.378868 -5.615484 1.136361e-04
## 590 sp|P61106|RAB14_HUMAN -0.17882092 4.385867 -5.528161 1.306021e-04
## 575 sp|P55084|ECHB_HUMAN -0.27293379 4.251690 -5.063074 2.790389e-04
## 282 sp|P07203|GPX1_HUMAN -0.15833288 4.361424 -4.767197 4.594180e-04
## 89 sp|O75356|ENTP5_HUMAN 0.08676138 4.370177 4.514696 7.096365e-04
## adj.P.Val B gene Significance
## 112 0.03233690 2.5541250 CYB5R3 Not Significant
## 528 0.03717808 1.5320233 CCT6A Not Significant
## 590 0.03717808 1.4014708 RAB14 Not Significant
## 575 0.05957481 0.6855750 HADHB Not Significant
## 282 0.07846859 0.2126430 GPX1 Not Significant
## 89 0.10100493 -0.2010684 ENTPD5 Not Significant
Comparison of Plex A and B results
The top 50 up and down regulated proteins are selected from Plex A and B and these lists were compared.
plexA_up <- head(subset(results_A,logFC>0),50)[,1]
plexA_dn <- head(subset(results_A,logFC<0),50)[,1]
plexB_up <- head(subset(results_B,logFC>0),50)[,1]
plexB_dn <- head(subset(results_B,logFC<0),50)[,1]
v1 <- list("A up"=plexA_up, "A dn"=plexA_dn, "B up"=plexB_up, "B dn"=plexB_dn)
plot(euler(v1), quantities = TRUE)

intersect(plexA_up,plexB_up)
## character(0)
intersect(plexA_dn,plexB_dn)
## [1] "sp|Q27J81|INF2_HUMAN"
intersect(plexA_up,plexB_dn)
## [1] "sp|P61106|RAB14_HUMAN" "sp|Q9UBW5|BIN2_HUMAN" "sp|Q3ZCW2|LEGL_HUMAN"
## [4] "sp|P14770|GPIX_HUMAN" "sp|Q15555|MARE2_HUMAN" "sp|Q86UX7|URP2_HUMAN"
## [7] "sp|Q14247|SRC8_HUMAN"
intersect(plexB_up,plexA_dn)
## [1] "sp|P18428|LBP_HUMAN" "sp|Q5D862|FILA2_HUMAN" "sp|P01766|HV313_HUMAN"
Ranking of proteins by logFC
results_A$score <- sign(results_A$logFC) * -log10(results_A$P.Value)
results_B$score <- sign(results_B$logFC) * -log10(results_B$P.Value)
results_m <- merge(results_A,results_B,by="Protein")
ranks_m <- results_m[,c("Protein","score.x","score.y")]
rownames(ranks_m) <- ranks_m$Protein ; ranks_m$Protein=NULL
colnames(ranks_m) <- c("Plex A","Plex B")
plot(ranks_m)
mylm <- lm(ranks_m$`Plex B` ~ ranks_m$`Plex A`)
abline(mylm,lwd=2,lty=2,col="red")
COR=signif(cor(ranks_m),3)[2,1]
COR
## [1] -0.551
mtext(paste("Pearson correlation=",COR))

Pathway Enrichment Analysis
Using the combined protein profiles.
Some of the pathways are known to be involved in CVD.
m <- mitch_import(results_combined,DEtype="limma",geneIDcol="gene")
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 904
## Note: no. genes in output = 904
## Note: estimated proportion of input genes in output = 1
reactome_sets <- gmt_import("ReactomePathways_2026-04-02.gmt")
mres <- mitch_calc(m, reactome_sets, minsetsize = 5, cores=8)
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
head(mres$enrichment_result,10) |>
kbl(caption = "Top ranked Reactome pathways by p-value") |>
kable_paper("hover", full_width = F)
Top ranked Reactome pathways by p-value
|
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
|
317
|
Metabolism of lipids
|
30
|
0.0017402
|
-0.3352403
|
0.4962473
|
|
374
|
Peptide hormone metabolism
|
15
|
0.0040461
|
0.4317210
|
0.4962473
|
|
197
|
Formation of the cornified envelope
|
28
|
0.0041121
|
0.3177593
|
0.4962473
|
|
282
|
Keratinization
|
28
|
0.0041121
|
0.3177593
|
0.4962473
|
|
383
|
Platelet Adhesion to exposed collagen
|
10
|
0.0044372
|
-0.5219239
|
0.4962473
|
|
548
|
Syndecan interactions
|
10
|
0.0053484
|
-0.5109620
|
0.4962473
|
|
151
|
Diseases associated with O-glycosylation of proteins
|
9
|
0.0059401
|
-0.5317194
|
0.4962473
|
|
354
|
Non-integrin membrane-ECM interactions
|
19
|
0.0065954
|
-0.3634255
|
0.4962473
|
|
353
|
Neutrophil degranulation
|
152
|
0.0074190
|
0.1373880
|
0.4962473
|
|
327
|
Mitochondrial protein degradation
|
6
|
0.0091377
|
-0.6161841
|
0.5500882
|
head(subset(mres$enrichment_result,s.dist>0),10) |>
kbl(caption = "Top ranked upregulated Reactome pathways by p-value") |>
kable_paper("hover", full_width = F)
Top ranked upregulated Reactome pathways by p-value
|
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
|
374
|
Peptide hormone metabolism
|
15
|
0.0040461
|
0.4317210
|
0.4962473
|
|
197
|
Formation of the cornified envelope
|
28
|
0.0041121
|
0.3177593
|
0.4962473
|
|
282
|
Keratinization
|
28
|
0.0041121
|
0.3177593
|
0.4962473
|
|
353
|
Neutrophil degranulation
|
152
|
0.0074190
|
0.1373880
|
0.4962473
|
|
260
|
Innate Immune System
|
277
|
0.0187814
|
0.0978645
|
0.7508590
|
|
155
|
Diseases of carbohydrate metabolism
|
6
|
0.0190101
|
0.5545657
|
0.7508590
|
|
296
|
Lysosome Vesicle Biogenesis
|
6
|
0.0294880
|
0.5148478
|
0.7508590
|
|
253
|
Immune System
|
337
|
0.0488998
|
0.0782347
|
0.7956135
|
|
107
|
Complement cascade
|
91
|
0.0592232
|
0.1204466
|
0.8987126
|
|
312
|
Metabolism of Angiotensinogen to Angiotensins
|
7
|
0.0716482
|
0.3948081
|
0.8987126
|
head(subset(mres$enrichment_result,s.dist<0),10) |>
kbl(caption = "Top ranked upregulated Reactome pathways by p-value") |>
kable_paper("hover", full_width = F)
Top ranked upregulated Reactome pathways by p-value
|
|
set
|
setSize
|
pANOVA
|
s.dist
|
p.adjustANOVA
|
|
317
|
Metabolism of lipids
|
30
|
0.0017402
|
-0.3352403
|
0.4962473
|
|
383
|
Platelet Adhesion to exposed collagen
|
10
|
0.0044372
|
-0.5219239
|
0.4962473
|
|
548
|
Syndecan interactions
|
10
|
0.0053484
|
-0.5109620
|
0.4962473
|
|
151
|
Diseases associated with O-glycosylation of proteins
|
9
|
0.0059401
|
-0.5317194
|
0.4962473
|
|
354
|
Non-integrin membrane-ECM interactions
|
19
|
0.0065954
|
-0.3634255
|
0.4962473
|
|
327
|
Mitochondrial protein degradation
|
6
|
0.0091377
|
-0.6161841
|
0.5500882
|
|
170
|
EPHA-mediated growth cone collapse
|
7
|
0.0103976
|
-0.5610766
|
0.5690322
|
|
88
|
Cellular responses to stimuli
|
86
|
0.0129358
|
-0.1626201
|
0.6489475
|
|
433
|
RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function
|
5
|
0.0178210
|
-0.6133482
|
0.7508590
|
|
422
|
RHOBTB GTPase Cycle
|
8
|
0.0216807
|
-0.4707031
|
0.7508590
|
if ( file.exists("mitchreport1.html") ) { unlink("mitchreport1.html") }
mitch_report(mres,"mitchreport1.html")
## Dataset saved as " /tmp/RtmpM1seI5/mitchreport1.rds ".
##
##
## processing file: mitch.Rmd
## 1/36
## 2/36 [checklibraries]
## 3/36
## 4/36 [peek]
## 5/36
## 6/36 [metrics]
## 7/36
## 8/36 [scatterplot]
## 9/36
## 10/36 [contourplot]
## 11/36
## 12/36 [input_geneset_metrics1]
## 13/36
## 14/36 [input_geneset_metrics2]
## 15/36
## 16/36 [input_geneset_metrics3]
## 17/36
## 18/36 [echart1d]
## 19/36 [echart2d]
## 20/36
## 21/36 [heatmap]
## 22/36
## 23/36 [effectsize]
## 24/36
## 25/36 [results_table]
## 26/36
## 27/36 [results_table_complete]
## 28/36
## 29/36 [detailed_geneset_reports1d]
## 30/36
## 31/36 [detailed_geneset_reports2d]
## 32/36
## 33/36 [network]
## 34/36
## 35/36 [session_info]
## 36/36
## output file: /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md
## /home/mdz/anaconda3/bin/pandoc +RTS -K512m -RTS /mnt/data/mdz/projects/CVD_TMT/cvd_proteomics_tmt/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/RtmpM1seI5/mitch_report.html --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /mnt/data/rlibs/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /mnt/data/rlibs/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpM1seI5/rmarkdown-str13c6855b1103e5.html
##
## Output created: /tmp/RtmpM1seI5/mitch_report.html
## [1] TRUE
IG Filtering
# plex A
ig_filter_A <- grepl("IG|KV|LV|IGH|IGK|IGL", rownames(expr_A_imp))
expr_A_clean <- expr_A_imp[!ig_filter_A, ]
# Plex B
ig_filter_B <- grepl("IG|KV|LV|IGH|IGK|IGL", rownames(expr_B_imp))
expr_B_clean <- expr_B_imp[!ig_filter_B, ]
# DE For Plex A
design_A <- model.matrix(~ Group, data = meta_A)
fit_A_clean <- lmFit(expr_A_clean, design_A)
fit_A_clean <- eBayes(fit_A_clean)
results_A_clean <- topTable(fit_A_clean, number = Inf)
## Removing intercept from test coefficients
results_A_clean$Protein <- rownames(results_A_clean)
# DE for Plex B
design_B <- model.matrix(~ Group, data = meta_B)
fit_B_clean <- lmFit(expr_B_clean, design_B)
fit_B_clean <- eBayes(fit_B_clean)
results_B_clean <- topTable(fit_B_clean, number = Inf)
## Removing intercept from test coefficients
results_B_clean$Protein <- rownames(results_B_clean)
head(results_A_clean)
## logFC AveExpr t P.Value adj.P.Val
## sp|Q86UF1|TSN33_HUMAN -0.20076209 4.396326 -4.590756 0.0006400107 0.3320219
## sp|P06702|S10A9_HUMAN -0.13940577 4.711956 -4.198574 0.0012668477 0.3320219
## sp|Q27J81|INF2_HUMAN -0.12265117 4.448334 -4.063033 0.0016109123 0.3320219
## sp|P49593|PPM1F_HUMAN 0.10647979 4.172522 3.990973 0.0018319207 0.3320219
## sp|Q9Y287|ITM2B_HUMAN -0.05635063 4.300458 -3.611170 0.0036379341 0.3320219
## sp|P05109|S10A8_HUMAN -0.12567021 4.779495 -3.566743 0.0039450988 0.3320219
## B Protein
## sp|Q86UF1|TSN33_HUMAN -0.1018677 sp|Q86UF1|TSN33_HUMAN
## sp|P06702|S10A9_HUMAN -0.7548051 sp|P06702|S10A9_HUMAN
## sp|Q27J81|INF2_HUMAN -0.9848090 sp|Q27J81|INF2_HUMAN
## sp|P49593|PPM1F_HUMAN -1.1078887 sp|P49593|PPM1F_HUMAN
## sp|Q9Y287|ITM2B_HUMAN -1.7641828 sp|Q9Y287|ITM2B_HUMAN
## sp|P05109|S10A8_HUMAN -1.8416104 sp|P05109|S10A8_HUMAN
head(results_B_clean)
## logFC AveExpr t P.Value adj.P.Val
## sp|P00387|NB5R3_HUMAN -0.17683895 4.440961 -6.325361 3.860124e-05 0.03076519
## sp|P40227|TCPZ_HUMAN -0.15017532 4.378868 -5.611405 1.154988e-04 0.03531205
## sp|P61106|RAB14_HUMAN -0.17882092 4.385867 -5.523120 1.329186e-04 0.03531205
## sp|P55084|ECHB_HUMAN -0.27293379 4.251690 -5.057160 2.840958e-04 0.05660608
## sp|P07203|GPX1_HUMAN -0.15833288 4.361424 -4.762751 4.662760e-04 0.07432439
## sp|O75356|ENTP5_HUMAN 0.08676138 4.370177 4.513899 7.152604e-04 0.09501042
## B Protein
## sp|P00387|NB5R3_HUMAN 2.5247563 sp|P00387|NB5R3_HUMAN
## sp|P40227|TCPZ_HUMAN 1.5147695 sp|P40227|TCPZ_HUMAN
## sp|P61106|RAB14_HUMAN 1.3840769 sp|P61106|RAB14_HUMAN
## sp|P55084|ECHB_HUMAN 0.6734127 sp|P55084|ECHB_HUMAN
## sp|P07203|GPX1_HUMAN 0.2068041 sp|P07203|GPX1_HUMAN
## sp|O75356|ENTP5_HUMAN -0.1976055 sp|O75356|ENTP5_HUMAN
Top 50 up and down regulated genes after IG removal
# plex A
# top 50 up
top50_up_A <- results_A_clean %>%
dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC > 0) %>%
dplyr::arrange(P.Value) %>%
dplyr::slice_head(n = 50)
# top 50 down
top50_down_A <- results_A_clean %>%
dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC < 0) %>%
dplyr::arrange(P.Value) %>%
dplyr::slice_head(n = 50)
# Plex B
# Top 50 UP
top50_up_B <- results_B_clean %>%
dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC > 0) %>%
dplyr::arrange(P.Value) %>%
dplyr::slice_head(n = 50)
# Top 50 down
top50_down_B <- results_B_clean %>%
dplyr::filter(!is.na(logFC) & !is.na(P.Value) & logFC < 0) %>%
dplyr::arrange(P.Value) %>%
dplyr::slice_head(n = 50)
up_A <- top50_up_A$Protein
down_A <- top50_down_A$Protein
up_B <- top50_up_B$Protein
down_B <- top50_down_B$Protein
# Up overlap
common_up <- intersect(up_A, up_B)
# Down overlap
common_down <- intersect(down_A, down_B)
# Print counts
cat("Common UP proteins:", length(common_up), "\n")
## Common UP proteins: 0
cat("Common DOWN proteins:", length(common_down), "\n")
## Common DOWN proteins: 1
library(eulerr)
# UP proteins Euler
fit_up <- euler(list(
"Plex A Up" = up_A,
"Plex B Up" = up_B
))
plot(fit_up,
fills = c("#4CAF50", "#2196F3"),
alpha = 0.6,
quantities = TRUE,
main = "Top 50 Upregulated Proteins Overlap")

# down regulated
fit_down <- euler(list(
"Plex A Down" = down_A,
"Plex B Down" = down_B
))
plot(fit_down,
fills = c("#FF7043", "#AB47BC"),
alpha = 0.6,
quantities = TRUE,
main = "Top 50 Downregulated Proteins Overlap")

Pathway Enrichment
results_A_clean$gene <- str_extract(results_A_clean$Protein, "(?<=\\|)[A-Z0-9]+(?=_HUMAN)")
results_B_clean$gene <- str_extract(results_B_clean$Protein, "(?<=\\|)[A-Z0-9]+(?=_HUMAN)")
# Plex A CLEAN
rank_A_clean <- results_A_clean %>%
dplyr::select(gene, logFC) %>%
filter(!is.na(gene)) %>%
group_by(gene) %>%
summarise(logFC = mean(logFC), .groups = "drop")
gene_list_A_clean <- rank_A_clean$logFC
names(gene_list_A_clean) <- rank_A_clean$gene
df_A_clean <- data.frame(logFC = gene_list_A_clean)
# run mitch
mitch_A_clean <- mitch_calc(df_A_clean, reactome_sets, minsetsize = 10)
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
res_mitch_A_clean <- mitch_A_clean$enrichment_result %>%
as.data.frame() %>%
dplyr::arrange(p.adjustANOVA)
# Plex B
rank_B_clean <- results_B_clean %>%
dplyr::select(gene, logFC) %>%
filter(!is.na(gene)) %>%
group_by(gene) %>%
summarise(logFC = mean(logFC), .groups = "drop")
gene_list_B_clean <- rank_B_clean$logFC
names(gene_list_B_clean) <- rank_B_clean$gene
df_B_clean <- data.frame(logFC = gene_list_B_clean)
mitch_B_clean <- mitch_calc(df_B_clean, reactome_sets, minsetsize = 10)
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
res_mitch_B_clean <- mitch_B_clean$enrichment_result %>%
as.data.frame() %>%
dplyr::arrange(p.adjustANOVA)
go_A_clean <- enrichGO(
gene = names(gene_list_A_clean),
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
readable = TRUE
)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gene, fromType = fromType, toType = "ENTREZID", OrgDb = OrgDb):
## 57.63% of input gene IDs are fail to map...
go_A_clean_df <- as.data.frame(go_A_clean)
top_mitch_A <- res_mitch_A_clean %>%
arrange(p.adjustANOVA) %>%
slice_head(n = 20)
kable(top_mitch_A,
caption = "Top Reactome Pathways (Plex A - IG Removed)")
Top Reactome Pathways (Plex A - IG Removed)
| Membrane Trafficking |
29 |
0.0000001 |
0.5896115 |
0.0000041 |
| Platelet activation, signaling and aggregation |
41 |
0.0000769 |
0.3645997 |
0.0020477 |
| Vesicle-mediated transport |
44 |
0.0000798 |
0.3518475 |
0.0020477 |
| Diseases of signal transduction by growth factor receptors and second messengers |
21 |
0.0001673 |
0.4790548 |
0.0032208 |
| Hemostasis |
57 |
0.0003507 |
0.2827278 |
0.0054003 |
| Cell-Cell communication |
21 |
0.0009534 |
0.4209333 |
0.0102977 |
| Signaling by Rho GTPases |
26 |
0.0010699 |
0.3757881 |
0.0102977 |
| Signaling by Rho GTPases, Miro GTPases and RHOBTB3 |
26 |
0.0010699 |
0.3757881 |
0.0102977 |
| Post-translational protein modification |
65 |
0.0016095 |
0.2350541 |
0.0131230 |
| Cell junction organization |
17 |
0.0017043 |
0.4433035 |
0.0131230 |
| RHO GTPase Effectors |
16 |
0.0023828 |
0.4422478 |
0.0166797 |
| MAPK family signaling cascades |
18 |
0.0040415 |
0.3952004 |
0.0200841 |
| MAPK1/MAPK3 signaling |
16 |
0.0044199 |
0.4145392 |
0.0200841 |
| RAF/MAP kinase cascade |
16 |
0.0044199 |
0.4145392 |
0.0200841 |
| Platelet degranulation |
32 |
0.0045531 |
0.2951080 |
0.0200841 |
| Response to elevated platelet cytosolic Ca2+ |
32 |
0.0045531 |
0.2951080 |
0.0200841 |
| Leishmania infection |
11 |
0.0048288 |
0.4934743 |
0.0200841 |
| Parasitic Infection Pathways |
11 |
0.0048288 |
0.4934743 |
0.0200841 |
| Asparagine N-linked glycosylation |
15 |
0.0049558 |
0.4223881 |
0.0200841 |
| Signal Transduction |
76 |
0.0052251 |
0.1940214 |
0.0201167 |
top_mitch_B <- res_mitch_B_clean %>%
arrange(p.adjustANOVA) %>%
slice_head(n = 20)
kable(top_mitch_B,
caption = "Top Reactome Pathways (Plex B - IG Removed)")
Top Reactome Pathways (Plex B - IG Removed)
| Signaling by Rho GTPases |
24 |
0.0000042 |
-0.5475420 |
0.0001170 |
| Signaling by Rho GTPases, Miro GTPases and RHOBTB3 |
24 |
0.0000042 |
-0.5475420 |
0.0001170 |
| Cell junction organization |
13 |
0.0000047 |
-0.7350863 |
0.0001170 |
| Platelet activation, signaling and aggregation |
38 |
0.0000338 |
-0.3964358 |
0.0006246 |
| RHO GTPase Effectors |
13 |
0.0001247 |
-0.6175432 |
0.0016676 |
| Hemostasis |
51 |
0.0001352 |
-0.3179835 |
0.0016676 |
| Membrane Trafficking |
24 |
0.0002094 |
-0.4424321 |
0.0022138 |
| Cell-Cell communication |
16 |
0.0003042 |
-0.5252881 |
0.0028137 |
| Muscle contraction |
10 |
0.0005403 |
-0.6343075 |
0.0039979 |
| Smooth Muscle Contraction |
10 |
0.0005403 |
-0.6343075 |
0.0039979 |
| Platelet degranulation |
30 |
0.0007806 |
-0.3602781 |
0.0048135 |
| Response to elevated platelet cytosolic Ca2+ |
30 |
0.0007806 |
-0.3602781 |
0.0048135 |
| EPH-Ephrin signaling |
13 |
0.0012376 |
-0.5206044 |
0.0070449 |
| Cellular responses to stimuli |
26 |
0.0033516 |
-0.3373242 |
0.0177158 |
| RHO GTPase cycle |
15 |
0.0065504 |
-0.4088662 |
0.0323154 |
| Vesicle-mediated transport |
36 |
0.0069872 |
-0.2654402 |
0.0323160 |
| FCGR3A-mediated phagocytosis |
10 |
0.0124551 |
-0.4589581 |
0.0460839 |
| Fcgamma receptor (FCGR) dependent phagocytosis |
10 |
0.0124551 |
-0.4589581 |
0.0460839 |
| Leishmania phagocytosis |
10 |
0.0124551 |
-0.4589581 |
0.0460839 |
| Parasite infection |
10 |
0.0124551 |
-0.4589581 |
0.0460839 |
Session Info
sessionInfo()
## R version 4.6.0 (2026-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] kableExtra_1.4.0 gtools_3.9.5 beeswarm_0.4.0
## [4] eulerr_7.1.0 msigdbr_26.1.0 mitch_1.24.0
## [7] knitr_1.51 ReactomePA_1.56.0 umap_0.2.10.0
## [10] org.Hs.eg.db_3.23.1 AnnotationDbi_1.74.0 IRanges_2.46.0
## [13] S4Vectors_0.50.1 Biobase_2.72.0 BiocGenerics_0.58.1
## [16] generics_0.1.4 clusterProfiler_4.20.0 pheatmap_1.0.13
## [19] limma_3.68.2 janitor_2.2.1 lubridate_1.9.5
## [22] forcats_1.0.1 stringr_1.6.0 purrr_1.2.2
## [25] readr_2.2.0 tidyr_1.3.2 tibble_3.3.1
## [28] ggplot2_4.0.3 tidyverse_2.0.0 dplyr_1.2.1
## [31] BiocManager_1.30.27
##
## loaded via a namespace (and not attached):
## [1] splines_4.6.0 later_1.4.8 bitops_1.0-9
## [4] ggplotify_0.1.3 polyclip_1.10-7 enrichit_0.1.4
## [7] graph_1.90.0 lifecycle_1.0.5 httr2_1.2.2
## [10] vroom_1.7.1 processx_3.9.0 lattice_0.22-9
## [13] MASS_7.3-65 magrittr_2.0.5 sass_0.4.10
## [16] rmarkdown_2.31 jquerylib_0.1.4 yaml_2.3.12
## [19] httpuv_1.6.17 otel_0.2.0 ggtangle_0.1.2
## [22] askpass_1.2.1 reticulate_1.46.0 DBI_1.3.0
## [25] RColorBrewer_1.1-3 ggraph_2.2.2 yulab.utils_0.2.4
## [28] tweenr_2.0.3 rappdirs_0.3.4 aisdk_1.1.0
## [31] gdtools_0.5.0 enrichplot_1.32.0 ggrepel_0.9.8
## [34] tidytree_0.4.7 reactome.db_1.96.0 RSpectra_0.16-2
## [37] svglite_2.2.2 DOSE_4.6.0 xml2_1.5.2
## [40] ggforce_0.5.0 tidyselect_1.2.1 aplot_0.2.9
## [43] farver_2.1.2 viridis_0.6.5 Seqinfo_1.2.0
## [46] jsonlite_2.0.0 tidygraph_1.3.1 systemfonts_1.3.2
## [49] polylabelr_1.0.0 tools_4.6.0 ggnewscale_0.5.2
## [52] treeio_1.36.1 Rcpp_1.1.1-1.1 glue_1.8.1
## [55] gridExtra_2.3 xfun_0.57 qvalue_2.44.0
## [58] withr_3.0.2 fastmap_1.2.0 GGally_2.4.0
## [61] openssl_2.4.1 caTools_1.18.3 callr_3.7.6
## [64] digest_0.6.39 timechange_0.4.0 R6_2.6.1
## [67] mime_0.13 gridGraphics_0.5-1 textshaping_1.0.5
## [70] GO.db_3.23.1 dichromat_2.0-0.1 RSQLite_3.52.0
## [73] utf8_1.2.6 fontLiberation_0.1.0 graphlayouts_1.2.3
## [76] httr_1.4.8 htmlwidgets_1.6.4 scatterpie_0.2.6
## [79] ggstats_0.13.0 graphite_1.58.0 pkgconfig_2.0.3
## [82] gtable_0.3.6 blob_1.3.0 S7_0.2.2
## [85] XVector_0.52.0 htmltools_0.5.9 fontBitstreamVera_0.1.1
## [88] echarts4r_0.5.0 scales_1.4.0 png_0.1-9
## [91] snakecase_0.11.1 ggfun_0.2.0 rstudioapi_0.18.0
## [94] tzdb_0.5.0 reshape2_1.4.5 curl_7.1.0
## [97] coda_0.19-4.1 statnet.common_4.13.0 nlme_3.1-169
## [100] cachem_1.1.0 KernSmooth_2.23-26 parallel_4.6.0
## [103] pillar_1.11.1 grid_4.6.0 vctrs_0.7.3
## [106] gplots_3.3.0 promises_1.5.0 tidydr_0.0.6
## [109] xtable_1.8-8 cluster_2.1.8.2 evaluate_1.0.5
## [112] cli_3.6.6 compiler_4.6.0 rlang_1.2.0
## [115] crayon_1.5.3 labeling_0.4.3 ps_1.9.3
## [118] plyr_1.8.9 fs_2.1.0 ggiraph_0.9.6
## [121] stringi_1.8.7 network_1.20.0 viridisLite_0.4.3
## [124] babelgene_22.9 assertthat_0.2.1 Biostrings_2.80.0
## [127] lazyeval_0.2.3 GOSemSim_2.38.0 fontquiver_0.2.1
## [130] Matrix_1.7-5 hms_1.1.4 patchwork_1.3.2
## [133] bit64_4.8.0 KEGGREST_1.52.0 statmod_1.5.2
## [136] shiny_1.13.0 igraph_2.3.1 memoise_2.0.1
## [139] bslib_0.11.0 ggtree_4.2.0 bit_4.6.0
## [142] ape_5.8-1 gson_0.1.0
save.image("Analysis2.rdata")