Intro

Here we are running an analysis of DIA based proteomics of control and VLCAD knock-out cells. Samples 1-4 are KO and 5-8 are control.

I have run fragpipe with LFQ pipeline.

Here I will do an exploratory data analysis.

This will include QC analysis, PCA, quantile normalisation and differential expression with limma.

library("pcaMethods") #BioC
library("limma")
library("kableExtra")
library("gplots")
library("mitch")

Now I’ll read in the data.

# Load the data
x <- read.table("report.pg_matrix.tsv", header=TRUE, sep="\t")

# Clean column names
colnames(x) <- gsub("X.matt_proteomics.250711_MM_11262_","", colnames(x))
colnames(x) <- gsub("_uncalibrated.mzML", "", colnames(x))

# tidy cols
y <- x[,c("Protein.Group","Protein.Ids","Genes","1","2","3","4","5","6","7","8")]

# Set row names
rownames(y) <- paste(y$Protein.Ids, y$Genes, sep = "_")

# Extract intensity data
y <- y[,4:11]

# fix colnames
colnames(y) <- c("KO1","KO2","KO3","KO4","CTL1","CTL2","CTL3","CTL4")


# Display data dimensions and first few rows
cat("Data dimensions:", nrow(y), "proteins x", ncol(y), "samples\n")

## Data dimensions: 7899 proteins x 8 samples

cat("FIRST 6 ROWS OF INTENSITY DATA\n")

## FIRST 6 ROWS OF INTENSITY DATA

head(y) %>%
  kbl(caption="Top few rows of the dataset") %>%
  kable_paper("hover", full_width = F)

Top few rows of the dataset
	KO1	KO2	KO3	KO4	CTL1	CTL2	CTL3	CTL4
A0A075B6K5_IGLV3-9	694323	2083250	NA	NA	NA	NA	NA	NA
A0A075B6P5_IGKV2-28	3447730	11593300	NA	NA	NA	NA	NA	NA
A0A075B6R9_IGKV2D-24	996053	1531320	NA	NA	NA	NA	NA	NA
A0A075B6S5_IGKV1-27	622822	2574890	NA	NA	NA	NA	NA	NA
A0A096LNW5_NOTCH2NLR	396606	310606	455893	606542	627783	374798	554069	355685
A0A096LP01_SMIM26	6361420	2306680	1930910	2496140	6221610	1555130	1960550	1772430

Quality control

Now we can run an analysis of missing values.

total_values <- nrow(y) * ncol(y)
total_missing <- sum(is.na(y))
missing_percentage <- (total_missing / total_values) * 100

cat("\nOVERALL MISSING DATA SUMMARY\n")

## 
## OVERALL MISSING DATA SUMMARY

cat("Total values:", total_values, "\n")

## Total values: 63192

cat("Total missing:", total_missing, "\n")

## Total missing: 1716

cat("Overall missing percentage:", round(missing_percentage, 2), "%\n")

## Overall missing percentage: 2.72 %

# Missing data per sample
missing_per_sample <- colSums(is.na(y))
missing_per_sample_pct <- (missing_per_sample / nrow(y)) * 100

missing_summary <- data.frame(
  Sample = names(missing_per_sample),
  Missing_Count = missing_per_sample,
  Missing_Percent = round(missing_per_sample_pct, 2)
)

cat("\nMISSING DATA PER SAMPLE\n")

## 
## MISSING DATA PER SAMPLE

print(missing_summary)

##      Sample Missing_Count Missing_Percent
## KO1     KO1           224            2.84
## KO2     KO2           139            1.76
## KO3     KO3           209            2.65
## KO4     KO4           253            3.20
## CTL1   CTL1           224            2.84
## CTL2   CTL2           190            2.41
## CTL3   CTL3           201            2.54
## CTL4   CTL4           276            3.49

# by protein
nas <- apply(X = y, MARGIN = 1, function(x){ 
  length(which(is.na(x))) 
})
missing_proteins_table <- table(nas)

cat("\nDATA DISTRIBUTION ACROSS PROTEINS\n")

## 
## DATA DISTRIBUTION ACROSS PROTEINS

cat("Number of proteins by missing value count:\n")

## Number of proteins by missing value count:

print(missing_proteins_table)

## nas
##    0    1    2    3    4    5    6    7 
## 7349  189   94   59   55   39   51   63

cat("\nSummary of missing values per protein:\n")

## 
## Summary of missing values per protein:

print(summary(nas))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2172  0.0000  7.0000

Overall 2.7% missing values which is very good.

Now remove proteins with more than 4 missing values.

table(nas >4)

## 
## FALSE  TRUE 
##  7746   153

nrow(y[nas <5,])

## [1] 7746

y2 <- y[nas <5,]

dim(y2)

## [1] 7746    8

dim(y)

## [1] 7899    8

summary(y2)

##       KO1                 KO2                 KO3           
##  Min.   :1.715e+04   Min.   :1.657e+04   Min.   :1.170e+04  
##  1st Qu.:2.533e+06   1st Qu.:2.657e+06   1st Qu.:2.639e+06  
##  Median :7.094e+06   Median :7.440e+06   Median :7.442e+06  
##  Mean   :3.505e+07   Mean   :3.504e+07   Mean   :3.494e+07  
##  3rd Qu.:2.108e+07   3rd Qu.:2.070e+07   3rd Qu.:2.180e+07  
##  Max.   :5.253e+09   Max.   :1.298e+10   Max.   :4.627e+09  
##  NA's   :117         NA's   :79          NA's   :71         
##       KO4                 CTL1                CTL2          
##  Min.   :1.426e+04   Min.   :1.784e+04   Min.   :1.841e+04  
##  1st Qu.:2.497e+06   1st Qu.:2.517e+06   1st Qu.:2.626e+06  
##  Median :7.007e+06   Median :7.125e+06   Median :7.238e+06  
##  Mean   :3.382e+07   Mean   :3.523e+07   Mean   :3.173e+07  
##  3rd Qu.:2.068e+07   3rd Qu.:2.059e+07   3rd Qu.:1.986e+07  
##  Max.   :3.798e+09   Max.   :5.020e+09   Max.   :5.979e+09  
##  NA's   :125         NA's   :88          NA's   :70         
##       CTL3                CTL4          
##  Min.   :1.576e+04   Min.   :4.673e+03  
##  1st Qu.:2.532e+06   1st Qu.:2.304e+06  
##  Median :7.153e+06   Median :6.602e+06  
##  Mean   :3.223e+07   Mean   :3.193e+07  
##  3rd Qu.:2.083e+07   3rd Qu.:1.954e+07  
##  Max.   :3.392e+09   Max.   :3.772e+09  
##  NA's   :74          NA's   :150

apply(y2,2,median,na.rm=TRUE)

##     KO1     KO2     KO3     KO4    CTL1    CTL2    CTL3    CTL4 
## 7093900 7440070 7442300 7007090 7124725 7238305 7152880 6601615

apply(y2,2,mean,na.rm=TRUE)

##      KO1      KO2      KO3      KO4     CTL1     CTL2     CTL3     CTL4 
## 35051253 35037385 34939285 33822843 35230167 31734946 32229620 31926156

Intensities are fairly even.

Data Visualization - Raw Distribution

Boxplot of raw and log-transformed abundances

# Boxplot of raw abundances
par(mfrow = c(1, 1), mar = c(5, 4, 3, 1))

# Raw scale boxplot
boxplot(y2, las = 2, main = "Raw Abundance Distribution",
        ylab = "Abundance", cex.axis = 0.6, col = rainbow(ncol(y2)))

# Log-transformed boxplot for better visualization
y_log <- log10(y2 + 1)  # Add 1 to handle zeros if any
boxplot(y_log, las = 2, main = "Log10(Abundance + 1) Distribution",
        ylab = "Log10(Abundance + 1)", cex.axis = 0.6, col = rainbow(ncol(y2)))

Protein Abundance Analysis

# Protein Abundance Analysis
# Calculate protein statistics
protein_stats <- data.frame(
  Protein = rownames(y),
  Mean_Abundance = rowMeans(y, na.rm = TRUE),
  Median_Abundance = apply(y, 1, median, na.rm = TRUE),
  SD_Abundance = apply(y, 1, sd, na.rm = TRUE),
  CV = apply(y, 1, sd, na.rm = TRUE) / rowMeans(y, na.rm = TRUE),
  Missing_Count = nas
)

cat("\nPROTEIN ABUNDANCE SUMMARY\n")

## 
## PROTEIN ABUNDANCE SUMMARY

cat("Mean abundance across all proteins:\n")

## Mean abundance across all proteins:

print(summary(protein_stats$Mean_Abundance))

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.907e+04 2.490e+06 7.040e+06 3.283e+07 2.059e+07 3.710e+09

cat("\nCoefficient of Variation (CV) summary:\n")

## 
## Coefficient of Variation (CV) summary:

print(summary(protein_stats$CV))

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## 0.007651 0.170949 0.236206 0.275446 0.333498 2.761049       63

# Top 10 most abundant proteins
high_abundance <- head(protein_stats[order(protein_stats$Mean_Abundance, decreasing = TRUE), ], 10)

cat("\nTOP 10 MOST ABUNDANT PROTEINS\n")

## 
## TOP 10 MOST ABUNDANT PROTEINS

print(high_abundance[, c("Protein", "Mean_Abundance", "Missing_Count")])

##                       Protein Mean_Abundance Missing_Count
## P05141_SLC25A5 P05141_SLC25A5     3710495000             0
## P06576_ATP5F1B P06576_ATP5F1B     3405280000             0
## P62805_H4C1       P62805_H4C1     3382420000             0
## P60709_ACTB       P60709_ACTB     3335985000             0
## P25705_ATP5F1A P25705_ATP5F1A     3031916250             0
## P04406_GAPDH     P04406_GAPDH     2599235000             0
## Q00325_SLC25A3 Q00325_SLC25A3     2422585000             0
## P06899_H2BC11   P06899_H2BC11     2301392125             0
## P10809_HSPD1     P10809_HSPD1     2273151375             0
## P62979_RPS27A   P62979_RPS27A     2201783750             0

Top 10 most variable proteins by CV

high_variable <- head(protein_stats[order(protein_stats$CV, decreasing = TRUE), ], 10)

cat("\nTOP 10 MOST VARIABLE PROTEINS (by Coefficient of Variation)\n")

## 
## TOP 10 MOST VARIABLE PROTEINS (by Coefficient of Variation)

#Round only the numeric columns, keep Protein column as-is
high_variable_display <- high_variable[, c("Protein", "Mean_Abundance", "CV", "Missing_Count")]
high_variable_display[, c("Mean_Abundance", "CV")] <- round(high_variable_display[, c("Mean_Abundance", "CV")], 3)
high_variable_display[, "Missing_Count"] <- round(high_variable_display[, "Missing_Count"], 0)  # Missing count should be integer
print(high_variable_display)

##                         Protein Mean_Abundance    CV Missing_Count
## P02538_KRT6A       P02538_KRT6A      211717589 2.761             0
## P08779_KRT16       P08779_KRT16      121250752 2.710             0
## P04259_KRT6B       P04259_KRT6B     1685411300 2.709             0
## P06702_S100A9     P06702_S100A9       83771732 2.696             0
## P31151_S100A7     P31151_S100A7       44475176 2.626             0
## P02533_KRT14       P02533_KRT14      338393075 2.588             0
## P48594_SERPINB4 P48594_SERPINB4       32009080 2.579             0
## Q04695_KRT17       Q04695_KRT17      206012900 2.520             0
## Q13835_PKP1         Q13835_PKP1        4272266 2.423             0
## Q08188_TGM3         Q08188_TGM3       22738735 2.415             0

Looks like keratin is highly variable could be some contamination going on.

Visualization of CV distribution

hist(protein_stats$CV[!is.infinite(protein_stats$CV)], 
     breaks = 50, main = "Distribution of Coefficient of Variation", 
     xlab = "CV", ylab = "Frequency", 
     col = "lightblue", border = "darkblue")

Now look at the sample correlation matrix.

cor_matrix <- cor(y2, use = "complete.obs")

cat("\nSAMPLE CORRELATION MATRIX\n")

## 
## SAMPLE CORRELATION MATRIX

print(round(cor_matrix, 3))

##        KO1   KO2   KO3   KO4  CTL1  CTL2  CTL3  CTL4
## KO1  1.000 0.616 0.904 0.909 0.985 0.789 0.947 0.942
## KO2  0.616 1.000 0.676 0.626 0.631 0.678 0.646 0.590
## KO3  0.904 0.676 1.000 0.973 0.914 0.900 0.971 0.929
## KO4  0.909 0.626 0.973 1.000 0.907 0.807 0.967 0.962
## CTL1 0.985 0.631 0.914 0.907 1.000 0.845 0.961 0.937
## CTL2 0.789 0.678 0.900 0.807 0.845 1.000 0.863 0.754
## CTL3 0.947 0.646 0.971 0.967 0.961 0.863 1.000 0.978
## CTL4 0.942 0.590 0.929 0.962 0.937 0.754 0.978 1.000

# Identify sample groups
chrom_samples <- grepl("Chrom", colnames(y))
sol_samples <- grepl("Sol", colnames(y))

# Calculate group-wise correlations
chrom_cor <- cor_matrix[chrom_samples, chrom_samples]
sol_cor <- cor_matrix[sol_samples, sol_samples]
between_cor <- cor_matrix[chrom_samples, sol_samples]

cat("\nCORRELATION SUMMARY\n")

## 
## CORRELATION SUMMARY

cat("Average within-Chromatin correlation:", round(mean(chrom_cor[upper.tri(chrom_cor)]), 3), "\n")

## Average within-Chromatin correlation: NaN

cat("Average within-Soluble correlation:", round(mean(sol_cor[upper.tri(sol_cor)]), 3), "\n")

## Average within-Soluble correlation: NaN

cat("Average between-condition correlation:", round(mean(between_cor), 3), "\n")

## Average between-condition correlation: NaN

cor_matrix

##            KO1       KO2       KO3       KO4      CTL1      CTL2      CTL3
## KO1  1.0000000 0.6156414 0.9042915 0.9090907 0.9854008 0.7893350 0.9466495
## KO2  0.6156414 1.0000000 0.6761849 0.6262485 0.6308259 0.6779291 0.6457640
## KO3  0.9042915 0.6761849 1.0000000 0.9729884 0.9143130 0.9001092 0.9714041
## KO4  0.9090907 0.6262485 0.9729884 1.0000000 0.9072548 0.8071333 0.9670573
## CTL1 0.9854008 0.6308259 0.9143130 0.9072548 1.0000000 0.8448395 0.9605245
## CTL2 0.7893350 0.6779291 0.9001092 0.8071333 0.8448395 1.0000000 0.8628217
## CTL3 0.9466495 0.6457640 0.9714041 0.9670573 0.9605245 0.8628217 1.0000000
## CTL4 0.9415118 0.5895684 0.9285356 0.9615625 0.9374458 0.7537676 0.9778751
##           CTL4
## KO1  0.9415118
## KO2  0.5895684
## KO3  0.9285356
## KO4  0.9615625
## CTL1 0.9374458
## CTL2 0.7537676
## CTL3 0.9778751
## CTL4 1.0000000

heatmap.2(cor_matrix,trace="none")

Looks like KO2 could be an outlier.

heatmap.2(as.matrix(y2[grep("KRT",rownames(y2)),]),trace="none",scale="row",margin=c(5,10))

The heatmap shows KO2 is contaminated with keratin. Suggest excluding it from downstream analysis.

Repeat analysis with y_log

cor_matrix <- cor(y_log, use = "complete.obs")

cat("\nSAMPLE CORRELATION MATRIX\n")

## 
## SAMPLE CORRELATION MATRIX

print(round(cor_matrix, 3))

##        KO1   KO2   KO3   KO4  CTL1  CTL2  CTL3  CTL4
## KO1  1.000 0.955 0.973 0.964 0.987 0.948 0.969 0.961
## KO2  0.955 1.000 0.970 0.946 0.944 0.968 0.963 0.933
## KO3  0.973 0.970 1.000 0.983 0.966 0.964 0.989 0.975
## KO4  0.964 0.946 0.983 1.000 0.958 0.932 0.976 0.984
## CTL1 0.987 0.944 0.966 0.958 1.000 0.960 0.976 0.964
## CTL2 0.948 0.968 0.964 0.932 0.960 1.000 0.973 0.931
## CTL3 0.969 0.963 0.989 0.976 0.976 0.973 1.000 0.983
## CTL4 0.961 0.933 0.975 0.984 0.964 0.931 0.983 1.000

# Identify sample groups
chrom_samples <- grepl("Chrom", colnames(y))
sol_samples <- grepl("Sol", colnames(y))

# Calculate group-wise correlations
chrom_cor <- cor_matrix[chrom_samples, chrom_samples]
sol_cor <- cor_matrix[sol_samples, sol_samples]
between_cor <- cor_matrix[chrom_samples, sol_samples]

cat("\nCORRELATION SUMMARY\n")

## 
## CORRELATION SUMMARY

cat("Average within-Chromatin correlation:", round(mean(chrom_cor[upper.tri(chrom_cor)]), 3), "\n")

## Average within-Chromatin correlation: NaN

cat("Average within-Soluble correlation:", round(mean(sol_cor[upper.tri(sol_cor)]), 3), "\n")

## Average within-Soluble correlation: NaN

cat("Average between-condition correlation:", round(mean(between_cor), 3), "\n")

## Average between-condition correlation: NaN

cor_matrix

##            KO1       KO2       KO3       KO4      CTL1      CTL2      CTL3
## KO1  1.0000000 0.9550537 0.9728069 0.9644101 0.9867622 0.9482181 0.9693253
## KO2  0.9550537 1.0000000 0.9703504 0.9456029 0.9441793 0.9676906 0.9625101
## KO3  0.9728069 0.9703504 1.0000000 0.9830744 0.9659350 0.9640514 0.9893335
## KO4  0.9644101 0.9456029 0.9830744 1.0000000 0.9584659 0.9319293 0.9762458
## CTL1 0.9867622 0.9441793 0.9659350 0.9584659 1.0000000 0.9598686 0.9759373
## CTL2 0.9482181 0.9676906 0.9640514 0.9319293 0.9598686 1.0000000 0.9729682
## CTL3 0.9693253 0.9625101 0.9893335 0.9762458 0.9759373 0.9729682 1.0000000
## CTL4 0.9611229 0.9334854 0.9747769 0.9840200 0.9639182 0.9311667 0.9827680
##           CTL4
## KO1  0.9611229
## KO2  0.9334854
## KO3  0.9747769
## KO4  0.9840200
## CTL1 0.9639182
## CTL2 0.9311667
## CTL3 0.9827680
## CTL4 1.0000000

heatmap.2(cor_matrix,trace="none")

After log transformation, the contamination is not so obvious.

Check mito enrichment

mito <- readLines("mitocarta_gene.txt")
genenames <- sapply(strsplit(paste(rownames(y2),"_NONE",sep=""),"_"),"[[",2)

mito2 <- y2[which(genenames %in% mito),]
nonmito2 <- y2[which(! genenames %in% mito),]

mitofrac2 <- colSums(mito2,na.rm=TRUE) / colSums(y2,na.rm=TRUE)
mitofrac2

##       KO1       KO2       KO3       KO4      CTL1      CTL2      CTL3      CTL4 
## 0.3601878 0.2736487 0.3367755 0.3685658 0.3330282 0.2424464 0.3158987 0.3565576

Data normalisation

Handle remaining missing values in filtered data

for(i in 1:ncol(y2)) {
  if(any(is.na(y2[,i]))) {
    min_val <- min(y2[,i], na.rm = TRUE)
    y2[is.na(y2[,i]), i] <- min_val * 0.1
  }
}

cat("Missing values after imputation:", sum(is.na(y2)), "\n")

## Missing values after imputation: 0

#Log10 transformation
yl <- log10(y2 + 1)
cat("Log10 transformation completed\n")

## Log10 transformation completed

#Between-array normalization
yn <- normalizeBetweenArrays(yl, method = "quantile", targets = NULL, cyclic.method = "fast")
cat("Between-array normalization completed\n")

## Between-array normalization completed

cat("Final normalized data dimensions:", nrow(yn), "x", ncol(yn), "\n")

## Final normalized data dimensions: 7746 x 8

NORMALIZATION VALIDATION

Before and after comparison

cat("Raw data range:", round(range(y2), 0), "\n")

## Raw data range: 467 12982700000

cat("Normalized data range:", round(range(yn), 2), "\n")

## Normalized data range: 3.13 9.71

# Sample median variability
raw_medians <- apply(y2, 2, median)
norm_medians <- apply(yn, 2, median)

cat("CV of sample medians (raw):", round(sd(raw_medians)/mean(raw_medians), 3), "\n")

## CV of sample medians (raw): 0.045

cat("CV of sample medians (normalized):", round(sd(norm_medians)/mean(norm_medians), 3), "\n")

## CV of sample medians (normalized): 0

# Correlation preservation
cor_raw <- cor(y2)
cor_norm <- cor(yn)

ko_idx <- grepl("KO", colnames(yn))
ctl_idx <- grepl("CTL", colnames(yn))

cat("Within-Chromatin correlation (normalized):", 
    round(mean(cor_norm[ko_idx, ko_idx][upper.tri(cor_norm[ko_idx, ko_idx])]), 3), "\n")

## Within-Chromatin correlation (normalized): 0.858

cat("Within-Soluble correlation (normalized):", 
    round(mean(cor_norm[ctl_idx, ctl_idx][upper.tri(cor_norm[ctl_idx, ctl_idx])]), 3), "\n")

## Within-Soluble correlation (normalized): 0.874

VISUALIZATION COMPARISON

par(mfrow = c(2, 2), mar = c(6, 4, 3, 1))

# Raw data
boxplot(log10(y2 + 1), las = 2, main = "Raw Data (Log10 scale)",
        cex.axis = 0.6, col = rainbow(ncol(y2)))

# Log10 transformed
boxplot(yl, las = 2, main = "Log10 Transformed",
        cex.axis = 0.6, col = rainbow(ncol(yl)))

# Final normalized(quantile)
boxplot(yn, las = 2, main = "Final Normalized",
        cex.axis = 0.6, col = rainbow(ncol(yn)))

par(mfrow = c(1, 1))

Looks OK.

Sample completeness

sample_completeness <- (1 - missing_per_sample_pct/100) * 100
cat("\nSample completeness (%):\n")

## 
## Sample completeness (%):

for(i in 1:length(sample_completeness)) {
  cat(names(sample_completeness)[i], ":", round(sample_completeness[i], 1), "%\n")
}

## KO1 : 97.2 %
## KO2 : 98.2 %
## KO3 : 97.4 %
## KO4 : 96.8 %
## CTL1 : 97.2 %
## CTL2 : 97.6 %
## CTL3 : 97.5 %
## CTL4 : 96.5 %

PCA Analysis

Use filtered data (z) with fewer missing values Transpose data so samples are rows (pcaMethods expects this format)

Perform PPCA (handles missing values)

pc <- pca(t(y2), nPcs = 3, method = "ppca")

print(pc)

## ppca calculated PCA
## Importance of component(s):
##                  PC1    PC2    PC3
## R2            0.6516 0.1821 0.1125
## Cumulative R2 0.6516 0.8337 0.9463
## 7746     Variables
## 8    Samples
## 0    NAs ( 0 %)
## 3    Calculated component(s)
## Data was mean centered before running PCA 
## Data was NOT scaled before running PCA 
## Scores structure:
## [1] 8 3
## Loadings structure:
## [1] 7746    3

cat("\nPCA Summary:\n")

## 
## PCA Summary:

cat("Number of components:", pc@nPcs, "\n")

## Number of components: 3

cat("Method used:", pc@method, "\n")

## Method used: ppca

cat("Variance explained by each PC:\n")

## Variance explained by each PC:

print(round(pc@R2, 3))

## [1] 0.652 0.182 0.113

cat("\nCumulative variance explained:\n")

## 
## Cumulative variance explained:

print(round(pc@R2cum, 3))

## [1] 0.652 0.834 0.946

# Scores plot (samples)
par(mar = c(5,4,3,1))
slplot(pc, main = "PCA Scores Plot", cex = 0.8, cex.axis = 0.8)

# Manual scores plot with sample group colors
scores <- pc@scores
chrom_color <- ifelse(grepl("KO", rownames(scores)), "red", "blue")

plot(scores[,1], scores[,2], col = chrom_color, pch = 19, cex = 1.5,
     main = "PCA Scores - PC1 vs PC2",
     xlab = paste0("PC1 (", round(pc@R2[1]*100, 1), "% variance)"),
     ylab = paste0("PC2 (", round(pc@R2[2]*100, 1), "% variance)"))

# Add sample labels
text(scores[,1], scores[,2], labels = rownames(scores), pos = 3, cex = 0.7)

# Add legend
legend("topright", legend = c("KO", "CTL"),
       col = c("red", "blue"), pch = 19, cex = 0.8)

Now exclude KO2

y3 <- y2[,grep("KO2",colnames(y2),invert=TRUE)]

pc <- pca(t(y3), nPcs = 3, method = "ppca")

print(pc)

## ppca calculated PCA
## Importance of component(s):
##                  PC1    PC2    PC3
## R2            0.5476 0.3058 0.1038
## Cumulative R2 0.5476 0.8534 0.9572
## 7746     Variables
## 7    Samples
## 0    NAs ( 0 %)
## 3    Calculated component(s)
## Data was mean centered before running PCA 
## Data was NOT scaled before running PCA 
## Scores structure:
## [1] 7 3
## Loadings structure:
## [1] 7746    3

cat("\nPCA Summary:\n")

## 
## PCA Summary:

cat("Number of components:", pc@nPcs, "\n")

## Number of components: 3

cat("Method used:", pc@method, "\n")

## Method used: ppca

cat("Variance explained by each PC:\n")

## Variance explained by each PC:

print(round(pc@R2, 3))

## [1] 0.548 0.306 0.104

cat("\nCumulative variance explained:\n")

## 
## Cumulative variance explained:

print(round(pc@R2cum, 3))

## [1] 0.548 0.853 0.957

# Scores plot (samples)
par(mar = c(5,4,3,1))
slplot(pc, main = "PCA Scores Plot", cex = 0.8, cex.axis = 0.8)

# Manual scores plot with sample group colors
scores <- pc@scores
chrom_color <- ifelse(grepl("KO", rownames(scores)), "red", "blue")

plot(scores[,1], scores[,2], col = chrom_color, pch = 19, cex = 1.5,
     main = "PCA Scores - PC1 vs PC2",
     xlab = paste0("PC1 (", round(pc@R2[1]*100, 1), "% variance)"),
     ylab = paste0("PC2 (", round(pc@R2[2]*100, 1), "% variance)"))

# Add sample labels
text(scores[,1], scores[,2], labels = rownames(scores), pos = 3, cex = 0.7)

# Add legend
legend("topleft", legend = c("KO", "CTL"),
       col = c("red", "blue"), pch = 19, cex = 0.8)

Normalisation part 2

Now do normalisation on the data after removing KO2.

#Log10 transformation
yl <- log10(y3 + 1)
cat("Log10 transformation completed\n")

## Log10 transformation completed

#Between-array normalization
yn <- normalizeBetweenArrays(yl, method = "quantile", targets = NULL, cyclic.method = "fast")
cat("Between-array normalization completed\n")

## Between-array normalization completed

cat("Final normalized data dimensions:", nrow(yn), "x", ncol(yn), "\n")

## Final normalized data dimensions: 7746 x 7

Differential analysis

Make a sample sheet.

ss <- as.data.frame(colnames(yn))
ss$ko <- grepl("KO",ss[,1])
rownames(ss) <- ss[,1]
ss[,1]=NULL

ss$mitofrac <- mitofrac2[-2]

ss %>%
  kbl(caption="Sample sheet") %>%
  kable_paper("hover", full_width = F)

Sample sheet
	ko	mitofrac
KO1	TRUE	0.3601878
KO3	TRUE	0.3367755
KO4	TRUE	0.3685658
CTL1	FALSE	0.3330282
CTL2	FALSE	0.2424464
CTL3	FALSE	0.3158987
CTL4	FALSE	0.3565576

Design matrix.

design <- model.matrix(~ ko, data = ss)

print(design)

##      (Intercept) koTRUE
## KO1            1      1
## KO3            1      1
## KO4            1      1
## CTL1           1      0
## CTL2           1      0
## CTL3           1      0
## CTL4           1      0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$ko
## [1] "contr.treatment"

Fit linear models and apply empirical Bayes moderation

fit <- lmFit(yn, design)
fit <- eBayes(fit)

# Summary of differential expression
de_summary <- summary(decideTests(fit))

de_summary

##        (Intercept) koTRUE
## Down             0      4
## NotSig           0   7740
## Up            7746      2

Looking at the DE results

results <- topTable(fit, coef = 2, number = Inf)

# Calculate key statistics
total_proteins <- nrow(results)
significant_proteins <- sum(results$adj.P.Val < 0.05)
sig_rate <- round((significant_proteins / total_proteins) * 100, 1)

# Display most significant results
head(results, 20) %>%
  kbl(caption="Top 20 DE proteins") %>%
  kable_paper("hover", full_width = F)

Top 20 DE proteins
	logFC	AveExpr	t	P.Value	adj.P.Val	B
Q16720_ATP2B3	-3.9979017	5.404929	-79.215082	0.0000000	0.0000002	7.2817783
Q6UWL6_KIRREL2	-2.4877394	4.541980	-28.165804	0.0000000	0.0000659	6.4597870
P54803_GALC	-2.9248379	4.791750	-27.912631	0.0000000	0.0000659	6.4444630
Q7Z6P3_RAB44	-3.1937750	4.945428	-23.722454	0.0000001	0.0001493	6.1299787
P19827_ITIH1	3.2713868	4.661376	22.070456	0.0000001	0.0001949	5.9653324
Q15022_SUZ12	1.9898705	4.147861	14.184665	0.0000025	0.0031751	4.5748017
P09471_GNAO1	-0.3278428	7.347893	-8.977974	0.0000490	0.0541705	2.4665274
P49748_ACADVL	-0.9316407	8.094585	-8.495590	0.0000695	0.0673340	2.1772248
Q96QV1_HHIP	1.7861241	4.461318	7.294129	0.0001804	0.1552317	1.3553864
Q8N111_CEND1	1.0056142	5.827519	7.161533	0.0002019	0.1559058	1.2548389
Q5UCC4_EMC10	-0.5261417	6.917398	-7.054691	0.0002214	0.1559058	1.1722738
Q9H019_MTFR1L	0.2671044	7.184493	6.539372	0.0003508	0.2264695	0.7538082
Q53GS7_GLE1	0.2618606	6.697547	6.277462	0.0004481	0.2639324	0.5276234
P20138_CD33	0.5269739	5.968220	6.211754	0.0004770	0.2639324	0.4693845
Q9NZU0_FLRT3	0.3401387	6.879011	6.127510	0.0005172	0.2671036	0.3938203
P28676_GCA	0.3257526	6.680174	5.712750	0.0007799	0.3491965	0.0066896
Q9Y624_F11R	-0.2782516	7.384211	-5.639829	0.0008401	0.3491965	-0.0640430
Q8NFH5_NUP35	0.2915181	7.271447	5.608404	0.0008676	0.3491965	-0.0947765
Q14435_GALNT3	-0.3441410	6.568961	-5.575920	0.0008971	0.3491965	-0.1267058
Q49AM1_MTERF2	1.8027949	4.354474	5.545081	0.0009262	0.3491965	-0.1571711

upregulated <- results[results$logFC > 0 & results$adj.P.Val < 0.05, ]    # KO-enriched
downregulated <- results[results$logFC < 0 & results$adj.P.Val < 0.05, ]  # CTL-enriched

upregulated %>%
  kbl(caption="Upregulated proteins") %>%
  kable_paper("hover", full_width = F)

Upregulated proteins
	logFC	AveExpr	t	P.Value	adj.P.Val	B
P19827_ITIH1	3.271387	4.661376	22.07046	1.0e-07	0.0001949	5.965332
Q15022_SUZ12	1.989871	4.147861	14.18467	2.5e-06	0.0031751	4.574802

downregulated %>%
  kbl(caption="Downregulated proteins") %>%
  kable_paper("hover", full_width = F)

Downregulated proteins
	logFC	AveExpr	t	P.Value	adj.P.Val	B
Q16720_ATP2B3	-3.997902	5.404929	-79.21508	0e+00	0.0000002	7.281778
Q6UWL6_KIRREL2	-2.487739	4.541980	-28.16580	0e+00	0.0000659	6.459787
P54803_GALC	-2.924838	4.791750	-27.91263	0e+00	0.0000659	6.444463
Q7Z6P3_RAB44	-3.193775	4.945428	-23.72245	1e-07	0.0001493	6.129979

sig_up <- nrow(upregulated)
sig_down <- nrow(downregulated)
not_sig <- total_proteins - sig_up - sig_down

# Create classification summary
classification_summary <- data.frame(
  Category = c("Chromatin-enriched", "Soluble-enriched", "Not Significant"),
  Count = c(sig_up, sig_down, not_sig),
  Percentage = round(c(sig_up, sig_down, not_sig) / total_proteins * 100, 1)
)

classification_summary %>%
  kbl(caption="DE summary") %>%
  kable_paper("hover", full_width = F)

DE summary
Category	Count	Percentage
Chromatin-enriched	2	0.0
Soluble-enriched	4	0.1
Not Significant	7740	99.9

Volcano plot.

sig <- subset(results,adj.P.Val < 0.05)

plot(results$logFC,-log10(results$P.Value),
  pch=19,cex=0.6,col="gray",bty="n",
  xlab="log2 fold change",
  ylab="-log10(p-value)")

points(sig$logFC,-log10(sig$P.Value),pch=19,cex=0.65,col="red")

protnames <- sapply(strsplit(rownames(sig),"_"),"[[",2)
text(sig$logFC,-log10(sig$P.Value),labels=protnames,srt = -30)

Heatmap

topn=10
top <- as.matrix(yn[which(rownames(yn) %in% rownames(head(results,topn))),])

colfunc <- colorRampPalette(c("blue", "white", "red"))

heatmap.2(cor_matrix,trace="none",scale="row")

heatmap.2( top, col=colfunc(25),scale="row",
 trace="none",margins = c(5,15), cexRow=0.9, main="Top 10 genes")

Output results as tsv.

ym <- merge(results,yn,by=0)

colnames(ym)[1] <- "ProteinID"
write.table(x=ym,file="vlcad_ko_de.tsv",quote=F,sep="\t",row.names=FALSE)

ym <- ym[order(ym$P.Value),]

Pathway Enrichment

First get the pathways from Reactome.

Then run mitch.

pw <- gmt_import("ReactomePathways_2025-10-31.gmt")

gt <- data.frame(rownames(results))
gt$name <- sapply(lapply(strsplit(gt[,1],"_"),function(x) { c(x,"x") } ),"[[",2) #substitute x if there's no gene name
table(which(gt$name =="x"))

## 
##   74 3115 
##    1    1

m <- mitch_import(x=results,DEtype="limma",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 7746

## Note: no. genes in output = 7745

## Note: estimated proportion of input genes in output = 1

# sanity checks
head(m[order(m$x),,drop=FALSE])

##                  x
## ATP2B3  -79.215082
## KIRREL2 -28.165804
## GALC    -27.912631
## RAB44   -23.722454
## GNAO1    -8.977974
## ACADVL   -8.495590

tail(m[order(m$x),,drop=FALSE])

##                x
## GLE1    6.277462
## MTFR1L  6.539372
## CEND1   7.161533
## HHIP    7.294129
## SUZ12  14.184665
## ITIH1  22.070456

mres <- mitch_calc(x=m,genesets=pw,minsetsize=5,cores=8,priority="effect")

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

NTOT <- nrow(mres$enrichment_result)

mresig <- subset(mres$enrichment_result,p.adjustANOVA<0.05)

NSIG <- nrow((mresig))

NUP <- nrow(subset(mresig,s.dist>0))
NDN <- nrow(subset(mresig,s.dist<0))

head(subset(mresig,s.dist>0),20) %>%
  kbl(caption="Top upregulated pathways (effect size)") %>%
  kable_paper("hover", full_width = F)

Top upregulated pathways (effect size)
	set	setSize	pANOVA	s.dist	p.adjustANOVA
1745	rRNA modification in the mitochondrion	13	0.0000001	0.8586295	0.0000017
845	Mitochondrial ribosome-associated quality control	89	0.0000000	0.8264738	0.0000000
243	Classical antibody-mediated complement activation	6	0.0006917	0.7998449	0.0043403
279	Creation of C4 and C2 activators	6	0.0006917	0.7998449	0.0043403
674	Initial triggering of complement	9	0.0000695	0.7657417	0.0006565
334	Defective TPR may confer susceptibility towards thyroid papillary carcinoma (TPC)	29	0.0000000	0.7631701	0.0000000
640	IP3 and IP4 transport between cytosol and nucleus	29	0.0000000	0.7631701	0.0000000
641	IP6 and IP7 transport between cytosol and nucleus	29	0.0000000	0.7631701	0.0000000
642	IPs transport between nucleus and cytosol	29	0.0000000	0.7631701	0.0000000
1218	Regulation of Glucokinase by Glucokinase Regulatory Protein	29	0.0000000	0.7631701	0.0000000
977	PDH complex synthesizes acetyl-CoA from PYR	5	0.0042314	0.7387080	0.0207092
1283	Regulation of pyruvate dehydrogenase (PDH) complex	12	0.0000119	0.7302470	0.0001544
1372	SUMOylation of SUMOylation proteins	31	0.0000000	0.7206503	0.0000000
734	Ketone body metabolism	7	0.0010709	0.7140272	0.0063140
1075	Propionyl-CoA catabolism	5	0.0058501	0.7117829	0.0274832
1525	Synthesis of Ketone Bodies	6	0.0025577	0.7111599	0.0137457
375	Diseases of branched-chain amino acid catabolism	12	0.0000244	0.7036295	0.0002858
841	Mitochondrial iron-sulfur cluster biogenesis	13	0.0000116	0.7024951	0.0001529
877	NEP/NS2 Interacts with the Cellular Export Machinery	31	0.0000000	0.6990558	0.0000000
1080	Protein lipoylation	9	0.0003029	0.6955073	0.0022175

head(subset(mresig,s.dist<0),20) %>%
  kbl(caption="Top downregulated pathways (effect size)") %>%
  kable_paper("hover", full_width = F)

Top downregulated pathways (effect size)
	set	setSize	pANOVA	s.dist	p.adjustANOVA
1178	RUNX2 regulates genes involved in cell migration	5	0.0017631	-0.8076486	0.0099928
1171	RSK activation	5	0.0054010	-0.7185013	0.0255782
905	Nef mediated downregulation of MHC class I complex cell surface expression	9	0.0004455	-0.6760313	0.0030339
909	Negative feedback regulation of MAPK pathway	5	0.0093903	-0.6708527	0.0411439
306	DNA Damage Reversal	6	0.0087700	-0.6179524	0.0389113
1312	Reversal of alkylation damage by DNA dioxygenases	6	0.0087700	-0.6179524	0.0389113
1063	Prevention of phagosomal-lysosomal fusion	6	0.0108181	-0.6008959	0.0462159
906	Nef-mediates down modulation of cell surface receptors by recruiting them to clathrin adapters	16	0.0000330	-0.5996571	0.0003598
1515	Suppression of phagosomal maturation	9	0.0018810	-0.5984718	0.0106265
58	Activation of caspases through apoptosome-mediated cleavage	6	0.0116380	-0.5948658	0.0488238
511	Frs2-mediated activation	8	0.0063474	-0.5573543	0.0295820
1074	Prolonged ERK activation events	10	0.0025341	-0.5515191	0.0137381
509	Formation of tubulin folding intermediates by CCT/TriC	20	0.0000329	-0.5365696	0.0003598
1603	The role of Nef in HIV-1 replication and disease pathogenesis	20	0.0000354	-0.5344207	0.0003818
201	Caspase-mediated cleavage of cytoskeletal proteins	10	0.0035746	-0.5322043	0.0184181
1532	Synthesis of PIPs at the Golgi membrane	14	0.0006035	-0.5296302	0.0039258
519	G beta:gamma signalling through PI3Kgamma	11	0.0033178	-0.5115076	0.0172981
1533	Synthesis of PIPs at the early endosome membrane	14	0.0010124	-0.5075669	0.0059893
570	Glucagon-type ligand receptors	9	0.0100870	-0.4954039	0.0439773
1534	Synthesis of PIPs at the late endosome membrane	9	0.0107493	-0.4911525	0.0460648

Bar plot of top results.

ups <- head(subset(mresig,s.dist>0),20)[,c("s.dist")]
names(ups) <- head(subset(mresig,s.dist>0),20)[,c("set")]

dns <- head(subset(mresig,s.dist<0),20)[,c("s.dist")]
names(dns) <- head(subset(mresig,s.dist<0),20)[,c("set")]

tops <- c(ups,dns)

tops <- sort(tops)

par(mar=c(5,25,5,1))

cols <- c(rep("blue",length(dns)),rep("red",length(ups)))

barplot(abs(tops),
  horiz=TRUE,las=1,cex.names=0.65,col=cols,
  main="top DE Reactomes",
  xlab="ES",
  cex.main=0.9)

Make detailed pathway report.

mitch_report(res=mres,outfile="vlcad_ko_reactome.html",overwrite=TRUE)

## Note: overwriting existing report

## Dataset saved as " /tmp/Rtmp6yFyhj/vlcad_ko_reactome.rds ".

## 
## 
## processing file: mitch.Rmd

## 1/36                             
## 2/36 [checklibraries]            
## 3/36                             
## 4/36 [peek]                      
## 5/36                             
## 6/36 [metrics]                   
## 7/36                             
## 8/36 [scatterplot]

## 9/36                             
## 10/36 [contourplot]               
## 11/36                             
## 12/36 [input_geneset_metrics1]    
## 13/36                             
## 14/36 [input_geneset_metrics2]

## 15/36                             
## 16/36 [input_geneset_metrics3]

## 17/36                             
## 18/36 [echart1d]                  
## 19/36 [echart2d]                  
## 20/36                             
## 21/36 [heatmap]                   
## 22/36                             
## 23/36 [effectsize]                
## 24/36                             
## 25/36 [results_table]             
## 26/36                             
## 27/36 [results_table_complete]    
## 28/36                             
## 29/36 [detailed_geneset_reports1d]

## 30/36                             
## 31/36 [detailed_geneset_reports2d]
## 32/36                             
## 33/36 [network]

## 34/36                             
## 35/36 [session_info]              
## 36/36

## output file: /mnt/data/mdz/projects/matt/VLCAD_KO_proteomics/mitch.knit.md

## /home/mdz/anaconda3/bin/pandoc +RTS -K512m -RTS /mnt/data/mdz/projects/matt/VLCAD_KO_proteomics/mitch.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output /tmp/Rtmp6yFyhj/mitch_report.html --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /usr/local/lib/R/site-library/rmarkdown/rmarkdown/lua/latex-div.lua --self-contained --variable bs3=TRUE --section-divs --template /usr/local/lib/R/site-library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/Rtmp6yFyhj/rmarkdown-str22491b3618a586.html

## 
## Output created: /tmp/Rtmp6yFyhj/mitch_report.html

## [1] TRUE

Now to exclude proteins not included in the mitocarta 3.0

#Log10 transformation
yl <- log10(y3 + 1)
cat("Log10 transformation completed\n")

## Log10 transformation completed

# remove nonmito genes
ylm <- yl[genenames %in% mito,]

#Between-array normalization
ynm <- normalizeBetweenArrays(ylm, method = "quantile", targets = NULL, cyclic.method = "fast")
cat("Between-array normalization completed\n")

## Between-array normalization completed

cat("Final normalized data dimensions:", nrow(ynm), "x", ncol(ynm), "\n")

## Final normalized data dimensions: 970 x 7

fit <- lmFit(ynm, design)
fit <- eBayes(fit)

# Summary of differential expression
de_summary2 <- summary(decideTests(fit))

de_summary2

##        (Intercept) koTRUE
## Down             0      1
## NotSig           0    969
## Up             970      0

results <- topTable(fit, coef = 2, number = Inf)

# Calculate key statistics
total_proteins <- nrow(results)
significant_proteins <- sum(results$adj.P.Val < 0.05)
sig_rate <- round((significant_proteins / total_proteins) * 100, 1)

# Display most significant results
head(results, 20) %>%
  kbl(caption="Top 20 DE proteins") %>%
  kable_paper("hover", full_width = F)

Top 20 DE proteins
	logFC	AveExpr	t	P.Value	adj.P.Val	B
P49748_ACADVL	-0.9886404	8.079648	-11.436537	0.0000047	0.0045452	3.8155570
P49448_GLUD2	-0.3335719	6.432548	-6.087435	0.0003613	0.1228884	0.6040007
O15239_NDUFA1	-0.3624427	6.975618	-6.039722	0.0003801	0.1228884	0.5603807
Q96TC7_RMDN3	-0.2409756	7.740955	-5.435486	0.0007391	0.1660609	-0.0233680
O95178_NDUFB2	-0.2589000	6.096188	-5.145725	0.0010343	0.1660609	-0.3247449
Q9NQE9_HINT3	-0.2335068	6.802493	-5.079432	0.0011188	0.1660609	-0.3957169
P27105_STOM	-0.4464345	6.981229	-5.021813	0.0011984	0.1660609	-0.4580220
Q7Z6M4_MTERF4	0.2546649	6.926563	4.641960	0.0019079	0.2313366	-0.8833126
Q9H4K7_MTG2	0.1787095	7.554958	4.456684	0.0024121	0.2599656	-1.0999897
Q9NWX6_THG1L	0.2029737	6.974353	3.819173	0.0056188	0.5155023	-1.8913269
P23378_GLDC	-0.2522858	7.599438	-3.723253	0.0064144	0.5155023	-2.0163102
P12532_CKMT1A	-0.3351368	7.472854	-3.722974	0.0064169	0.5155023	-2.0166759
Q8N183_NDUFAF2	0.1538279	8.433794	3.615423	0.0074558	0.5155023	-2.1585432
Q9H4B0_OSGEPL1	0.1807376	7.384455	3.411703	0.0099509	0.5155023	-2.4320012
Q9P0J6_MRPL36	0.2563553	6.218910	3.409270	0.0099856	0.5155023	-2.4353035
Q969Z3_MTARC2	-0.1831623	7.018149	-3.405682	0.0100370	0.5155023	-2.4401733
P14406_COX7A2	-0.2367301	8.382974	-3.389153	0.0102776	0.5155023	-2.4626320
O14957_UQCR11	-0.3130263	7.391760	-3.337953	0.0110624	0.5155023	-2.5324303
Q96HP4_OXNAD1	0.1479719	7.258565	3.301370	0.0116621	0.5155023	-2.5825121
Q9BRQ6_CHCHD6	-0.1882930	7.128908	-3.289064	0.0118715	0.5155023	-2.5993971

upregulated <- results[results$logFC > 0 & results$adj.P.Val < 0.05, ]    # KO-enriched
downregulated <- results[results$logFC < 0 & results$adj.P.Val < 0.05, ]  # CTL-enriched

upregulated %>%
  kbl(caption="Upregulated proteins") %>%
  kable_paper("hover", full_width = F)

Upregulated proteins
logFC	AveExpr	t	P.Value	adj.P.Val	B

downregulated %>%
  kbl(caption="Downregulated proteins") %>%
  kable_paper("hover", full_width = F)

Downregulated proteins
	logFC	AveExpr	t	P.Value	adj.P.Val	B
P49748_ACADVL	-0.9886404	8.079648	-11.43654	4.7e-06	0.0045452	3.815557

sig_up <- nrow(upregulated)
sig_down <- nrow(downregulated)
not_sig <- total_proteins - sig_up - sig_down

# Create classification summary
classification_summary <- data.frame(
  Category = c("Chromatin-enriched", "Soluble-enriched", "Not Significant"),
  Count = c(sig_up, sig_down, not_sig),
  Percentage = round(c(sig_up, sig_down, not_sig) / total_proteins * 100, 1)
)

classification_summary %>%
  kbl(caption="DE summary") %>%
  kable_paper("hover", full_width = F)

DE summary
Category	Count	Percentage
Chromatin-enriched	0	0.0
Soluble-enriched	1	0.1
Not Significant	969	99.9

sig <- subset(results,adj.P.Val < 0.05)

plot(results$logFC,-log10(results$P.Value),
  pch=19,cex=0.6,col="gray",bty="n",
  xlab="log2 fold change",
  ylab="-log10(p-value)")

points(sig$logFC,-log10(sig$P.Value),pch=19,cex=0.65,col="red")

protnames <- sapply(strsplit(rownames(sig),"_"),"[[",2)
text(sig$logFC,-log10(sig$P.Value),labels=protnames,srt = -30)

Session information

For reproducibility.

sessionInfo()

## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] gtools_3.9.5        mitch_1.20.0        gplots_3.2.0       
## [4] kableExtra_1.4.0    limma_3.64.3        pcaMethods_2.0.0   
## [7] Biobase_2.68.0      BiocGenerics_0.54.0 generics_0.1.4     
## 
## loaded via a namespace (and not attached):
##  [1] beeswarm_0.4.0        gtable_0.3.6          xfun_0.52            
##  [4] bslib_0.9.0           ggplot2_3.5.2         htmlwidgets_1.6.4    
##  [7] caTools_1.18.3        lattice_0.22-7        GGally_2.3.0         
## [10] vctrs_0.6.5           tools_4.5.2           bitops_1.0-9         
## [13] parallel_4.5.2        tibble_3.3.0          pkgconfig_2.0.3      
## [16] KernSmooth_2.23-26    RColorBrewer_1.1-3    S7_0.2.0             
## [19] lifecycle_1.0.4       compiler_4.5.2        farver_2.1.2         
## [22] stringr_1.5.1         textshaping_1.0.1     statmod_1.5.0        
## [25] httpuv_1.6.16         htmltools_0.5.8.1     sass_0.4.10          
## [28] yaml_2.3.10           pillar_1.11.0         later_1.4.2          
## [31] jquerylib_0.1.4       tidyr_1.3.1           MASS_7.3-65          
## [34] cachem_1.1.0          mime_0.13             network_1.19.0       
## [37] ggstats_0.10.0        tidyselect_1.2.1      digest_0.6.37        
## [40] stringi_1.8.7         reshape2_1.4.4        dplyr_1.1.4          
## [43] purrr_1.1.0           fastmap_1.2.0         grid_4.5.2           
## [46] cli_3.6.5             magrittr_2.0.3        dichromat_2.0-0.1    
## [49] withr_3.0.2           scales_1.4.0          promises_1.3.3       
## [52] rmarkdown_2.29        gridExtra_2.3         coda_0.19-4.1        
## [55] shiny_1.11.1          evaluate_1.0.4        knitr_1.50           
## [58] viridisLite_0.4.2     rlang_1.1.6           Rcpp_1.1.0           
## [61] xtable_1.8-4          glue_1.8.0            echarts4r_0.4.5      
## [64] xml2_1.3.8            svglite_2.2.1         rstudioapi_0.17.1    
## [67] jsonlite_2.0.0        plyr_1.8.9            statnet.common_4.12.0
## [70] R6_2.6.1              systemfonts_1.2.3

Analysis of proteomics data for Shashwat - VLCAD knock out

Burnet Bioinformatics Group

2025-11-06