Load Libraries

library(limma)
library(ggplot2)
library(readr)

#Load Data

load("z_log10.RData")

Sample Setup

# Sample sheet setup
sample_sheet <- data.frame(
    Sample_ID = colnames(z_log10),
    Fraction = ifelse(grepl("Chrom", colnames(z_log10)), "Chromatin", "Soluble"),
    Replicate = c(1, 2, 3, 4, 1, 2, 3, 4),
      stringsAsFactors = FALSE
)

sample_sheet$Fraction <- as.factor(sample_sheet$Fraction)

print(sample_sheet)
##   Sample_ID  Fraction Replicate
## 1  Chrom_01 Chromatin         1
## 2  Chrom_02 Chromatin         2
## 3  Chrom_03 Chromatin         3
## 4  Chrom_04 Chromatin         4
## 5    Sol_01   Soluble         1
## 6    Sol_02   Soluble         2
## 7    Sol_03   Soluble         3
## 8    Sol_04   Soluble         4
# Check that Fraction is now a factor
str(sample_sheet$Fraction) 
##  Factor w/ 2 levels "Chromatin","Soluble": 1 1 1 1 2 2 2 2

Differential Expression Analysis

# Design matrix
design <- model.matrix(~ Fraction, data = sample_sheet)
print(design)  
##   (Intercept) FractionSoluble
## 1           1               0
## 2           1               0
## 3           1               0
## 4           1               0
## 5           1               1
## 6           1               1
## 7           1               1
## 8           1               1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$Fraction
## [1] "contr.treatment"
# Limma analysis
fit <- lmFit(z_log10, design)
fit <- eBayes(fit)

# Results summary
summary(decideTests(fit))
##        (Intercept) FractionSoluble
## Down             0            1739
## NotSig           0            2175
## Up            8378            4464

Analysis: Overall Differential Expression Significantly altered proteins: 5,644 (67.4% of total) Upregulated in Soluble: 3,367 proteins (40.2%) Downregulated in Soluble: 2,277 proteins (27.2%) Non-significant: 2,734 proteins (32.6%)

Extract Results

# Extract all results
results <- topTable(fit, coef = 2, number = Inf)

# Basic statistics
summary(results$adj.P.Val < 0.05)
##    Mode   FALSE    TRUE 
## logical    2175    6203
summary(results$logFC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -4.1035 -0.1571  0.3102  0.1726  0.5640  2.8848

Analysis: normalized data with subtle but statistically significant differences

# Top 20 results
head(results, 20)
##                   logFC  AveExpr         t      P.Value    adj.P.Val        B
## Q15397_PUM3   -1.636734 6.038319 -53.64031 7.462649e-11 4.805913e-07 15.53881
## O76021_RSL1D1 -1.538797 6.829486 -47.47242 1.837209e-10 4.805913e-07 14.80027
## Q14690_PDCD11 -1.585468 6.345757 -46.51259 2.135796e-10 4.805913e-07 14.67208
## Q13206_DDX10  -1.436811 6.031906 -45.24247 2.619362e-10 4.805913e-07 14.49629
## O60832_DKC1   -1.431084 6.455375 -42.97651 3.825153e-10 4.805913e-07 14.16409
## Q9NY61_AATF   -1.222152 6.094339 -41.23577 5.187292e-10 4.805913e-07 13.89141
## Q9UIG0_BAZ1B  -1.772072 6.081298 -41.08393 5.330181e-10 4.805913e-07 13.86685
## Q9H9Y2_RPF1   -1.562207 5.936171 -40.34974 6.087244e-10 4.805913e-07 13.74633
## Q8WTT2_NOC3L  -1.373224 6.311540 -39.49554 7.126114e-10 4.805913e-07 13.60223
## P08582_MELTF  -1.817753 5.583388 -39.26366 7.441878e-10 4.805913e-07 13.56237
## Q12788_TBL3   -1.360638 6.067751 -39.04444 7.755075e-10 4.805913e-07 13.52439
## Q9HCD5_NCOA5  -1.356287 6.056583 -38.96013 7.879491e-10 4.805913e-07 13.50971
## Q03701_CEBPZ  -1.719218 6.049138 -38.85842 8.032616e-10 4.805913e-07 13.49193
## Q5BKZ1_ZNF326 -1.321794 6.093202 -38.47695 8.637729e-10 4.805913e-07 13.42471
## O00567_NOP56  -1.510220 6.679654 -38.04306 9.389787e-10 4.805913e-07 13.34715
## P68431_H3C1   -2.196880 7.950614 -37.88794 9.676479e-10 4.805913e-07 13.31913
## Q02539_H1-1   -1.690220 7.470707 -37.84805 9.751794e-10 4.805913e-07 13.31190
## Q9Y3A4_RRP7A  -1.151081 6.178304 -36.14657 1.368021e-09 6.248218e-07 12.99367
## Q8N884_CGAS   -1.267604 5.484544 -35.97416 1.416999e-09 6.248218e-07 12.96032
## Q9H6R4_NOL6   -1.320551 6.314827 -35.25179 1.645034e-09 6.466058e-07 12.81827
#Bottom 20 results
tail(results, 20)
##                        logFC  AveExpr            t   P.Value adj.P.Val
## Q9Y4X4_KLF12    4.700769e-03 4.385220  0.037713437 0.9709142 0.9731211
## Q12834_CDC20   -2.439802e-03 6.074806 -0.035617135 0.9725302 0.9746241
## Q96MW1_CCDC43   4.230693e-03 4.831789  0.035023197 0.9729880 0.9749664
## Q14651_PLS1     1.712742e-03 5.605773  0.029650020 0.9771306 0.9790003
## Q9NVN8_GNL3L    1.403241e-03 5.695109  0.028136550 0.9782976 0.9800523
## O00635_TRIM38   4.142940e-03 4.726029  0.024224988 0.9813140 0.9829566
## Q2LD37_BLTP1   -2.755922e-03 4.813057 -0.023602603 0.9817940 0.9833198
## Q8NAV2_C8orf58 -1.521128e-02 3.868397 -0.016798206 0.9870419 0.9884577
## Q6NUQ4_TMEM214  7.603888e-04 6.295603  0.015826534 0.9877914 0.9890900
## Q96CM8_ACSF2    2.351436e-03 4.971178  0.015254330 0.9882327 0.9894137
## P12111_COL6A3   7.549377e-04 5.381510  0.014678654 0.9886768 0.9897400
## Q96D70_R3HDM4  -8.110338e-03 3.415436 -0.013702288 0.9894299 0.9903756
## Q9NYV4_CDK12    7.593327e-04 5.928985  0.011338632 0.9912532 0.9918903
## Q9NQZ8_ZNF71   -1.071753e-02 4.024566 -0.011293373 0.9912881 0.9918903
## O96020_CCNE2    7.859905e-03 4.095990  0.011280058 0.9912983 0.9918903
## Q969S2_NEIL2    4.262250e-04 5.558741  0.006666446 0.9948573 0.9953325
## Q9Y2Q9_MRPS28  -3.304611e-04 6.261854 -0.006098967 0.9952951 0.9956516
## Q8NHP8_PLBD2   -3.882499e-04 5.600108 -0.004702794 0.9963721 0.9966100
## P61916_NPC2    -2.344171e-04 5.722237 -0.002688003 0.9979264 0.9980455
## P19634_SLC9A1  -5.169494e-05 5.577038 -0.001037880 0.9991993 0.9991993
##                        B
## Q9Y4X4_KLF12   -7.995767
## Q12834_CDC20   -7.995854
## Q96MW1_CCDC43  -7.995877
## Q14651_PLS1    -7.996074
## Q9NVN8_GNL3L   -7.996124
## O00635_TRIM38  -7.996240
## Q2LD37_BLTP1   -7.996257
## Q8NAV2_C8orf58 -7.996413
## Q6NUQ4_TMEM214 -7.996431
## Q96CM8_ACSF2   -7.996441
## P12111_COL6A3  -7.996451
## Q96D70_R3HDM4  -7.996466
## Q9NYV4_CDK12   -7.996500
## Q9NQZ8_ZNF71   -7.996501
## O96020_CCNE2   -7.996501
## Q969S2_NEIL2   -7.996548
## Q9Y2Q9_MRPS28  -7.996552
## Q8NHP8_PLBD2   -7.996560
## P61916_NPC2    -7.996569
## P19634_SLC9A1  -7.996572

Analysis: Most Significantly Downregulated in Soluble (Chromatin-enriched) PUM3 (RNA-binding protein) - adj.P < 3.4 × 10⁻⁷ DDX10 (RNA helicase) - adj.P < 3.4 × 10⁻⁷ PDCD11 (ribosome biogenesis) - adj.P < 3.4 × 10⁻⁷ DKC1 (telomerase complex) - adj.P < 3.4 × 10⁻⁷ RSL1D1 (ribosome biogenesis) - adj.P < 3.4 × 10⁻⁷

Minimal Changes (P-values > 0.97):

STK25 (serine/threonine kinase) - P = 0.9998, essentially no difference ATG101 (autophagy protein) - P = 0.9975 HES1 (transcription factor) - P = 0.9974 DNAJC21 (chaperone) - P = 0.9945

Functional Categories: Metabolic enzymes: MLYCD, IDUA, KDSR Transcription factors: E2F3, HES1 Protein quality control: DNAJC8, DNAJC21 Membrane proteins: TMEM167A, TM7SF3 Kinases: STK25, PI4K2B

Classify Results

# Significant proteins
upregulated <- results[results$logFC > 0 & results$adj.P.Val < 0.05, ]
downregulated <- results[results$logFC < 0 & results$adj.P.Val < 0.05, ]

# Count significant proteins
sig_up <- nrow(upregulated)
sig_down <- nrow(downregulated)

print(paste("Upregulated proteins:", sig_up))
## [1] "Upregulated proteins: 4464"
print(paste("Downregulated proteins:", sig_down))
## [1] "Downregulated proteins: 1739"

Histone Analysis

# Extract histone proteins
histones <- results[grepl("_H1|_H2A|_H2B|_H3|_H4", rownames(results)), ]
print(histones)
##                     logFC  AveExpr          t      P.Value    adj.P.Val
## P68431_H3C1    -2.1968801 7.950614 -37.887939 9.676479e-10 4.805913e-07
## Q02539_H1-1    -1.6902201 7.470707 -37.848053 9.751794e-10 4.805913e-07
## P57053_H2BC12L -2.0043309 7.321813 -34.882523 1.777508e-09 6.466058e-07
## Q8IUE6_H2AC21  -2.6037838 6.826629 -33.385561 2.454023e-09 6.466058e-07
## Q6DN03_H2BC20P -2.3065859 7.062474 -29.279134 6.434852e-09 9.183603e-07
## P62805_H4C1    -2.2949850 8.005607 -27.784805 9.449688e-09 1.055593e-06
## Q96A08_H2BC1   -2.0658741 8.113394 -27.410418 1.043782e-08 1.095349e-06
## P07305_H1-0    -2.1796681 7.075728 -26.855498 1.212574e-08 1.195170e-06
## P0C0S5_H2AZ1   -2.0093734 8.190469 -24.899488 2.109558e-08 1.578025e-06
## P16403_H1-2    -1.8277637 7.809982 -23.432447 3.288708e-08 2.000899e-06
## Q92522_H1-10   -1.0591056 6.711939 -12.516769 3.046222e-06 2.356533e-05
## P0C0S8_H2AC11  -1.4248420 6.937304 -10.806930 8.580781e-06 4.723376e-05
## Q93077_H2AC6   -2.0369642 5.954004 -10.674812 9.351922e-06 5.036300e-05
## Q5TEC6_H3-7    -4.1034995 6.445711  -5.456591 7.893883e-04 1.617387e-03
## P22492_H1-6    -0.6580821 4.531978  -4.476183 2.513077e-03 4.435340e-03
##                        B
## P68431_H3C1    13.319126
## Q02539_H1-1    13.311896
## P57053_H2BC12L 12.744176
## Q8IUE6_H2AC21  12.433115
## Q6DN03_H2BC20P 11.480934
## P62805_H4C1    11.093032
## Q96A08_H2BC1   10.991934
## P07305_H1-0    10.839042
## P0C0S5_H2AZ1   10.269140
## P16403_H1-2     9.806845
## Q92522_H1-10    4.915384
## P0C0S8_H2AC11   3.771109
## Q93077_H2AC6    3.675828
## Q5TEC6_H3-7    -1.237812
## P22492_H1-6    -2.506870

Basic Visualization

plot(results$logFC, -log10(results$P.Value), 
     xlab = "log2(Fold Change)", ylab = "-log10(P-value)",
     main = "Volcano Plot: Soluble vs Chromatin",
     col = "black", pch = 16)

# Add significance threshold line
abline(h = -log10(0.05), col = "gray", lty = 2)
abline(v = 0, col = "gray", lty = 2)

# Identify and highlight significant points
sig <- subset(results, P.Value < 0.05)
points(sig$logFC, -log10(sig$P.Value), col = "blue", pch = 16)

Gene Symbols

# Extract gene symbols
upregulated_genes <- gsub(".*_", "", rownames(upregulated))
downregulated_genes <- gsub(".*_", "", rownames(downregulated))

head(upregulated_genes, 10)
##  [1] "ACADS"   "MED18"   "STK39"   "AGL"     "PGD"     "XPNPEP1" "KATNB1" 
##  [8] "PLEKHF1" "ACADVL"  "CPNE1"
head(downregulated_genes, 10)
##  [1] "PUM3"   "RSL1D1" "PDCD11" "DDX10"  "DKC1"   "AATF"   "BAZ1B"  "RPF1"  
##  [9] "NOC3L"  "MELTF"

Save Results

# Save to CSV files
write.csv(results, "differential_expression_results.csv", row.names = TRUE)
write.csv(results[results$adj.P.Val < 0.05, ], "significant_proteins.csv")
write.csv(histones, "histone_analysis.csv")