library(limma)
library(ggplot2)
library(readr)
#Load Data
load("z_log10.RData")
# Sample sheet setup
sample_sheet <- data.frame(
Sample_ID = colnames(z_log10),
Fraction = ifelse(grepl("Chrom", colnames(z_log10)), "Chromatin", "Soluble"),
Replicate = c(1, 2, 3, 4, 1, 2, 3, 4),
stringsAsFactors = FALSE
)
sample_sheet$Fraction <- as.factor(sample_sheet$Fraction)
print(sample_sheet)
## Sample_ID Fraction Replicate
## 1 Chrom_01 Chromatin 1
## 2 Chrom_02 Chromatin 2
## 3 Chrom_03 Chromatin 3
## 4 Chrom_04 Chromatin 4
## 5 Sol_01 Soluble 1
## 6 Sol_02 Soluble 2
## 7 Sol_03 Soluble 3
## 8 Sol_04 Soluble 4
# Check that Fraction is now a factor
str(sample_sheet$Fraction)
## Factor w/ 2 levels "Chromatin","Soluble": 1 1 1 1 2 2 2 2
# Design matrix
design <- model.matrix(~ Fraction, data = sample_sheet)
print(design)
## (Intercept) FractionSoluble
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 1
## 6 1 1
## 7 1 1
## 8 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$Fraction
## [1] "contr.treatment"
# Limma analysis
fit <- lmFit(z_log10, design)
fit <- eBayes(fit)
# Results summary
summary(decideTests(fit))
## (Intercept) FractionSoluble
## Down 0 1739
## NotSig 0 2175
## Up 8378 4464
Analysis: Overall Differential Expression Significantly altered proteins: 5,644 (67.4% of total) Upregulated in Soluble: 3,367 proteins (40.2%) Downregulated in Soluble: 2,277 proteins (27.2%) Non-significant: 2,734 proteins (32.6%)
# Extract all results
results <- topTable(fit, coef = 2, number = Inf)
# Basic statistics
summary(results$adj.P.Val < 0.05)
## Mode FALSE TRUE
## logical 2175 6203
summary(results$logFC)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.1035 -0.1571 0.3102 0.1726 0.5640 2.8848
Analysis: normalized data with subtle but statistically significant differences
# Top 20 results
head(results, 20)
## logFC AveExpr t P.Value adj.P.Val B
## Q15397_PUM3 -1.636734 6.038319 -53.64031 7.462649e-11 4.805913e-07 15.53881
## O76021_RSL1D1 -1.538797 6.829486 -47.47242 1.837209e-10 4.805913e-07 14.80027
## Q14690_PDCD11 -1.585468 6.345757 -46.51259 2.135796e-10 4.805913e-07 14.67208
## Q13206_DDX10 -1.436811 6.031906 -45.24247 2.619362e-10 4.805913e-07 14.49629
## O60832_DKC1 -1.431084 6.455375 -42.97651 3.825153e-10 4.805913e-07 14.16409
## Q9NY61_AATF -1.222152 6.094339 -41.23577 5.187292e-10 4.805913e-07 13.89141
## Q9UIG0_BAZ1B -1.772072 6.081298 -41.08393 5.330181e-10 4.805913e-07 13.86685
## Q9H9Y2_RPF1 -1.562207 5.936171 -40.34974 6.087244e-10 4.805913e-07 13.74633
## Q8WTT2_NOC3L -1.373224 6.311540 -39.49554 7.126114e-10 4.805913e-07 13.60223
## P08582_MELTF -1.817753 5.583388 -39.26366 7.441878e-10 4.805913e-07 13.56237
## Q12788_TBL3 -1.360638 6.067751 -39.04444 7.755075e-10 4.805913e-07 13.52439
## Q9HCD5_NCOA5 -1.356287 6.056583 -38.96013 7.879491e-10 4.805913e-07 13.50971
## Q03701_CEBPZ -1.719218 6.049138 -38.85842 8.032616e-10 4.805913e-07 13.49193
## Q5BKZ1_ZNF326 -1.321794 6.093202 -38.47695 8.637729e-10 4.805913e-07 13.42471
## O00567_NOP56 -1.510220 6.679654 -38.04306 9.389787e-10 4.805913e-07 13.34715
## P68431_H3C1 -2.196880 7.950614 -37.88794 9.676479e-10 4.805913e-07 13.31913
## Q02539_H1-1 -1.690220 7.470707 -37.84805 9.751794e-10 4.805913e-07 13.31190
## Q9Y3A4_RRP7A -1.151081 6.178304 -36.14657 1.368021e-09 6.248218e-07 12.99367
## Q8N884_CGAS -1.267604 5.484544 -35.97416 1.416999e-09 6.248218e-07 12.96032
## Q9H6R4_NOL6 -1.320551 6.314827 -35.25179 1.645034e-09 6.466058e-07 12.81827
#Bottom 20 results
tail(results, 20)
## logFC AveExpr t P.Value adj.P.Val
## Q9Y4X4_KLF12 4.700769e-03 4.385220 0.037713437 0.9709142 0.9731211
## Q12834_CDC20 -2.439802e-03 6.074806 -0.035617135 0.9725302 0.9746241
## Q96MW1_CCDC43 4.230693e-03 4.831789 0.035023197 0.9729880 0.9749664
## Q14651_PLS1 1.712742e-03 5.605773 0.029650020 0.9771306 0.9790003
## Q9NVN8_GNL3L 1.403241e-03 5.695109 0.028136550 0.9782976 0.9800523
## O00635_TRIM38 4.142940e-03 4.726029 0.024224988 0.9813140 0.9829566
## Q2LD37_BLTP1 -2.755922e-03 4.813057 -0.023602603 0.9817940 0.9833198
## Q8NAV2_C8orf58 -1.521128e-02 3.868397 -0.016798206 0.9870419 0.9884577
## Q6NUQ4_TMEM214 7.603888e-04 6.295603 0.015826534 0.9877914 0.9890900
## Q96CM8_ACSF2 2.351436e-03 4.971178 0.015254330 0.9882327 0.9894137
## P12111_COL6A3 7.549377e-04 5.381510 0.014678654 0.9886768 0.9897400
## Q96D70_R3HDM4 -8.110338e-03 3.415436 -0.013702288 0.9894299 0.9903756
## Q9NYV4_CDK12 7.593327e-04 5.928985 0.011338632 0.9912532 0.9918903
## Q9NQZ8_ZNF71 -1.071753e-02 4.024566 -0.011293373 0.9912881 0.9918903
## O96020_CCNE2 7.859905e-03 4.095990 0.011280058 0.9912983 0.9918903
## Q969S2_NEIL2 4.262250e-04 5.558741 0.006666446 0.9948573 0.9953325
## Q9Y2Q9_MRPS28 -3.304611e-04 6.261854 -0.006098967 0.9952951 0.9956516
## Q8NHP8_PLBD2 -3.882499e-04 5.600108 -0.004702794 0.9963721 0.9966100
## P61916_NPC2 -2.344171e-04 5.722237 -0.002688003 0.9979264 0.9980455
## P19634_SLC9A1 -5.169494e-05 5.577038 -0.001037880 0.9991993 0.9991993
## B
## Q9Y4X4_KLF12 -7.995767
## Q12834_CDC20 -7.995854
## Q96MW1_CCDC43 -7.995877
## Q14651_PLS1 -7.996074
## Q9NVN8_GNL3L -7.996124
## O00635_TRIM38 -7.996240
## Q2LD37_BLTP1 -7.996257
## Q8NAV2_C8orf58 -7.996413
## Q6NUQ4_TMEM214 -7.996431
## Q96CM8_ACSF2 -7.996441
## P12111_COL6A3 -7.996451
## Q96D70_R3HDM4 -7.996466
## Q9NYV4_CDK12 -7.996500
## Q9NQZ8_ZNF71 -7.996501
## O96020_CCNE2 -7.996501
## Q969S2_NEIL2 -7.996548
## Q9Y2Q9_MRPS28 -7.996552
## Q8NHP8_PLBD2 -7.996560
## P61916_NPC2 -7.996569
## P19634_SLC9A1 -7.996572
Analysis: Most Significantly Downregulated in Soluble (Chromatin-enriched) PUM3 (RNA-binding protein) - adj.P < 3.4 × 10⁻⁷ DDX10 (RNA helicase) - adj.P < 3.4 × 10⁻⁷ PDCD11 (ribosome biogenesis) - adj.P < 3.4 × 10⁻⁷ DKC1 (telomerase complex) - adj.P < 3.4 × 10⁻⁷ RSL1D1 (ribosome biogenesis) - adj.P < 3.4 × 10⁻⁷
Minimal Changes (P-values > 0.97):
STK25 (serine/threonine kinase) - P = 0.9998, essentially no difference ATG101 (autophagy protein) - P = 0.9975 HES1 (transcription factor) - P = 0.9974 DNAJC21 (chaperone) - P = 0.9945
Functional Categories: Metabolic enzymes: MLYCD, IDUA, KDSR Transcription factors: E2F3, HES1 Protein quality control: DNAJC8, DNAJC21 Membrane proteins: TMEM167A, TM7SF3 Kinases: STK25, PI4K2B
# Significant proteins
upregulated <- results[results$logFC > 0 & results$adj.P.Val < 0.05, ]
downregulated <- results[results$logFC < 0 & results$adj.P.Val < 0.05, ]
# Count significant proteins
sig_up <- nrow(upregulated)
sig_down <- nrow(downregulated)
print(paste("Upregulated proteins:", sig_up))
## [1] "Upregulated proteins: 4464"
print(paste("Downregulated proteins:", sig_down))
## [1] "Downregulated proteins: 1739"
# Extract histone proteins
histones <- results[grepl("_H1|_H2A|_H2B|_H3|_H4", rownames(results)), ]
print(histones)
## logFC AveExpr t P.Value adj.P.Val
## P68431_H3C1 -2.1968801 7.950614 -37.887939 9.676479e-10 4.805913e-07
## Q02539_H1-1 -1.6902201 7.470707 -37.848053 9.751794e-10 4.805913e-07
## P57053_H2BC12L -2.0043309 7.321813 -34.882523 1.777508e-09 6.466058e-07
## Q8IUE6_H2AC21 -2.6037838 6.826629 -33.385561 2.454023e-09 6.466058e-07
## Q6DN03_H2BC20P -2.3065859 7.062474 -29.279134 6.434852e-09 9.183603e-07
## P62805_H4C1 -2.2949850 8.005607 -27.784805 9.449688e-09 1.055593e-06
## Q96A08_H2BC1 -2.0658741 8.113394 -27.410418 1.043782e-08 1.095349e-06
## P07305_H1-0 -2.1796681 7.075728 -26.855498 1.212574e-08 1.195170e-06
## P0C0S5_H2AZ1 -2.0093734 8.190469 -24.899488 2.109558e-08 1.578025e-06
## P16403_H1-2 -1.8277637 7.809982 -23.432447 3.288708e-08 2.000899e-06
## Q92522_H1-10 -1.0591056 6.711939 -12.516769 3.046222e-06 2.356533e-05
## P0C0S8_H2AC11 -1.4248420 6.937304 -10.806930 8.580781e-06 4.723376e-05
## Q93077_H2AC6 -2.0369642 5.954004 -10.674812 9.351922e-06 5.036300e-05
## Q5TEC6_H3-7 -4.1034995 6.445711 -5.456591 7.893883e-04 1.617387e-03
## P22492_H1-6 -0.6580821 4.531978 -4.476183 2.513077e-03 4.435340e-03
## B
## P68431_H3C1 13.319126
## Q02539_H1-1 13.311896
## P57053_H2BC12L 12.744176
## Q8IUE6_H2AC21 12.433115
## Q6DN03_H2BC20P 11.480934
## P62805_H4C1 11.093032
## Q96A08_H2BC1 10.991934
## P07305_H1-0 10.839042
## P0C0S5_H2AZ1 10.269140
## P16403_H1-2 9.806845
## Q92522_H1-10 4.915384
## P0C0S8_H2AC11 3.771109
## Q93077_H2AC6 3.675828
## Q5TEC6_H3-7 -1.237812
## P22492_H1-6 -2.506870
plot(results$logFC, -log10(results$P.Value),
xlab = "log2(Fold Change)", ylab = "-log10(P-value)",
main = "Volcano Plot: Soluble vs Chromatin",
col = "black", pch = 16)
# Add significance threshold line
abline(h = -log10(0.05), col = "gray", lty = 2)
abline(v = 0, col = "gray", lty = 2)
# Identify and highlight significant points
sig <- subset(results, P.Value < 0.05)
points(sig$logFC, -log10(sig$P.Value), col = "blue", pch = 16)
# Extract gene symbols
upregulated_genes <- gsub(".*_", "", rownames(upregulated))
downregulated_genes <- gsub(".*_", "", rownames(downregulated))
head(upregulated_genes, 10)
## [1] "ACADS" "MED18" "STK39" "AGL" "PGD" "XPNPEP1" "KATNB1"
## [8] "PLEKHF1" "ACADVL" "CPNE1"
head(downregulated_genes, 10)
## [1] "PUM3" "RSL1D1" "PDCD11" "DDX10" "DKC1" "AATF" "BAZ1B" "RPF1"
## [9] "NOC3L" "MELTF"
# Save to CSV files
write.csv(results, "differential_expression_results.csv", row.names = TRUE)
write.csv(results[results$adj.P.Val < 0.05, ], "significant_proteins.csv")
write.csv(histones, "histone_analysis.csv")