Source: https://github.com/markziemann/paula_proteomics
library("limma")
library("kableExtra")
library("vioplot")
library("beeswarm")
library("gplots")
library("openxlsx")
library("eulerr")
library("stringr")
We want to properly annotate these proteins with names and conduct differential expression irrespective of the strain/isolate.
We are running 6 different protein expression contrasts. All of these are looking at the effect of lactic acid, but with different inclusion.
L. crispatus both strains
L. crispatus strain D
L. crispatus strain A
L. iners both strains
L. iners strain S/7
L. iners strain A
It requires a lot of cleaning. Here is what the data looks like to start with.
x <- read.table("20240611_192029_P24_0717_E1_PaluaE_Updated_LC_ATCC_Report.tsv",sep="\t",header=TRUE)
head(x,6) %>% kbl(caption = "Input data from 20240611_192029_P24_0717_E1_PaluaE_Updated_LC_ATCC_Report.tsv") %>% kable_paper("hover", full_width = F)
PG.ProteinGroups | PG.ProteinAccessions | PG.Genes | PG.ProteinDescriptions | PG.UniProtIds | PG.FastaHeaders | X.1..AST1_JS_20240405_P24_0717_E1_S02.raw.PG.NrOfPrecursorsMeasured | X.2..AST1_JS_20240405_P24_0717_E1_S10.raw.PG.NrOfPrecursorsMeasured | X.3..AST1_JS_20240405_P24_0717_E1_S01.raw.PG.NrOfPrecursorsMeasured | X.4..AST1_JS_20240405_P24_0717_E1_S09.raw.PG.NrOfPrecursorsMeasured | X.5..AST1_JS_20240405_P24_0717_E1_S04.raw.PG.NrOfPrecursorsMeasured | X.6..AST1_JS_20240405_P24_0717_E1_S12.raw.PG.NrOfPrecursorsMeasured | X.7..AST1_JS_20240405_P24_0717_E1_S11.raw.PG.NrOfPrecursorsMeasured | X.8..AST1_JS_20240405_P24_0717_E1_S03_20240426013646.raw.PG.NrOfPrecursorsMeasured | X.1..AST1_JS_20240405_P24_0717_E1_S02.raw.PG.Quantity | X.2..AST1_JS_20240405_P24_0717_E1_S10.raw.PG.Quantity | X.3..AST1_JS_20240405_P24_0717_E1_S01.raw.PG.Quantity | X.4..AST1_JS_20240405_P24_0717_E1_S09.raw.PG.Quantity | X.5..AST1_JS_20240405_P24_0717_E1_S04.raw.PG.Quantity | X.6..AST1_JS_20240405_P24_0717_E1_S12.raw.PG.Quantity | X.7..AST1_JS_20240405_P24_0717_E1_S11.raw.PG.Quantity | X.8..AST1_JS_20240405_P24_0717_E1_S03_20240426013646.raw.PG.Quantity |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Cont_A0A3Q1M3L6 | Cont_A0A3Q1M3L6 | Uncharacterized protein | Cont_A0A3Q1M3L6 | >tr|Cont_A0A3Q1M3L6|A0A3Q1M3L6_BOVIN Uncharacterized protein OS=Bos taurus OX=9913 PE=1 SV=1 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 95.97440 | 109.55053 | 16.73227 | NaN | 24.11901 | 46.19556 | NaN | NaN | |
Cont_A0A3Q1MJB2 | Cont_A0A3Q1MJB2 | FBLN1 | Fibulin-1 | Cont_A0A3Q1MJB2 | >tr|Cont_A0A3Q1MJB2|A0A3Q1MJB2_BOVIN Fibulin-1 OS=Bos taurus OX=9913 GN=FBLN1 PE=1 SV=1 | 17 | 17 | 17 | 17 | 17 | 17 | 17 | 17 | 19.75845 | 37.81480 | NaN | NaN | 13.18379 | 23.32178 | NaN | 8.519665 |
Cont_A2I7N0 | Cont_A2I7N0 | SERPINA3-4 | Serpin A3-4 | Cont_A2I7N0 | >sp|Cont_A2I7N0|SPA34_BOVIN Serpin A3-4 OS=Bos taurus OX=9913 GN=SERPINA3-4 PE=3 SV=1 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 70.93400 | 75.30853 | NaN | NaN | 47.79508 | 60.18491 | 44.21424 | 78.915138 |
Cont_A2I7N3 | Cont_A2I7N3 | SERPINA3-7 | Serpin A3-7 | Cont_A2I7N3 | >sp|Cont_A2I7N3|SPA37_BOVIN Serpin A3-7 OS=Bos taurus OX=9913 GN=SERPINA3-7 PE=3 SV=1 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 265.34586 | 415.20547 | 91.61462 | 22.31066 | 117.20869 | 206.51183 | NaN | NaN |
Cont_A7E3W2 | Cont_A7E3W2 | LGALS3BP | Galectin-3-binding protein | Cont_A7E3W2 | >sp|Cont_A7E3W2|LG3BP_BOVIN Galectin-3-binding protein OS=Bos taurus OX=9913 GN=LGALS3BP PE=1 SV=1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 16.56437 | 21.59553 | NaN | NaN | 16.88817 | 32.42205 | 12.57231 | NaN |
Cont_G5E513 | Cont_G5E513 | Uncharacterized protein | Cont_G5E513 | >tr|Cont_G5E513|G5E513_BOVIN Uncharacterized protein OS=Bos taurus OX=9913 PE=1 SV=2 | 16 | 16 | 16 | 16 | 16 | 16 | 16 | 16 | 163.90485 | 266.43649 | 132.05286 | NaN | 103.07031 | 145.92206 | NaN | NaN |
# filter the data
q <- x[,c(1:6,15:ncol(x))]
q2 <- q[grep("UPI",q[,1]),]
# clean
colnames(q2) <- gsub("_2024042",".raw",colnames(q2))
colnames(q2) <- gsub(".raw"," ",colnames(q2))
colnames(q2) <- gsub("_E1_"," ",colnames(q2))
colnames(q2) <- str_sub(colnames(q2),4,)
colnames(q2) <- gsub("..AST1_JS_20240405_P24_0717 "," ",colnames(q2))
colnames(q2) <- gsub("PG.","PG ",colnames(q2))
colnames(q2) <- gsub("^ ","",colnames(q2))
colnames(q2) <- sapply(strsplit(colnames(q2)," "),"[[",1)
message("number of rows and columns in the data")
## number of rows and columns in the data
dim(q2)
## [1] 1572 14
head(q2,6) %>% kbl(caption = "after cleaning it looks like this") %>% kable_paper("hover", full_width = F)
ProteinGroups | ProteinAccessions | Genes | ProteinDescriptions | UniProtIds | FastaHeaders | S02 | S10 | S01 | S09 | S04 | S12 | S11 | S03 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
100 | UPI000045224F | UPI000045224F | status=active | >UPI000045224F status=active | 604.57123 | 575.3896 | 331.45837 | 429.70279 | 253.03282 | 195.8584 | 245.82193 | 169.17685 | ||
101 | UPI0000452250 | UPI0000452250 | status=active | >UPI0000452250 status=active | 53.33073 | 51.8002 | 58.16257 | 62.12326 | 18.19687 | 17.6302 | 12.94107 | 13.71563 | ||
102 | UPI00004B2864 | UPI00004B2864 | status=active | >UPI00004B2864 status=active | 5987.67871 | 6332.9077 | 4586.60986 | 4949.33350 | 4437.26953 | 4903.6855 | 4003.72314 | 3560.29736 | ||
103 | UPI00004C6804 | UPI00004C6804 | status=active | >UPI00004C6804 status=active | 1228.69580 | 1015.1328 | 927.17303 | 874.80890 | 1186.76379 | 1064.7327 | 922.82410 | 976.92743 | ||
104 | UPI00004C690C | UPI00004C690C | status=active | >UPI00004C690C status=active | 3470.40405 | 3655.9775 | 4382.52979 | 4565.18848 | 4038.15283 | 4117.3325 | 4461.77148 | 4963.70801 | ||
105 | UPI00004C6915 | UPI00004C6915 | status=active | >UPI00004C6915 status=active | 2973.98657 | 3171.4775 | 3307.13892 | 3571.12085 | 4827.39209 | 4384.8164 | 4490.04590 | 4598.70557 |
Now we can fetch annotations from UniProt batch tool. The full data set is available as an Excel file called “quantifications_crisp.xlsx”.
Also need to log transform the data in order to get sane values of logFC.
y <- read.csv("idmapping_2024_06_27.tsv.gz",sep="\t",header=TRUE)
y2 <- y[!duplicated(y$From), ]
m <- merge(y2,q2,by.x="From",by.y="ProteinGroups",all.y=TRUE)
rownames(m) <- paste(m$ProteinAccessions,m$Protein.names)
m2 <- m[,grep("S",colnames(m))]
head(m2,6) %>% kbl(caption = "Now the proteins have a proper name (if one is available)") %>% kable_paper("hover", full_width = F)
S02 | S10 | S01 | S09 | S04 | S12 | S11 | S03 | |
---|---|---|---|---|---|---|---|---|
UPI000045224F Histidine phosphatase family protein (Phosphoglycerate mutase) | 604.57123 | 575.3896 | 331.45837 | 429.70279 | 253.03282 | 195.8584 | 245.82193 | 169.17685 |
UPI0000452250 Histidine phosphatase family protein (Phosphoglycerate mutase) | 53.33073 | 51.8002 | 58.16257 | 62.12326 | 18.19687 | 17.6302 | 12.94107 | 13.71563 |
UPI00004B2864 Glyceraldehyde-3-phosphate dehydrogenase (EC 1.2.1.-) | 5987.67871 | 6332.9077 | 4586.60986 | 4949.33350 | 4437.26953 | 4903.6855 | 4003.72314 | 3560.29736 |
UPI00004C6804 Small ribosomal subunit protein bS18 | 1228.69580 | 1015.1328 | 927.17303 | 874.80890 | 1186.76379 | 1064.7327 | 922.82410 | 976.92743 |
UPI00004C690C Small ribosomal subunit protein uS10 | 3470.40405 | 3655.9775 | 4382.52979 | 4565.18848 | 4038.15283 | 4117.3325 | 4461.77148 | 4963.70801 |
UPI00004C6915 Large ribosomal subunit protein uL29 | 2973.98657 | 3171.4775 | 3307.13892 | 3571.12085 | 4827.39209 | 4384.8164 | 4490.04590 | 4598.70557 |
m3 <- m2
m3$UniParc <- rownames(m3)
m3 <- m3[,c(ncol(m3),1:ncol(m3)-1)]
write.xlsx(m2, "quantifications_crisp.xlsx")
message("number of NA values per row")
## number of NA values per row
table(unname(apply(m2,1,function(x) { length(which(is.na(x))) } )))
##
## 0 1 2 3 4 5 6 7
## 1260 38 44 69 129 10 16 6
m2 <- m2[!is.na(rowSums(m2)),]
m2 <- log(m2)
Define the control and lactate groups and the strain.
ss <- read.table("samplesheet.tsv",header=TRUE,row.names=1)
ss$LA <- factor(grepl("LA",ss$condition))
ss$strain <- factor(grepl("_D_",ss$condition))
ss %>% kbl(caption = "Sample sheet") %>% kable_paper("hover", full_width = F)
condition | replicate | LA | strain | |
---|---|---|---|---|
S02 | LC_A_LA | 1 | TRUE | FALSE |
S10 | LC_A_LA | 2 | TRUE | FALSE |
S01 | LC_A_UT | 1 | FALSE | FALSE |
S09 | LC_A_UT | 2 | FALSE | FALSE |
S04 | LC_D_LA | 1 | TRUE | TRUE |
S12 | LC_D_LA | 2 | TRUE | TRUE |
S11 | LC_D_UT | 1 | FALSE | TRUE |
S03 | LC_D_UT | 2 | FALSE | TRUE |
This helps to see the similarities and differences between samples.
mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(ss$LA))))
plot(cmdscale(dist(t(m2))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=20-as.numeric(ss$strain), cex=4,
col=mycols)
text(cmdscale(dist(t(m2))), labels=colnames(m2) )
legend("right", legend=c("lac", "ctl"),
fill=c("pink","lightblue"), cex=1)
mtext("Circle='A', Diamond='D'")
Here we are using limma, which was originally designed for microarray data, but should work okay if the data are normally distributed.
We are looking for proteins whose abundance is changed by the treatment: lactic acid versus control. Cancel the effect of strain.
The full differential expression results are available as a Excel file called “differentialexpression.xlsx”.
design <- model.matrix(~ ss$strain + ss$LA)
design
## (Intercept) ss$strainTRUE ss$LATRUE
## 1 1 0 1
## 2 1 0 1
## 3 1 0 0
## 4 1 0 0
## 5 1 1 1
## 6 1 1 1
## 7 1 1 0
## 8 1 1 0
## attr(,"assign")
## [1] 0 1 2
## attr(,"contrasts")
## attr(,"contrasts")$`ss$strain`
## [1] "contr.treatment"
##
## attr(,"contrasts")$`ss$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(m2,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) ss$strainTRUE ss$LATRUE
## Down 0 435 217
## NotSig 1 444 823
## Up 1259 381 220
dm <- topTable(fit.reduced,coef=3, number = Inf)
head(dm,50) %>% kbl(caption = "Top 50 proteins after differential analysis") %>% kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
UPI0003C50192 Uncharacterized protein | 1.2185533 | 3.632099 | 21.371661 | 1.00e-07 | 0.0001090 | 8.739686 |
UPI00019F9AD9 Uncharacterized protein | 2.8352143 | 5.575400 | 17.302665 | 4.00e-07 | 0.0001693 | 7.386957 |
UPI0006EFB04E D-2-hydroxyacid dehydrogenase (Lactate dehydrogenase) | 0.8960643 | 8.207817 | 17.200591 | 4.00e-07 | 0.0001693 | 7.347658 |
UPI0006EEA493 LysR family transcriptional regulator | -1.7368811 | 3.308481 | -15.622743 | 8.00e-07 | 0.0002502 | 6.699798 |
UPI0006EEB579 SCP domain-containing protein | 0.8042276 | 6.248607 | 14.582890 | 1.30e-06 | 0.0002929 | 6.227041 |
UPI000280B884 CHAP domain-containing protein (Capsid protein) | 0.7848850 | 5.120403 | 14.238354 | 1.50e-06 | 0.0002929 | 6.061445 |
UPI0006EFEF39 Acetate kinase (EC 2.7.2.1) (Acetokinase) | 0.9072964 | 6.394221 | 14.003487 | 1.70e-06 | 0.0002929 | 5.945839 |
UPI0006D1F1A0 Antibiotic biosynthesis monooxygenase | 0.7786892 | 6.325596 | 13.836296 | 1.90e-06 | 0.0002929 | 5.862160 |
UPI0006EFBCD1 NA | 0.9605729 | 5.193541 | 13.308989 | 2.40e-06 | 0.0003414 | 5.590410 |
UPI0006F0ADA9 ABC transporter ATP-binding protein | -0.9690016 | 5.678968 | -12.656101 | 3.50e-06 | 0.0003527 | 5.236508 |
UPI0006F0CAFE Cell surface protein | 0.9590548 | 7.054032 | 12.655330 | 3.50e-06 | 0.0003527 | 5.236078 |
UPI0006EEE1EA DUF2207 domain-containing protein | -0.6803517 | 4.394857 | -12.620154 | 3.50e-06 | 0.0003527 | 5.216432 |
UPI00019F9E66 6-phospho-alpha-glucosidase | 1.0065759 | 4.583708 | 12.564307 | 3.60e-06 | 0.0003527 | 5.185118 |
UPI000280CD19 Bacteriocin biosynthesis cyclodehydratase, SagC family | -1.2083029 | 4.328382 | -12.149713 | 4.60e-06 | 0.0003866 | 4.947748 |
UPI0001D109E5 ABC transporter ATP-binding protein | -0.8140890 | 5.923749 | -12.145174 | 4.60e-06 | 0.0003866 | 4.945100 |
UPI0006EEFD55 NA | -1.3570985 | 3.592978 | -11.933159 | 5.20e-06 | 0.0003930 | 4.820222 |
UPI0006F1480A Adaptor protein MecA | -0.9166124 | 5.181364 | -11.821689 | 5.50e-06 | 0.0003930 | 4.753600 |
UPI0001D10921 FeS cluster biogenesis domain-containing protein | 1.1236718 | 5.823662 | 11.723933 | 5.90e-06 | 0.0003930 | 4.694615 |
UPI0006EE8744 Glycerol kinase (EC 2.7.1.30) (ATP:glycerol 3-phosphotransferase) (Glycerokinase) (GK) | 0.8760640 | 6.015547 | 11.685472 | 6.00e-06 | 0.0003930 | 4.671264 |
UPI00019F9BA2 RNA polymerase sigma factor SigA | -0.6272685 | 7.147367 | -11.621568 | 6.20e-06 | 0.0003930 | 4.632283 |
UPI0001D10C9D Carbamoyl phosphate synthase small chain (EC 6.3.5.5) (Carbamoyl phosphate synthetase glutamine chain) | 1.1182979 | 4.832018 | 11.182474 | 8.10e-06 | 0.0004878 | 4.358132 |
UPI0006EF6A19 NA | 0.7923608 | 5.513031 | 10.946746 | 9.40e-06 | 0.0005389 | 4.206252 |
UPI00019F9AC3 Single-stranded DNA-binding protein | 1.6082144 | 6.980588 | 10.803748 | 1.03e-05 | 0.0005472 | 4.112454 |
UPI0003C4FF63 Glycosidase | 0.5851856 | 7.216755 | 10.784182 | 1.04e-05 | 0.0005472 | 4.099521 |
UPI0006F0D43B NA | -0.8527692 | 7.931381 | -10.549601 | 1.21e-05 | 0.0006044 | 3.942555 |
UPI0006EEC4E7 NA | 0.5580260 | 6.825247 | 10.460407 | 1.28e-05 | 0.0006044 | 3.881935 |
UPI0003C4FE5C 5’-nucleotidase/2’,3’-cyclic phosphodiesterase | 0.5569871 | 5.981301 | 10.446509 | 1.30e-05 | 0.0006044 | 3.872443 |
UPI0006CF5989 Choloylglycine hydrolase (EC 3.5.1.24) (Linear amide C-N hydrolase) | 0.7759976 | 6.378880 | 10.347142 | 1.38e-05 | 0.0006221 | 3.804197 |
UPI00019F979E NCS2 family permease | -0.6511438 | 5.175233 | -10.207719 | 1.52e-05 | 0.0006480 | 3.707317 |
UPI0001EC2A56 NA | -0.4984581 | 5.902387 | -10.181465 | 1.54e-05 | 0.0006480 | 3.688925 |
UPI0001D10ACA DUF1827 family protein | 0.4683423 | 6.754028 | 9.619257 | 2.27e-05 | 0.0008702 | 3.283367 |
UPI00019F9F6D Co-chaperonin GroES (10 kDa chaperonin) (Chaperonin-10) (Cpn10) | 0.6702769 | 7.552614 | 9.614350 | 2.27e-05 | 0.0008702 | 3.279727 |
UPI0006F16580 Prolyl oligopeptidase family serine peptidase | 0.5693419 | 5.106987 | 9.611432 | 2.28e-05 | 0.0008702 | 3.277560 |
UPI0006F13160 NA | -0.5433226 | 6.081711 | -9.537122 | 2.40e-05 | 0.0008900 | 3.222179 |
UPI0006D25EFF Alpha/beta hydrolase | 0.8975537 | 6.262152 | 9.370095 | 2.71e-05 | 0.0009739 | 3.096159 |
UPI0006F00364 23S rRNA (Uracil(1939)-C(5))-methyltransferase RlmD (EC 2.1.1.190) | -0.5044046 | 6.341746 | -9.272998 | 2.90e-05 | 0.0010155 | 3.021904 |
UPI0006EE72EA NA | 0.8466036 | 4.746372 | 9.112451 | 3.26e-05 | 0.0011108 | 2.897481 |
UPI0006D290CD NUDIX hydrolase | 0.6496231 | 5.667444 | 8.816007 | 4.07e-05 | 0.0013492 | 2.662202 |
UPI0006D05E86 Carbamoyl phosphate synthase large chain (EC 6.3.4.16) (EC 6.3.5.5) (Carbamoyl phosphate synthetase ammonia chain) | 0.7790395 | 4.667275 | 8.673334 | 4.54e-05 | 0.0014035 | 2.546323 |
UPI00019F9F6C Chaperonin GroEL (EC 5.6.1.7) (60 kDa chaperonin) (Chaperonin-60) (Cpn60) | 0.6472228 | 8.459526 | 8.645810 | 4.63e-05 | 0.0014035 | 2.523766 |
UPI0006D0F543 [Citrate [pro-3S]-lyase] ligase (EC 6.2.1.22) | 0.8549000 | 6.476774 | 8.637783 | 4.66e-05 | 0.0014035 | 2.517175 |
UPI00019F9CF0 UPF0342 protein CP352_03820 | -0.6682714 | 6.782959 | -8.633198 | 4.68e-05 | 0.0014035 | 2.513408 |
UPI00019D0189 Protein-export membrane protein SecG | -0.9254545 | 3.978692 | -8.581272 | 4.87e-05 | 0.0014269 | 2.470614 |
UPI00019F9CBD Heat-inducible transcription repressor HrcA | 0.5255174 | 5.795517 | 8.481382 | 5.26e-05 | 0.0015071 | 2.387619 |
UPI0006EEC3E6 Glycerate kinase | 0.7873579 | 4.127738 | 8.441904 | 5.43e-05 | 0.0015199 | 2.354573 |
UPI00019F97D8 GTPase Der (GTP-binding protein EngA) | -0.4550549 | 6.854532 | -8.345894 | 5.85e-05 | 0.0015777 | 2.273615 |
UPI0006EFA65A ATP-binding cassette domain-containing protein (Sulfate ABC transporter ATP-binding protein) | -0.4872194 | 5.244354 | -8.304732 | 6.05e-05 | 0.0015777 | 2.238649 |
UPI0001B29D61 UvrABC system protein B (Protein UvrB) (Excinuclease ABC subunit B) | -0.4979885 | 5.777026 | -8.272971 | 6.20e-05 | 0.0015777 | 2.211561 |
UPI0006D25A56 Amino acid permease | -0.6513254 | 5.344608 | -8.262735 | 6.26e-05 | 0.0015777 | 2.202812 |
UPI0001B29DE0 N-acetylmuramidase | 1.4311514 | 6.817274 | 8.261628 | 6.26e-05 | 0.0015777 | 2.201865 |
dmo <- dm
dmo$UniParc <- rownames(dmo)
dmo <- dmo[,c(ncol(dmo),1:ncol(dmo)-1)]
write.xlsx(dmo, "differentialexpression1_lcrisp_bothstrains.xlsx")
Volcano chart
TOT=nrow(dm)
sig <- subset(dm,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
dm <- dm[!is.na(dm$P.Value),]
plot(dm$logFC,-log10(dm$P.Value),pch=19,
xlab="Log2 fold change", ylab="p-value")
points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")
Heatmap of top hits
top <- rownames(head(dm,40))
mx <- m2[rownames(m2) %in% top,]
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
ColSideColors=mycols , col=my_palette, main="top 20 proteins")
Strain D samples only.
ss2 <- ss[which(ss$strain=="TRUE"),]
m22 <- m2[,which(colnames(m2) %in% rownames(ss2))]
m22f <- m22[which(unname(apply(m22,1,function(x) { length(which(is.na(x))) } )) <= 1),]
dim(m22)
## [1] 1260 4
dim(m22f)
## [1] 1260 4
design <- model.matrix(~ ss2$LA)
design
## (Intercept) ss2$LATRUE
## 1 1 1
## 2 1 1
## 3 1 0
## 4 1 0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`ss2$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(m22,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) ss2$LATRUE
## Down 1 156
## NotSig 0 883
## Up 1259 221
dm2 <- topTable(fit.reduced,coef=2, number = Inf)
head(dm2,50) %>% kbl(caption = "Top 50 proteins after differential analysis in strain D") %>% kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
UPI0006EFD634 Addiction module toxin RelE | 1.5775279 | 2.705036 | 24.88410 | 0.0000125 | 0.0044616 | 4.401738 |
UPI0006EE62B5 FAD-binding oxidoreductase | 2.2380965 | 4.570619 | 24.08998 | 0.0000142 | 0.0044616 | 4.282673 |
UPI00019F9AD9 Uncharacterized protein | 2.4537498 | 5.468400 | 23.09282 | 0.0000169 | 0.0044616 | 4.124448 |
UPI0006EEA493 LysR family transcriptional regulator | -1.6835305 | 3.718481 | -22.69018 | 0.0000181 | 0.0044616 | 4.057635 |
UPI00019F9F24 Uncharacterized protein | -1.3698080 | 4.614358 | -22.45381 | 0.0000189 | 0.0044616 | 4.017592 |
UPI00019F9974 Aggregation promoting protein | 1.6393521 | 6.492835 | 21.48404 | 0.0000227 | 0.0044616 | 3.846647 |
UPI00019F9AC3 Single-stranded DNA-binding protein | 1.9690084 | 7.237968 | 21.01575 | 0.0000248 | 0.0044616 | 3.760074 |
UPI0006EE6481 NA | -1.4682391 | 4.442939 | -19.77363 | 0.0000318 | 0.0050021 | 3.516666 |
UPI0006EEFCA3 Iron export ABC transporter permease subunit FetB | 1.2286387 | 4.819856 | 18.63024 | 0.0000405 | 0.0056643 | 3.273265 |
UPI0006EFFAEF NA | -1.0836296 | 3.616578 | -17.73148 | 0.0000495 | 0.0058635 | 3.067469 |
UPI0001D10BCD Acetate kinase (EC 2.7.2.1) (Acetokinase) | 1.2830067 | 7.233350 | 17.58140 | 0.0000512 | 0.0058635 | 3.031768 |
UPI0006D00323 Cation transporter | -1.0577670 | 4.424871 | -17.19590 | 0.0000560 | 0.0058807 | 2.938229 |
UPI00019F989F DNA-entry nuclease (DNA/RNA non-specific endonuclease) | 1.5962839 | 5.033110 | 16.67168 | 0.0000635 | 0.0061542 | 2.806613 |
UPI0006F0ADA9 ABC transporter ATP-binding protein | -1.0093253 | 6.476739 | -16.06200 | 0.0000738 | 0.0063770 | 2.646786 |
UPI0006EF675B Surface layer protein A domain-containing protein | 3.5463467 | 2.860442 | 15.84365 | 0.0000780 | 0.0063770 | 2.587686 |
UPI0006EEC3E6 Glycerate kinase | 0.9797904 | 4.106890 | 15.56909 | 0.0000838 | 0.0063770 | 2.511926 |
UPI0006D0A27A Uncharacterized protein | 1.9741167 | 6.490131 | 15.25429 | 0.0000910 | 0.0063770 | 2.423011 |
UPI0001CA7E10 Site-specific integrase (Tyrosine-type recombinase/integrase) | 1.0058834 | 3.855084 | 15.09073 | 0.0000950 | 0.0063770 | 2.375924 |
UPI0001D109BD SEC10/PgrA surface exclusion domain-containing protein | 1.7751133 | 3.684581 | 15.04665 | 0.0000962 | 0.0063770 | 2.363126 |
UPI0003C50192 Uncharacterized protein | 1.2096006 | 3.347472 | 14.58808 | 0.0001090 | 0.0066355 | 2.227256 |
UPI000280AF2C Uncharacterized protein | -0.9435167 | 6.089776 | -14.46334 | 0.0001128 | 0.0066355 | 2.189405 |
UPI0001B29848 Polysaccharide biosynthesis protein | 0.8855193 | 2.693559 | 14.25752 | 0.0001195 | 0.0066355 | 2.126091 |
UPI000280CD19 Bacteriocin biosynthesis cyclodehydratase, SagC family | -1.0083906 | 5.231619 | -14.11873 | 0.0001243 | 0.0066355 | 2.082784 |
UPI0001B29DE0 N-acetylmuramidase | 1.8142933 | 7.168071 | 14.06155 | 0.0001264 | 0.0066355 | 2.064795 |
UPI000280B884 CHAP domain-containing protein (Capsid protein) | 0.8571945 | 4.998360 | 13.61728 | 0.0001438 | 0.0067476 | 1.922024 |
UPI0006F0683A ABC-F type ribosomal protection protein | 0.9249169 | 3.572097 | 13.31941 | 0.0001572 | 0.0067476 | 1.823221 |
UPI0006F10FA9 FAD-dependent oxidoreductase | 1.0857286 | 6.994510 | 13.30546 | 0.0001579 | 0.0067476 | 1.818532 |
UPI00019F9838 Lysin | 1.0991145 | 6.436472 | 13.29654 | 0.0001583 | 0.0067476 | 1.815529 |
UPI00019F994B Large ribosomal subunit protein bL35 | -1.5073564 | 5.012247 | -13.21458 | 0.0001623 | 0.0067476 | 1.787838 |
UPI00019F9988 Cell wall-active antibiotics response protein | -1.3164237 | 1.849238 | -13.16499 | 0.0001648 | 0.0067476 | 1.770987 |
UPI0001D10AE8 citrate lyase holo-[acyl-carrier protein] synthase (EC 2.7.7.61) | -1.0480848 | 2.713043 | -13.04826 | 0.0001708 | 0.0067476 | 1.731036 |
UPI0006F0FF2E HlyC/CorC family transporter | 0.8113734 | 5.967212 | 13.03771 | 0.0001714 | 0.0067476 | 1.727403 |
UPI0003C4ED4B Maltose/maltodextrin transport system permease protein | 0.9206299 | 5.899261 | 12.83499 | 0.0001825 | 0.0069686 | 1.656970 |
UPI0001D10A52 GatB/YqeY domain-containing protein | 0.9925822 | 7.166126 | 12.70414 | 0.0001902 | 0.0070452 | 1.610830 |
UPI0006EFB04E D-2-hydroxyacid dehydrogenase (Lactate dehydrogenase) | 0.8498222 | 7.949839 | 12.61401 | 0.0001957 | 0.0070452 | 1.578733 |
UPI0006EEB579 SCP domain-containing protein | 0.8701531 | 6.155717 | 12.38305 | 0.0002108 | 0.0072987 | 1.495289 |
UPI0006EFEF39 Acetate kinase (EC 2.7.2.1) (Acetokinase) | 0.9810643 | 6.319723 | 12.29672 | 0.0002168 | 0.0072987 | 1.463647 |
UPI00019F9A03 Integral membrane protein | -0.8695559 | 3.613189 | -12.21427 | 0.0002227 | 0.0072987 | 1.433194 |
UPI0006EF99FA NA | 0.9811628 | 6.016539 | 12.12911 | 0.0002290 | 0.0072987 | 1.401498 |
UPI00019F9D2F ABC transporter permease (Efflux ABC transporter, permease protein) (FtsX-like permease family protein) | 1.0766064 | 4.813435 | 12.03671 | 0.0002362 | 0.0072987 | 1.366827 |
UPI0006D2A7D6 Beta-phosphoglucomutase (EC 5.4.2.6) | 1.2282356 | 6.177199 | 12.00608 | 0.0002386 | 0.0072987 | 1.355269 |
UPI0006D191CC Beta-lactamase family protein (Serine hydrolase) | 1.5665935 | 2.125130 | 11.94778 | 0.0002433 | 0.0072987 | 1.333175 |
UPI0006EE9FBF NA | 0.7248417 | 6.022945 | 11.82255 | 0.0002538 | 0.0074363 | 1.285314 |
UPI0006EE6EBB Cell surface protein | 1.0454276 | 5.794653 | 11.66150 | 0.0002681 | 0.0075429 | 1.222936 |
UPI0001D10B75 ABC transporter ATP-binding protein | 0.9575419 | 6.673853 | 11.60016 | 0.0002738 | 0.0075429 | 1.198927 |
UPI0001B29889 Endonuclease III (EC 4.2.99.18) (DNA-(apurinic or apyrimidinic site) lyase) | 0.8249882 | 5.560359 | 11.58371 | 0.0002754 | 0.0075429 | 1.192464 |
UPI0006F1480A Adaptor protein MecA | -0.7818784 | 5.465943 | -11.40528 | 0.0002930 | 0.0076761 | 1.121721 |
UPI0001D10CD4 Cof-type HAD-IIB family hydrolase | -1.0136306 | 3.777249 | -11.38672 | 0.0002949 | 0.0076761 | 1.114289 |
UPI0006F0893B NA | 1.8324976 | 4.344964 | 11.34541 | 0.0002992 | 0.0076761 | 1.097708 |
UPI0006D0F543 [Citrate [pro-3S]-lyase] ligase (EC 6.2.1.22) | 1.0028106 | 6.987480 | 11.24916 | 0.0003096 | 0.0076761 | 1.058820 |
dm2o <- dm2
dm2o$UniParc <- rownames(dm2o)
dm2o <- dm2o[,c(ncol(dm2o),1:ncol(dm2o)-1)]
write.xlsx(dm2o, "differentialexpression2_lcrisp_strainD.xlsx")
Volcano chart
TOT=nrow(dm2)
sig <- subset(dm2,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
dm2 <- dm2[!is.na(dm2$P.Value),]
mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(ss2$LA))))
plot(dm2$logFC,-log10(dm2$P.Value),pch=19,
xlab="Log2 fold change", ylab="p-value")
points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")
Heatmap of top hits
top <- rownames(head(dm2,40))
mx <- m22f[rownames(m22f) %in% top,]
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
ColSideColors=mycols , col=my_palette, main="top 20 proteins in strain D")
Strain A samples only.
ss3 <- ss[which(ss$strain=="FALSE"),]
m23 <- m2[,which(colnames(m2) %in% rownames(ss3))]
m23f <- m23[which(unname(apply(m23,1,function(x) { length(which(is.na(x))) } )) <= 1),]
dim(m23)
## [1] 1260 4
dim(m23f)
## [1] 1260 4
design <- model.matrix(~ ss3$LA)
design
## (Intercept) ss3$LATRUE
## 1 1 1
## 2 1 1
## 3 1 0
## 4 1 0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`ss3$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(m23,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) ss3$LATRUE
## Down 0 226
## NotSig 1 808
## Up 1259 226
dm3 <- topTable(fit.reduced,coef=2, number = Inf)
head(dm3,50) %>% kbl(caption = "Top 50 proteins after differential analysis in strain A") %>% kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
UPI00019F9AD9 Uncharacterized protein | 3.2166788 | 5.682400 | 30.04365 | 0.0000021 | 0.0014603 | 6.015127 |
UPI0006EF450B Large ribosomal subunit protein bL33 | 1.8487318 | 4.993582 | 27.46555 | 0.0000032 | 0.0014603 | 5.661758 |
UPI000312EB68 ABC transporter, ATP-binding protein | -2.6268776 | 4.725038 | -25.77893 | 0.0000043 | 0.0014603 | 5.401901 |
UPI0006D0B4B4 NAD(P)H-binding protein | 2.0050191 | 5.864568 | 24.46485 | 0.0000054 | 0.0014603 | 5.181434 |
UPI0001B298F1 Bacteriocin biosynthesis docking scaffold, SagD family (Streptolysin associated protein SagD) | -1.5407203 | 4.422143 | -23.24604 | 0.0000068 | 0.0014603 | 4.961245 |
UPI0006D1560C NUDIX domain-containing protein | 1.4126586 | 4.025241 | 22.64043 | 0.0000077 | 0.0014603 | 4.845730 |
UPI0006EEFD55 NA | -1.4050667 | 3.005881 | -22.34940 | 0.0000081 | 0.0014603 | 4.788687 |
UPI0006F08471 Bifunctional metallophosphatase/5’-nucleotidase | -1.8300559 | 5.380552 | -19.81708 | 0.0000139 | 0.0017276 | 4.246134 |
UPI00019F9D2D UPF0122 protein AEL95_04395 | -1.6118108 | 4.487226 | -19.64729 | 0.0000145 | 0.0017276 | 4.206515 |
UPI0006F070A6 Glycosyltransferase family 2 protein | -1.4613312 | 4.398040 | -19.50830 | 0.0000150 | 0.0017276 | 4.173753 |
UPI0001B29C39 Multicopper oxidase | 1.4883553 | 6.472084 | 19.47177 | 0.0000151 | 0.0017276 | 4.165092 |
UPI0003C50192 Uncharacterized protein | 1.2275060 | 3.916727 | 18.28665 | 0.0000200 | 0.0020995 | 3.872375 |
UPI00019F99E2 Transcriptional regulator | -1.3615736 | 3.707602 | -17.80118 | 0.0000226 | 0.0021236 | 3.745503 |
UPI0006F18862 LacI family transcriptional regulator | -1.1175177 | 4.008912 | -17.52154 | 0.0000242 | 0.0021236 | 3.670463 |
UPI00019F9C5D Cell division protein SepF | -1.2462487 | 6.063781 | -17.20694 | 0.0000263 | 0.0021236 | 3.584266 |
UPI0001D10921 FeS cluster biogenesis domain-containing protein | 1.1867401 | 6.073076 | 17.10660 | 0.0000270 | 0.0021236 | 3.556369 |
UPI00019F9AC3 Single-stranded DNA-binding protein | 1.2474204 | 6.723209 | 16.70320 | 0.0000300 | 0.0021243 | 3.442177 |
UPI0006EE9387 Abasic site processing protein (EC 3.4.-.-) | 1.0964979 | 4.123204 | 16.66117 | 0.0000303 | 0.0021243 | 3.430085 |
UPI0006F07C1D NCS2 family permease | 1.0872432 | 2.719199 | 16.18323 | 0.0000346 | 0.0022925 | 3.289994 |
UPI0006F0CAFE Cell surface protein | 1.0240510 | 7.525658 | 15.83775 | 0.0000381 | 0.0023780 | 3.185623 |
UPI0003C4DFF9 Uncharacterized protein | 1.0672302 | 3.807732 | 15.50272 | 0.0000419 | 0.0023780 | 3.081816 |
UPI0001B29D0C Uncharacterized protein | -1.0288092 | 5.893181 | -15.48604 | 0.0000421 | 0.0023780 | 3.076576 |
UPI0003C51696 Integral membrane protein | 0.9557050 | 4.431694 | 15.24233 | 0.0000452 | 0.0023780 | 2.999303 |
UPI0006EE6636 Sucrose-6-phosphate hydrolase (EC 3.2.1.26) (Invertase) | -1.3261034 | 4.615558 | -15.23366 | 0.0000453 | 0.0023780 | 2.996528 |
UPI0006EFB04E D-2-hydroxyacid dehydrogenase (Lactate dehydrogenase) | 0.9423065 | 8.465795 | 14.89366 | 0.0000501 | 0.0025248 | 2.886220 |
UPI000280CD19 Bacteriocin biosynthesis cyclodehydratase, SagC family | -1.4082152 | 3.425146 | -14.33902 | 0.0000593 | 0.0028753 | 2.699908 |
UPI0006F14A06 phosphomevalonate kinase (EC 2.7.4.2) | -0.9542321 | 3.766227 | -13.99565 | 0.0000661 | 0.0030844 | 2.580405 |
UPI0006F0D43B NA | -0.9936575 | 7.599597 | -13.86314 | 0.0000690 | 0.0031029 | 2.533402 |
UPI00004C6CBB DNA-directed RNA polymerase subunit omega (RNAP omega subunit) (EC 2.7.7.6) (RNA polymerase omega subunit) (Transcriptase subunit omega) | -1.1693329 | 6.197811 | -13.73136 | 0.0000719 | 0.0031259 | 2.486156 |
UPI0001EF562B Hd superfamily hydrolase | 0.8197912 | 6.979256 | 13.41768 | 0.0000797 | 0.0032340 | 2.371621 |
UPI0003C50430 Appr-1-p processing domain protein | 0.9996051 | 4.854561 | 13.39465 | 0.0000803 | 0.0032340 | 2.363095 |
UPI00019F9E66 6-phospho-alpha-glucosidase | 1.0624082 | 5.210872 | 13.24740 | 0.0000844 | 0.0032340 | 2.308194 |
UPI0006F1BB34 Aminopeptidase P family protein | 0.8736938 | 5.539827 | 13.21997 | 0.0000852 | 0.0032340 | 2.297890 |
UPI0006EF73D7 AAA domain-containing protein (ATP-dependent Clp protease ATP-binding subunit) | 1.3235494 | 6.849573 | 13.14762 | 0.0000873 | 0.0032340 | 2.270605 |
UPI0006D1F499 Uncharacterized protein | 1.5986464 | 3.425981 | 12.65596 | 0.0001033 | 0.0036856 | 2.080706 |
UPI0003C4F885 Nucleoside-diphosphate-sugar epimerase | 1.1632576 | 4.471481 | 12.60242 | 0.0001053 | 0.0036856 | 2.059545 |
UPI0006F1480A Adaptor protein MecA | -1.0513464 | 4.896785 | -12.31614 | 0.0001166 | 0.0039702 | 1.944699 |
UPI0006EEB579 SCP domain-containing protein | 0.7383021 | 6.341498 | 12.12000 | 0.0001252 | 0.0041504 | 1.864343 |
UPI0001D1087D Tyrosine recombinase XerD | -0.8564454 | 5.426426 | -11.77940 | 0.0001420 | 0.0045869 | 1.721437 |
UPI00019F979E NCS2 family permease | -0.7395568 | 5.551160 | -11.66332 | 0.0001483 | 0.0046721 | 1.671728 |
UPI0006D1F1A0 Antibiotic biosynthesis monooxygenase | 0.7354339 | 6.650952 | 11.45713 | 0.0001605 | 0.0047642 | 1.582124 |
UPI0006CFE894 MgtC/SapB family protein | 0.8257886 | 3.340397 | 11.44257 | 0.0001614 | 0.0047642 | 1.575737 |
UPI0001B29C41 NA | -1.0330207 | 4.934275 | -11.39417 | 0.0001644 | 0.0047642 | 1.554429 |
UPI0006EFBCD1 NA | 0.9611916 | 4.743661 | 11.36366 | 0.0001664 | 0.0047642 | 1.540948 |
UPI0006EFEF39 Acetate kinase (EC 2.7.2.1) (Acetokinase) | 0.8335285 | 6.468718 | 11.12373 | 0.0001828 | 0.0051175 | 1.433598 |
UPI0001D109E5 ABC transporter ATP-binding protein | -0.8022680 | 5.060041 | -11.06072 | 0.0001874 | 0.0051330 | 1.405006 |
UPI000280B884 CHAP domain-containing protein (Capsid protein) | 0.7125756 | 5.242446 | 11.00187 | 0.0001918 | 0.0051431 | 1.378150 |
UPI0006CFD7B7 Aquaporin (MIP family channel protein) | -1.0192124 | 2.918010 | -10.91104 | 0.0001990 | 0.0051492 | 1.336403 |
UPI00019F9D22 LexA repressor (EC 3.4.21.88) | -1.2120114 | 5.374712 | -10.89515 | 0.0002002 | 0.0051492 | 1.329061 |
UPI0006EE903B Amino acid ABC transporter permease | -1.3003315 | 4.846551 | -10.80114 | 0.0002080 | 0.0051638 | 1.285407 |
dm3o <- dm3
dm3o$UniParc <- rownames(dm3o)
dm3o <- dm3o[,c(ncol(dm3o),1:ncol(dm3o)-1)]
write.xlsx(dm3o, "differentialexpression3_lcrisp_strainA.xlsx")
Volcano chart
TOT=nrow(dm3)
sig <- subset(dm3,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
dm3 <- dm3[!is.na(dm2$P.Value),]
mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(ss3$LA))))
plot(dm3$logFC,-log10(dm3$P.Value),pch=19,
xlab="Log2 fold change", ylab="p-value")
points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")
Heatmap of top hits
top <- rownames(head(dm3,40))
mx <- m23f[rownames(m23f) %in% top,]
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
ColSideColors=mycols , col=my_palette, main="top 20 proteins in strain A")
Make a Euler diagram of the genes.
dm2up <- rownames(subset(dm2,adj.P.Val<0.05 & logFC > 0))
dm2dn <- rownames(subset(dm2,adj.P.Val<0.05 & logFC < 0))
dm3up <- rownames(subset(dm3,adj.P.Val<0.05 & logFC > 0))
dm3dn <- rownames(subset(dm3,adj.P.Val<0.05 & logFC < 0))
v1 <- list("Strain D up"=dm2up, "Strain D down"=dm2dn,
"Strain A up"=dm3up,"Strain A down"=dm3dn)
plot(euler(v1),quantities = TRUE)
Make a scatter plotr of the ranks.
While the results are not completely concoordant between the strains, there is a lot of similarity.
mg <- merge(dm2,dm3,by=0)
rownames(mg) <- mg$Row.names
mg <- mg[,c("t.x","t.y")]
head(mg)
## t.x
## UPI000045224F Histidine phosphatase family protein (Phosphoglycerate mutase) 0.5172830
## UPI0000452250 Histidine phosphatase family protein (Phosphoglycerate mutase) 4.5393027
## UPI00004B2864 Glyceraldehyde-3-phosphate dehydrogenase (EC 1.2.1.-) 2.6003691
## UPI00004C6804 Small ribosomal subunit protein bS18 2.2663724
## UPI00004C690C Small ribosomal subunit protein uS10 -1.9979086
## UPI00004C6915 Large ribosomal subunit protein uL29 0.1770106
## t.y
## UPI000045224F Histidine phosphatase family protein (Phosphoglycerate mutase) 4.181907
## UPI0000452250 Histidine phosphatase family protein (Phosphoglycerate mutase) -2.052452
## UPI00004B2864 Glyceraldehyde-3-phosphate dehydrogenase (EC 1.2.1.-) 3.745995
## UPI00004C6804 Small ribosomal subunit protein bS18 2.391652
## UPI00004C690C Small ribosomal subunit protein uS10 -3.518867
## UPI00004C6915 Large ribosomal subunit protein uL29 -1.621014
plot(mg,pch=19,cex=0.4,main="limma t-stat",xlab="Strain D",ylab="strain A")
abline(h=0,v=0,col="blue",lty=2,lwd=2)
mylm <- lm(mg)
abline(mylm,lty=2,col="red",lwd=2)
mylm
##
## Call:
## lm(formula = mg)
##
## Coefficients:
## (Intercept) t.y
## 0.4057 0.4048
cor.test(mg[,1],mg[,2])
##
## Pearson's product-moment correlation
##
## data: mg[, 1] and mg[, 2]
## t = 16.523, df = 1258, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3758233 0.4666273
## sample estimates:
## cor
## 0.4222841
rmg <- apply(mg,2, function(x) {
rnk <- rank(x)
NUMNEG=length(which(x<0))
rnk <- rnk - NUMNEG
return(rnk)
})
plot(rmg,pch=19,cex=0.4,main="rank of limma t-stat",xlab="Strain D",ylab="strain A")
abline(h=0,v=0,col="blue",lty=2,lwd=2)
mylm <- lm(as.data.frame(rmg))
abline(mylm,lty=2,col="red",lwd=2)
mylm
##
## Call:
## lm(formula = as.data.frame(rmg))
##
## Coefficients:
## (Intercept) t.y
## 23.0023 0.4469
cor.test(rmg[,1],rmg[,2])
##
## Pearson's product-moment correlation
##
## data: rmg[, 1] and rmg[, 2]
## t = 17.719, df = 1258, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4015929 0.4900377
## sample estimates:
## cor
## 0.4469067
tr <- nrow(subset(mg,t.x>0 & t.y>0))
br <- nrow(subset(mg,t.x>0 & t.y<0))
tl <- nrow(subset(mg,t.x<0 & t.y>0))
bl <- nrow(subset(mg,t.x<0 & t.y<0))
tr
## [1] 402
br
## [1] 240
tl
## [1] 204
bl
## [1] 414
concordant = tr + bl
message(paste("concordant proteins:",concordant))
## concordant proteins: 816
discordant = br + tl
message(paste("discordant proteins:",discordant))
## discordant proteins: 444
x2 <- read.table("20240611_164801_P24_0717_E1_PaluaE_Updated_Report_iners_only_V2.tsv",
header=TRUE,fill=TRUE,sep="\t",quote = "")
rownames(x2) <- paste(x2$PG.ProteinGroups,x2$PG.ProteinDescriptions,x2$PG.Genes,sep="|")
head(x2,2)
## PG.ProteinGroups PG.ProteinAccessions
## A0A6G5W5S4|pullulanase|glgUi A0A6G5W5S4 A0A6G5W5S4
## A0A6G5W607|pullulanase (Fragment)|glgUi A0A6G5W607 A0A6G5W607
## PG.Genes PG.ProteinDescriptions
## A0A6G5W5S4|pullulanase|glgUi glgUi pullulanase
## A0A6G5W607|pullulanase (Fragment)|glgUi glgUi pullulanase (Fragment)
## PG.UniProtIds
## A0A6G5W5S4|pullulanase|glgUi A0A6G5W5S4
## A0A6G5W607|pullulanase (Fragment)|glgUi A0A6G5W607
## PG.FastaHeaders
## A0A6G5W5S4|pullulanase|glgUi >tr|A0A6G5W5S4|A0A6G5W5S4_9LACO pullulanase OS=Lactobacillus iners OX=147802 GN=glgUi PE=3 SV=1
## A0A6G5W607|pullulanase (Fragment)|glgUi >tr|A0A6G5W607|A0A6G5W607_9LACO pullulanase (Fragment) OS=Lactobacillus iners OX=147802 GN=glgUi PE=3 SV=1
## X.1..AST1_JS_20240405_P24_0717_E1_S08.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi 14
## A0A6G5W607|pullulanase (Fragment)|glgUi 166
## X.2..AST1_JS_20240405_P24_0717_E1_S16.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi 14
## A0A6G5W607|pullulanase (Fragment)|glgUi 166
## X.3..AST1_JS_20240405_P24_0717_E1_S07.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi 14
## A0A6G5W607|pullulanase (Fragment)|glgUi 166
## X.4..AST1_JS_20240405_P24_0717_E1_S15.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi 14
## A0A6G5W607|pullulanase (Fragment)|glgUi 166
## X.5..AST1_JS_20240405_P24_0717_E1_S06_20240426003324.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi 14
## A0A6G5W607|pullulanase (Fragment)|glgUi 166
## X.6..AST1_JS_20240405_P24_0717_E1_S14.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi 14
## A0A6G5W607|pullulanase (Fragment)|glgUi 166
## X.7..AST1_JS_20240405_P24_0717_E1_S05.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi 14
## A0A6G5W607|pullulanase (Fragment)|glgUi 166
## X.8..AST1_JS_20240405_P24_0717_E1_S13.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi 14
## A0A6G5W607|pullulanase (Fragment)|glgUi 166
## X.1..AST1_JS_20240405_P24_0717_E1_S08.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi 280.6738
## A0A6G5W607|pullulanase (Fragment)|glgUi 1004.2641
## X.2..AST1_JS_20240405_P24_0717_E1_S16.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi 933.0865
## A0A6G5W607|pullulanase (Fragment)|glgUi 2914.1511
## X.3..AST1_JS_20240405_P24_0717_E1_S07.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi 270.0818
## A0A6G5W607|pullulanase (Fragment)|glgUi 895.5801
## X.4..AST1_JS_20240405_P24_0717_E1_S15.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi 1255.454
## A0A6G5W607|pullulanase (Fragment)|glgUi 3614.425
## X.5..AST1_JS_20240405_P24_0717_E1_S06_20240426003324.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi 576.503
## A0A6G5W607|pullulanase (Fragment)|glgUi 3977.755
## X.6..AST1_JS_20240405_P24_0717_E1_S14.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi 708.0147
## A0A6G5W607|pullulanase (Fragment)|glgUi 3197.5947
## X.7..AST1_JS_20240405_P24_0717_E1_S05.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi 1176.233
## A0A6G5W607|pullulanase (Fragment)|glgUi 3996.819
## X.8..AST1_JS_20240405_P24_0717_E1_S13.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi 785.5422
## A0A6G5W607|pullulanase (Fragment)|glgUi 3855.5637
x3 <- x2[,15:22]
colnames(x3) <- sapply(strsplit(gsub("_E1_"," ",colnames(x3))," "),"[[",2)
colnames(x3) <- substr(colnames(x3), 0, 3)
head(x3,2)
## S08 S16 S07 S15
## A0A6G5W5S4|pullulanase|glgUi 280.6738 933.0865 270.0818 1255.454
## A0A6G5W607|pullulanase (Fragment)|glgUi 1004.2641 2914.1511 895.5801 3614.425
## S06 S14 S05 S13
## A0A6G5W5S4|pullulanase|glgUi 576.503 708.0147 1176.233 785.5422
## A0A6G5W607|pullulanase (Fragment)|glgUi 3977.755 3197.5947 3996.819 3855.5637
x3 <- log(x3)
head(x3,2)
## S08 S16 S07 S15
## A0A6G5W5S4|pullulanase|glgUi 5.637193 6.838498 5.598725 7.135253
## A0A6G5W607|pullulanase (Fragment)|glgUi 6.912010 7.977334 6.797472 8.192688
## S06 S14 S05 S13
## A0A6G5W5S4|pullulanase|glgUi 6.356981 6.562465 7.070072 6.666374
## A0A6G5W607|pullulanase (Fragment)|glgUi 8.288473 8.070154 8.293254 8.257273
message("Number of NA values in each row")
## Number of NA values in each row
table(unname(apply(x3,1,function(x) { length(which(is.na(x))) } )))
##
## 0 1 2 3 4 5 6 7 8
## 1014 90 60 74 263 84 67 39 1
message("Number of NA values in each column")
## Number of NA values in each column
apply(x3,2,function(x) { length(which(is.na(x))) } )
## S08 S16 S07 S15 S06 S14 S05 S13
## 314 318 205 201 460 454 322 313
message("Remove rows with any NA values")
## Remove rows with any NA values
x3 <- x3[apply(x3,1,function(x) { length(which(is.na(x)))<1 } ),]
dim(x3)
## [1] 1014 8
# remove human proteins with "Cont_" in the name
x3 <- x3[grep("Cont_",rownames(x3),invert=TRUE),]
write.xlsx(m3, "quantifications_liners.xlsx")
ss2 <- read.table("samplesheet2.tsv")
ss2 %>% kbl(caption = "Sample sheet for iners") %>% kable_paper("hover", full_width = F)
condition | replicate | LA | strain | |
---|---|---|---|---|
S08 | LI_S_LA | 1 | TRUE | FALSE |
S16 | Li_S_LA | 2 | TRUE | FALSE |
S07 | Li_S_UT | 1 | FALSE | FALSE |
S15 | Li_S_UT | 2 | FALSE | FALSE |
S06 | Li_A_LA | 1 | TRUE | TRUE |
S14 | Li_A_LA | 2 | TRUE | TRUE |
S05 | Li_A_UT | 1 | FALSE | TRUE |
S13 | Li_A_UT | 2 | FALSE | TRUE |
This helps to see the similarities and differences between samples.
mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(ss2$LA)+1)))
plot(cmdscale(dist(t(x3))), xlab="Coordinate 1", ylab="Coordinate 2",
type = "p",bty="n",pch=20-(as.numeric(ss2$strain)+1), cex=4,
col=mycols,main="L. iners")
text(cmdscale(dist(t(x3))), labels=colnames(x3) )
legend("bottomright", legend=c("lac", "ctl"),
fill=c("pink","lightblue"), cex=1)
mtext("Circle='S/7', Diamond='A'")
Here we are using limma, which was originally designed for microarray data, but should work okay if the data are normally distributed.
We are looking for proteins whose abundance is changed by the treatment: lactic acid versus control. Cancel the effect of strain.
The full differential expression results are available as a Excel file called “differentialexpression.xlsx”.
design <- model.matrix(~ ss2$strain + ss2$LA)
design
## (Intercept) ss2$strainTRUE ss2$LATRUE
## 1 1 0 1
## 2 1 0 1
## 3 1 0 0
## 4 1 0 0
## 5 1 1 1
## 6 1 1 1
## 7 1 1 0
## 8 1 1 0
## attr(,"assign")
## [1] 0 1 2
## attr(,"contrasts")
## attr(,"contrasts")$`ss2$strain`
## [1] "contr.treatment"
##
## attr(,"contrasts")$`ss2$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(x3,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) ss2$strainTRUE ss2$LATRUE
## Down 0 124 0
## NotSig 0 718 942
## Up 946 104 4
dm <- topTable(fit.reduced,coef=3, number = Inf)
head(dm,50) %>% kbl(caption = "L. iners Top 50 proteins after differential analysis") %>% kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
A0A6G7BAD8;C8PBF9;E1NP85;E1NU18|Large ribosomal subunit protein bL17|rplQ | 0.6084891 | 9.658645 | 8.779976 | 0.0000426 | 0.0322545 | 2.6574575 |
E1NTJ7|ATPase family associated with various cellular activities (AAA)|tlyB | 1.1235433 | 6.859346 | 8.017863 | 0.0000775 | 0.0322545 | 2.1328950 |
C8PBD4|Large ribosomal subunit protein uL23|rplW | 0.5468029 | 9.969843 | 7.495354 | 0.0001201 | 0.0322545 | 1.7379522 |
iRT_Peptides_Fusion|| | 1.3133707 | 9.592667 | 7.349541 | 0.0001364 | 0.0322545 | 1.6221915 |
E1NQ38|L-lactate dehydrogenase|ldh | 0.5238036 | 10.219760 | 6.416689 | 0.0003225 | 0.0610236 | 0.8190545 |
C8PAK4|ATP-dependent helicase/nuclease subunit A|addA | -0.6370563 | 5.890215 | -5.967885 | 0.0005047 | 0.0682333 | 0.3908851 |
A0A6G7BA31;E1NPA9;E1NU42|Large ribosomal subunit protein uL2|rplB | 0.5498280 | 9.858048 | 5.490973 | 0.0008343 | 0.0682333 | -0.0966346 |
A0A6G7B7J6;E1NP20|DNA replication initiation control protein YabA;Initiation-control protein YabA|yabA;HMPREF9212_0021 | 0.8275117 | 7.026795 | 5.377919 | 0.0009439 | 0.0682333 | -0.2173414 |
A0A6G7BAK1;C8PDK6;E1NN69;E1NU70|Small ribosomal subunit protein uS4|rpsD | 0.4734114 | 10.053833 | 5.360036 | 0.0009627 | 0.0682333 | -0.2366188 |
A0A6G7B9K9;C8PDM0;E1NNA3;E1NSY3|ATP synthase subunit c|atpE | 0.6389176 | 9.091449 | 5.319976 | 0.0010062 | 0.0682333 | -0.2799886 |
A0A6G7B7L2;C8PCC1;E1NQZ6;E1NTA8|Glycerol-3-phosphate acyltransferase|plsY | 1.5830470 | 6.119509 | 5.243050 | 0.0010961 | 0.0682333 | -0.3639854 |
A0A6G7B8A1;C8PD20;E1NMV0;E1NTM3|Probable transcriptional regulatory protein G6Z83_02215;Probable transcriptional regulatory protein HMPREF0520_0990;Probable transcriptional regulatory protein HMPREF9212_1469;Probable transcriptional regulatory protein HMPREF9211_1381|G6Z83_02215;HMPREF0520_0990;HMPREF9212_1469;HMPREF9211_1381 | 0.5924151 | 8.842778 | 5.204975 | 0.0011438 | 0.0682333 | -0.4059101 |
A0A6G7B9V3;C8PBD2;E1NPB2;E1NU45|Large ribosomal subunit protein uL3|rplC | 0.4220668 | 9.786631 | 5.201545 | 0.0011483 | 0.0682333 | -0.4096988 |
A0A6G7B9J2|YSIRK-type signal peptide-containing protein|G6Z83_05695 | 0.5993022 | 10.063905 | 5.199370 | 0.0011511 | 0.0682333 | -0.4121015 |
A0A6G7BAH4|YSIRK-type signal peptide-containing protein|G6Z83_05700 | 0.6804659 | 10.195850 | 5.198324 | 0.0011524 | 0.0682333 | -0.4132575 |
A0A6G7BA20;C8PBE7;E1NP97;E1NU30|Large ribosomal subunit protein uL6|rplF | 0.3375948 | 10.288888 | 5.159023 | 0.0012046 | 0.0682333 | -0.4568189 |
A0A6G7B6W1;C8PB11;E1NM32;E1NSA1|Small ribosomal subunit protein bS6|rpsF | 0.4731390 | 8.926827 | 5.143283 | 0.0012262 | 0.0682333 | -0.4743350 |
A0A6G7BA57|Lysozyme|G6Z83_05710 | -0.9404139 | 6.806758 | -5.028137 | 0.0013980 | 0.0734725 | -0.6036964 |
C8PBS7;E1NPZ2|PTS sorbitol transporter subunit IIA;PTS system glucitol/sorbitol-specific IIA component|HMPREF0520_0547;HMPREF9212_0324 | 0.4341482 | 7.442156 | 4.850936 | 0.0017168 | 0.0838591 | -0.8070027 |
A0A6G7BAS1;C8PBE1;E1NPA3;E1NU36|Small ribosomal subunit protein uS17|rpsQ | 0.5430052 | 7.602969 | 4.815873 | 0.0017890 | 0.0838591 | -0.8478424 |
A0A6G7B8R5;E1NTM4|Fic family protein;Death-on-curing family protein|G6Z83_02210;HMPREF9211_1382 | 0.3945682 | 6.146693 | 4.722650 | 0.0019977 | 0.0838591 | -0.9574121 |
C8PB12|DNA gyrase subunit A|gyrA | 1.4918476 | 4.801755 | 4.714146 | 0.0020180 | 0.0838591 | -0.9674787 |
A0A6G7BAQ9|RNA methyltransferase|G6Z83_04110 | -0.3641053 | 6.593073 | -4.705532 | 0.0020389 | 0.0838591 | -0.9776876 |
A0A6G7B9M4;C8PD80;E1NUD7|Uncharacterized protein|G6Z83_02730;HMPREF0520_1050;HMPREF9211_0800 | 0.8755442 | 5.438394 | 4.576239 | 0.0023817 | 0.0928538 | -1.1323990 |
A0A6G7BB71;C8PBB0;E1NM82;E1NTT7|Transcription elongation factor GreA|greA | 0.4991568 | 6.834250 | 4.551655 | 0.0024539 | 0.0928538 | -1.1621307 |
A0A6G7B973;C8PDI4;E1NN42;E1NS17|Cell division protein SepF|sepF | 0.4572304 | 8.112222 | 4.453854 | 0.0027655 | 0.1006214 | -1.2814001 |
A0A6G7B9N0;C8PBW1;E1NLZ6|Phosphoglycerate kinase|pgk | 0.4365787 | 10.203958 | 4.419923 | 0.0028836 | 0.1010317 | -1.3231486 |
A0A6G7BB56;C8PBG4;E1NP80;E1NU13|Large ribosomal subunit protein uL13|rplM | 0.5070600 | 10.259201 | 4.379138 | 0.0030329 | 0.1024679 | -1.3735841 |
C8PAS5|Trigger factor|tig | 0.3167440 | 10.045474 | 4.337782 | 0.0031930 | 0.1030167 | -1.4250057 |
A0A6G7B750;C8PCC5;E1NQZ1;E1NTB2|PAS domain-containing protein;PAS domain S-box protein;PAS domain S-box protein;PAS domain S-box protein|G6Z83_00585;HMPREF0520_0745;HMPREF9212_0181;HMPREF9211_0053 | 0.5815868 | 5.890258 | 4.319443 | 0.0032669 | 0.1030167 | -1.4478984 |
A0A6G7B9W6;C8PC06;E1NMS9;E1NV33|Na+/H+ antiporter NhaC|nhaC | 0.4360909 | 5.913365 | 4.279854 | 0.0034330 | 0.1047622 | -1.4975065 |
A0A6G7B8M0;C8PAQ7;E1NQA0;E1NSL0|Pyruvate kinase|pyk | 0.3755278 | 10.003065 | 4.164135 | 0.0039739 | 0.1112275 | -1.6439838 |
A0A6G7B8K3;C8PAS6;E1NQC3;E1NSN2|Elongation factor Tu|tuf | 0.3852240 | 9.994231 | 4.154819 | 0.0040213 | 0.1112275 | -1.6558714 |
A0A6G7B825;E1NQZ0;E1NSK5|2,3-bisphosphoglycerate-dependent phosphoglycerate mutase;2,3-bisphosphoglycerate-dependent phosphoglycerate mutase;2,3-bisphosphoglycerate-dependent phosphoglycerate mutase|gpmA | 0.5031857 | 9.785226 | 4.139189 | 0.0041023 | 0.1112275 | -1.6758476 |
A0A6G7BAN1|HIT family protein|G6Z83_06610 | 1.0083528 | 5.760342 | 4.120785 | 0.0041999 | 0.1112275 | -1.6994192 |
E1NUI0|Alpha amylase, catalytic domain protein|dexB | -0.3473133 | 7.378849 | -4.114695 | 0.0042328 | 0.1112275 | -1.7072311 |
A0A6G7B7N2;C8PCE4;E1NQX2;E1NSI6|Small ribosomal subunit protein uS7|rpsG | 0.4094128 | 9.451998 | 4.088877 | 0.0043752 | 0.1115655 | -1.7404162 |
E1NUT2|GA module|HMPREF9211_0601 | 1.8476468 | 8.013496 | 4.070212 | 0.0044815 | 0.1115655 | -1.7644741 |
E1NMM8|X-Pro dipeptidyl-peptidase C-terminal non-catalytic domain protein|HMPREF9212_0748 | 0.7975769 | 6.119571 | 4.025690 | 0.0047465 | 0.1132871 | -1.8220858 |
E1NLW5;E1NV59|UbiC transcription regulator-associated domain protein|HMPREF9212_0654;HMPREF9211_0682 | 0.3865731 | 6.967498 | 4.018615 | 0.0047902 | 0.1132871 | -1.8312699 |
C8PAY8|6-phosphogluconate dehydrogenase, decarboxylating|gnd | -0.5328312 | 7.810156 | -3.917734 | 0.0054627 | 0.1260418 | -1.9630915 |
A0A6G7BHU2;C8PBF1;E1NP93;E1NU26|Large ribosomal subunit protein uL15|rplO | 0.5058773 | 9.404662 | 3.882563 | 0.0057208 | 0.1274243 | -2.0094241 |
A0A6G7BB47;C8PBI5|Large ribosomal subunit protein bL33|rpmG | -0.5143169 | 7.192931 | -3.873159 | 0.0057920 | 0.1274243 | -2.0218467 |
A0A6G7BA62;C8PB55;E1NM62|ABC transporter ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein|G6Z83_06665;HMPREF0520_0325;HMPREF9212_0412 | -0.5103978 | 6.765337 | -3.853956 | 0.0059405 | 0.1277202 | -2.0472530 |
A0A6G7B8M6;C8PAJ9|Isopentenyl-diphosphate delta-isomerase|fni | 0.9213771 | 5.284632 | 3.804495 | 0.0063422 | 0.1333277 | -2.1129545 |
A0A6G7BAE9;C8PBE9;E1NP95;E1NU28|Small ribosomal subunit protein uS5|rpsE | 0.4204507 | 9.926902 | 3.773670 | 0.0066075 | 0.1358837 | -2.1540903 |
A0A6G7B7H4|PTS mannose/fructose/sorbose transporter family subunit IID|G6Z83_00375 | 0.4022195 | 9.734485 | 3.738419 | 0.0069256 | 0.1393956 | -2.2013074 |
A0A6G7B9U7;C8PBX7;E1NLX2|Peptidoglycan-binding protein LysM;Uncharacterized protein;Uncharacterized protein|G6Z83_05375;HMPREF0520_0597;HMPREF9212_0664 | -0.7078563 | 5.028171 | -3.709912 | 0.0071950 | 0.1418008 | -2.2396265 |
A0A6G7B9W9;C8PBW0;E1NLZ7;E1NTI0|Glyceraldehyde-3-phosphate dehydrogenase|gap | 0.5066283 | 9.603459 | 3.661947 | 0.0076742 | 0.1469933 | -2.3043733 |
C8PAF4|E1-E2 ATPase|HMPREF0520_0074 | -0.4488300 | 5.967501 | -3.644885 | 0.0078529 | 0.1469933 | -2.3274864 |
dm4 <- dm
write.xlsx(dm4, "differentialexpression4_liners_bothstrains.xlsx")
Volcano chart
TOT=nrow(dm)
sig <- subset(dm,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
dm <- dm[!is.na(dm$P.Value),]
plot(dm$logFC,-log10(dm$P.Value),pch=19,
xlab="Log2 fold change", ylab="p-value")
points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")
Heatmap of top hits
top <- rownames(head(dm,40))
mx <- x3[rownames(x3) %in% top,]
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
ColSideColors=mycols , col=my_palette, main="top 20 proteins")
Strain S/7 samples only.
sx2 <- ss2[which(ss2$strain=="FALSE"),]
x32 <- x3[,which(colnames(x3) %in% rownames(sx2))]
design <- model.matrix(~ sx2$LA)
design
## (Intercept) sx2$LATRUE
## 1 1 1
## 2 1 1
## 3 1 0
## 4 1 0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`sx2$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(x32,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sx2$LATRUE
## Down 0 0
## NotSig 0 946
## Up 946 0
dm5 <- topTable(fit.reduced,coef=2, number = Inf)
head(dm5,50) %>% kbl(caption = "L. iners Top 50 proteins after differential analysis in strain S") %>% kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
A0A6G7B883;C8PCJ3;E1NMC3|UPF0297 protein G6Z83_01885;UPF0297 protein HMPREF0520_0813;UPF0297 protein HMPREF9212_1332|G6Z83_01885;HMPREF0520_0813;HMPREF9212_1332 | 1.6588334 | 5.324599 | 8.231884 | 0.0012163 | 0.710556 | -3.966573 |
A0A6G7B973;C8PDI4;E1NN42;E1NS17|Cell division protein SepF|sepF | 0.6596861 | 8.111005 | 5.723858 | 0.0046905 | 0.710556 | -4.018834 |
A0A6G7B7L2;C8PCC1;E1NQZ6;E1NTA8|Glycerol-3-phosphate acyltransferase|plsY | 1.0450400 | 6.207543 | 5.405260 | 0.0057639 | 0.710556 | -4.030131 |
A0A6G7BA56;C8PBJ1;E1NTX9|Large ribosomal subunit protein bL12|rplL | 0.6592185 | 9.618330 | 5.125944 | 0.0069620 | 0.710556 | -4.041461 |
A0A6G7BAD8;C8PBF9;E1NP85;E1NU18|Large ribosomal subunit protein bL17|rplQ | 0.5629366 | 9.660954 | 4.932765 | 0.0079715 | 0.710556 | -4.050202 |
A0A6G7BH83|Uncharacterized protein|G6Z83_05035 | 1.3202032 | 5.661840 | 4.705422 | 0.0093983 | 0.710556 | -4.061572 |
E1NTJ7|ATPase family associated with various cellular activities (AAA)|tlyB | 0.9606188 | 7.585307 | 4.695050 | 0.0094705 | 0.710556 | -4.062122 |
C8PAK4|ATP-dependent helicase/nuclease subunit A|addA | -0.7805417 | 5.749606 | -4.591513 | 0.0102297 | 0.710556 | -4.067759 |
A0A6G7B8M6;C8PAJ9|Isopentenyl-diphosphate delta-isomerase|fni | 1.3708782 | 5.322028 | 4.256452 | 0.0132486 | 0.710556 | -4.088122 |
A0A6G7B9E7;C8PAA6;E1NQF2;E1NSV3|Transcriptional repressor NrdR|nrdR | 0.4808854 | 6.695247 | 4.239371 | 0.0134296 | 0.710556 | -4.089255 |
C8PB12|DNA gyrase subunit A|gyrA | 1.2194640 | 4.929902 | 4.212096 | 0.0137248 | 0.710556 | -4.091085 |
C8PBD4|Large ribosomal subunit protein uL23|rplW | 0.4609073 | 9.955422 | 4.155785 | 0.0143595 | 0.710556 | -4.094944 |
A0A6G7B4A7|Uncharacterized protein|G6Z83_01810 | 2.0419336 | 5.210420 | 4.060867 | 0.0155116 | 0.710556 | -4.101709 |
C8PBT3|Peptidase, M16 family|HMPREF0520_0553 | 0.5473418 | 6.047777 | 4.046227 | 0.0156991 | 0.710556 | -4.102782 |
A0A6G7B9K9;C8PDM0;E1NNA3;E1NSY3|ATP synthase subunit c|atpE | 0.6151739 | 9.283886 | 3.907242 | 0.0176218 | 0.710556 | -4.113388 |
A0A6G7BB71;C8PBB0;E1NM82;E1NTT7|Transcription elongation factor GreA|greA | 0.6165167 | 7.070780 | 3.865705 | 0.0182505 | 0.710556 | -4.116710 |
iRT_Peptides_Fusion|| | 1.0806311 | 9.669933 | 3.698586 | 0.0210686 | 0.710556 | -4.130838 |
E1NN99|ATP synthase subunit delta|atpH | 1.4109078 | 5.362340 | 3.667325 | 0.0216520 | 0.710556 | -4.133622 |
A0A6G7B6W1;C8PB11;E1NM32;E1NSA1|Small ribosomal subunit protein bS6|rpsF | 0.4235001 | 8.763806 | 3.651958 | 0.0219459 | 0.710556 | -4.135008 |
A0A6G7BA57|Lysozyme|G6Z83_05710 | -1.1404348 | 6.324888 | -3.594832 | 0.0230811 | 0.710556 | -4.140261 |
A0A6G7B927;C8PAI7;E1NUN8|ABC-2 transporter permease;Uncharacterized protein;Uncharacterized protein|G6Z83_03795;HMPREF0520_0107;HMPREF9211_1507 | -0.9107605 | 6.857402 | -3.517841 | 0.0247239 | 0.710556 | -4.147599 |
E1NQ38|L-lactate dehydrogenase|ldh | 0.4118918 | 10.201247 | 3.504271 | 0.0250277 | 0.710556 | -4.148924 |
C8PB77|HTH merR-type domain-containing protein|HMPREF0520_0347 | 0.5619474 | 6.585859 | 3.489783 | 0.0253570 | 0.710556 | -4.150350 |
A0A6G7B8A1;C8PD20;E1NMV0;E1NTM3|Probable transcriptional regulatory protein G6Z83_02215;Probable transcriptional regulatory protein HMPREF0520_0990;Probable transcriptional regulatory protein HMPREF9212_1469;Probable transcriptional regulatory protein HMPREF9211_1381|G6Z83_02215;HMPREF0520_0990;HMPREF9212_1469;HMPREF9211_1381 | 0.6767054 | 8.785173 | 3.331598 | 0.0293093 | 0.710556 | -4.166660 |
A0A6G7BAC2;C8PAB2;E1NQE6;E1NSU3|Large ribosomal subunit protein bL20|rplT | 0.3920754 | 9.634431 | 3.291841 | 0.0304149 | 0.710556 | -4.170983 |
A0A6G7BA31;E1NPA9;E1NU42|Large ribosomal subunit protein uL2|rplB | 0.3703785 | 9.819932 | 3.291426 | 0.0304267 | 0.710556 | -4.171029 |
A0A6G7B6Y1;C8PAY5;E1NMJ5;E1NSB9|NCS2 family permease;Putative permease;Putative permease;Permease|G6Z83_00155;yieG;pbuG;HMPREF9211_1272 | -0.6997797 | 6.075260 | -3.171551 | 0.0340734 | 0.710556 | -4.184644 |
C8PD88|Dihydropteroate synthase|folP | -0.6053520 | 5.889066 | -3.168543 | 0.0341713 | 0.710556 | -4.184997 |
A0A6G7B9S9|ATP-dependent DNA helicase|pcrA | 0.3493146 | 8.044538 | 3.138916 | 0.0351539 | 0.710556 | -4.188506 |
A0A6G7BHR8|Glutamate–tRNA ligase|gltX | 0.3522573 | 8.671349 | 3.132587 | 0.0353682 | 0.710556 | -4.189263 |
A0A6G7B7J6;E1NP20|DNA replication initiation control protein YabA;Initiation-control protein YabA|yabA;HMPREF9212_0021 | 0.5052118 | 7.117279 | 3.131884 | 0.0353920 | 0.710556 | -4.189348 |
A0A6G7BA62;C8PB55;E1NM62|ABC transporter ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein|G6Z83_06665;HMPREF0520_0325;HMPREF9212_0412 | -0.5096376 | 6.825266 | -3.109255 | 0.0361712 | 0.710556 | -4.192076 |
A0A6G7BA67;C8PBI7;E1NNP9;E1NTY3|Transcription termination/antitermination protein NusG|nusG | 0.3875250 | 7.932463 | 3.093745 | 0.0367169 | 0.710556 | -4.193966 |
A0A6G7B970|Xaa-Pro dipeptidyl-peptidase|G6Z83_04040 | 0.4915151 | 7.508535 | 3.091378 | 0.0368010 | 0.710556 | -4.194256 |
E1NRB2|Uncharacterized protein|HMPREF9211_0159 | 0.5581602 | 5.779447 | 3.090678 | 0.0368260 | 0.710556 | -4.194342 |
A0A6G7B1X1;E1NUD2|GTP cyclohydrolase 1|folE | 0.4384387 | 6.765608 | 3.025986 | 0.0392173 | 0.710556 | -4.202410 |
A0A6G7B8T2;C8PAT1|DNA polymerase III subunit delta;DNA polymerase III, delta subunit|holA | 0.5197183 | 5.907468 | 3.025610 | 0.0392318 | 0.710556 | -4.202458 |
A0A6G7B9V3;C8PBD2;E1NPB2;E1NU45|Large ribosomal subunit protein uL3|rplC | 0.4265643 | 9.749148 | 2.980407 | 0.0410119 | 0.710556 | -4.208266 |
A0A6G7B871;C8PCZ1;E1NTR1|Amino acid permease|G6Z83_02055;HMPREF0520_0961;HMPREF9211_1419 | 0.4364796 | 5.360959 | 2.962592 | 0.0417393 | 0.710556 | -4.210595 |
E1NLW5;E1NV59|UbiC transcription regulator-associated domain protein|HMPREF9212_0654;HMPREF9211_0682 | 0.4176203 | 7.157107 | 2.959469 | 0.0418684 | 0.710556 | -4.211005 |
C8PAQ4;E1NQ97;E1NRR7|Protein RibT|HMPREF0520_0174;HMPREF9212_0787;HMPREF9211_0564 | 0.7826915 | 5.636071 | 2.956516 | 0.0419909 | 0.710556 | -4.211394 |
A0A6G7B9T6;C8PAV5;E1NQN4;E1NRX6|Chaperone protein DnaK|dnaK | 0.4336780 | 10.961888 | 2.916650 | 0.0436860 | 0.710556 | -4.216704 |
A0A6G7B993|Lipoprotein|G6Z83_05110 | -0.6210108 | 6.832911 | -2.900466 | 0.0443968 | 0.710556 | -4.218892 |
A0A6G7B9V7;C8PBW4;E1NLZ3;E1NTI5|Protein-export membrane protein SecG|secG | 0.6199192 | 5.477796 | 2.897314 | 0.0445369 | 0.710556 | -4.219320 |
C8PAP1|CCA-adding enzyme|papS | -0.3318399 | 7.171062 | -2.870511 | 0.0457485 | 0.710556 | -4.222992 |
A0A6G7BA49;C8PBD3;E1NPB1;E1NU44|Large ribosomal subunit protein uL4|rplD | 0.3548646 | 10.997198 | 2.870045 | 0.0457699 | 0.710556 | -4.223057 |
A0A6G7BA27|Ribosome hibernation promoting factor|raiA | 0.4731349 | 9.752917 | 2.838531 | 0.0472447 | 0.710556 | -4.227443 |
A0A6G7B9V1|Transcription-repair-coupling factor|mfd | -0.3121057 | 5.657076 | -2.822571 | 0.0480128 | 0.710556 | -4.229693 |
C8PBZ3|Putative potassium/sodium efflux P-type ATPase, fungal-type|pacL | -0.3455378 | 7.889072 | -2.808562 | 0.0486988 | 0.710556 | -4.231684 |
A0A6G7BAK1;C8PDK6;E1NN69;E1NU70|Small ribosomal subunit protein uS4|rpsD | 0.3281767 | 10.069653 | 2.804414 | 0.0489042 | 0.710556 | -4.232277 |
write.xlsx(dm5, "differentialexpression5_liners_strainS7.xlsx")
Volcano chart
TOT=nrow(dm5)
sig <- subset(dm5,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(sx2$LA)+1)))
plot(dm5$logFC,-log10(dm5$P.Value),pch=19,
xlab="Log2 fold change", ylab="p-value")
points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")
Heatmap of top hits
top <- rownames(head(dm5,40))
mx <- x32[rownames(x32) %in% top,]
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
ColSideColors=mycols , col=my_palette, main="top 20 proteins in strain S/7")
Strain A samples only.
sx3 <- ss2[which(ss2$strain=="TRUE"),]
x33 <- x3[,which(colnames(x3) %in% rownames(sx3))]
dim(x33)
## [1] 946 4
design <- model.matrix(~ sx3$LA)
design
## (Intercept) sx3$LATRUE
## 1 1 1
## 2 1 1
## 3 1 0
## 4 1 0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`sx3$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(x33,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sx3$LATRUE
## Down 0 8
## NotSig 0 931
## Up 946 7
dm6 <- topTable(fit.reduced,coef=2, number = Inf)
head(dm6,50) %>% kbl(caption = "L. iners Top 50 proteins after differential analysis in strain A") %>% kable_paper("hover", full_width = F)
logFC | AveExpr | t | P.Value | adj.P.Val | B | |
---|---|---|---|---|---|---|
iRT_Peptides_Fusion|| | 1.5461103 | 9.515401 | 13.549429 | 0.0000849 | 0.0454905 | 2.2998729 |
E1NP53|pullulanase|pulA | 3.1908412 | 5.871560 | 12.882786 | 0.0001059 | 0.0454905 | 2.1219257 |
A0A6G7BB59;E1NP90;E1NU23|Translation initiation factor IF-1|infA | -1.6211342 | 7.769874 | -11.233670 | 0.0001922 | 0.0454905 | 1.6128024 |
C8PAR7;E1NQB3;E1NSM2|Inosine-uridine preferring nucleoside hydrolase|HMPREF0520_0187;HMPREF9212_0803;HMPREF9211_0318 | -1.1776902 | 7.716570 | -10.561476 | 0.0002511 | 0.0454905 | 1.3723130 |
A0A6G7B9J9;C8PDL0;E1NN91;E1NSX2|DUF2969 domain-containing protein|G6Z83_04760;HMPREF0520_1180;HMPREF9212_0951;HMPREF9211_0361 | -1.4356109 | 6.979445 | -10.156581 | 0.0002973 | 0.0454905 | 1.2167896 |
A0A6G7B7F6;E1NP47;E1NTJ2|Nucleoside 2-deoxyribosyltransferase;Nucleoside deoxyribosyltransferase;Nucleoside deoxyribosyltransferase|G6Z83_01315;ntd;ntd | -1.4576278 | 8.371740 | -10.030792 | 0.0003136 | 0.0454905 | 1.1667272 |
A0A6G7B919;C8PDI1;E1NN39;E1NS15|DNA-directed RNA polymerase subunit omega|rpoZ | -1.4529529 | 7.812049 | -9.395463 | 0.0004154 | 0.0454905 | 0.9003475 |
E1NMM8|X-Pro dipeptidyl-peptidase C-terminal non-catalytic domain protein|HMPREF9212_0748 | 1.1581023 | 6.045063 | 9.030851 | 0.0004920 | 0.0454905 | 0.7365773 |
A0A6G7B194;C8PCI9;E1NMC7|Thioredoxin|trxA | -1.8455992 | 7.740627 | -8.949855 | 0.0005113 | 0.0454905 | 0.6990473 |
A0A6G7B7J6;E1NP20|DNA replication initiation control protein YabA;Initiation-control protein YabA|yabA;HMPREF9212_0021 | 1.1498115 | 6.936311 | 8.815781 | 0.0005453 | 0.0454905 | 0.6359724 |
A0A6G7B7H9;C8PCY0;E1NQ76;E1NT48|Sugar aldolase;Class II Aldolase and Adducin domain protein;Class II Aldolase and Adducin domain protein;Class II Aldolase and Adducin domain protein|G6Z83_01090;HMPREF0520_0950;HMPREF9212_0109;HMPREF9211_0073 | 1.4006240 | 5.388047 | 8.784767 | 0.0005536 | 0.0454905 | 0.6212101 |
A0A6G7BAN1|HIT family protein|G6Z83_06610 | 1.5613218 | 5.604153 | 8.548712 | 0.0006216 | 0.0454905 | 0.5066903 |
A0A6G7B9D2;E1NSF7|Ribokinase|rbsK | -1.0469324 | 6.936908 | -8.537229 | 0.0006251 | 0.0454905 | 0.5010203 |
C8PAB4;E1NQE3|Tagatose-6-phosphate kinase|pfkB | -1.4991333 | 6.902144 | -8.315335 | 0.0006990 | 0.0463882 | 0.3895926 |
E1NTJ7|ATPase family associated with various cellular activities (AAA)|tlyB | 1.2864677 | 6.133384 | 8.215924 | 0.0007355 | 0.0463882 | 0.3384996 |
A0A6G7B9V5;C8PAT5;E1NQQ9|Phosphopantetheine adenylyltransferase|coaD | -1.2055292 | 5.964346 | -7.602228 | 0.0010202 | 0.0580304 | 0.0060492 |
A0A6G7BK52;E1NMN3;E1NUB8|PTS sugar transporter subunit IIC;PTS system sorbose-specific iic component;PTS system sorbose-specific iic component|G6Z83_04060;HMPREF9212_0753;HMPREF9211_0464 | 0.9195288 | 7.418822 | 7.562468 | 0.0010428 | 0.0580304 | -0.0165511 |
A0A6G7B3J8;C8PB26|DegV family protein;EDD domain protein, DegV family|G6Z83_06805;HMPREF0520_0296 | 0.7641276 | 8.137844 | 6.966521 | 0.0014685 | 0.0726407 | -0.3720761 |
E1NQ07|Gram-positive signal peptide protein, YSIRK family|HMPREF9212_0339 | 0.8215463 | 9.607226 | 6.917106 | 0.0015124 | 0.0726407 | -0.4030337 |
A0A6G7B9J2|YSIRK-type signal peptide-containing protein|G6Z83_05695 | 0.7863908 | 10.073410 | 6.891616 | 0.0015357 | 0.0726407 | -0.4190952 |
E1NUH9|Bacteriophage peptidoglycan hydrolase|HMPREF9211_0844 | -1.5214910 | 7.956728 | -6.742151 | 0.0016815 | 0.0740238 | -0.5145592 |
A0A6G7B825;E1NQZ0;E1NSK5|2,3-bisphosphoglycerate-dependent phosphoglycerate mutase;2,3-bisphosphoglycerate-dependent phosphoglycerate mutase;2,3-bisphosphoglycerate-dependent phosphoglycerate mutase|gpmA | 0.7545887 | 9.779730 | 6.696858 | 0.0017289 | 0.0740238 | -0.5439275 |
A0A6G7BAH4|YSIRK-type signal peptide-containing protein|G6Z83_05700 | 0.8744591 | 10.012309 | 6.631877 | 0.0017997 | 0.0740238 | -0.5864265 |
A0A6G7B8J9;C8PC89;E1NMH1;E1NT71|DNA-entry nuclease;Putative DNA/RNA non-specific endonuclease;Putative DNA/RNA non-specific endonuclease;Putative DNA/RNA non-specific endonuclease|G6Z83_00410;endA;HMPREF9212_0480;HMPREF9211_0011 | 1.3952953 | 5.221992 | 6.390779 | 0.0020952 | 0.0807155 | -0.7479428 |
A0A6G7BB56;C8PBG4;E1NP80;E1NU13|Large ribosomal subunit protein uL13|rplM | 0.7495095 | 10.255969 | 6.344798 | 0.0021580 | 0.0807155 | -0.7794489 |
A0A6G7B7R3|Histidine phosphatase family protein|G6Z83_01060 | -0.7982777 | 7.098486 | -6.302076 | 0.0022184 | 0.0807155 | -0.8089285 |
A0A6G7B9U4;C8PDF6;E1NSF6|Ribose-5-phosphate isomerase A|rpiA | -0.7160048 | 7.482566 | -6.165382 | 0.0024258 | 0.0849922 | -0.9046053 |
A0A6G7BA57|Lysozyme|G6Z83_05710 | -0.7403929 | 7.288629 | -5.933773 | 0.0028331 | 0.0946491 | -1.0715609 |
A0A6G7B948;E1NSG1|DNA starvation/stationary phase protection protein;Ferritin-like protein|G6Z83_04385;HMPREF9211_1021 | -1.5380654 | 9.026246 | -5.789784 | 0.0031279 | 0.0946491 | -1.1785343 |
A0A6G7BI36;E1NS53|Iron-sulfur cluster biosynthesis family protein;FeS cluster biogenesis domain-containing protein|G6Z83_06680;HMPREF9211_1203 | -0.7886301 | 7.828368 | -5.710088 | 0.0033069 | 0.0946491 | -1.2388235 |
C8PCW8|Serine-type D-Ala-D-Ala carboxypeptidase|dacA | 0.8793457 | 5.604185 | 5.707939 | 0.0033119 | 0.0946491 | -1.2404597 |
A0A6G7BAD8;C8PBF9;E1NP85;E1NU18|Large ribosomal subunit protein bL17|rplQ | 0.6540416 | 9.656336 | 5.655781 | 0.0034359 | 0.0946491 | -1.2803546 |
A0A6G7BIS7;E1NP71|Uncharacterized protein|G6Z83_01205;HMPREF9212_0073 | -2.0562275 | 6.804168 | -5.646155 | 0.0034594 | 0.0946491 | -1.2877543 |
A0A6G7B7L2;C8PCC1;E1NQZ6;E1NTA8|Glycerol-3-phosphate acyltransferase|plsY | 2.1210540 | 6.031474 | 5.644508 | 0.0034634 | 0.0946491 | -1.2890219 |
A0A6G7BA31;E1NPA9;E1NU42|Large ribosomal subunit protein uL2|rplB | 0.7292774 | 9.896163 | 5.588147 | 0.0036051 | 0.0946491 | -1.3325935 |
E1NMT2;E1NV37|Putative lipid kinase|HMPREF9212_1126;HMPREF9211_0659 | -0.8223935 | 7.201478 | -5.556455 | 0.0036879 | 0.0946491 | -1.3572708 |
C8PBD4|Large ribosomal subunit protein uL23|rplW | 0.6326985 | 9.984263 | 5.483257 | 0.0038878 | 0.0946491 | -1.4147585 |
A0A6G7B797|D-alanyl-lipoteichoic acid biosynthesis protein DltD|dltD | 0.9295961 | 6.003996 | 5.482801 | 0.0038891 | 0.0946491 | -1.4151182 |
A0A6G7B9U7;C8PBX7;E1NLX2|Peptidoglycan-binding protein LysM;Uncharacterized protein;Uncharacterized protein|G6Z83_05375;HMPREF0520_0597;HMPREF9212_0664 | -0.8701541 | 5.081611 | -5.478220 | 0.0039020 | 0.0946491 | -1.4187396 |
E1NQ38|L-lactate dehydrogenase|ldh | 0.6357153 | 10.238272 | 5.376131 | 0.0042043 | 0.0968069 | -1.5001430 |
A0A6G7B750;C8PCC5;E1NQZ1;E1NTB2|PAS domain-containing protein;PAS domain S-box protein;PAS domain S-box protein;PAS domain S-box protein|G6Z83_00585;HMPREF0520_0745;HMPREF9212_0181;HMPREF9211_0053 | 0.8245465 | 5.862766 | 5.344758 | 0.0043028 | 0.0968069 | -1.5254344 |
A0A6G7BH65;C8PDH4;E1NN27;E1NS05|ribulose-phosphate 3-epimerase;Ribulose-phosphate 3-epimerase;ribulose-phosphate 3-epimerase;ribulose-phosphate 3-epimerase|rpe;rpe;HMPREF9212_0886;HMPREF9211_0930 | -0.8276356 | 5.867281 | -5.330348 | 0.0043489 | 0.0968069 | -1.5370944 |
A0A6G7B7W0;C8PCJ1;E1NMC5;E1NR60|UPF0473 protein G6Z83_01895;UPF0473 protein HMPREF0520_0811;UPF0473 protein HMPREF9212_1334;UPF0473 protein HMPREF9211_0420|G6Z83_01895;HMPREF0520_0811;HMPREF9212_1334;HMPREF9211_0420 | -0.7474805 | 6.561477 | -5.314515 | 0.0044003 | 0.0968069 | -1.5499381 |
A0A6G7BAK1;C8PDK6;E1NN69;E1NU70|Small ribosomal subunit protein uS4|rpsD | 0.6186460 | 10.038013 | 5.244074 | 0.0046380 | 0.0997159 | -1.6074877 |
A0A6G7B957;C8PAF6;E1NNX5;E1NRJ0|Enolase|eno | 0.7706141 | 10.069214 | 5.127028 | 0.0050676 | 0.1065332 | -1.7046038 |
E1NUT2|GA module|HMPREF9211_0601 | 2.5997715 | 7.690821 | 5.071889 | 0.0052865 | 0.1087173 | -1.7510088 |
A0A6G7B9W9;C8PBW0;E1NLZ7;E1NTI0|Glyceraldehyde-3-phosphate dehydrogenase|gap | 0.6284955 | 9.580014 | 5.010529 | 0.0055434 | 0.1104289 | -1.8031495 |
C8PDF5|Inosine-uridine preferring nucleoside hydrolase|rihA | -0.5882678 | 7.561041 | -4.996732 | 0.0056032 | 0.1104289 | -1.8149471 |
C8PDJ1|Penicillin-binding protein, transpeptidase domain protein|ftsI | -0.6765870 | 7.201216 | -4.863581 | 0.0062216 | 0.1180702 | -1.9301961 |
A0A6G7BAS1;C8PBE1;E1NPA3;E1NU36|Small ribosomal subunit protein uS17|rpsQ | 0.6932880 | 7.589185 | 4.859764 | 0.0062405 | 0.1180702 | -1.9335381 |
write.xlsx(dm6, "differentialexpression6_liners_strainA.xlsx")
Volcano chart
TOT=nrow(dm6)
sig <- subset(dm6,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(sx3$LA)+1)))
plot(dm6$logFC,-log10(dm6$P.Value),pch=19,
xlab="Log2 fold change", ylab="p-value")
points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")
Heatmap of top hits
top <- rownames(head(dm6,40))
mx <- x33[rownames(x33) %in% top,]
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
ColSideColors=mycols , col=my_palette, main="top 20 proteins in strain A")
Make a Euler diagram of the genes.
dm5up <- rownames(subset(dm5,adj.P.Val<0.05 & logFC > 0))
dm5dn <- rownames(subset(dm5,adj.P.Val<0.05 & logFC < 0))
dm6up <- rownames(subset(dm6,adj.P.Val<0.05 & logFC > 0))
dm6dn <- rownames(subset(dm6,adj.P.Val<0.05 & logFC < 0))
v1 <- list("Strain S up"=dm5up, "Strain S down"=dm5dn,
"Strain A up"=dm6up,"Strain A down"=dm6dn)
plot(euler(v1),quantities = TRUE)
Make a scatter plotr of the ranks.
While the results are not completely concoordant between the strains, there is a lot of similarity.
mg <- merge(dm5,dm6,by=0)
rownames(mg) <- mg$Row.names
mg <- mg[,c("t.x","t.y")]
head(mg)
## t.x
## A0A6G5W5S4|pullulanase|glgUi -0.18443758
## A0A6G5W607|pullulanase (Fragment)|glgUi -0.07975566
## A0A6G7B0K0;E1NMG4;E1NT78|Alpha-glycosidase;Alpha amylase, catalytic domain protein;Alpha amylase, catalytic domain protein|G6Z83_00440;nplT;nplT 1.17334663
## A0A6G7B0K8;E1NQ85|ADP-dependent (S)-NAD(P)H-hydrate dehydratase|nnrD 1.52542062
## A0A6G7B0L8;C8PCV9|Foldase protein PrsA|prsA -0.10795115
## A0A6G7B0M7;C8PAW2;E1NMK7;E1NSE6|ABC transporter ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein|G6Z83_00265;ylbA;HMPREF9212_0258;HMPREF9211_1299 1.75459600
## t.y
## A0A6G5W5S4|pullulanase|glgUi -2.1838245
## A0A6G5W607|pullulanase (Fragment)|glgUi -0.7253266
## A0A6G7B0K0;E1NMG4;E1NT78|Alpha-glycosidase;Alpha amylase, catalytic domain protein;Alpha amylase, catalytic domain protein|G6Z83_00440;nplT;nplT 2.0925056
## A0A6G7B0K8;E1NQ85|ADP-dependent (S)-NAD(P)H-hydrate dehydratase|nnrD -0.9936822
## A0A6G7B0L8;C8PCV9|Foldase protein PrsA|prsA 2.4313204
## A0A6G7B0M7;C8PAW2;E1NMK7;E1NSE6|ABC transporter ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein|G6Z83_00265;ylbA;HMPREF9212_0258;HMPREF9211_1299 2.7927192
plot(mg,pch=19,cex=0.4,main="limma t-stat",xlab="Strain S/7",ylab="strain A")
abline(h=0,v=0,col="blue",lty=2,lwd=2)
mylm <- lm(mg)
abline(mylm,lty=2,col="red",lwd=2)
mylm
##
## Call:
## lm(formula = mg)
##
## Coefficients:
## (Intercept) t.y
## 0.1785 0.2381
cor.test(mg[,1],mg[,2])
##
## Pearson's product-moment correlation
##
## data: mg[, 1] and mg[, 2]
## t = 13.375, df = 944, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3441682 0.4514052
## sample estimates:
## cor
## 0.3991508
rmg <- apply(mg,2, function(x) {
rnk <- rank(x)
NUMNEG=length(which(x<0))
rnk <- rnk - NUMNEG
return(rnk)
})
plot(rmg,pch=19,cex=0.4,main="rank of limma t-stat",xlab="Strain D",ylab="strain A")
abline(h=0,v=0,col="blue",lty=2,lwd=2)
mylm <- lm(as.data.frame(rmg))
abline(mylm,lty=2,col="red",lwd=2)
mylm
##
## Call:
## lm(formula = as.data.frame(rmg))
##
## Coefficients:
## (Intercept) t.y
## 34.1758 0.4226
cor.test(rmg[,1],rmg[,2])
##
## Pearson's product-moment correlation
##
## data: rmg[, 1] and rmg[, 2]
## t = 14.325, df = 944, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3687669 0.4735569
## sample estimates:
## cor
## 0.4225731
tr <- nrow(subset(mg,t.x>0 & t.y>0))
br <- nrow(subset(mg,t.x>0 & t.y<0))
tl <- nrow(subset(mg,t.x<0 & t.y>0))
bl <- nrow(subset(mg,t.x<0 & t.y<0))
tr
## [1] 324
br
## [1] 185
tl
## [1] 154
bl
## [1] 283
concordant = tr + bl
message(paste("concordant proteins:",concordant))
## concordant proteins: 607
discordant = br + tl
message(paste("discordant proteins:",discordant))
## discordant proteins: 339
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] stringr_1.5.1 eulerr_7.0.2 openxlsx_4.2.5.2 gplots_3.1.3.1
## [5] beeswarm_0.4.0 vioplot_0.4.0 zoo_1.8-12 sm_2.2-6.0
## [9] kableExtra_1.4.0 limma_3.60.0
##
## loaded via a namespace (and not attached):
## [1] jsonlite_1.8.8 highr_0.11 compiler_4.4.1 gtools_3.9.5
## [5] zip_2.3.1 Rcpp_1.0.12 xml2_1.3.6 bitops_1.0-7
## [9] jquerylib_0.1.4 systemfonts_1.1.0 scales_1.3.0 yaml_2.3.8
## [13] fastmap_1.2.0 statmod_1.5.0 lattice_0.22-6 R6_2.5.1
## [17] knitr_1.47 polyclip_1.10-6 munsell_0.5.1 svglite_2.1.3
## [21] polylabelr_0.2.0 bslib_0.7.0 rlang_1.1.4 cachem_1.1.0
## [25] stringi_1.8.4 xfun_0.45 caTools_1.18.2 sass_0.4.9
## [29] viridisLite_0.4.2 cli_3.6.3 magrittr_2.0.3 digest_0.6.36
## [33] grid_4.4.1 rstudioapi_0.16.0 lifecycle_1.0.4 vctrs_0.6.5
## [37] KernSmooth_2.23-24 evaluate_0.24.0 glue_1.7.0 colorspace_2.1-0
## [41] rmarkdown_2.27 tools_4.4.1 htmltools_0.5.8.1