Source: https://github.com/markziemann/paula_proteomics

library("limma")
library("kableExtra")
library("vioplot")
library("beeswarm")
library("gplots")
library("openxlsx")
library("eulerr")
library("stringr")

Intro

We want to properly annotate these proteins with names and conduct differential expression irrespective of the strain/isolate.

We are running 6 different protein expression contrasts. All of these are looking at the effect of lactic acid, but with different inclusion.

  1. L. crispatus both strains

  2. L. crispatus strain D

  3. L. crispatus strain A

  4. L. iners both strains

  5. L. iners strain S/7

  6. L. iners strain A

Lactobacillus crispatus

Read in data

It requires a lot of cleaning. Here is what the data looks like to start with.

x <- read.table("20240611_192029_P24_0717_E1_PaluaE_Updated_LC_ATCC_Report.tsv",sep="\t",header=TRUE)

head(x,6) %>% kbl(caption = "Input data from 20240611_192029_P24_0717_E1_PaluaE_Updated_LC_ATCC_Report.tsv") %>% kable_paper("hover", full_width = F)
Input data from 20240611_192029_P24_0717_E1_PaluaE_Updated_LC_ATCC_Report.tsv
PG.ProteinGroups PG.ProteinAccessions PG.Genes PG.ProteinDescriptions PG.UniProtIds PG.FastaHeaders X.1..AST1_JS_20240405_P24_0717_E1_S02.raw.PG.NrOfPrecursorsMeasured X.2..AST1_JS_20240405_P24_0717_E1_S10.raw.PG.NrOfPrecursorsMeasured X.3..AST1_JS_20240405_P24_0717_E1_S01.raw.PG.NrOfPrecursorsMeasured X.4..AST1_JS_20240405_P24_0717_E1_S09.raw.PG.NrOfPrecursorsMeasured X.5..AST1_JS_20240405_P24_0717_E1_S04.raw.PG.NrOfPrecursorsMeasured X.6..AST1_JS_20240405_P24_0717_E1_S12.raw.PG.NrOfPrecursorsMeasured X.7..AST1_JS_20240405_P24_0717_E1_S11.raw.PG.NrOfPrecursorsMeasured X.8..AST1_JS_20240405_P24_0717_E1_S03_20240426013646.raw.PG.NrOfPrecursorsMeasured X.1..AST1_JS_20240405_P24_0717_E1_S02.raw.PG.Quantity X.2..AST1_JS_20240405_P24_0717_E1_S10.raw.PG.Quantity X.3..AST1_JS_20240405_P24_0717_E1_S01.raw.PG.Quantity X.4..AST1_JS_20240405_P24_0717_E1_S09.raw.PG.Quantity X.5..AST1_JS_20240405_P24_0717_E1_S04.raw.PG.Quantity X.6..AST1_JS_20240405_P24_0717_E1_S12.raw.PG.Quantity X.7..AST1_JS_20240405_P24_0717_E1_S11.raw.PG.Quantity X.8..AST1_JS_20240405_P24_0717_E1_S03_20240426013646.raw.PG.Quantity
Cont_A0A3Q1M3L6 Cont_A0A3Q1M3L6 Uncharacterized protein Cont_A0A3Q1M3L6 >tr|Cont_A0A3Q1M3L6|A0A3Q1M3L6_BOVIN Uncharacterized protein OS=Bos taurus OX=9913 PE=1 SV=1 6 6 6 6 6 6 6 6 95.97440 109.55053 16.73227 NaN 24.11901 46.19556 NaN NaN
Cont_A0A3Q1MJB2 Cont_A0A3Q1MJB2 FBLN1 Fibulin-1 Cont_A0A3Q1MJB2 >tr|Cont_A0A3Q1MJB2|A0A3Q1MJB2_BOVIN Fibulin-1 OS=Bos taurus OX=9913 GN=FBLN1 PE=1 SV=1 17 17 17 17 17 17 17 17 19.75845 37.81480 NaN NaN 13.18379 23.32178 NaN 8.519665
Cont_A2I7N0 Cont_A2I7N0 SERPINA3-4 Serpin A3-4 Cont_A2I7N0 >sp|Cont_A2I7N0|SPA34_BOVIN Serpin A3-4 OS=Bos taurus OX=9913 GN=SERPINA3-4 PE=3 SV=1 7 7 7 7 7 7 7 7 70.93400 75.30853 NaN NaN 47.79508 60.18491 44.21424 78.915138
Cont_A2I7N3 Cont_A2I7N3 SERPINA3-7 Serpin A3-7 Cont_A2I7N3 >sp|Cont_A2I7N3|SPA37_BOVIN Serpin A3-7 OS=Bos taurus OX=9913 GN=SERPINA3-7 PE=3 SV=1 5 5 5 5 5 5 5 5 265.34586 415.20547 91.61462 22.31066 117.20869 206.51183 NaN NaN
Cont_A7E3W2 Cont_A7E3W2 LGALS3BP Galectin-3-binding protein Cont_A7E3W2 >sp|Cont_A7E3W2|LG3BP_BOVIN Galectin-3-binding protein OS=Bos taurus OX=9913 GN=LGALS3BP PE=1 SV=1 2 2 2 2 2 2 2 2 16.56437 21.59553 NaN NaN 16.88817 32.42205 12.57231 NaN
Cont_G5E513 Cont_G5E513 Uncharacterized protein Cont_G5E513 >tr|Cont_G5E513|G5E513_BOVIN Uncharacterized protein OS=Bos taurus OX=9913 PE=1 SV=2 16 16 16 16 16 16 16 16 163.90485 266.43649 132.05286 NaN 103.07031 145.92206 NaN NaN
# filter the data
q <- x[,c(1:6,15:ncol(x))]
q2 <- q[grep("UPI",q[,1]),]

# clean
colnames(q2) <- gsub("_2024042",".raw",colnames(q2))
colnames(q2) <- gsub(".raw"," ",colnames(q2))
colnames(q2) <- gsub("_E1_"," ",colnames(q2))
colnames(q2) <- str_sub(colnames(q2),4,)
colnames(q2) <- gsub("..AST1_JS_20240405_P24_0717 "," ",colnames(q2))
colnames(q2) <- gsub("PG.","PG ",colnames(q2))
colnames(q2) <- gsub("^ ","",colnames(q2))
colnames(q2) <- sapply(strsplit(colnames(q2)," "),"[[",1)

message("number of rows and columns in the data")
## number of rows and columns in the data
dim(q2)
## [1] 1572   14
head(q2,6) %>% kbl(caption = "after cleaning it looks like this") %>% kable_paper("hover", full_width = F)
after cleaning it looks like this
ProteinGroups ProteinAccessions Genes ProteinDescriptions UniProtIds FastaHeaders S02 S10 S01 S09 S04 S12 S11 S03
100 UPI000045224F UPI000045224F status=active >UPI000045224F status=active 604.57123 575.3896 331.45837 429.70279 253.03282 195.8584 245.82193 169.17685
101 UPI0000452250 UPI0000452250 status=active >UPI0000452250 status=active 53.33073 51.8002 58.16257 62.12326 18.19687 17.6302 12.94107 13.71563
102 UPI00004B2864 UPI00004B2864 status=active >UPI00004B2864 status=active 5987.67871 6332.9077 4586.60986 4949.33350 4437.26953 4903.6855 4003.72314 3560.29736
103 UPI00004C6804 UPI00004C6804 status=active >UPI00004C6804 status=active 1228.69580 1015.1328 927.17303 874.80890 1186.76379 1064.7327 922.82410 976.92743
104 UPI00004C690C UPI00004C690C status=active >UPI00004C690C status=active 3470.40405 3655.9775 4382.52979 4565.18848 4038.15283 4117.3325 4461.77148 4963.70801
105 UPI00004C6915 UPI00004C6915 status=active >UPI00004C6915 status=active 2973.98657 3171.4775 3307.13892 3571.12085 4827.39209 4384.8164 4490.04590 4598.70557

Attach some functional information from Uniprot

Now we can fetch annotations from UniProt batch tool. The full data set is available as an Excel file called “quantifications_crisp.xlsx”.

Also need to log transform the data in order to get sane values of logFC.

y <- read.csv("idmapping_2024_06_27.tsv.gz",sep="\t",header=TRUE)
y2 <- y[!duplicated(y$From), ]
m <- merge(y2,q2,by.x="From",by.y="ProteinGroups",all.y=TRUE)
rownames(m) <- paste(m$ProteinAccessions,m$Protein.names)
m2 <- m[,grep("S",colnames(m))]

head(m2,6) %>% kbl(caption = "Now the proteins have a proper name (if one is available)") %>% kable_paper("hover", full_width = F)
Now the proteins have a proper name (if one is available)
S02 S10 S01 S09 S04 S12 S11 S03
UPI000045224F Histidine phosphatase family protein (Phosphoglycerate mutase) 604.57123 575.3896 331.45837 429.70279 253.03282 195.8584 245.82193 169.17685
UPI0000452250 Histidine phosphatase family protein (Phosphoglycerate mutase) 53.33073 51.8002 58.16257 62.12326 18.19687 17.6302 12.94107 13.71563
UPI00004B2864 Glyceraldehyde-3-phosphate dehydrogenase (EC 1.2.1.-) 5987.67871 6332.9077 4586.60986 4949.33350 4437.26953 4903.6855 4003.72314 3560.29736
UPI00004C6804 Small ribosomal subunit protein bS18 1228.69580 1015.1328 927.17303 874.80890 1186.76379 1064.7327 922.82410 976.92743
UPI00004C690C Small ribosomal subunit protein uS10 3470.40405 3655.9775 4382.52979 4565.18848 4038.15283 4117.3325 4461.77148 4963.70801
UPI00004C6915 Large ribosomal subunit protein uL29 2973.98657 3171.4775 3307.13892 3571.12085 4827.39209 4384.8164 4490.04590 4598.70557
m3 <- m2
m3$UniParc <- rownames(m3)
m3 <- m3[,c(ncol(m3),1:ncol(m3)-1)]
write.xlsx(m2, "quantifications_crisp.xlsx")

message("number of NA values per row")
## number of NA values per row
table(unname(apply(m2,1,function(x) { length(which(is.na(x))) } )))
## 
##    0    1    2    3    4    5    6    7 
## 1260   38   44   69  129   10   16    6
m2 <- m2[!is.na(rowSums(m2)),]

m2 <- log(m2)

Curate the sample sheet

Define the control and lactate groups and the strain.

ss <- read.table("samplesheet.tsv",header=TRUE,row.names=1)

ss$LA <- factor(grepl("LA",ss$condition))

ss$strain <- factor(grepl("_D_",ss$condition))

ss %>% kbl(caption = "Sample sheet") %>% kable_paper("hover", full_width = F)
Sample sheet
condition replicate LA strain
S02 LC_A_LA 1 TRUE FALSE
S10 LC_A_LA 2 TRUE FALSE
S01 LC_A_UT 1 FALSE FALSE
S09 LC_A_UT 2 FALSE FALSE
S04 LC_D_LA 1 TRUE TRUE
S12 LC_D_LA 2 TRUE TRUE
S11 LC_D_UT 1 FALSE TRUE
S03 LC_D_UT 2 FALSE TRUE

MDS analysis

This helps to see the similarities and differences between samples.

mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(ss$LA))))

plot(cmdscale(dist(t(m2))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=20-as.numeric(ss$strain), cex=4,
  col=mycols)

text(cmdscale(dist(t(m2))), labels=colnames(m2) )

legend("right", legend=c("lac", "ctl"),
       fill=c("pink","lightblue"),  cex=1)

mtext("Circle='A', Diamond='D'")

Differential expression 1 - effect of lactic acid, both strains

Here we are using limma, which was originally designed for microarray data, but should work okay if the data are normally distributed.

We are looking for proteins whose abundance is changed by the treatment: lactic acid versus control. Cancel the effect of strain.

The full differential expression results are available as a Excel file called “differentialexpression.xlsx”.

design <- model.matrix(~ ss$strain + ss$LA)
design
##   (Intercept) ss$strainTRUE ss$LATRUE
## 1           1             0         1
## 2           1             0         1
## 3           1             0         0
## 4           1             0         0
## 5           1             1         1
## 6           1             1         1
## 7           1             1         0
## 8           1             1         0
## attr(,"assign")
## [1] 0 1 2
## attr(,"contrasts")
## attr(,"contrasts")$`ss$strain`
## [1] "contr.treatment"
## 
## attr(,"contrasts")$`ss$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(m2,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
##        (Intercept) ss$strainTRUE ss$LATRUE
## Down             0           435       217
## NotSig           1           444       823
## Up            1259           381       220
dm <- topTable(fit.reduced,coef=3, number = Inf)

head(dm,50) %>% kbl(caption = "Top 50 proteins after differential analysis") %>% kable_paper("hover", full_width = F)
Top 50 proteins after differential analysis
logFC AveExpr t P.Value adj.P.Val B
UPI0003C50192 Uncharacterized protein 1.2185533 3.632099 21.371661 1.00e-07 0.0001090 8.739686
UPI00019F9AD9 Uncharacterized protein 2.8352143 5.575400 17.302665 4.00e-07 0.0001693 7.386957
UPI0006EFB04E D-2-hydroxyacid dehydrogenase (Lactate dehydrogenase) 0.8960643 8.207817 17.200591 4.00e-07 0.0001693 7.347658
UPI0006EEA493 LysR family transcriptional regulator -1.7368811 3.308481 -15.622743 8.00e-07 0.0002502 6.699798
UPI0006EEB579 SCP domain-containing protein 0.8042276 6.248607 14.582890 1.30e-06 0.0002929 6.227041
UPI000280B884 CHAP domain-containing protein (Capsid protein) 0.7848850 5.120403 14.238354 1.50e-06 0.0002929 6.061445
UPI0006EFEF39 Acetate kinase (EC 2.7.2.1) (Acetokinase) 0.9072964 6.394221 14.003487 1.70e-06 0.0002929 5.945839
UPI0006D1F1A0 Antibiotic biosynthesis monooxygenase 0.7786892 6.325596 13.836296 1.90e-06 0.0002929 5.862160
UPI0006EFBCD1 NA 0.9605729 5.193541 13.308989 2.40e-06 0.0003414 5.590410
UPI0006F0ADA9 ABC transporter ATP-binding protein -0.9690016 5.678968 -12.656101 3.50e-06 0.0003527 5.236508
UPI0006F0CAFE Cell surface protein 0.9590548 7.054032 12.655330 3.50e-06 0.0003527 5.236078
UPI0006EEE1EA DUF2207 domain-containing protein -0.6803517 4.394857 -12.620154 3.50e-06 0.0003527 5.216432
UPI00019F9E66 6-phospho-alpha-glucosidase 1.0065759 4.583708 12.564307 3.60e-06 0.0003527 5.185118
UPI000280CD19 Bacteriocin biosynthesis cyclodehydratase, SagC family -1.2083029 4.328382 -12.149713 4.60e-06 0.0003866 4.947748
UPI0001D109E5 ABC transporter ATP-binding protein -0.8140890 5.923749 -12.145174 4.60e-06 0.0003866 4.945100
UPI0006EEFD55 NA -1.3570985 3.592978 -11.933159 5.20e-06 0.0003930 4.820222
UPI0006F1480A Adaptor protein MecA -0.9166124 5.181364 -11.821689 5.50e-06 0.0003930 4.753600
UPI0001D10921 FeS cluster biogenesis domain-containing protein 1.1236718 5.823662 11.723933 5.90e-06 0.0003930 4.694615
UPI0006EE8744 Glycerol kinase (EC 2.7.1.30) (ATP:glycerol 3-phosphotransferase) (Glycerokinase) (GK) 0.8760640 6.015547 11.685472 6.00e-06 0.0003930 4.671264
UPI00019F9BA2 RNA polymerase sigma factor SigA -0.6272685 7.147367 -11.621568 6.20e-06 0.0003930 4.632283
UPI0001D10C9D Carbamoyl phosphate synthase small chain (EC 6.3.5.5) (Carbamoyl phosphate synthetase glutamine chain) 1.1182979 4.832018 11.182474 8.10e-06 0.0004878 4.358132
UPI0006EF6A19 NA 0.7923608 5.513031 10.946746 9.40e-06 0.0005389 4.206252
UPI00019F9AC3 Single-stranded DNA-binding protein 1.6082144 6.980588 10.803748 1.03e-05 0.0005472 4.112454
UPI0003C4FF63 Glycosidase 0.5851856 7.216755 10.784182 1.04e-05 0.0005472 4.099521
UPI0006F0D43B NA -0.8527692 7.931381 -10.549601 1.21e-05 0.0006044 3.942555
UPI0006EEC4E7 NA 0.5580260 6.825247 10.460407 1.28e-05 0.0006044 3.881935
UPI0003C4FE5C 5’-nucleotidase/2’,3’-cyclic phosphodiesterase 0.5569871 5.981301 10.446509 1.30e-05 0.0006044 3.872443
UPI0006CF5989 Choloylglycine hydrolase (EC 3.5.1.24) (Linear amide C-N hydrolase) 0.7759976 6.378880 10.347142 1.38e-05 0.0006221 3.804197
UPI00019F979E NCS2 family permease -0.6511438 5.175233 -10.207719 1.52e-05 0.0006480 3.707317
UPI0001EC2A56 NA -0.4984581 5.902387 -10.181465 1.54e-05 0.0006480 3.688925
UPI0001D10ACA DUF1827 family protein 0.4683423 6.754028 9.619257 2.27e-05 0.0008702 3.283367
UPI00019F9F6D Co-chaperonin GroES (10 kDa chaperonin) (Chaperonin-10) (Cpn10) 0.6702769 7.552614 9.614350 2.27e-05 0.0008702 3.279727
UPI0006F16580 Prolyl oligopeptidase family serine peptidase 0.5693419 5.106987 9.611432 2.28e-05 0.0008702 3.277560
UPI0006F13160 NA -0.5433226 6.081711 -9.537122 2.40e-05 0.0008900 3.222179
UPI0006D25EFF Alpha/beta hydrolase 0.8975537 6.262152 9.370095 2.71e-05 0.0009739 3.096159
UPI0006F00364 23S rRNA (Uracil(1939)-C(5))-methyltransferase RlmD (EC 2.1.1.190) -0.5044046 6.341746 -9.272998 2.90e-05 0.0010155 3.021904
UPI0006EE72EA NA 0.8466036 4.746372 9.112451 3.26e-05 0.0011108 2.897481
UPI0006D290CD NUDIX hydrolase 0.6496231 5.667444 8.816007 4.07e-05 0.0013492 2.662202
UPI0006D05E86 Carbamoyl phosphate synthase large chain (EC 6.3.4.16) (EC 6.3.5.5) (Carbamoyl phosphate synthetase ammonia chain) 0.7790395 4.667275 8.673334 4.54e-05 0.0014035 2.546323
UPI00019F9F6C Chaperonin GroEL (EC 5.6.1.7) (60 kDa chaperonin) (Chaperonin-60) (Cpn60) 0.6472228 8.459526 8.645810 4.63e-05 0.0014035 2.523766
UPI0006D0F543 [Citrate [pro-3S]-lyase] ligase (EC 6.2.1.22) 0.8549000 6.476774 8.637783 4.66e-05 0.0014035 2.517175
UPI00019F9CF0 UPF0342 protein CP352_03820 -0.6682714 6.782959 -8.633198 4.68e-05 0.0014035 2.513408
UPI00019D0189 Protein-export membrane protein SecG -0.9254545 3.978692 -8.581272 4.87e-05 0.0014269 2.470614
UPI00019F9CBD Heat-inducible transcription repressor HrcA 0.5255174 5.795517 8.481382 5.26e-05 0.0015071 2.387619
UPI0006EEC3E6 Glycerate kinase 0.7873579 4.127738 8.441904 5.43e-05 0.0015199 2.354573
UPI00019F97D8 GTPase Der (GTP-binding protein EngA) -0.4550549 6.854532 -8.345894 5.85e-05 0.0015777 2.273615
UPI0006EFA65A ATP-binding cassette domain-containing protein (Sulfate ABC transporter ATP-binding protein) -0.4872194 5.244354 -8.304732 6.05e-05 0.0015777 2.238649
UPI0001B29D61 UvrABC system protein B (Protein UvrB) (Excinuclease ABC subunit B) -0.4979885 5.777026 -8.272971 6.20e-05 0.0015777 2.211561
UPI0006D25A56 Amino acid permease -0.6513254 5.344608 -8.262735 6.26e-05 0.0015777 2.202812
UPI0001B29DE0 N-acetylmuramidase 1.4311514 6.817274 8.261628 6.26e-05 0.0015777 2.201865
dmo <- dm
dmo$UniParc <- rownames(dmo)
dmo <- dmo[,c(ncol(dmo),1:ncol(dmo)-1)]
write.xlsx(dmo, "differentialexpression1_lcrisp_bothstrains.xlsx")

Volcano chart

TOT=nrow(dm)
sig <- subset(dm,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
dm <- dm[!is.na(dm$P.Value),]

plot(dm$logFC,-log10(dm$P.Value),pch=19,
  xlab="Log2 fold change", ylab="p-value")

points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")

Heatmap of top hits

top <- rownames(head(dm,40))

mx <- m2[rownames(m2) %in% top,]

my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)

heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
    ColSideColors=mycols ,  col=my_palette, main="top 20 proteins")

Differential expression 2 - effect of lactic acid in strain D

Strain D samples only.

ss2 <- ss[which(ss$strain=="TRUE"),]

m22 <- m2[,which(colnames(m2) %in% rownames(ss2))]
m22f <- m22[which(unname(apply(m22,1,function(x) { length(which(is.na(x))) } )) <= 1),]
dim(m22)
## [1] 1260    4
dim(m22f)
## [1] 1260    4
design <- model.matrix(~ ss2$LA)
design
##   (Intercept) ss2$LATRUE
## 1           1          1
## 2           1          1
## 3           1          0
## 4           1          0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`ss2$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(m22,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
##        (Intercept) ss2$LATRUE
## Down             1        156
## NotSig           0        883
## Up            1259        221
dm2 <- topTable(fit.reduced,coef=2, number = Inf)

head(dm2,50) %>% kbl(caption = "Top 50 proteins after differential analysis in strain D") %>% kable_paper("hover", full_width = F)
Top 50 proteins after differential analysis in strain D
logFC AveExpr t P.Value adj.P.Val B
UPI0006EFD634 Addiction module toxin RelE 1.5775279 2.705036 24.88410 0.0000125 0.0044616 4.401738
UPI0006EE62B5 FAD-binding oxidoreductase 2.2380965 4.570619 24.08998 0.0000142 0.0044616 4.282673
UPI00019F9AD9 Uncharacterized protein 2.4537498 5.468400 23.09282 0.0000169 0.0044616 4.124448
UPI0006EEA493 LysR family transcriptional regulator -1.6835305 3.718481 -22.69018 0.0000181 0.0044616 4.057635
UPI00019F9F24 Uncharacterized protein -1.3698080 4.614358 -22.45381 0.0000189 0.0044616 4.017592
UPI00019F9974 Aggregation promoting protein 1.6393521 6.492835 21.48404 0.0000227 0.0044616 3.846647
UPI00019F9AC3 Single-stranded DNA-binding protein 1.9690084 7.237968 21.01575 0.0000248 0.0044616 3.760074
UPI0006EE6481 NA -1.4682391 4.442939 -19.77363 0.0000318 0.0050021 3.516666
UPI0006EEFCA3 Iron export ABC transporter permease subunit FetB 1.2286387 4.819856 18.63024 0.0000405 0.0056643 3.273265
UPI0006EFFAEF NA -1.0836296 3.616578 -17.73148 0.0000495 0.0058635 3.067469
UPI0001D10BCD Acetate kinase (EC 2.7.2.1) (Acetokinase) 1.2830067 7.233350 17.58140 0.0000512 0.0058635 3.031768
UPI0006D00323 Cation transporter -1.0577670 4.424871 -17.19590 0.0000560 0.0058807 2.938229
UPI00019F989F DNA-entry nuclease (DNA/RNA non-specific endonuclease) 1.5962839 5.033110 16.67168 0.0000635 0.0061542 2.806613
UPI0006F0ADA9 ABC transporter ATP-binding protein -1.0093253 6.476739 -16.06200 0.0000738 0.0063770 2.646786
UPI0006EF675B Surface layer protein A domain-containing protein 3.5463467 2.860442 15.84365 0.0000780 0.0063770 2.587686
UPI0006EEC3E6 Glycerate kinase 0.9797904 4.106890 15.56909 0.0000838 0.0063770 2.511926
UPI0006D0A27A Uncharacterized protein 1.9741167 6.490131 15.25429 0.0000910 0.0063770 2.423011
UPI0001CA7E10 Site-specific integrase (Tyrosine-type recombinase/integrase) 1.0058834 3.855084 15.09073 0.0000950 0.0063770 2.375924
UPI0001D109BD SEC10/PgrA surface exclusion domain-containing protein 1.7751133 3.684581 15.04665 0.0000962 0.0063770 2.363126
UPI0003C50192 Uncharacterized protein 1.2096006 3.347472 14.58808 0.0001090 0.0066355 2.227256
UPI000280AF2C Uncharacterized protein -0.9435167 6.089776 -14.46334 0.0001128 0.0066355 2.189405
UPI0001B29848 Polysaccharide biosynthesis protein 0.8855193 2.693559 14.25752 0.0001195 0.0066355 2.126091
UPI000280CD19 Bacteriocin biosynthesis cyclodehydratase, SagC family -1.0083906 5.231619 -14.11873 0.0001243 0.0066355 2.082784
UPI0001B29DE0 N-acetylmuramidase 1.8142933 7.168071 14.06155 0.0001264 0.0066355 2.064795
UPI000280B884 CHAP domain-containing protein (Capsid protein) 0.8571945 4.998360 13.61728 0.0001438 0.0067476 1.922024
UPI0006F0683A ABC-F type ribosomal protection protein 0.9249169 3.572097 13.31941 0.0001572 0.0067476 1.823221
UPI0006F10FA9 FAD-dependent oxidoreductase 1.0857286 6.994510 13.30546 0.0001579 0.0067476 1.818532
UPI00019F9838 Lysin 1.0991145 6.436472 13.29654 0.0001583 0.0067476 1.815529
UPI00019F994B Large ribosomal subunit protein bL35 -1.5073564 5.012247 -13.21458 0.0001623 0.0067476 1.787838
UPI00019F9988 Cell wall-active antibiotics response protein -1.3164237 1.849238 -13.16499 0.0001648 0.0067476 1.770987
UPI0001D10AE8 citrate lyase holo-[acyl-carrier protein] synthase (EC 2.7.7.61) -1.0480848 2.713043 -13.04826 0.0001708 0.0067476 1.731036
UPI0006F0FF2E HlyC/CorC family transporter 0.8113734 5.967212 13.03771 0.0001714 0.0067476 1.727403
UPI0003C4ED4B Maltose/maltodextrin transport system permease protein 0.9206299 5.899261 12.83499 0.0001825 0.0069686 1.656970
UPI0001D10A52 GatB/YqeY domain-containing protein 0.9925822 7.166126 12.70414 0.0001902 0.0070452 1.610830
UPI0006EFB04E D-2-hydroxyacid dehydrogenase (Lactate dehydrogenase) 0.8498222 7.949839 12.61401 0.0001957 0.0070452 1.578733
UPI0006EEB579 SCP domain-containing protein 0.8701531 6.155717 12.38305 0.0002108 0.0072987 1.495289
UPI0006EFEF39 Acetate kinase (EC 2.7.2.1) (Acetokinase) 0.9810643 6.319723 12.29672 0.0002168 0.0072987 1.463647
UPI00019F9A03 Integral membrane protein -0.8695559 3.613189 -12.21427 0.0002227 0.0072987 1.433194
UPI0006EF99FA NA 0.9811628 6.016539 12.12911 0.0002290 0.0072987 1.401498
UPI00019F9D2F ABC transporter permease (Efflux ABC transporter, permease protein) (FtsX-like permease family protein) 1.0766064 4.813435 12.03671 0.0002362 0.0072987 1.366827
UPI0006D2A7D6 Beta-phosphoglucomutase (EC 5.4.2.6) 1.2282356 6.177199 12.00608 0.0002386 0.0072987 1.355269
UPI0006D191CC Beta-lactamase family protein (Serine hydrolase) 1.5665935 2.125130 11.94778 0.0002433 0.0072987 1.333175
UPI0006EE9FBF NA 0.7248417 6.022945 11.82255 0.0002538 0.0074363 1.285314
UPI0006EE6EBB Cell surface protein 1.0454276 5.794653 11.66150 0.0002681 0.0075429 1.222936
UPI0001D10B75 ABC transporter ATP-binding protein 0.9575419 6.673853 11.60016 0.0002738 0.0075429 1.198927
UPI0001B29889 Endonuclease III (EC 4.2.99.18) (DNA-(apurinic or apyrimidinic site) lyase) 0.8249882 5.560359 11.58371 0.0002754 0.0075429 1.192464
UPI0006F1480A Adaptor protein MecA -0.7818784 5.465943 -11.40528 0.0002930 0.0076761 1.121721
UPI0001D10CD4 Cof-type HAD-IIB family hydrolase -1.0136306 3.777249 -11.38672 0.0002949 0.0076761 1.114289
UPI0006F0893B NA 1.8324976 4.344964 11.34541 0.0002992 0.0076761 1.097708
UPI0006D0F543 [Citrate [pro-3S]-lyase] ligase (EC 6.2.1.22) 1.0028106 6.987480 11.24916 0.0003096 0.0076761 1.058820
dm2o <- dm2
dm2o$UniParc <- rownames(dm2o)
dm2o <- dm2o[,c(ncol(dm2o),1:ncol(dm2o)-1)]
write.xlsx(dm2o, "differentialexpression2_lcrisp_strainD.xlsx")

Volcano chart

TOT=nrow(dm2)
sig <- subset(dm2,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
dm2 <- dm2[!is.na(dm2$P.Value),]

mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(ss2$LA))))


plot(dm2$logFC,-log10(dm2$P.Value),pch=19,
  xlab="Log2 fold change", ylab="p-value")

points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")

Heatmap of top hits

top <- rownames(head(dm2,40))

mx <- m22f[rownames(m22f) %in% top,]

my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)

heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
    ColSideColors=mycols ,  col=my_palette, main="top 20 proteins in strain D")

Differential expression 3 - effect of lactic acid in strain A

Strain A samples only.

ss3 <- ss[which(ss$strain=="FALSE"),]

m23 <- m2[,which(colnames(m2) %in% rownames(ss3))]
m23f <- m23[which(unname(apply(m23,1,function(x) { length(which(is.na(x))) } )) <= 1),]
dim(m23)
## [1] 1260    4
dim(m23f)
## [1] 1260    4
design <- model.matrix(~ ss3$LA)
design
##   (Intercept) ss3$LATRUE
## 1           1          1
## 2           1          1
## 3           1          0
## 4           1          0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`ss3$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(m23,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
##        (Intercept) ss3$LATRUE
## Down             0        226
## NotSig           1        808
## Up            1259        226
dm3 <- topTable(fit.reduced,coef=2, number = Inf)

head(dm3,50) %>% kbl(caption = "Top 50 proteins after differential analysis in strain A") %>% kable_paper("hover", full_width = F)
Top 50 proteins after differential analysis in strain A
logFC AveExpr t P.Value adj.P.Val B
UPI00019F9AD9 Uncharacterized protein 3.2166788 5.682400 30.04365 0.0000021 0.0014603 6.015127
UPI0006EF450B Large ribosomal subunit protein bL33 1.8487318 4.993582 27.46555 0.0000032 0.0014603 5.661758
UPI000312EB68 ABC transporter, ATP-binding protein -2.6268776 4.725038 -25.77893 0.0000043 0.0014603 5.401901
UPI0006D0B4B4 NAD(P)H-binding protein 2.0050191 5.864568 24.46485 0.0000054 0.0014603 5.181434
UPI0001B298F1 Bacteriocin biosynthesis docking scaffold, SagD family (Streptolysin associated protein SagD) -1.5407203 4.422143 -23.24604 0.0000068 0.0014603 4.961245
UPI0006D1560C NUDIX domain-containing protein 1.4126586 4.025241 22.64043 0.0000077 0.0014603 4.845730
UPI0006EEFD55 NA -1.4050667 3.005881 -22.34940 0.0000081 0.0014603 4.788687
UPI0006F08471 Bifunctional metallophosphatase/5’-nucleotidase -1.8300559 5.380552 -19.81708 0.0000139 0.0017276 4.246134
UPI00019F9D2D UPF0122 protein AEL95_04395 -1.6118108 4.487226 -19.64729 0.0000145 0.0017276 4.206515
UPI0006F070A6 Glycosyltransferase family 2 protein -1.4613312 4.398040 -19.50830 0.0000150 0.0017276 4.173753
UPI0001B29C39 Multicopper oxidase 1.4883553 6.472084 19.47177 0.0000151 0.0017276 4.165092
UPI0003C50192 Uncharacterized protein 1.2275060 3.916727 18.28665 0.0000200 0.0020995 3.872375
UPI00019F99E2 Transcriptional regulator -1.3615736 3.707602 -17.80118 0.0000226 0.0021236 3.745503
UPI0006F18862 LacI family transcriptional regulator -1.1175177 4.008912 -17.52154 0.0000242 0.0021236 3.670463
UPI00019F9C5D Cell division protein SepF -1.2462487 6.063781 -17.20694 0.0000263 0.0021236 3.584266
UPI0001D10921 FeS cluster biogenesis domain-containing protein 1.1867401 6.073076 17.10660 0.0000270 0.0021236 3.556369
UPI00019F9AC3 Single-stranded DNA-binding protein 1.2474204 6.723209 16.70320 0.0000300 0.0021243 3.442177
UPI0006EE9387 Abasic site processing protein (EC 3.4.-.-) 1.0964979 4.123204 16.66117 0.0000303 0.0021243 3.430085
UPI0006F07C1D NCS2 family permease 1.0872432 2.719199 16.18323 0.0000346 0.0022925 3.289994
UPI0006F0CAFE Cell surface protein 1.0240510 7.525658 15.83775 0.0000381 0.0023780 3.185623
UPI0003C4DFF9 Uncharacterized protein 1.0672302 3.807732 15.50272 0.0000419 0.0023780 3.081816
UPI0001B29D0C Uncharacterized protein -1.0288092 5.893181 -15.48604 0.0000421 0.0023780 3.076576
UPI0003C51696 Integral membrane protein 0.9557050 4.431694 15.24233 0.0000452 0.0023780 2.999303
UPI0006EE6636 Sucrose-6-phosphate hydrolase (EC 3.2.1.26) (Invertase) -1.3261034 4.615558 -15.23366 0.0000453 0.0023780 2.996528
UPI0006EFB04E D-2-hydroxyacid dehydrogenase (Lactate dehydrogenase) 0.9423065 8.465795 14.89366 0.0000501 0.0025248 2.886220
UPI000280CD19 Bacteriocin biosynthesis cyclodehydratase, SagC family -1.4082152 3.425146 -14.33902 0.0000593 0.0028753 2.699908
UPI0006F14A06 phosphomevalonate kinase (EC 2.7.4.2) -0.9542321 3.766227 -13.99565 0.0000661 0.0030844 2.580405
UPI0006F0D43B NA -0.9936575 7.599597 -13.86314 0.0000690 0.0031029 2.533402
UPI00004C6CBB DNA-directed RNA polymerase subunit omega (RNAP omega subunit) (EC 2.7.7.6) (RNA polymerase omega subunit) (Transcriptase subunit omega) -1.1693329 6.197811 -13.73136 0.0000719 0.0031259 2.486156
UPI0001EF562B Hd superfamily hydrolase 0.8197912 6.979256 13.41768 0.0000797 0.0032340 2.371621
UPI0003C50430 Appr-1-p processing domain protein 0.9996051 4.854561 13.39465 0.0000803 0.0032340 2.363095
UPI00019F9E66 6-phospho-alpha-glucosidase 1.0624082 5.210872 13.24740 0.0000844 0.0032340 2.308194
UPI0006F1BB34 Aminopeptidase P family protein 0.8736938 5.539827 13.21997 0.0000852 0.0032340 2.297890
UPI0006EF73D7 AAA domain-containing protein (ATP-dependent Clp protease ATP-binding subunit) 1.3235494 6.849573 13.14762 0.0000873 0.0032340 2.270605
UPI0006D1F499 Uncharacterized protein 1.5986464 3.425981 12.65596 0.0001033 0.0036856 2.080706
UPI0003C4F885 Nucleoside-diphosphate-sugar epimerase 1.1632576 4.471481 12.60242 0.0001053 0.0036856 2.059545
UPI0006F1480A Adaptor protein MecA -1.0513464 4.896785 -12.31614 0.0001166 0.0039702 1.944699
UPI0006EEB579 SCP domain-containing protein 0.7383021 6.341498 12.12000 0.0001252 0.0041504 1.864343
UPI0001D1087D Tyrosine recombinase XerD -0.8564454 5.426426 -11.77940 0.0001420 0.0045869 1.721437
UPI00019F979E NCS2 family permease -0.7395568 5.551160 -11.66332 0.0001483 0.0046721 1.671728
UPI0006D1F1A0 Antibiotic biosynthesis monooxygenase 0.7354339 6.650952 11.45713 0.0001605 0.0047642 1.582124
UPI0006CFE894 MgtC/SapB family protein 0.8257886 3.340397 11.44257 0.0001614 0.0047642 1.575737
UPI0001B29C41 NA -1.0330207 4.934275 -11.39417 0.0001644 0.0047642 1.554429
UPI0006EFBCD1 NA 0.9611916 4.743661 11.36366 0.0001664 0.0047642 1.540948
UPI0006EFEF39 Acetate kinase (EC 2.7.2.1) (Acetokinase) 0.8335285 6.468718 11.12373 0.0001828 0.0051175 1.433598
UPI0001D109E5 ABC transporter ATP-binding protein -0.8022680 5.060041 -11.06072 0.0001874 0.0051330 1.405006
UPI000280B884 CHAP domain-containing protein (Capsid protein) 0.7125756 5.242446 11.00187 0.0001918 0.0051431 1.378150
UPI0006CFD7B7 Aquaporin (MIP family channel protein) -1.0192124 2.918010 -10.91104 0.0001990 0.0051492 1.336403
UPI00019F9D22 LexA repressor (EC 3.4.21.88) -1.2120114 5.374712 -10.89515 0.0002002 0.0051492 1.329061
UPI0006EE903B Amino acid ABC transporter permease -1.3003315 4.846551 -10.80114 0.0002080 0.0051638 1.285407
dm3o <- dm3
dm3o$UniParc <- rownames(dm3o)
dm3o <- dm3o[,c(ncol(dm3o),1:ncol(dm3o)-1)]
write.xlsx(dm3o, "differentialexpression3_lcrisp_strainA.xlsx")

Volcano chart

TOT=nrow(dm3)
sig <- subset(dm3,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
dm3 <- dm3[!is.na(dm2$P.Value),]

mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(ss3$LA))))

plot(dm3$logFC,-log10(dm3$P.Value),pch=19,
  xlab="Log2 fold change", ylab="p-value")

points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")

Heatmap of top hits

top <- rownames(head(dm3,40))

mx <- m23f[rownames(m23f) %in% top,]

my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)

heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
    ColSideColors=mycols ,  col=my_palette, main="top 20 proteins in strain A")

Comparison

Make a Euler diagram of the genes.

dm2up <- rownames(subset(dm2,adj.P.Val<0.05 & logFC > 0))
dm2dn <- rownames(subset(dm2,adj.P.Val<0.05 & logFC < 0))

dm3up <- rownames(subset(dm3,adj.P.Val<0.05 & logFC > 0))
dm3dn <- rownames(subset(dm3,adj.P.Val<0.05 & logFC < 0))

v1 <- list("Strain D up"=dm2up, "Strain D down"=dm2dn,
  "Strain A up"=dm3up,"Strain A down"=dm3dn)

plot(euler(v1),quantities = TRUE)

Make a scatter plotr of the ranks.

While the results are not completely concoordant between the strains, there is a lot of similarity.

mg <- merge(dm2,dm3,by=0)
rownames(mg) <- mg$Row.names
mg <- mg[,c("t.x","t.y")]
head(mg)
##                                                                                     t.x
## UPI000045224F Histidine phosphatase family protein (Phosphoglycerate mutase)  0.5172830
## UPI0000452250 Histidine phosphatase family protein (Phosphoglycerate mutase)  4.5393027
## UPI00004B2864 Glyceraldehyde-3-phosphate dehydrogenase (EC 1.2.1.-)           2.6003691
## UPI00004C6804 Small ribosomal subunit protein bS18                            2.2663724
## UPI00004C690C Small ribosomal subunit protein uS10                           -1.9979086
## UPI00004C6915 Large ribosomal subunit protein uL29                            0.1770106
##                                                                                    t.y
## UPI000045224F Histidine phosphatase family protein (Phosphoglycerate mutase)  4.181907
## UPI0000452250 Histidine phosphatase family protein (Phosphoglycerate mutase) -2.052452
## UPI00004B2864 Glyceraldehyde-3-phosphate dehydrogenase (EC 1.2.1.-)           3.745995
## UPI00004C6804 Small ribosomal subunit protein bS18                            2.391652
## UPI00004C690C Small ribosomal subunit protein uS10                           -3.518867
## UPI00004C6915 Large ribosomal subunit protein uL29                           -1.621014
plot(mg,pch=19,cex=0.4,main="limma t-stat",xlab="Strain D",ylab="strain A")
abline(h=0,v=0,col="blue",lty=2,lwd=2)

mylm <- lm(mg)
abline(mylm,lty=2,col="red",lwd=2)

mylm
## 
## Call:
## lm(formula = mg)
## 
## Coefficients:
## (Intercept)          t.y  
##      0.4057       0.4048
cor.test(mg[,1],mg[,2])
## 
##  Pearson's product-moment correlation
## 
## data:  mg[, 1] and mg[, 2]
## t = 16.523, df = 1258, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3758233 0.4666273
## sample estimates:
##       cor 
## 0.4222841
rmg <- apply(mg,2, function(x) {
  rnk <- rank(x)
  NUMNEG=length(which(x<0))
  rnk <- rnk - NUMNEG
  return(rnk)
})

plot(rmg,pch=19,cex=0.4,main="rank of limma t-stat",xlab="Strain D",ylab="strain A")
abline(h=0,v=0,col="blue",lty=2,lwd=2)

mylm <- lm(as.data.frame(rmg))

abline(mylm,lty=2,col="red",lwd=2)

mylm
## 
## Call:
## lm(formula = as.data.frame(rmg))
## 
## Coefficients:
## (Intercept)          t.y  
##     23.0023       0.4469
cor.test(rmg[,1],rmg[,2])
## 
##  Pearson's product-moment correlation
## 
## data:  rmg[, 1] and rmg[, 2]
## t = 17.719, df = 1258, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4015929 0.4900377
## sample estimates:
##       cor 
## 0.4469067
tr <- nrow(subset(mg,t.x>0 & t.y>0))
br <- nrow(subset(mg,t.x>0 & t.y<0))
tl <- nrow(subset(mg,t.x<0 & t.y>0))
bl <- nrow(subset(mg,t.x<0 & t.y<0))

tr
## [1] 402
br
## [1] 240
tl
## [1] 204
bl
## [1] 414
concordant = tr + bl
message(paste("concordant proteins:",concordant))
## concordant proteins: 816
discordant = br + tl
message(paste("discordant proteins:",discordant))
## discordant proteins: 444

Lactobacillus iners

Read in data

x2 <- read.table("20240611_164801_P24_0717_E1_PaluaE_Updated_Report_iners_only_V2.tsv",
  header=TRUE,fill=TRUE,sep="\t",quote = "")

rownames(x2) <- paste(x2$PG.ProteinGroups,x2$PG.ProteinDescriptions,x2$PG.Genes,sep="|")

head(x2,2)
##                                         PG.ProteinGroups PG.ProteinAccessions
## A0A6G5W5S4|pullulanase|glgUi                  A0A6G5W5S4           A0A6G5W5S4
## A0A6G5W607|pullulanase (Fragment)|glgUi       A0A6G5W607           A0A6G5W607
##                                         PG.Genes PG.ProteinDescriptions
## A0A6G5W5S4|pullulanase|glgUi               glgUi            pullulanase
## A0A6G5W607|pullulanase (Fragment)|glgUi    glgUi pullulanase (Fragment)
##                                         PG.UniProtIds
## A0A6G5W5S4|pullulanase|glgUi               A0A6G5W5S4
## A0A6G5W607|pullulanase (Fragment)|glgUi    A0A6G5W607
##                                                                                                                                    PG.FastaHeaders
## A0A6G5W5S4|pullulanase|glgUi                       >tr|A0A6G5W5S4|A0A6G5W5S4_9LACO pullulanase OS=Lactobacillus iners OX=147802 GN=glgUi PE=3 SV=1
## A0A6G5W607|pullulanase (Fragment)|glgUi >tr|A0A6G5W607|A0A6G5W607_9LACO pullulanase (Fragment) OS=Lactobacillus iners OX=147802 GN=glgUi PE=3 SV=1
##                                         X.1..AST1_JS_20240405_P24_0717_E1_S08.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi                                                                             14
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                 166
##                                         X.2..AST1_JS_20240405_P24_0717_E1_S16.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi                                                                             14
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                 166
##                                         X.3..AST1_JS_20240405_P24_0717_E1_S07.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi                                                                             14
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                 166
##                                         X.4..AST1_JS_20240405_P24_0717_E1_S15.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi                                                                             14
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                 166
##                                         X.5..AST1_JS_20240405_P24_0717_E1_S06_20240426003324.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi                                                                                            14
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                                166
##                                         X.6..AST1_JS_20240405_P24_0717_E1_S14.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi                                                                             14
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                 166
##                                         X.7..AST1_JS_20240405_P24_0717_E1_S05.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi                                                                             14
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                 166
##                                         X.8..AST1_JS_20240405_P24_0717_E1_S13.raw.PG.NrOfPrecursorsMeasured
## A0A6G5W5S4|pullulanase|glgUi                                                                             14
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                 166
##                                         X.1..AST1_JS_20240405_P24_0717_E1_S08.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi                                                         280.6738
## A0A6G5W607|pullulanase (Fragment)|glgUi                                             1004.2641
##                                         X.2..AST1_JS_20240405_P24_0717_E1_S16.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi                                                         933.0865
## A0A6G5W607|pullulanase (Fragment)|glgUi                                             2914.1511
##                                         X.3..AST1_JS_20240405_P24_0717_E1_S07.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi                                                         270.0818
## A0A6G5W607|pullulanase (Fragment)|glgUi                                              895.5801
##                                         X.4..AST1_JS_20240405_P24_0717_E1_S15.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi                                                         1255.454
## A0A6G5W607|pullulanase (Fragment)|glgUi                                              3614.425
##                                         X.5..AST1_JS_20240405_P24_0717_E1_S06_20240426003324.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi                                                                         576.503
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                             3977.755
##                                         X.6..AST1_JS_20240405_P24_0717_E1_S14.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi                                                         708.0147
## A0A6G5W607|pullulanase (Fragment)|glgUi                                             3197.5947
##                                         X.7..AST1_JS_20240405_P24_0717_E1_S05.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi                                                         1176.233
## A0A6G5W607|pullulanase (Fragment)|glgUi                                              3996.819
##                                         X.8..AST1_JS_20240405_P24_0717_E1_S13.raw.PG.Quantity
## A0A6G5W5S4|pullulanase|glgUi                                                         785.5422
## A0A6G5W607|pullulanase (Fragment)|glgUi                                             3855.5637
x3 <- x2[,15:22]

colnames(x3) <- sapply(strsplit(gsub("_E1_"," ",colnames(x3))," "),"[[",2)
colnames(x3) <- substr(colnames(x3), 0, 3)

head(x3,2)
##                                               S08       S16      S07      S15
## A0A6G5W5S4|pullulanase|glgUi             280.6738  933.0865 270.0818 1255.454
## A0A6G5W607|pullulanase (Fragment)|glgUi 1004.2641 2914.1511 895.5801 3614.425
##                                              S06       S14      S05       S13
## A0A6G5W5S4|pullulanase|glgUi             576.503  708.0147 1176.233  785.5422
## A0A6G5W607|pullulanase (Fragment)|glgUi 3977.755 3197.5947 3996.819 3855.5637
x3 <- log(x3)

head(x3,2)
##                                              S08      S16      S07      S15
## A0A6G5W5S4|pullulanase|glgUi            5.637193 6.838498 5.598725 7.135253
## A0A6G5W607|pullulanase (Fragment)|glgUi 6.912010 7.977334 6.797472 8.192688
##                                              S06      S14      S05      S13
## A0A6G5W5S4|pullulanase|glgUi            6.356981 6.562465 7.070072 6.666374
## A0A6G5W607|pullulanase (Fragment)|glgUi 8.288473 8.070154 8.293254 8.257273
message("Number of NA values in each row")
## Number of NA values in each row
table(unname(apply(x3,1,function(x) { length(which(is.na(x))) } )))
## 
##    0    1    2    3    4    5    6    7    8 
## 1014   90   60   74  263   84   67   39    1
message("Number of NA values in each column")
## Number of NA values in each column
apply(x3,2,function(x) { length(which(is.na(x))) } )
## S08 S16 S07 S15 S06 S14 S05 S13 
## 314 318 205 201 460 454 322 313
message("Remove rows with any NA values")
## Remove rows with any NA values
x3 <- x3[apply(x3,1,function(x) { length(which(is.na(x)))<1 } ),]
dim(x3)
## [1] 1014    8
# remove human proteins with "Cont_" in the name
x3 <- x3[grep("Cont_",rownames(x3),invert=TRUE),]

write.xlsx(m3, "quantifications_liners.xlsx")

Curate the sample sheet

ss2 <- read.table("samplesheet2.tsv")

ss2 %>% kbl(caption = "Sample sheet for iners") %>% kable_paper("hover", full_width = F)
Sample sheet for iners
condition replicate LA strain
S08 LI_S_LA 1 TRUE FALSE
S16 Li_S_LA 2 TRUE FALSE
S07 Li_S_UT 1 FALSE FALSE
S15 Li_S_UT 2 FALSE FALSE
S06 Li_A_LA 1 TRUE TRUE
S14 Li_A_LA 2 TRUE TRUE
S05 Li_A_UT 1 FALSE TRUE
S13 Li_A_UT 2 FALSE TRUE

MDS analysis

This helps to see the similarities and differences between samples.

mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(ss2$LA)+1)))

plot(cmdscale(dist(t(x3))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=20-(as.numeric(ss2$strain)+1), cex=4,
  col=mycols,main="L. iners")

text(cmdscale(dist(t(x3))), labels=colnames(x3) )

legend("bottomright", legend=c("lac", "ctl"),
       fill=c("pink","lightblue"),  cex=1)

mtext("Circle='S/7', Diamond='A'")

Differential expression 4 - effect of lactic acid, both strains

Here we are using limma, which was originally designed for microarray data, but should work okay if the data are normally distributed.

We are looking for proteins whose abundance is changed by the treatment: lactic acid versus control. Cancel the effect of strain.

The full differential expression results are available as a Excel file called “differentialexpression.xlsx”.

design <- model.matrix(~ ss2$strain + ss2$LA)
design
##   (Intercept) ss2$strainTRUE ss2$LATRUE
## 1           1              0          1
## 2           1              0          1
## 3           1              0          0
## 4           1              0          0
## 5           1              1          1
## 6           1              1          1
## 7           1              1          0
## 8           1              1          0
## attr(,"assign")
## [1] 0 1 2
## attr(,"contrasts")
## attr(,"contrasts")$`ss2$strain`
## [1] "contr.treatment"
## 
## attr(,"contrasts")$`ss2$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(x3,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
##        (Intercept) ss2$strainTRUE ss2$LATRUE
## Down             0            124          0
## NotSig           0            718        942
## Up             946            104          4
dm <- topTable(fit.reduced,coef=3, number = Inf)

head(dm,50) %>% kbl(caption = "L. iners Top 50 proteins after differential analysis") %>% kable_paper("hover", full_width = F)
L. iners Top 50 proteins after differential analysis
logFC AveExpr t P.Value adj.P.Val B
A0A6G7BAD8;C8PBF9;E1NP85;E1NU18|Large ribosomal subunit protein bL17|rplQ 0.6084891 9.658645 8.779976 0.0000426 0.0322545 2.6574575
E1NTJ7|ATPase family associated with various cellular activities (AAA)|tlyB 1.1235433 6.859346 8.017863 0.0000775 0.0322545 2.1328950
C8PBD4|Large ribosomal subunit protein uL23|rplW 0.5468029 9.969843 7.495354 0.0001201 0.0322545 1.7379522
iRT_Peptides_Fusion|| 1.3133707 9.592667 7.349541 0.0001364 0.0322545 1.6221915
E1NQ38|L-lactate dehydrogenase|ldh 0.5238036 10.219760 6.416689 0.0003225 0.0610236 0.8190545
C8PAK4|ATP-dependent helicase/nuclease subunit A|addA -0.6370563 5.890215 -5.967885 0.0005047 0.0682333 0.3908851
A0A6G7BA31;E1NPA9;E1NU42|Large ribosomal subunit protein uL2|rplB 0.5498280 9.858048 5.490973 0.0008343 0.0682333 -0.0966346
A0A6G7B7J6;E1NP20|DNA replication initiation control protein YabA;Initiation-control protein YabA|yabA;HMPREF9212_0021 0.8275117 7.026795 5.377919 0.0009439 0.0682333 -0.2173414
A0A6G7BAK1;C8PDK6;E1NN69;E1NU70|Small ribosomal subunit protein uS4|rpsD 0.4734114 10.053833 5.360036 0.0009627 0.0682333 -0.2366188
A0A6G7B9K9;C8PDM0;E1NNA3;E1NSY3|ATP synthase subunit c|atpE 0.6389176 9.091449 5.319976 0.0010062 0.0682333 -0.2799886
A0A6G7B7L2;C8PCC1;E1NQZ6;E1NTA8|Glycerol-3-phosphate acyltransferase|plsY 1.5830470 6.119509 5.243050 0.0010961 0.0682333 -0.3639854
A0A6G7B8A1;C8PD20;E1NMV0;E1NTM3|Probable transcriptional regulatory protein G6Z83_02215;Probable transcriptional regulatory protein HMPREF0520_0990;Probable transcriptional regulatory protein HMPREF9212_1469;Probable transcriptional regulatory protein HMPREF9211_1381|G6Z83_02215;HMPREF0520_0990;HMPREF9212_1469;HMPREF9211_1381 0.5924151 8.842778 5.204975 0.0011438 0.0682333 -0.4059101
A0A6G7B9V3;C8PBD2;E1NPB2;E1NU45|Large ribosomal subunit protein uL3|rplC 0.4220668 9.786631 5.201545 0.0011483 0.0682333 -0.4096988
A0A6G7B9J2|YSIRK-type signal peptide-containing protein|G6Z83_05695 0.5993022 10.063905 5.199370 0.0011511 0.0682333 -0.4121015
A0A6G7BAH4|YSIRK-type signal peptide-containing protein|G6Z83_05700 0.6804659 10.195850 5.198324 0.0011524 0.0682333 -0.4132575
A0A6G7BA20;C8PBE7;E1NP97;E1NU30|Large ribosomal subunit protein uL6|rplF 0.3375948 10.288888 5.159023 0.0012046 0.0682333 -0.4568189
A0A6G7B6W1;C8PB11;E1NM32;E1NSA1|Small ribosomal subunit protein bS6|rpsF 0.4731390 8.926827 5.143283 0.0012262 0.0682333 -0.4743350
A0A6G7BA57|Lysozyme|G6Z83_05710 -0.9404139 6.806758 -5.028137 0.0013980 0.0734725 -0.6036964
C8PBS7;E1NPZ2|PTS sorbitol transporter subunit IIA;PTS system glucitol/sorbitol-specific IIA component|HMPREF0520_0547;HMPREF9212_0324 0.4341482 7.442156 4.850936 0.0017168 0.0838591 -0.8070027
A0A6G7BAS1;C8PBE1;E1NPA3;E1NU36|Small ribosomal subunit protein uS17|rpsQ 0.5430052 7.602969 4.815873 0.0017890 0.0838591 -0.8478424
A0A6G7B8R5;E1NTM4|Fic family protein;Death-on-curing family protein|G6Z83_02210;HMPREF9211_1382 0.3945682 6.146693 4.722650 0.0019977 0.0838591 -0.9574121
C8PB12|DNA gyrase subunit A|gyrA 1.4918476 4.801755 4.714146 0.0020180 0.0838591 -0.9674787
A0A6G7BAQ9|RNA methyltransferase|G6Z83_04110 -0.3641053 6.593073 -4.705532 0.0020389 0.0838591 -0.9776876
A0A6G7B9M4;C8PD80;E1NUD7|Uncharacterized protein|G6Z83_02730;HMPREF0520_1050;HMPREF9211_0800 0.8755442 5.438394 4.576239 0.0023817 0.0928538 -1.1323990
A0A6G7BB71;C8PBB0;E1NM82;E1NTT7|Transcription elongation factor GreA|greA 0.4991568 6.834250 4.551655 0.0024539 0.0928538 -1.1621307
A0A6G7B973;C8PDI4;E1NN42;E1NS17|Cell division protein SepF|sepF 0.4572304 8.112222 4.453854 0.0027655 0.1006214 -1.2814001
A0A6G7B9N0;C8PBW1;E1NLZ6|Phosphoglycerate kinase|pgk 0.4365787 10.203958 4.419923 0.0028836 0.1010317 -1.3231486
A0A6G7BB56;C8PBG4;E1NP80;E1NU13|Large ribosomal subunit protein uL13|rplM 0.5070600 10.259201 4.379138 0.0030329 0.1024679 -1.3735841
C8PAS5|Trigger factor|tig 0.3167440 10.045474 4.337782 0.0031930 0.1030167 -1.4250057
A0A6G7B750;C8PCC5;E1NQZ1;E1NTB2|PAS domain-containing protein;PAS domain S-box protein;PAS domain S-box protein;PAS domain S-box protein|G6Z83_00585;HMPREF0520_0745;HMPREF9212_0181;HMPREF9211_0053 0.5815868 5.890258 4.319443 0.0032669 0.1030167 -1.4478984
A0A6G7B9W6;C8PC06;E1NMS9;E1NV33|Na+/H+ antiporter NhaC|nhaC 0.4360909 5.913365 4.279854 0.0034330 0.1047622 -1.4975065
A0A6G7B8M0;C8PAQ7;E1NQA0;E1NSL0|Pyruvate kinase|pyk 0.3755278 10.003065 4.164135 0.0039739 0.1112275 -1.6439838
A0A6G7B8K3;C8PAS6;E1NQC3;E1NSN2|Elongation factor Tu|tuf 0.3852240 9.994231 4.154819 0.0040213 0.1112275 -1.6558714
A0A6G7B825;E1NQZ0;E1NSK5|2,3-bisphosphoglycerate-dependent phosphoglycerate mutase;2,3-bisphosphoglycerate-dependent phosphoglycerate mutase;2,3-bisphosphoglycerate-dependent phosphoglycerate mutase|gpmA 0.5031857 9.785226 4.139189 0.0041023 0.1112275 -1.6758476
A0A6G7BAN1|HIT family protein|G6Z83_06610 1.0083528 5.760342 4.120785 0.0041999 0.1112275 -1.6994192
E1NUI0|Alpha amylase, catalytic domain protein|dexB -0.3473133 7.378849 -4.114695 0.0042328 0.1112275 -1.7072311
A0A6G7B7N2;C8PCE4;E1NQX2;E1NSI6|Small ribosomal subunit protein uS7|rpsG 0.4094128 9.451998 4.088877 0.0043752 0.1115655 -1.7404162
E1NUT2|GA module|HMPREF9211_0601 1.8476468 8.013496 4.070212 0.0044815 0.1115655 -1.7644741
E1NMM8|X-Pro dipeptidyl-peptidase C-terminal non-catalytic domain protein|HMPREF9212_0748 0.7975769 6.119571 4.025690 0.0047465 0.1132871 -1.8220858
E1NLW5;E1NV59|UbiC transcription regulator-associated domain protein|HMPREF9212_0654;HMPREF9211_0682 0.3865731 6.967498 4.018615 0.0047902 0.1132871 -1.8312699
C8PAY8|6-phosphogluconate dehydrogenase, decarboxylating|gnd -0.5328312 7.810156 -3.917734 0.0054627 0.1260418 -1.9630915
A0A6G7BHU2;C8PBF1;E1NP93;E1NU26|Large ribosomal subunit protein uL15|rplO 0.5058773 9.404662 3.882563 0.0057208 0.1274243 -2.0094241
A0A6G7BB47;C8PBI5|Large ribosomal subunit protein bL33|rpmG -0.5143169 7.192931 -3.873159 0.0057920 0.1274243 -2.0218467
A0A6G7BA62;C8PB55;E1NM62|ABC transporter ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein|G6Z83_06665;HMPREF0520_0325;HMPREF9212_0412 -0.5103978 6.765337 -3.853956 0.0059405 0.1277202 -2.0472530
A0A6G7B8M6;C8PAJ9|Isopentenyl-diphosphate delta-isomerase|fni 0.9213771 5.284632 3.804495 0.0063422 0.1333277 -2.1129545
A0A6G7BAE9;C8PBE9;E1NP95;E1NU28|Small ribosomal subunit protein uS5|rpsE 0.4204507 9.926902 3.773670 0.0066075 0.1358837 -2.1540903
A0A6G7B7H4|PTS mannose/fructose/sorbose transporter family subunit IID|G6Z83_00375 0.4022195 9.734485 3.738419 0.0069256 0.1393956 -2.2013074
A0A6G7B9U7;C8PBX7;E1NLX2|Peptidoglycan-binding protein LysM;Uncharacterized protein;Uncharacterized protein|G6Z83_05375;HMPREF0520_0597;HMPREF9212_0664 -0.7078563 5.028171 -3.709912 0.0071950 0.1418008 -2.2396265
A0A6G7B9W9;C8PBW0;E1NLZ7;E1NTI0|Glyceraldehyde-3-phosphate dehydrogenase|gap 0.5066283 9.603459 3.661947 0.0076742 0.1469933 -2.3043733
C8PAF4|E1-E2 ATPase|HMPREF0520_0074 -0.4488300 5.967501 -3.644885 0.0078529 0.1469933 -2.3274864
dm4 <- dm
write.xlsx(dm4, "differentialexpression4_liners_bothstrains.xlsx")

Volcano chart

TOT=nrow(dm)
sig <- subset(dm,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")
dm <- dm[!is.na(dm$P.Value),]

plot(dm$logFC,-log10(dm$P.Value),pch=19,
  xlab="Log2 fold change", ylab="p-value")

points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")

Heatmap of top hits

top <- rownames(head(dm,40))

mx <- x3[rownames(x3) %in% top,]

my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)

heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
    ColSideColors=mycols ,  col=my_palette, main="top 20 proteins")

Differential expression 5 - effect of lactic acid in strain S (7)

Strain S/7 samples only.

sx2 <- ss2[which(ss2$strain=="FALSE"),]

x32 <- x3[,which(colnames(x3) %in% rownames(sx2))]

design <- model.matrix(~ sx2$LA)
design
##   (Intercept) sx2$LATRUE
## 1           1          1
## 2           1          1
## 3           1          0
## 4           1          0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`sx2$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(x32,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
##        (Intercept) sx2$LATRUE
## Down             0          0
## NotSig           0        946
## Up             946          0
dm5 <- topTable(fit.reduced,coef=2, number = Inf)

head(dm5,50) %>% kbl(caption = "L. iners Top 50 proteins after differential analysis in strain S") %>% kable_paper("hover", full_width = F)
L. iners Top 50 proteins after differential analysis in strain S
logFC AveExpr t P.Value adj.P.Val B
A0A6G7B883;C8PCJ3;E1NMC3|UPF0297 protein G6Z83_01885;UPF0297 protein HMPREF0520_0813;UPF0297 protein HMPREF9212_1332|G6Z83_01885;HMPREF0520_0813;HMPREF9212_1332 1.6588334 5.324599 8.231884 0.0012163 0.710556 -3.966573
A0A6G7B973;C8PDI4;E1NN42;E1NS17|Cell division protein SepF|sepF 0.6596861 8.111005 5.723858 0.0046905 0.710556 -4.018834
A0A6G7B7L2;C8PCC1;E1NQZ6;E1NTA8|Glycerol-3-phosphate acyltransferase|plsY 1.0450400 6.207543 5.405260 0.0057639 0.710556 -4.030131
A0A6G7BA56;C8PBJ1;E1NTX9|Large ribosomal subunit protein bL12|rplL 0.6592185 9.618330 5.125944 0.0069620 0.710556 -4.041461
A0A6G7BAD8;C8PBF9;E1NP85;E1NU18|Large ribosomal subunit protein bL17|rplQ 0.5629366 9.660954 4.932765 0.0079715 0.710556 -4.050202
A0A6G7BH83|Uncharacterized protein|G6Z83_05035 1.3202032 5.661840 4.705422 0.0093983 0.710556 -4.061572
E1NTJ7|ATPase family associated with various cellular activities (AAA)|tlyB 0.9606188 7.585307 4.695050 0.0094705 0.710556 -4.062122
C8PAK4|ATP-dependent helicase/nuclease subunit A|addA -0.7805417 5.749606 -4.591513 0.0102297 0.710556 -4.067759
A0A6G7B8M6;C8PAJ9|Isopentenyl-diphosphate delta-isomerase|fni 1.3708782 5.322028 4.256452 0.0132486 0.710556 -4.088122
A0A6G7B9E7;C8PAA6;E1NQF2;E1NSV3|Transcriptional repressor NrdR|nrdR 0.4808854 6.695247 4.239371 0.0134296 0.710556 -4.089255
C8PB12|DNA gyrase subunit A|gyrA 1.2194640 4.929902 4.212096 0.0137248 0.710556 -4.091085
C8PBD4|Large ribosomal subunit protein uL23|rplW 0.4609073 9.955422 4.155785 0.0143595 0.710556 -4.094944
A0A6G7B4A7|Uncharacterized protein|G6Z83_01810 2.0419336 5.210420 4.060867 0.0155116 0.710556 -4.101709
C8PBT3|Peptidase, M16 family|HMPREF0520_0553 0.5473418 6.047777 4.046227 0.0156991 0.710556 -4.102782
A0A6G7B9K9;C8PDM0;E1NNA3;E1NSY3|ATP synthase subunit c|atpE 0.6151739 9.283886 3.907242 0.0176218 0.710556 -4.113388
A0A6G7BB71;C8PBB0;E1NM82;E1NTT7|Transcription elongation factor GreA|greA 0.6165167 7.070780 3.865705 0.0182505 0.710556 -4.116710
iRT_Peptides_Fusion|| 1.0806311 9.669933 3.698586 0.0210686 0.710556 -4.130838
E1NN99|ATP synthase subunit delta|atpH 1.4109078 5.362340 3.667325 0.0216520 0.710556 -4.133622
A0A6G7B6W1;C8PB11;E1NM32;E1NSA1|Small ribosomal subunit protein bS6|rpsF 0.4235001 8.763806 3.651958 0.0219459 0.710556 -4.135008
A0A6G7BA57|Lysozyme|G6Z83_05710 -1.1404348 6.324888 -3.594832 0.0230811 0.710556 -4.140261
A0A6G7B927;C8PAI7;E1NUN8|ABC-2 transporter permease;Uncharacterized protein;Uncharacterized protein|G6Z83_03795;HMPREF0520_0107;HMPREF9211_1507 -0.9107605 6.857402 -3.517841 0.0247239 0.710556 -4.147599
E1NQ38|L-lactate dehydrogenase|ldh 0.4118918 10.201247 3.504271 0.0250277 0.710556 -4.148924
C8PB77|HTH merR-type domain-containing protein|HMPREF0520_0347 0.5619474 6.585859 3.489783 0.0253570 0.710556 -4.150350
A0A6G7B8A1;C8PD20;E1NMV0;E1NTM3|Probable transcriptional regulatory protein G6Z83_02215;Probable transcriptional regulatory protein HMPREF0520_0990;Probable transcriptional regulatory protein HMPREF9212_1469;Probable transcriptional regulatory protein HMPREF9211_1381|G6Z83_02215;HMPREF0520_0990;HMPREF9212_1469;HMPREF9211_1381 0.6767054 8.785173 3.331598 0.0293093 0.710556 -4.166660
A0A6G7BAC2;C8PAB2;E1NQE6;E1NSU3|Large ribosomal subunit protein bL20|rplT 0.3920754 9.634431 3.291841 0.0304149 0.710556 -4.170983
A0A6G7BA31;E1NPA9;E1NU42|Large ribosomal subunit protein uL2|rplB 0.3703785 9.819932 3.291426 0.0304267 0.710556 -4.171029
A0A6G7B6Y1;C8PAY5;E1NMJ5;E1NSB9|NCS2 family permease;Putative permease;Putative permease;Permease|G6Z83_00155;yieG;pbuG;HMPREF9211_1272 -0.6997797 6.075260 -3.171551 0.0340734 0.710556 -4.184644
C8PD88|Dihydropteroate synthase|folP -0.6053520 5.889066 -3.168543 0.0341713 0.710556 -4.184997
A0A6G7B9S9|ATP-dependent DNA helicase|pcrA 0.3493146 8.044538 3.138916 0.0351539 0.710556 -4.188506
A0A6G7BHR8|Glutamate–tRNA ligase|gltX 0.3522573 8.671349 3.132587 0.0353682 0.710556 -4.189263
A0A6G7B7J6;E1NP20|DNA replication initiation control protein YabA;Initiation-control protein YabA|yabA;HMPREF9212_0021 0.5052118 7.117279 3.131884 0.0353920 0.710556 -4.189348
A0A6G7BA62;C8PB55;E1NM62|ABC transporter ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein|G6Z83_06665;HMPREF0520_0325;HMPREF9212_0412 -0.5096376 6.825266 -3.109255 0.0361712 0.710556 -4.192076
A0A6G7BA67;C8PBI7;E1NNP9;E1NTY3|Transcription termination/antitermination protein NusG|nusG 0.3875250 7.932463 3.093745 0.0367169 0.710556 -4.193966
A0A6G7B970|Xaa-Pro dipeptidyl-peptidase|G6Z83_04040 0.4915151 7.508535 3.091378 0.0368010 0.710556 -4.194256
E1NRB2|Uncharacterized protein|HMPREF9211_0159 0.5581602 5.779447 3.090678 0.0368260 0.710556 -4.194342
A0A6G7B1X1;E1NUD2|GTP cyclohydrolase 1|folE 0.4384387 6.765608 3.025986 0.0392173 0.710556 -4.202410
A0A6G7B8T2;C8PAT1|DNA polymerase III subunit delta;DNA polymerase III, delta subunit|holA 0.5197183 5.907468 3.025610 0.0392318 0.710556 -4.202458
A0A6G7B9V3;C8PBD2;E1NPB2;E1NU45|Large ribosomal subunit protein uL3|rplC 0.4265643 9.749148 2.980407 0.0410119 0.710556 -4.208266
A0A6G7B871;C8PCZ1;E1NTR1|Amino acid permease|G6Z83_02055;HMPREF0520_0961;HMPREF9211_1419 0.4364796 5.360959 2.962592 0.0417393 0.710556 -4.210595
E1NLW5;E1NV59|UbiC transcription regulator-associated domain protein|HMPREF9212_0654;HMPREF9211_0682 0.4176203 7.157107 2.959469 0.0418684 0.710556 -4.211005
C8PAQ4;E1NQ97;E1NRR7|Protein RibT|HMPREF0520_0174;HMPREF9212_0787;HMPREF9211_0564 0.7826915 5.636071 2.956516 0.0419909 0.710556 -4.211394
A0A6G7B9T6;C8PAV5;E1NQN4;E1NRX6|Chaperone protein DnaK|dnaK 0.4336780 10.961888 2.916650 0.0436860 0.710556 -4.216704
A0A6G7B993|Lipoprotein|G6Z83_05110 -0.6210108 6.832911 -2.900466 0.0443968 0.710556 -4.218892
A0A6G7B9V7;C8PBW4;E1NLZ3;E1NTI5|Protein-export membrane protein SecG|secG 0.6199192 5.477796 2.897314 0.0445369 0.710556 -4.219320
C8PAP1|CCA-adding enzyme|papS -0.3318399 7.171062 -2.870511 0.0457485 0.710556 -4.222992
A0A6G7BA49;C8PBD3;E1NPB1;E1NU44|Large ribosomal subunit protein uL4|rplD 0.3548646 10.997198 2.870045 0.0457699 0.710556 -4.223057
A0A6G7BA27|Ribosome hibernation promoting factor|raiA 0.4731349 9.752917 2.838531 0.0472447 0.710556 -4.227443
A0A6G7B9V1|Transcription-repair-coupling factor|mfd -0.3121057 5.657076 -2.822571 0.0480128 0.710556 -4.229693
C8PBZ3|Putative potassium/sodium efflux P-type ATPase, fungal-type|pacL -0.3455378 7.889072 -2.808562 0.0486988 0.710556 -4.231684
A0A6G7BAK1;C8PDK6;E1NN69;E1NU70|Small ribosomal subunit protein uS4|rpsD 0.3281767 10.069653 2.804414 0.0489042 0.710556 -4.232277
write.xlsx(dm5, "differentialexpression5_liners_strainS7.xlsx")

Volcano chart

TOT=nrow(dm5)
sig <- subset(dm5,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")

mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(sx2$LA)+1)))


plot(dm5$logFC,-log10(dm5$P.Value),pch=19,
  xlab="Log2 fold change", ylab="p-value")

points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")

Heatmap of top hits

top <- rownames(head(dm5,40))

mx <- x32[rownames(x32) %in% top,]

my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)

heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
    ColSideColors=mycols ,  col=my_palette, main="top 20 proteins in strain S/7")

Differential expression 6 - effect of lactic acid in strain A

Strain A samples only.

sx3 <- ss2[which(ss2$strain=="TRUE"),]

x33 <- x3[,which(colnames(x3) %in% rownames(sx3))]
dim(x33)
## [1] 946   4
design <- model.matrix(~ sx3$LA)
design
##   (Intercept) sx3$LATRUE
## 1           1          1
## 2           1          1
## 3           1          0
## 4           1          0
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$`sx3$LA`
## [1] "contr.treatment"
fit.reduced <- lmFit(x33,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
##        (Intercept) sx3$LATRUE
## Down             0          8
## NotSig           0        931
## Up             946          7
dm6 <- topTable(fit.reduced,coef=2, number = Inf)

head(dm6,50) %>% kbl(caption = "L. iners Top 50 proteins after differential analysis in strain A") %>% kable_paper("hover", full_width = F)
L. iners Top 50 proteins after differential analysis in strain A
logFC AveExpr t P.Value adj.P.Val B
iRT_Peptides_Fusion|| 1.5461103 9.515401 13.549429 0.0000849 0.0454905 2.2998729
E1NP53|pullulanase|pulA 3.1908412 5.871560 12.882786 0.0001059 0.0454905 2.1219257
A0A6G7BB59;E1NP90;E1NU23|Translation initiation factor IF-1|infA -1.6211342 7.769874 -11.233670 0.0001922 0.0454905 1.6128024
C8PAR7;E1NQB3;E1NSM2|Inosine-uridine preferring nucleoside hydrolase|HMPREF0520_0187;HMPREF9212_0803;HMPREF9211_0318 -1.1776902 7.716570 -10.561476 0.0002511 0.0454905 1.3723130
A0A6G7B9J9;C8PDL0;E1NN91;E1NSX2|DUF2969 domain-containing protein|G6Z83_04760;HMPREF0520_1180;HMPREF9212_0951;HMPREF9211_0361 -1.4356109 6.979445 -10.156581 0.0002973 0.0454905 1.2167896
A0A6G7B7F6;E1NP47;E1NTJ2|Nucleoside 2-deoxyribosyltransferase;Nucleoside deoxyribosyltransferase;Nucleoside deoxyribosyltransferase|G6Z83_01315;ntd;ntd -1.4576278 8.371740 -10.030792 0.0003136 0.0454905 1.1667272
A0A6G7B919;C8PDI1;E1NN39;E1NS15|DNA-directed RNA polymerase subunit omega|rpoZ -1.4529529 7.812049 -9.395463 0.0004154 0.0454905 0.9003475
E1NMM8|X-Pro dipeptidyl-peptidase C-terminal non-catalytic domain protein|HMPREF9212_0748 1.1581023 6.045063 9.030851 0.0004920 0.0454905 0.7365773
A0A6G7B194;C8PCI9;E1NMC7|Thioredoxin|trxA -1.8455992 7.740627 -8.949855 0.0005113 0.0454905 0.6990473
A0A6G7B7J6;E1NP20|DNA replication initiation control protein YabA;Initiation-control protein YabA|yabA;HMPREF9212_0021 1.1498115 6.936311 8.815781 0.0005453 0.0454905 0.6359724
A0A6G7B7H9;C8PCY0;E1NQ76;E1NT48|Sugar aldolase;Class II Aldolase and Adducin domain protein;Class II Aldolase and Adducin domain protein;Class II Aldolase and Adducin domain protein|G6Z83_01090;HMPREF0520_0950;HMPREF9212_0109;HMPREF9211_0073 1.4006240 5.388047 8.784767 0.0005536 0.0454905 0.6212101
A0A6G7BAN1|HIT family protein|G6Z83_06610 1.5613218 5.604153 8.548712 0.0006216 0.0454905 0.5066903
A0A6G7B9D2;E1NSF7|Ribokinase|rbsK -1.0469324 6.936908 -8.537229 0.0006251 0.0454905 0.5010203
C8PAB4;E1NQE3|Tagatose-6-phosphate kinase|pfkB -1.4991333 6.902144 -8.315335 0.0006990 0.0463882 0.3895926
E1NTJ7|ATPase family associated with various cellular activities (AAA)|tlyB 1.2864677 6.133384 8.215924 0.0007355 0.0463882 0.3384996
A0A6G7B9V5;C8PAT5;E1NQQ9|Phosphopantetheine adenylyltransferase|coaD -1.2055292 5.964346 -7.602228 0.0010202 0.0580304 0.0060492
A0A6G7BK52;E1NMN3;E1NUB8|PTS sugar transporter subunit IIC;PTS system sorbose-specific iic component;PTS system sorbose-specific iic component|G6Z83_04060;HMPREF9212_0753;HMPREF9211_0464 0.9195288 7.418822 7.562468 0.0010428 0.0580304 -0.0165511
A0A6G7B3J8;C8PB26|DegV family protein;EDD domain protein, DegV family|G6Z83_06805;HMPREF0520_0296 0.7641276 8.137844 6.966521 0.0014685 0.0726407 -0.3720761
E1NQ07|Gram-positive signal peptide protein, YSIRK family|HMPREF9212_0339 0.8215463 9.607226 6.917106 0.0015124 0.0726407 -0.4030337
A0A6G7B9J2|YSIRK-type signal peptide-containing protein|G6Z83_05695 0.7863908 10.073410 6.891616 0.0015357 0.0726407 -0.4190952
E1NUH9|Bacteriophage peptidoglycan hydrolase|HMPREF9211_0844 -1.5214910 7.956728 -6.742151 0.0016815 0.0740238 -0.5145592
A0A6G7B825;E1NQZ0;E1NSK5|2,3-bisphosphoglycerate-dependent phosphoglycerate mutase;2,3-bisphosphoglycerate-dependent phosphoglycerate mutase;2,3-bisphosphoglycerate-dependent phosphoglycerate mutase|gpmA 0.7545887 9.779730 6.696858 0.0017289 0.0740238 -0.5439275
A0A6G7BAH4|YSIRK-type signal peptide-containing protein|G6Z83_05700 0.8744591 10.012309 6.631877 0.0017997 0.0740238 -0.5864265
A0A6G7B8J9;C8PC89;E1NMH1;E1NT71|DNA-entry nuclease;Putative DNA/RNA non-specific endonuclease;Putative DNA/RNA non-specific endonuclease;Putative DNA/RNA non-specific endonuclease|G6Z83_00410;endA;HMPREF9212_0480;HMPREF9211_0011 1.3952953 5.221992 6.390779 0.0020952 0.0807155 -0.7479428
A0A6G7BB56;C8PBG4;E1NP80;E1NU13|Large ribosomal subunit protein uL13|rplM 0.7495095 10.255969 6.344798 0.0021580 0.0807155 -0.7794489
A0A6G7B7R3|Histidine phosphatase family protein|G6Z83_01060 -0.7982777 7.098486 -6.302076 0.0022184 0.0807155 -0.8089285
A0A6G7B9U4;C8PDF6;E1NSF6|Ribose-5-phosphate isomerase A|rpiA -0.7160048 7.482566 -6.165382 0.0024258 0.0849922 -0.9046053
A0A6G7BA57|Lysozyme|G6Z83_05710 -0.7403929 7.288629 -5.933773 0.0028331 0.0946491 -1.0715609
A0A6G7B948;E1NSG1|DNA starvation/stationary phase protection protein;Ferritin-like protein|G6Z83_04385;HMPREF9211_1021 -1.5380654 9.026246 -5.789784 0.0031279 0.0946491 -1.1785343
A0A6G7BI36;E1NS53|Iron-sulfur cluster biosynthesis family protein;FeS cluster biogenesis domain-containing protein|G6Z83_06680;HMPREF9211_1203 -0.7886301 7.828368 -5.710088 0.0033069 0.0946491 -1.2388235
C8PCW8|Serine-type D-Ala-D-Ala carboxypeptidase|dacA 0.8793457 5.604185 5.707939 0.0033119 0.0946491 -1.2404597
A0A6G7BAD8;C8PBF9;E1NP85;E1NU18|Large ribosomal subunit protein bL17|rplQ 0.6540416 9.656336 5.655781 0.0034359 0.0946491 -1.2803546
A0A6G7BIS7;E1NP71|Uncharacterized protein|G6Z83_01205;HMPREF9212_0073 -2.0562275 6.804168 -5.646155 0.0034594 0.0946491 -1.2877543
A0A6G7B7L2;C8PCC1;E1NQZ6;E1NTA8|Glycerol-3-phosphate acyltransferase|plsY 2.1210540 6.031474 5.644508 0.0034634 0.0946491 -1.2890219
A0A6G7BA31;E1NPA9;E1NU42|Large ribosomal subunit protein uL2|rplB 0.7292774 9.896163 5.588147 0.0036051 0.0946491 -1.3325935
E1NMT2;E1NV37|Putative lipid kinase|HMPREF9212_1126;HMPREF9211_0659 -0.8223935 7.201478 -5.556455 0.0036879 0.0946491 -1.3572708
C8PBD4|Large ribosomal subunit protein uL23|rplW 0.6326985 9.984263 5.483257 0.0038878 0.0946491 -1.4147585
A0A6G7B797|D-alanyl-lipoteichoic acid biosynthesis protein DltD|dltD 0.9295961 6.003996 5.482801 0.0038891 0.0946491 -1.4151182
A0A6G7B9U7;C8PBX7;E1NLX2|Peptidoglycan-binding protein LysM;Uncharacterized protein;Uncharacterized protein|G6Z83_05375;HMPREF0520_0597;HMPREF9212_0664 -0.8701541 5.081611 -5.478220 0.0039020 0.0946491 -1.4187396
E1NQ38|L-lactate dehydrogenase|ldh 0.6357153 10.238272 5.376131 0.0042043 0.0968069 -1.5001430
A0A6G7B750;C8PCC5;E1NQZ1;E1NTB2|PAS domain-containing protein;PAS domain S-box protein;PAS domain S-box protein;PAS domain S-box protein|G6Z83_00585;HMPREF0520_0745;HMPREF9212_0181;HMPREF9211_0053 0.8245465 5.862766 5.344758 0.0043028 0.0968069 -1.5254344
A0A6G7BH65;C8PDH4;E1NN27;E1NS05|ribulose-phosphate 3-epimerase;Ribulose-phosphate 3-epimerase;ribulose-phosphate 3-epimerase;ribulose-phosphate 3-epimerase|rpe;rpe;HMPREF9212_0886;HMPREF9211_0930 -0.8276356 5.867281 -5.330348 0.0043489 0.0968069 -1.5370944
A0A6G7B7W0;C8PCJ1;E1NMC5;E1NR60|UPF0473 protein G6Z83_01895;UPF0473 protein HMPREF0520_0811;UPF0473 protein HMPREF9212_1334;UPF0473 protein HMPREF9211_0420|G6Z83_01895;HMPREF0520_0811;HMPREF9212_1334;HMPREF9211_0420 -0.7474805 6.561477 -5.314515 0.0044003 0.0968069 -1.5499381
A0A6G7BAK1;C8PDK6;E1NN69;E1NU70|Small ribosomal subunit protein uS4|rpsD 0.6186460 10.038013 5.244074 0.0046380 0.0997159 -1.6074877
A0A6G7B957;C8PAF6;E1NNX5;E1NRJ0|Enolase|eno 0.7706141 10.069214 5.127028 0.0050676 0.1065332 -1.7046038
E1NUT2|GA module|HMPREF9211_0601 2.5997715 7.690821 5.071889 0.0052865 0.1087173 -1.7510088
A0A6G7B9W9;C8PBW0;E1NLZ7;E1NTI0|Glyceraldehyde-3-phosphate dehydrogenase|gap 0.6284955 9.580014 5.010529 0.0055434 0.1104289 -1.8031495
C8PDF5|Inosine-uridine preferring nucleoside hydrolase|rihA -0.5882678 7.561041 -4.996732 0.0056032 0.1104289 -1.8149471
C8PDJ1|Penicillin-binding protein, transpeptidase domain protein|ftsI -0.6765870 7.201216 -4.863581 0.0062216 0.1180702 -1.9301961
A0A6G7BAS1;C8PBE1;E1NPA3;E1NU36|Small ribosomal subunit protein uS17|rpsQ 0.6932880 7.589185 4.859764 0.0062405 0.1180702 -1.9335381
write.xlsx(dm6, "differentialexpression6_liners_strainA.xlsx")

Volcano chart

TOT=nrow(dm6)
sig <- subset(dm6,adj.P.Val<0.05)
SIG=nrow(sig)
UP=nrow(subset(sig,logFC>0))
DN=nrow(subset(sig,logFC<0))
HEADER=paste(TOT,"total proteins,",SIG,"@5% FDR,",UP,"up,",DN,"down")

mycols <- gsub("1","lightblue",gsub("2","pink",as.character(as.numeric(sx3$LA)+1)))

plot(dm6$logFC,-log10(dm6$P.Value),pch=19,
  xlab="Log2 fold change", ylab="p-value")

points(sig$logFC,-log10(sig$P.Value),col="red",pch=19)
mtext(HEADER)
abline(v=0,lty=2,lwd=2,col="blue")

Heatmap of top hits

top <- rownames(head(dm6,40))

mx <- x33[rownames(x33) %in% top,]

my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)

heatmap.2(as.matrix(mx),scale="row",margin=c(5,28),cexRow=1,trace="none",cexCol=1,
    ColSideColors=mycols ,  col=my_palette, main="top 20 proteins in strain A")

Comparison

Make a Euler diagram of the genes.

dm5up <- rownames(subset(dm5,adj.P.Val<0.05 & logFC > 0))
dm5dn <- rownames(subset(dm5,adj.P.Val<0.05 & logFC < 0))

dm6up <- rownames(subset(dm6,adj.P.Val<0.05 & logFC > 0))
dm6dn <- rownames(subset(dm6,adj.P.Val<0.05 & logFC < 0))

v1 <- list("Strain S up"=dm5up, "Strain S down"=dm5dn,
  "Strain A up"=dm6up,"Strain A down"=dm6dn)

plot(euler(v1),quantities = TRUE)

Make a scatter plotr of the ranks.

While the results are not completely concoordant between the strains, there is a lot of similarity.

mg <- merge(dm5,dm6,by=0)
rownames(mg) <- mg$Row.names
mg <- mg[,c("t.x","t.y")]
head(mg)
##                                                                                                                                                                                                                                             t.x
## A0A6G5W5S4|pullulanase|glgUi                                                                                                                                                                                                        -0.18443758
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                                                                                                                                             -0.07975566
## A0A6G7B0K0;E1NMG4;E1NT78|Alpha-glycosidase;Alpha amylase, catalytic domain protein;Alpha amylase, catalytic domain protein|G6Z83_00440;nplT;nplT                                                                                     1.17334663
## A0A6G7B0K8;E1NQ85|ADP-dependent (S)-NAD(P)H-hydrate dehydratase|nnrD                                                                                                                                                                 1.52542062
## A0A6G7B0L8;C8PCV9|Foldase protein PrsA|prsA                                                                                                                                                                                         -0.10795115
## A0A6G7B0M7;C8PAW2;E1NMK7;E1NSE6|ABC transporter ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein|G6Z83_00265;ylbA;HMPREF9212_0258;HMPREF9211_1299  1.75459600
##                                                                                                                                                                                                                                            t.y
## A0A6G5W5S4|pullulanase|glgUi                                                                                                                                                                                                        -2.1838245
## A0A6G5W607|pullulanase (Fragment)|glgUi                                                                                                                                                                                             -0.7253266
## A0A6G7B0K0;E1NMG4;E1NT78|Alpha-glycosidase;Alpha amylase, catalytic domain protein;Alpha amylase, catalytic domain protein|G6Z83_00440;nplT;nplT                                                                                     2.0925056
## A0A6G7B0K8;E1NQ85|ADP-dependent (S)-NAD(P)H-hydrate dehydratase|nnrD                                                                                                                                                                -0.9936822
## A0A6G7B0L8;C8PCV9|Foldase protein PrsA|prsA                                                                                                                                                                                          2.4313204
## A0A6G7B0M7;C8PAW2;E1NMK7;E1NSE6|ABC transporter ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein;ABC transporter, ATP-binding protein|G6Z83_00265;ylbA;HMPREF9212_0258;HMPREF9211_1299  2.7927192
plot(mg,pch=19,cex=0.4,main="limma t-stat",xlab="Strain S/7",ylab="strain A")
abline(h=0,v=0,col="blue",lty=2,lwd=2)

mylm <- lm(mg)
abline(mylm,lty=2,col="red",lwd=2)

mylm
## 
## Call:
## lm(formula = mg)
## 
## Coefficients:
## (Intercept)          t.y  
##      0.1785       0.2381
cor.test(mg[,1],mg[,2])
## 
##  Pearson's product-moment correlation
## 
## data:  mg[, 1] and mg[, 2]
## t = 13.375, df = 944, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3441682 0.4514052
## sample estimates:
##       cor 
## 0.3991508
rmg <- apply(mg,2, function(x) {
  rnk <- rank(x)
  NUMNEG=length(which(x<0))
  rnk <- rnk - NUMNEG
  return(rnk)
})

plot(rmg,pch=19,cex=0.4,main="rank of limma t-stat",xlab="Strain D",ylab="strain A")
abline(h=0,v=0,col="blue",lty=2,lwd=2)

mylm <- lm(as.data.frame(rmg))

abline(mylm,lty=2,col="red",lwd=2)

mylm
## 
## Call:
## lm(formula = as.data.frame(rmg))
## 
## Coefficients:
## (Intercept)          t.y  
##     34.1758       0.4226
cor.test(rmg[,1],rmg[,2])
## 
##  Pearson's product-moment correlation
## 
## data:  rmg[, 1] and rmg[, 2]
## t = 14.325, df = 944, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3687669 0.4735569
## sample estimates:
##       cor 
## 0.4225731
tr <- nrow(subset(mg,t.x>0 & t.y>0))
br <- nrow(subset(mg,t.x>0 & t.y<0))
tl <- nrow(subset(mg,t.x<0 & t.y>0))
bl <- nrow(subset(mg,t.x<0 & t.y<0))

tr
## [1] 324
br
## [1] 185
tl
## [1] 154
bl
## [1] 283
concordant = tr + bl
message(paste("concordant proteins:",concordant))
## concordant proteins: 607
discordant = br + tl
message(paste("discordant proteins:",discordant))
## discordant proteins: 339

Session information

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] stringr_1.5.1    eulerr_7.0.2     openxlsx_4.2.5.2 gplots_3.1.3.1  
##  [5] beeswarm_0.4.0   vioplot_0.4.0    zoo_1.8-12       sm_2.2-6.0      
##  [9] kableExtra_1.4.0 limma_3.60.0    
## 
## loaded via a namespace (and not attached):
##  [1] jsonlite_1.8.8     highr_0.11         compiler_4.4.1     gtools_3.9.5      
##  [5] zip_2.3.1          Rcpp_1.0.12        xml2_1.3.6         bitops_1.0-7      
##  [9] jquerylib_0.1.4    systemfonts_1.1.0  scales_1.3.0       yaml_2.3.8        
## [13] fastmap_1.2.0      statmod_1.5.0      lattice_0.22-6     R6_2.5.1          
## [17] knitr_1.47         polyclip_1.10-6    munsell_0.5.1      svglite_2.1.3     
## [21] polylabelr_0.2.0   bslib_0.7.0        rlang_1.1.4        cachem_1.1.0      
## [25] stringi_1.8.4      xfun_0.45          caTools_1.18.2     sass_0.4.9        
## [29] viridisLite_0.4.2  cli_3.6.3          magrittr_2.0.3     digest_0.6.36     
## [33] grid_4.4.1         rstudioapi_0.16.0  lifecycle_1.0.4    vctrs_0.6.5       
## [37] KernSmooth_2.23-24 evaluate_0.24.0    glue_1.7.0         colorspace_2.1-0  
## [41] rmarkdown_2.27     tools_4.4.1        htmltools_0.5.8.1