Tags entries (E3):
The desired format is:
library("kableExtra")
library("DT")
ss <- read.table("samplesheet.tsv",header=TRUE,sep="\t",row.names=1)
ss |> kbl(caption="Description of wells") |> kable_paper("hover", full_width = F)
Drug | Description | |
---|---|---|
C3 | DMSO 0.01% | Control |
C4 | Piperaquine 270nM | Food vacuole and free radical damage |
C5 | Amodiaquine 156nM | Food vacuole and free radical damage |
C6 | DDSM-1 65nM | Inhibit DNA synthesis |
C7 | Atovaquone 2.7nM | Inhibit DNA synthesis |
C8 | Cipargamin 10nM | Cause cell swelling due to Na+ increase |
C9 | MMV665878 653nM | Cause cell swelling due to Na+ increase |
C10 | W-518 220nM | Vesicle trafficking |
C11 | OGHL250 49uM | Vesicle trafficking |
D4 | Artemisinin 50nM | Free radical damage |
D5 | DHA 50nM | Free radical damage |
D6 | NPPB 626uM | Block nutrient uptake |
D7 | Furozemide 1.34mM | Block nutrient uptake |
D8 | Bepsi 40.7nM | Inhibit protein folding |
D9 | Bepristat 440nM | Inhibit protein folding |
D10 | M5717 5.6nM | Block protein synthesis |
D11 | Cyclohexamide 1.5uM | Block protein synthesis |
x <- read.csv("20250701_COMBINED_Drug_pair_2_measurements.tsv",header=TRUE,sep="\t")
myt <- as.data.frame(table(x$Tags) )
myt |> kbl(caption="Frequency of features across all wells") |> kable_paper("hover", full_width = F)
Var1 | Freq |
---|---|
iRBC filter (child only), Mito Child, Nucleus Child, PV1 child, RBC, RBC area Filter, RBC segmenter, Touching Edge Filter RBC | 3303 |
Mito Child, Mito Segmenter, Mito size filter, Mito within RBC, Mito within RBC inside, Mitochondria | 3295 |
Mito Child, Mito Segmenter, Mito size filter, Mito within RBC, Mito within RBC intersecting, Mitochondria | 97 |
Nuclei, Nuclei Segmenter, Nucleus Child, Nucleus size filter, Nucleus within RBC, Nucleus within RBC inside | 3032 |
Nuclei, Nuclei Segmenter, Nucleus Child, Nucleus size filter, Nucleus within RBC, Nucleus within RBC intersecting | 73 |
PV1 child, RBC, RBC area Filter, RBC segmenter, Touching Edge Filter RBC | 14281 |
PV1, PV1 child, PV1 Segmenter, PV1 size filter, PV1 within RBC, PV1 within RBC inside | 3122 |
PV1, PV1 child, PV1 Segmenter, PV1 size filter, PV1 within RBC, PV1 within RBC intersecting | 318 |
WELL="C3"
y <- subset(x,Position..Well==WELL)
myt <- table(y$Tags)
myt |> kbl(caption="Frequency of features in well C3") |> kable_paper("hover", full_width = F)
Var1 | Freq |
---|---|
iRBC filter (child only), Mito Child, Nucleus Child, PV1 child, RBC, RBC area Filter, RBC segmenter, Touching Edge Filter RBC | 239 |
Mito Child, Mito Segmenter, Mito size filter, Mito within RBC, Mito within RBC inside, Mitochondria | 245 |
Mito Child, Mito Segmenter, Mito size filter, Mito within RBC, Mito within RBC intersecting, Mitochondria | 6 |
Nuclei, Nuclei Segmenter, Nucleus Child, Nucleus size filter, Nucleus within RBC, Nucleus within RBC inside | 240 |
Nuclei, Nuclei Segmenter, Nucleus Child, Nucleus size filter, Nucleus within RBC, Nucleus within RBC intersecting | 7 |
PV1 child, RBC, RBC area Filter, RBC segmenter, Touching Edge Filter RBC | 1028 |
PV1, PV1 child, PV1 Segmenter, PV1 size filter, PV1 within RBC, PV1 within RBC inside | 227 |
PV1, PV1 child, PV1 Segmenter, PV1 size filter, PV1 within RBC, PV1 within RBC intersecting | 24 |
# here is the important code for one well (C3)
rbc <- y[which(y$Parent.Names==""),]
RBC_count <- nrow(rbc)
RBC_area <- mean(rbc$Area..2D.Oriented.Bounds...µm...)
irbc <- subset(y,Tags=="iRBC filter (child only), Mito Child, Nucleus Child, PV1 child, RBC, RBC area Filter, RBC segmenter, Touching Edge Filter RBC")
iRBC_count <- nrow(irbc)
iRBC_area <- mean(irbc$Area..2D.Oriented.Bounds...µm...)
pv1 <- subset(y, Tags == "PV1 child, RBC, RBC area Filter, RBC segmenter, Touching Edge Filter RBC" | Tags =="PV1, PV1 child, PV1 Segmenter, PV1 size filter, PV1 within RBC, PV1 within RBC inside" | Tags == "PV1, PV1 child, PV1 Segmenter, PV1 size filter, PV1 within RBC, PV1 within RBC intersecting" )
PV1_area <- mean(pv1$Area..2D.Oriented.Bounds...µm...)
PV1_intensity <- mean(pv1$Mean..Intensities..2)
PV1_count <- nrow(pv1)
mito <- subset(y, Tags == "Mito Child, Mito Segmenter, Mito size filter, Mito within RBC, Mito within RBC inside, Mitochondria" | Tags == "Mito Child, Mito Segmenter, Mito size filter, Mito within RBC, Mito within RBC intersecting, Mitochondria")
Mito_area <- mean(mito$Area..2D.Oriented.Bounds...µm...)
Mito_intensity <- mean(mito$Mean..Intensities..1)
Mito_count <- nrow(mito)
nucleus <- subset(y, Tags == "Nuclei, Nuclei Segmenter, Nucleus Child, Nucleus size filter, Nucleus within RBC, Nucleus within RBC inside" | Tags == "Nuclei, Nuclei Segmenter, Nucleus Child, Nucleus size filter, Nucleus within RBC, Nucleus within RBC intersecting" )
Nucleus_area <- mean(nucleus$Area..2D.Oriented.Bounds...µm...)
Nucleus_intensity <- mean(nucleus$Mean..Intensities..3)
Nucleus_count <- nrow(nucleus)
res <- c("RBC count"=RBC_count,
"RBC area"=RBC_area,
"iRBC count"=iRBC_count,
"iRBC area"=iRBC_area,
"PV1_area"=PV1_area,
"PV1_intensity"=PV1_intensity,
"PV1_count"=PV1_count,
"Mito_area"=Mito_area,
"Mito_intensity"=Mito_intensity,
"Mito_count"=Mito_count,
"Nucleus_area"=Nucleus_area,
"Nucleus_intensity"=Nucleus_intensity,
"Nucleus_count"=Nucleus_count)
res
## RBC count RBC area iRBC count iRBC area
## 1267.000000 59.952071 239.000000 59.762435
## PV1_area PV1_intensity PV1_count Mito_area
## 50.565121 3479.049900 1279.000000 8.837521
## Mito_intensity Mito_count Nucleus_area Nucleus_intensity
## 4644.203291 251.000000 3.721479 5905.622685
## Nucleus_count
## 247.000000
WELLS <- unique(x$Position..Well)
Now put it in a function so we can iterate over all wells.
# define the function
smry <- function(y) {
rbc <- y[which(y$Parent.Names==""),]
RBC_count <- nrow(rbc)
RBC_area <- mean(rbc$Area..2D.Oriented.Bounds...µm...)
irbc <- subset(y,Tags=="iRBC filter (child only), Mito Child, Nucleus Child, PV1 child, RBC, RBC area Filter, RBC segmenter, Touching Edge Filter RBC")
iRBC_count <- nrow(irbc)
iRBC_area <- mean(irbc$Area..2D.Oriented.Bounds...µm...)
pv1 <- subset(y, Tags == "PV1 child, RBC, RBC area Filter, RBC segmenter, Touching Edge Filter RBC" | Tags =="PV1, PV1 child, PV1 Segmenter, PV1 size filter, PV1 within RBC, PV1 within RBC inside" | Tags == "PV1, PV1 child, PV1 Segmenter, PV1 size filter, PV1 within RBC, PV1 within RBC intersecting" )
PV1_area <- mean(pv1$Area..2D.Oriented.Bounds...µm...)
PV1_intensity <- mean(pv1$Mean..Intensities..2)
PV1_count <- nrow(pv1)
mito <- subset(y, Tags == "Mito Child, Mito Segmenter, Mito size filter, Mito within RBC, Mito within RBC inside, Mitochondria" | Tags == "Mito Child, Mito Segmenter, Mito size filter, Mito within RBC, Mito within RBC intersecting, Mitochondria")
Mito_area <- mean(mito$Area..2D.Oriented.Bounds...µm...)
Mito_intensity <- mean(mito$Mean..Intensities..1)
Mito_count <- nrow(mito)
nucleus <- subset(y, Tags == "Nuclei, Nuclei Segmenter, Nucleus Child, Nucleus size filter, Nucleus within RBC, Nucleus within RBC inside" | Tags == "Nuclei, Nuclei Segmenter, Nucleus Child, Nucleus size filter, Nucleus within RBC, Nucleus within RBC intersecting" )
Nucleus_area <- mean(nucleus$Area..2D.Oriented.Bounds...µm...)
Nucleus_intensity <- mean(nucleus$Mean..Intensities..3)
Nucleus_count <- nrow(nucleus)
res <- c("RBC count"=RBC_count,
"RBC area"=RBC_area,
"iRBC count"=iRBC_count,
"iRBC area"=iRBC_area,
"PV1_area"=PV1_area,
"PV1_intensity"=PV1_intensity,
"PV1_count"=PV1_count,
"Mito_area"=Mito_area,
"Mito_intensity"=Mito_intensity,
"Mito_count"=Mito_count,
"Nucleus_area"=Nucleus_area,
"Nucleus_intensity"=Nucleus_intensity,
"Nucleus_count"=Nucleus_count)
res
}
# get a vector of all wells
WELLS <- unique(x$Position..Well)
# now run the function for all wells
z <- lapply(WELLS, function(w) {
y <- subset(x,Position..Well==w)
smry(y)
} )
# z is a list of vectors, which we can convert to a matrix
z <- do.call(rbind,z)
rownames(z) <- WELLS
# peek at the ddata format
head(z,2)
## RBC count RBC area iRBC count iRBC area PV1_area PV1_intensity PV1_count
## C3 1267 59.95207 239 59.76244 50.56512 3479.050 1279
## C4 1108 59.68130 227 58.51197 49.47871 4268.908 1122
## Mito_area Mito_intensity Mito_count Nucleus_area Nucleus_intensity
## C3 8.837521 4644.203 251 3.721479 5905.623
## C4 8.326510 5050.113 234 3.486644 5498.474
## Nucleus_count
## C3 247
## C4 218
Show the data as a kableExtra table.
zz <- merge(ss,z,by=0)
zz |> kbl(caption="Summarised data by well") |> kable_paper("hover", full_width = F)
Row.names | Drug | Description | RBC count | RBC area | iRBC count | iRBC area | PV1_area | PV1_intensity | PV1_count | Mito_area | Mito_intensity | Mito_count | Nucleus_area | Nucleus_intensity | Nucleus_count |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
C3 | DMSO 0.01% | Control | 1267 | 59.95207 | 239 | 59.76244 | 50.56512 | 3479.050 | 1279 | 8.837521 | 4644.2033 | 251 | 3.721479 | 5905.623 | 247 |
C4 | Piperaquine 270nM | Food vacuole and free radical damage | 1108 | 59.68130 | 227 | 58.51197 | 49.47871 | 4268.908 | 1122 | 8.326510 | 5050.1132 | 234 | 3.486644 | 5498.474 | 218 |
C5 | Amodiaquine 156nM | Food vacuole and free radical damage | 1386 | 60.43123 | 277 | 57.54707 | 50.59340 | 3940.025 | 1395 | 6.574243 | 1977.6610 | 287 | 2.532401 | 5102.204 | 244 |
C6 | DDSM-1 65nM | Inhibit DNA synthesis | 1412 | 58.77855 | 283 | 57.96876 | 49.23086 | 4178.926 | 1418 | 8.203615 | 4783.9154 | 283 | 3.307747 | 5504.826 | 271 |
C7 | Atovaquone 2.7nM | Inhibit DNA synthesis | 1347 | 58.98824 | 268 | 58.30964 | 49.13302 | 4251.045 | 1364 | 8.150090 | 5079.2931 | 275 | 3.475012 | 6388.834 | 266 |
C8 | Cipargamin 10nM | Cause cell swelling due to Na+ increase | 1430 | 58.59986 | 251 | 56.36729 | 49.71329 | 4172.217 | 1443 | 6.318472 | 2950.6643 | 259 | 2.769500 | 5064.717 | 193 |
C9 | MMV665878 653nM | Cause cell swelling due to Na+ increase | 1320 | 58.34666 | 221 | 55.12919 | 50.03600 | 3600.101 | 1332 | 5.033644 | 873.6914 | 228 | 2.808488 | 4582.392 | 166 |
D10 | M5717 5.6nM | Block protein synthesis | 1511 | 57.00724 | 286 | 56.61045 | 47.94023 | 4072.360 | 1520 | 7.729455 | 4574.3273 | 300 | 3.084335 | 6886.685 | 286 |
D11 | Cyclohexamide 1.5uM | Block protein synthesis | 1429 | 55.36446 | 253 | 55.11494 | 47.15147 | 3945.445 | 1437 | 7.407381 | 4186.0737 | 264 | 3.201629 | 6610.812 | 240 |
D4 | Artemisinin 50nM | Free radical damage | 1206 | 57.57922 | 234 | 55.63792 | 48.76311 | 4066.048 | 1216 | 8.799574 | 4150.5616 | 242 | 3.794716 | 8460.831 | 236 |
D5 | DHA 50nM | Free radical damage | 1261 | 58.67011 | 223 | 56.80388 | 49.86090 | 3864.880 | 1271 | 6.213268 | 2209.1254 | 226 | 3.016739 | 7140.583 | 211 |
D8 | Bepsi 40.7nM | Inhibit protein folding | 1454 | 57.19384 | 272 | 55.55021 | 49.00180 | 4076.862 | 1457 | 8.780328 | 4545.8542 | 272 | 3.541456 | 8486.803 | 273 |
D9 | Bepristat 440nM | Inhibit protein folding | 1453 | 57.00203 | 269 | 55.10699 | 48.24839 | 4101.749 | 1467 | 7.540342 | 4385.4794 | 271 | 3.188267 | 7282.694 | 254 |
Show as data.table. This function allows you to download as an Excel file.
DT::datatable(
{ head(zz,1000) },
extensions = 'Buttons',
filter = list(position = 'top', clear = FALSE),
options = list(
paging = TRUE,
searching = TRUE,
fixedColumns = TRUE,
autoWidth = TRUE,
ordering = TRUE,
dom = 'tB',
buttons = c('copy', 'csv', 'excel'),
pageLength = 50,
search = list(regex = TRUE, caseInsensitive = TRUE)),
rownames= TRUE)
First quantify CVs, then do PCA.
cv <- function(x) { sd(x)/mean(x) }
par(mar=c(5.1,9.1,4.1,2.1))
barplot(apply(z,2,cv),horiz=T,las=1,xlab="CV")
par(mar=c(5.1,4.1,4.1,2.1))
pca <- prcomp(z, scale=TRUE)
scree <- pca$sdev
names(scree) <- 1:length(scree)
barplot(scree, main="Screeplot")
biplot(pca, main="PCA")
More variations on biplot clusstering here: https://www.geeksforgeeks.org/how-to-create-a-biplot-in-r/
For reproucibility
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.33 kableExtra_1.4.0
##
## loaded via a namespace (and not attached):
## [1] vctrs_0.6.5 svglite_2.2.1 cli_3.6.5 knitr_1.50
## [5] rlang_1.1.6 xfun_0.52 stringi_1.8.7 textshaping_1.0.1
## [9] jsonlite_2.0.0 glue_1.8.0 htmltools_0.5.8.1 sass_0.4.10
## [13] scales_1.4.0 rmarkdown_2.29 crosstalk_1.2.1 evaluate_1.0.3
## [17] jquerylib_0.1.4 fastmap_1.2.0 yaml_2.3.10 lifecycle_1.0.4
## [21] stringr_1.5.1 compiler_4.5.1 RColorBrewer_1.1-3 htmlwidgets_1.6.4
## [25] rstudioapi_0.17.1 systemfonts_1.2.3 farver_2.1.2 digest_0.6.37
## [29] viridisLite_0.4.2 R6_2.6.1 dichromat_2.0-0.1 pillar_1.10.2
## [33] magrittr_2.0.3 bslib_0.9.0 tools_4.5.1 xml2_1.3.8
## [37] cachem_1.1.0