Here I will be calculating the Euclidean distance separating organisms of different
suppressPackageStartupMessages({
library("vioplot")
library("beeswarm")
library("kableExtra")
library("seqinr")
library("gplots")
})
Load data.
dat <- read.csv("new_all.csv")
head(dat)
## name class TG CA AG CT GA TC
## 1 Acinonyx_jubatus Mammals 1.27 1.27 1.22 1.20 1.08 1.09
## 2 Ailuropoda_melanoleuca Mammals 1.26 1.27 1.23 1.21 1.08 1.09
## 3 Artibeus_jamaicensis Mammals 1.29 1.29 1.23 1.22 1.07 1.08
## 4 Arvicanthis_niloticus Mammals 1.31 1.32 1.26 1.24 1.08 1.09
## 5 Arvicola_amphibius Mammals 1.31 1.31 1.25 1.24 1.08 1.09
## 6 Balaenoptera_acutorostrata_scammoni Mammals 1.28 1.28 1.23 1.21 1.09 1.09
## AC GT CC GG AA TT AT TA CG GC
## 1 0.85 0.84 0.84 0.84 0.79 0.79 0.85 0.56 0.44 0.96
## 2 0.85 0.84 0.84 0.84 0.79 0.79 0.85 0.55 0.44 0.97
## 3 0.86 0.85 0.83 0.83 0.79 0.79 0.83 0.54 0.45 0.97
## 4 0.86 0.85 0.84 0.83 0.77 0.77 0.85 0.56 0.37 0.98
## 5 0.86 0.85 0.84 0.83 0.77 0.77 0.84 0.55 0.39 0.98
## 6 0.85 0.84 0.84 0.84 0.79 0.78 0.85 0.55 0.42 0.97
Separate into groups.
[1] “Mammals” “Birds” “Reptiles” “Amphibians”
[5] “Fishes” “Plants” “Chlorophyta” “Invertebrates” [9] “Fungi” “Protozoa”
unique(dat$class)
## [1] "Mammals" "Birds" "Reptiles" "Amphibians"
## [5] "Fishes" "Plants" "Chlorophyta" "Invertebrates"
## [9] "Fungi" "Protozoa"
mammals <- subset(dat,class=="Mammals")
fishes <- subset(dat,class=="Fishes")
birds <- subset(dat,class=="Birds")
fungi <- subset(dat,class=="Fungi")
plants <- subset(dat,class=="Plants")
protozoa <- subset(dat,class=="Protozoa")
reptiles <- subset(dat,class=="Reptiles")
chlorophyta <- subset(dat,class=="Chlorophyta")
amphibians <- subset(dat,class=="Amphibians")
invertebrates <- subset(dat,class=="Invertebrates")
l <- list("mammals"=mammals, "fishes"=fishes, "birds"=birds, "fungi"=fungi,
"plants"=plants, "protozoa"=protozoa, "reptiles"=reptiles,
"chlorophyta"=chlorophyta, "amphibians"=amphibians, "invertebrates"=invertebrates)
Now to examine the diversity with mean Eucledian distance.
res <- lapply(l,function(df) {
df$class <- NULL
dfa <- aggregate(. ~ name, df, mean)
rownames(dfa) <- dfa$name
dfa$name <- NULL
as.vector(dist(dfa))
})
message("mean")
## mean
lapply(res,mean)
## $mammals
## [1] 0.07720381
##
## $fishes
## [1] 0.1526821
##
## $birds
## [1] 0.08144294
##
## $fungi
## [1] 0.3368846
##
## $plants
## [1] 0.2131562
##
## $protozoa
## [1] 0.6800384
##
## $reptiles
## [1] 0.09594969
##
## $chlorophyta
## [1] 0.8337333
##
## $amphibians
## [1] 0.1021095
##
## $invertebrates
## [1] 0.4526068
message("median")
## median
lapply(res,median)
## $mammals
## [1] 0.07071068
##
## $fishes
## [1] 0.1371131
##
## $birds
## [1] 0.08
##
## $fungi
## [1] 0.3021589
##
## $plants
## [1] 0.1906567
##
## $protozoa
## [1] 0.6494324
##
## $reptiles
## [1] 0.09486833
##
## $chlorophyta
## [1] 0.8894104
##
## $amphibians
## [1] 0.1146633
##
## $invertebrates
## [1] 0.4261455
res2 <- res[order(unlist(lapply(res,median)))]
par(mar=c(5,7,3,1))
vioplot(res2,horizontal=TRUE,las=1,main="distribution of Euclidean distance") ; grid()
barplot(unlist(lapply(res2,mean)),horiz=TRUE,las=1,main="mean Euclidean distance") ; grid()
barplot(unlist(lapply(res2,mean)),horiz=TRUE,las=1,main="median Euclidean distance") ; grid()
chlorophyta$class = NULL
rownames(chlorophyta) <- chlorophyta$name
chlorophyta$name = NULL
hm_chlorophyta <- heatmap.2(as.matrix(chlorophyta),trace="none",main="chlorophyta",
scale="none",mar=c(12,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
protozoa$class = NULL
rownames(protozoa) <- protozoa$name
protozoa$name = NULL
hm_protozoa <- heatmap.2(as.matrix(protozoa),trace="none", main="protozoa",
scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
invertebrates$class = NULL
rownames(invertebrates) <- invertebrates$name
invertebrates$name = NULL
hm_invertebrates <- heatmap.2(as.matrix(invertebrates),trace="none", main="invertebrates",
scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
mammals$class = NULL
rownames(mammals) <- mammals$name
mammals$name = NULL
hm_mammals <- heatmap.2(as.matrix(mammals),trace="none",main="mammals",
scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
fishes$class = NULL
fishes <- aggregate(. ~ name, fishes, mean)
rownames(fishes) <- fishes$name
fishes$name = NULL
hm_fishes <- heatmap.2(as.matrix(fishes),trace="none",main="fishes",
scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
birds$class = NULL
rownames(birds) <- birds$name
birds$name = NULL
hm_birds <- heatmap.2(as.matrix(birds),trace="none",main="birds",
scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
fungi$class = NULL
rownames(fungi) <- fungi$name
fungi$name = NULL
hm_fungi <- heatmap.2(as.matrix(fungi),trace="none",main="fungi",
scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
plants$class = NULL
rownames(plants) <- plants$name
plants$name = NULL
hm_plants <- heatmap.2(as.matrix(plants),trace="none",main="plants",
scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
reptiles$class = NULL
rownames(reptiles) <- reptiles$name
reptiles$name = NULL
hm_reptiles <- heatmap.2(as.matrix(reptiles),trace="none",main="reptiles",
scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
amphibians$class = NULL
rownames(amphibians) <- amphibians$name
amphibians$name = NULL
hm_amphibians <- heatmap.2(as.matrix(amphibians),trace="none",main="amphibians",
scale="none",mar=c(12,12),cexRow=0.8, dendrogram="row", Colv=FALSE)
Session info
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gplots_3.1.3 seqinr_4.2-16 kableExtra_1.3.4 beeswarm_0.4.0
## [5] vioplot_0.3.7 zoo_1.8-10 sm_2.2-5.7.1
##
## loaded via a namespace (and not attached):
## [1] highr_0.9 bslib_0.4.0 jquerylib_0.1.4 compiler_4.2.0
## [5] bitops_1.0-7 tools_4.2.0 digest_0.6.29 jsonlite_1.8.0
## [9] evaluate_0.15 lifecycle_1.0.1 lattice_0.20-45 viridisLite_0.4.0
## [13] rlang_1.0.4 cli_3.3.0 rstudioapi_0.13 yaml_2.3.5
## [17] xfun_0.31 fastmap_1.1.0 httr_1.4.3 stringr_1.4.0
## [21] xml2_1.3.3 knitr_1.39 sass_0.4.2 systemfonts_1.0.4
## [25] gtools_3.9.3 caTools_1.18.2 ade4_1.7-19 grid_4.2.0
## [29] webshot_0.5.3 svglite_2.1.0 glue_1.6.2 R6_2.5.1
## [33] rmarkdown_2.14 magrittr_2.0.3 scales_1.2.0 htmltools_0.5.3
## [37] MASS_7.3-58 rvest_1.0.2 colorspace_2.0-3 KernSmooth_2.23-20
## [41] stringi_1.7.8 munsell_0.5.0 cachem_1.0.6