Introduction

Here I will be calculating the Euclidean distance separating organisms of different

suppressPackageStartupMessages({
  library("vioplot")
  library("beeswarm")
  library("kableExtra")
  library("seqinr")
  library("gplots")
})

Load data.

dat <- read.csv("new_all.csv")
head(dat)
##                                  name   class   TG   CA   AG   CT   GA   TC
## 1                    Acinonyx_jubatus Mammals 1.27 1.27 1.22 1.20 1.08 1.09
## 2              Ailuropoda_melanoleuca Mammals 1.26 1.27 1.23 1.21 1.08 1.09
## 3                Artibeus_jamaicensis Mammals 1.29 1.29 1.23 1.22 1.07 1.08
## 4               Arvicanthis_niloticus Mammals 1.31 1.32 1.26 1.24 1.08 1.09
## 5                  Arvicola_amphibius Mammals 1.31 1.31 1.25 1.24 1.08 1.09
## 6 Balaenoptera_acutorostrata_scammoni Mammals 1.28 1.28 1.23 1.21 1.09 1.09
##     AC   GT   CC   GG   AA   TT   AT   TA   CG   GC
## 1 0.85 0.84 0.84 0.84 0.79 0.79 0.85 0.56 0.44 0.96
## 2 0.85 0.84 0.84 0.84 0.79 0.79 0.85 0.55 0.44 0.97
## 3 0.86 0.85 0.83 0.83 0.79 0.79 0.83 0.54 0.45 0.97
## 4 0.86 0.85 0.84 0.83 0.77 0.77 0.85 0.56 0.37 0.98
## 5 0.86 0.85 0.84 0.83 0.77 0.77 0.84 0.55 0.39 0.98
## 6 0.85 0.84 0.84 0.84 0.79 0.78 0.85 0.55 0.42 0.97

Separate into groups.

[1] “Mammals” “Birds” “Reptiles” “Amphibians”
[5] “Fishes” “Plants” “Chlorophyta” “Invertebrates” [9] “Fungi” “Protozoa”

unique(dat$class)
##  [1] "Mammals"       "Birds"         "Reptiles"      "Amphibians"   
##  [5] "Fishes"        "Plants"        "Chlorophyta"   "Invertebrates"
##  [9] "Fungi"         "Protozoa"
mammals <- subset(dat,class=="Mammals")
fishes <- subset(dat,class=="Fishes")
birds <- subset(dat,class=="Birds")
fungi <- subset(dat,class=="Fungi")
plants <- subset(dat,class=="Plants")
protozoa <- subset(dat,class=="Protozoa")
reptiles <- subset(dat,class=="Reptiles")
chlorophyta <- subset(dat,class=="Chlorophyta")
amphibians <- subset(dat,class=="Amphibians")
invertebrates <- subset(dat,class=="Invertebrates")

l <- list("mammals"=mammals, "fishes"=fishes, "birds"=birds, "fungi"=fungi,
  "plants"=plants, "protozoa"=protozoa, "reptiles"=reptiles, 
  "chlorophyta"=chlorophyta, "amphibians"=amphibians, "invertebrates"=invertebrates)

Now to examine the diversity with mean Eucledian distance.

res <- lapply(l,function(df) {
  df$class <- NULL
  dfa <- aggregate(. ~ name, df, mean)
  rownames(dfa) <- dfa$name
  dfa$name <- NULL
  as.vector(dist(dfa))
})

message("mean")
## mean
lapply(res,mean)
## $mammals
## [1] 0.07720381
## 
## $fishes
## [1] 0.1526821
## 
## $birds
## [1] 0.08144294
## 
## $fungi
## [1] 0.3368846
## 
## $plants
## [1] 0.2131562
## 
## $protozoa
## [1] 0.6800384
## 
## $reptiles
## [1] 0.09594969
## 
## $chlorophyta
## [1] 0.8337333
## 
## $amphibians
## [1] 0.1021095
## 
## $invertebrates
## [1] 0.4526068
message("median")
## median
lapply(res,median)
## $mammals
## [1] 0.07071068
## 
## $fishes
## [1] 0.1371131
## 
## $birds
## [1] 0.08
## 
## $fungi
## [1] 0.3021589
## 
## $plants
## [1] 0.1906567
## 
## $protozoa
## [1] 0.6494324
## 
## $reptiles
## [1] 0.09486833
## 
## $chlorophyta
## [1] 0.8894104
## 
## $amphibians
## [1] 0.1146633
## 
## $invertebrates
## [1] 0.4261455
res2 <- res[order(unlist(lapply(res,median)))]

par(mar=c(5,7,3,1))
vioplot(res2,horizontal=TRUE,las=1,main="distribution of Euclidean distance") ; grid()

barplot(unlist(lapply(res2,mean)),horiz=TRUE,las=1,main="mean Euclidean distance") ; grid()

barplot(unlist(lapply(res2,mean)),horiz=TRUE,las=1,main="median Euclidean distance") ; grid()

Heatmaps

chlorophyta$class = NULL
rownames(chlorophyta) <- chlorophyta$name
chlorophyta$name = NULL
hm_chlorophyta <- heatmap.2(as.matrix(chlorophyta),trace="none",main="chlorophyta",
  scale="none",mar=c(12,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

protozoa$class = NULL
rownames(protozoa) <- protozoa$name
protozoa$name = NULL
hm_protozoa <- heatmap.2(as.matrix(protozoa),trace="none", main="protozoa",
  scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

invertebrates$class = NULL
rownames(invertebrates) <- invertebrates$name
invertebrates$name = NULL
hm_invertebrates <- heatmap.2(as.matrix(invertebrates),trace="none", main="invertebrates",
  scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

mammals$class = NULL
rownames(mammals) <- mammals$name
mammals$name = NULL
hm_mammals <- heatmap.2(as.matrix(mammals),trace="none",main="mammals",
  scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

fishes$class = NULL
fishes <- aggregate(. ~ name, fishes, mean)
rownames(fishes) <- fishes$name
fishes$name = NULL
hm_fishes <- heatmap.2(as.matrix(fishes),trace="none",main="fishes",
  scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

birds$class = NULL
rownames(birds) <- birds$name
birds$name = NULL
hm_birds <- heatmap.2(as.matrix(birds),trace="none",main="birds",
  scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

fungi$class = NULL
rownames(fungi) <- fungi$name
fungi$name = NULL
hm_fungi <- heatmap.2(as.matrix(fungi),trace="none",main="fungi",
  scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

plants$class = NULL
rownames(plants) <- plants$name
plants$name = NULL
hm_plants <- heatmap.2(as.matrix(plants),trace="none",main="plants",
  scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

reptiles$class = NULL
rownames(reptiles) <- reptiles$name
reptiles$name = NULL
hm_reptiles <- heatmap.2(as.matrix(reptiles),trace="none",main="reptiles",
  scale="none",mar=c(3,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

amphibians$class = NULL
rownames(amphibians) <- amphibians$name
amphibians$name = NULL
hm_amphibians <- heatmap.2(as.matrix(amphibians),trace="none",main="amphibians",
  scale="none",mar=c(12,12),cexRow=0.8, dendrogram="row", Colv=FALSE)

Session

Session info

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] gplots_3.1.3     seqinr_4.2-16    kableExtra_1.3.4 beeswarm_0.4.0  
## [5] vioplot_0.3.7    zoo_1.8-10       sm_2.2-5.7.1    
## 
## loaded via a namespace (and not attached):
##  [1] highr_0.9          bslib_0.4.0        jquerylib_0.1.4    compiler_4.2.0    
##  [5] bitops_1.0-7       tools_4.2.0        digest_0.6.29      jsonlite_1.8.0    
##  [9] evaluate_0.15      lifecycle_1.0.1    lattice_0.20-45    viridisLite_0.4.0 
## [13] rlang_1.0.4        cli_3.3.0          rstudioapi_0.13    yaml_2.3.5        
## [17] xfun_0.31          fastmap_1.1.0      httr_1.4.3         stringr_1.4.0     
## [21] xml2_1.3.3         knitr_1.39         sass_0.4.2         systemfonts_1.0.4 
## [25] gtools_3.9.3       caTools_1.18.2     ade4_1.7-19        grid_4.2.0        
## [29] webshot_0.5.3      svglite_2.1.0      glue_1.6.2         R6_2.5.1          
## [33] rmarkdown_2.14     magrittr_2.0.3     scales_1.2.0       htmltools_0.5.3   
## [37] MASS_7.3-58        rvest_1.0.2        colorspace_2.0-3   KernSmooth_2.23-20
## [41] stringi_1.7.8      munsell_0.5.0      cachem_1.0.6