Source: https://github.com/markziemann/asd_meth

Introduction

Here we are conducting a PCA analysis of the entire sample set including guthrie neonatal blood spots and peripheral blood at assessment.

library("kableExtra")
library("limma")

MDS for all

Now load the data.

bldat <- readRDS("bl_mvals.rds")
dim(bldat)
## [1] 802647     18
gudat <- readRDS("gu_mvals.rds")
dim(gudat)
## [1] 790658     22
mgdat <- merge(gudat,bldat,by=0)
rownames(mgdat) <- mgdat$Row.names
mgdat$Row.names = NULL

dim(mgdat)
## [1] 790347     40
ss <- read.table("ASD_EPIC_DATA/ASD_guthrie_blood_combined_summarysheet.csv",sep=",",header=TRUE)

Start with a scree plot to show the number of important dimensions.

myscree <- function(mx,n=10,main="") {
  pc <- princomp(mx)$sdev
  pcp <- pc/sum(pc)*100
  pcp <- pcp[1:10]
  barplot(pcp,cex.names = 1,las=2,ylim=c(0,70),
      ylab="percent (%) variance explained", main=main)
  text((0.5:length(pcp)*1.2),pcp,label=signif(pcp,3),pos=3,cex=0.8)
}

myscree(mgdat)

Now plot an MDS plot for all samples.

#mds <- plotMDS(mgdat)

mds <- cmdscale(dist(t(mgdat)))

XMAX=max(mds[,1])*1.1
XMIN=min(mds[,1])*1.1
mycolours <- as.character(as.numeric(colnames(mgdat) %in% colnames(bldat)) )
mycolours <- gsub("0","pink",mycolours)
mycolours <- gsub("1","lightblue",mycolours)

plot( mds , xlab="Coordinate 1", ylab="Coordinate 2",
  pch=19,cex=3, xlim=c(XMIN,XMAX),bty="n",
  col=mycolours )

text(mds, labels=colnames(mgdat) )

legend("topright", legend=c("Neonatal","Assessment"), pch=19,cex=1.5,col=c("pink","lightblue"))

pdf("MDS.pdf")
plot( mds , xlab="Coordinate 1", ylab="Coordinate 2",
  pch=19,cex=3, xlim=c(XMIN,XMAX),bty="n",
  col=mycolours )
text(mds, labels=colnames(mgdat) )
legend("topright", legend=c("Neonatal","Assessment"), pch=19,cex=1.5,col=c("pink","lightblue"))
dev.off()
## png 
##   2

Now let’s look at the guthrie card methylation profiles with scree and MDS plots.

guss <- ss[ss$SampleID %in% colnames(gudat),]

head(gudat)
##                   S3        S4        S31        S32        S11        S12
## cg26928153  3.759545  3.721204  3.7270592  4.0857809  3.4520160  3.5150960
## cg16269199  1.803129  1.881965  1.0675504  1.2920113  0.5550543  0.3959599
## cg13869341  2.107273  2.245791  3.5888431  3.3526949  2.0560847  2.0993315
## cg24669183  3.205113  3.306509  2.7232436  2.9550714  3.1164939  2.7930813
## cg26679879 -2.478877 -1.933014 -0.7146168 -0.7570735 -2.5125718 -2.5902257
## cg22519184 -1.979652 -1.250329 -0.4326203 -0.4317646 -1.8353273 -1.8038311
##                   S13        S14       S17       S18        S21       S22
## cg26928153  3.9797330  3.7399188  3.373260  3.681675  3.5833987  3.737275
## cg16269199  1.4399578  1.2827080  1.719002  1.421265  0.6872838  1.277938
## cg13869341  3.5624787  3.3782116  2.160539  2.439450  2.2497197  2.601821
## cg24669183  2.3686960  2.7489684  3.005928  3.215957  2.2929303  2.615283
## cg26679879 -1.1429094 -1.1934195 -2.952353 -2.099645 -3.2160186 -3.156050
## cg22519184 -0.8923552 -0.7819915 -2.000228 -1.553435 -2.2359800 -1.446442
##                   S27        S28         S7         S8        S23        S24
## cg26928153  3.9894984  3.8552451  4.2020305  3.9901243  3.8632950  3.7935934
## cg16269199  1.1683911  1.0093828  1.0623985  1.3561963  1.3049529  1.2787751
## cg13869341  3.2617628  2.8865653  2.2558865  2.3350162  1.9994270  1.9921303
## cg24669183  4.0425150  3.0563037  2.4461705  2.9013366  2.7521934  2.0792863
## cg26679879 -1.4130782 -0.7338048 -0.5356182 -0.7228236 -2.0539047 -1.6550626
## cg22519184 -0.9906381 -0.3305025 -0.4527813 -0.2757886 -0.9770986 -0.9951513
##                  S39       S40       S35       S36
## cg26928153  3.779488  4.254237  3.886292  3.544201
## cg16269199  1.042919  1.247214  1.689710  1.407690
## cg13869341  2.603591  2.552261  2.132584  2.158385
## cg24669183  2.758006  3.017206  2.489969  2.662112
## cg26679879 -2.201876 -1.750788 -2.602553 -2.446855
## cg22519184 -1.930166 -1.299777 -2.065091 -2.362380
myscree(gudat)

mds <- cmdscale(dist(t(gudat)))

colfunc <- colorRampPalette(c("lightblue", "pink"))

# gestational age
mycolours <- colfunc(nrow(guss))[order(scale(guss$gestational_age))]
plot( mds , xlab="Coordinate 1", ylab="Coordinate 2",
  main="neonatal blood, coloured by gestational age",
  pch=19,cex=3, xlim=c(XMIN,XMAX),bty="n",col=mycolours )
text(mds, labels=colnames(mgdat) )
mtext("lightblue=young,pink=older")

# sex: 1=male 2=female
mycolours <- gsub("2","pink",gsub("1","lightblue",as.character(guss$Sex)))
plot( mds , xlab="Coordinate 1", ylab="Coordinate 2",
  main="neonatal blood, coloured by sex",
  pch=19,cex=3, xlim=c(XMIN,XMAX),bty="n",col=mycolours )
text(mds, labels=colnames(mgdat) )
mtext("lightblue=male,pink=female")

# birth complications
mycolours <- gsub("3","darkred",gsub("2","red",gsub("1","pink",gsub("0","lightgray",as.character(guss$Birth_complications)))))

plot( mds , xlab="Coordinate 1", ylab="Coordinate 2",
  main="neonatal blood, coloured by birth complications",
  pch=19,cex=3, xlim=c(XMIN,XMAX),bty="n",col=mycolours )
text(mds, labels=colnames(mgdat) )
mtext("grey=none, darkred=severe")

# help required to start breathing
mycolours <- gsub("3","darkred",gsub("2","red",gsub("1","pink",gsub("0","lightgray",as.character(guss$Help_breathing_start)))))

plot( mds , xlab="Coordinate 1", ylab="Coordinate 2",
  main="help to start breathing",
  pch=19,cex=3, xlim=c(XMIN,XMAX),bty="n",col=mycolours )
text(mds, labels=colnames(mgdat) )
mtext("grey=none, red=help")

Session information

For reproducibility.

sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] limma_3.54.0     kableExtra_1.3.4
## 
## loaded via a namespace (and not attached):
##  [1] rstudioapi_0.14   knitr_1.41        xml2_1.3.3        magrittr_2.0.3   
##  [5] munsell_0.5.0     rvest_1.0.3       viridisLite_0.4.1 colorspace_2.0-3 
##  [9] R6_2.5.1          rlang_1.0.6       fastmap_1.1.0     highr_0.9        
## [13] stringr_1.5.0     httr_1.4.4        tools_4.2.2       webshot_0.5.4    
## [17] xfun_0.35         cli_3.5.0         jquerylib_0.1.4   systemfonts_1.0.4
## [21] htmltools_0.5.4   yaml_2.3.6        digest_0.6.31     lifecycle_1.0.3  
## [25] sass_0.4.4        vctrs_0.5.1       glue_1.6.2        cachem_1.0.6     
## [29] evaluate_0.19     rmarkdown_2.19    stringi_1.7.8     compiler_4.2.2   
## [33] bslib_0.4.2       scales_1.2.1      svglite_2.1.0     jsonlite_1.8.4