Introduction

Here we are splitting the dataset and testing concordance between LA and AL approaches.

suppressPackageStartupMessages({
  library("limma")
  library("eulerr")
  library("IlluminaHumanMethylation450kmanifest")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  library("tictoc")
  library("mitch")
  library("kableExtra")
  library("beeswarm")
})

CORES=8

Load data

  • annotations

  • probe sets

  • gene sets

  • design matrix

  • mval matrix

anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])

gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))

summary(unlist(lapply(sets,length)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    9.00   24.00   49.68   55.00 6778.00
genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")

if (!file.exists("GSE158422_design.rds")) {
  download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")

if (!file.exists("GSE158422_design.rds")) {
 download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")

boxplot(list("normal"=matrix(colMeans(mval),ncol=2)[,2],"tumor"=matrix(colMeans(mval),ncol=2)[,1]),
  main="mean probe methylation mval")

Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.

gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")

gs_symbols <- gmt_import("c2.cp.reactome.v2023.1.Hs.symbols.gmt")

GMEA FUNCTIONS

There are three approaches. First, limma can be done first, followed by aggregation by gene and then enrichment test (called LA). Secondly, probe values can be aggregated by gene, followed by limma, followed by enrichment test (AL). Third, probe values can be aggregated to gene level, and then aggregated to gene set level followed by limma test (AA).

LA

This process runs limma first and then aggregates the results before doing an enrichment test.

# limma
runlimma <- function(mval,design,myann) {
  fit.reduced <- lmFit(mval,design)
  fit.reduced <- eBayes(fit.reduced)
  summary(decideTests(fit.reduced))
  dm <- topTable(fit.reduced,coef=4, number = Inf)
  dm <- merge(myann,dm,by=0)
  dm <- dm[order(dm$P.Value),]
  rownames(dm) <- dm$Row.names
  dm$Row.names=NULL
  return(dm)
}

# aggregate
agg <- function(dm,cores=1) {
  gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=cores)
  dm$UCSC_RefGene_Name <- gnl
  l <- mclapply(1:nrow(dm), function(i) {
    a <- dm[i,]
    len <- length(a[[1]][[1]])
    tvals <- as.numeric(rep(a["t"],len))
    genes <- a[[1]][[1]]
    data.frame(genes,tvals)
  },mc.cores=cores)
  df <- do.call(rbind,l)
  keep <- names(which(table(df$genes)>1))
  df <- df[df$genes %in% keep,]
  gn <- unique(df$genes)
  gme_res <- lapply( 1:length(gn), function(i) {
    g <- gn[i]
    tstats <- df[which(df$genes==g),"tvals"]
    myn <- length(tstats)
    mymean <- mean(tstats)
    mymedian <- mean(tstats)
    if ( length(tstats) > 2 ) {
      ttest <- t.test(tstats)
      pval <- ttest$p.value
    } else {
      pval = 1
    }
    res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
      "median"=mymedian, pval=pval)
  } )
  gme_res_df <- do.call(rbind, gme_res)
  rownames(gme_res_df) <- gme_res_df[,1]
  gme_res_df <- gme_res_df[,-1]
  tmp <- apply(gme_res_df,2,as.numeric)
  rownames(tmp) <- rownames(gme_res_df)
  gme_res_df <- as.data.frame(tmp)
  gme_res_df$sig <- -log10(gme_res_df[,4])
  gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
  gme_res_df$fdr <- p.adjust(gme_res_df$pval)
  out <- list("df"=df,"gme_res_df"=gme_res_df)
  return(out)
}

# enrich
ttenrich <- function(m,genesets,cores=1) {
  res <- mclapply( 1:length(genesets), function(i) {
    gs <- genesets[i]
    name <- names(gs)
    n_members <- length(which(rownames(m) %in% gs[[1]]))
    if ( n_members > 4 ) {
      tstats <- m[which(rownames(m) %in% gs[[1]]),]
      myn <- length(tstats)
      mymean <- mean(tstats)
      mymedian <- mean(tstats)
      wt <- t.test(tstats)
      res <- c(name,myn,mymean,mymedian,wt$p.value)
    }
  } , mc.cores = cores)
  res_df <- do.call(rbind, res)
  rownames(res_df) <- res_df[,1]
  res_df <- res_df[,-1]
  colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
  tmp <- apply(res_df,2,as.numeric)
  rownames(tmp) <- rownames(res_df)
  res_df <- tmp
  res_df <- as.data.frame(res_df)
  res_df <- res_df[order(res_df$pval),]
  res_df$logp <- -log10(res_df$pval )
  res_df$fdr <- p.adjust(res_df$pval,method="fdr")
  res_df[order(abs(res_df$pval)),]
  return(res_df)
}

AL: Aggregate limma functions

Functions for aggregate-limma-enrich approach.

# chromosome by chromosome will be much faster
magg <- function(mval,myann,cores=1){
  gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=cores)
  myann$gnl <- gnl
  keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
  mx <- mval[rownames(mval) %in% keep,]
  mymed <- function(g) {
    probes <- rownames(myann[grep(g,myann$gnl),])
    rows <- which(rownames(mx) %in% probes)
    if ( length(rows) > 1 ) {
      b <- mx[rows,]
      med <- apply(b,2,mean)
      med <- matrix(med,nrow=1)
      colnames(med) <- colnames(b)
      rownames(med) <- g
      return(med)
    }
  }
  med <- mclapply(gn,mymed,mc.cores=cores)
  med <- med[lapply(med,length)>0]
  medf <- do.call(rbind,med)
  return(medf)
}

chragg <- function(mval,myann,cores=1){
  annodf <- as.data.frame(anno)
  keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
  mx <- mval[rownames(mval) %in% keep,]
  chrs <- unique(anno$chr)
  myorder <- unlist(lapply(chrs,function(mychr) { nrow( annodf[annodf$chr==mychr,] ) } ))
  chrs <- chrs[order(-myorder)]
  leadercores <- floor(sqrt(cores))
  workercores <- ceiling(sqrt(cores))
  chrmedf <- mclapply(chrs,function(chr) {
    chrfrag <- annodf[annodf$chr==chr,]
    chrprobes <-rownames(chrfrag)
    chrmx <- mx[rownames(mx) %in% chrprobes,]
    chranno <- myann[rownames(myann) %in% chrprobes,]
    chrmedf <- magg(mval=chrmx,myann=chranno,cores=workercores)
    return(chrmedf)
  },mc.cores=leadercores)
  medf <- do.call(rbind, chrmedf)
  return(medf)
}

agglimma <- function(medf,design) {
  fit.reduced <- lmFit(medf,design)
  fit.reduced <- eBayes(fit.reduced)
  dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  nondup <- !duplicated(dmagg$ID)
  dmagg <- dmagg[nondup,]
  rownames(dmagg) <- dmagg$ID
  dmagg$ID = NULL
  return(dmagg)
}

AA Aggregate-aggregate-limma functions

gsagg <- function(x,genesets,cores=1) {
  meds <- mclapply(1:length(genesets), function(i) {
    gs = genesets[[i]]
    xx <- x[rownames(x) %in% gs,]
    med <- apply(xx,2,mean)
  },mc.cores=cores)
  mymed <- do.call(rbind,meds)
  rownames(mymed) <- names(genesets)
  as.data.frame(mymed)
}

aalimma <- function(agag,design) {
  fit.reduced <- lmFit(agag,design)
  fit.reduced <- eBayes(fit.reduced)
  dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  return(dmagg)
}

aal <- function(mval,myann,genesets,design,cores=1) {
  medf <- chragg(mval,myann,cores=CORES)
  agag <- gsagg(x=medf,genesets=gs_symbols,cores=CORES)
  aalres <- aalimma(agag=agag,design=design)
  return(aalres)
}

#aalres <- aal(mval=mval,myann=myann,genesets=gs_symbols,design=design,cores=CORES)

Prep

# prep
sex <- as.data.frame(design)$sex
tumor <- as.data.frame(design)$tumor
patient <- as.character(unlist(lapply(1:ncol(mval),function(i) {c(i,i)})))
patient <- head(patient,ncol(mval))
design <- model.matrix(~ patient + tumor )
rownames(design) <- colnames(mval)

LA workflow split and analyse

split 74 samples 37 patients split into 18 + 18 Conduct LA workflow analysis. Then make overlap.

splitanalyze_la <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  # la1
  dm1 <- runlimma(mval_a,design_a,myann)
  dmagg1 <- agg(dm1,cores=floor(CORES/3))
  m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
  lares1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES)

  # la2
  dm2 <- runlimma(mval_b,design_b,myann)
  dmagg2 <- agg(dm2,cores=floor(CORES/3))
  m2 <- dmagg2$gme_res_df[,"mean",drop=FALSE]
  lares2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES)

  list("lares1"=lares1,"lares2"=lares2)

}

lares <- mclapply( seq(100,5000,100) ,splitanalyze_la,mc.cores=CORES)

AL workflow split and analyse

splitanalyze_al <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  #al
  medf1 <- chragg(mval_a,myann,cores=floor(CORES/3))
  magg1 <- agglimma(medf1,design_a)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  alres1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES)

  #al
  medf2 <- chragg(mval_b,myann,cores=floor(CORES/3))
  magg2 <- agglimma(medf2,design_b)
  m2 <- as.data.frame(magg2$t)
  rownames(m2) <- rownames(magg2)
  colnames(m2) <- "t"
  alres2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES)

  list("alres1"=alres1,"alres2"=alres2)
}

alres <- mclapply( seq(100,5000,100) ,splitanalyze_al,mc.cores=CORES)

AA workflow split and analyse

splitanalyze_aa <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  #aa1
  aares1 <- aal(mval=mval_a,myann=myann,genesets=gs_symbols,
    design=design_a,cores=floor(CORES/3))

  #aa2
  aares2 <- aal(mval=mval_b,myann=myann,genesets=gs_symbols,
    design=design_b,cores=floor(CORES/3))

  list("aares1"=aares1,"aares2"=aares2)
}

aares <- mclapply( seq(100,5000,100) ,splitanalyze_aa,mc.cores=CORES)

Compare

lacompare <- function(lares){
  lares1 <- lares[[1]]
  lares2 <- lares[[2]]
  lasig1 <- subset(lares1,fdr<0.05)
  lasig2 <- subset(lares2,fdr<0.05)
  lasig1_up <- rownames(subset(lasig1,t_median>0))
  lasig1_dn <- rownames(subset(lasig1,t_median<0))
  lasig2_up <- rownames(subset(lasig2,t_median>0))
  lasig2_dn <- rownames(subset(lasig2,t_median<0))

  total <- length(unique(c(lasig1_up,lasig2_up,lasig1_dn,lasig2_dn)))
  common <- length(intersect(lasig1_up, lasig2_up)) + length(intersect(lasig1_dn, lasig2_dn))
  discordant <- length(intersect(lasig1_up, lasig2_dn)) + length(intersect(lasig1_dn, lasig2_up))
  uncommon = total - common - discordant

  laout <- data.frame(total,common,discordant,uncommon)
  laout$p_comm <-laout$common / laout$total
  laout$p_disc <-laout$discordant / laout$total
  laout$p_uncom <-laout$uncommon / laout$total
  laout
}

alcompare <-function(alres){
  alres1 <- alres[[1]]
  alres2 <- alres[[2]]
  alsig1 <- subset(alres1,fdr<0.05)
  alsig2 <- subset(alres2,fdr<0.05)

  alsig1_up <- rownames(subset(alsig1,t_median>0))
  alsig1_dn <- rownames(subset(alsig1,t_median<0))
  alsig2_up <- rownames(subset(alsig2,t_median>0))
  alsig2_dn <- rownames(subset(alsig2,t_median<0))

  total <- length(unique(c(alsig1_up,alsig2_up,alsig1_dn,alsig2_dn)))
  common <- length(intersect(alsig1_up, alsig2_up)) + length(intersect(alsig1_dn, alsig2_dn))
  discordant <- length(intersect(alsig1_up, alsig2_dn)) + length(intersect(alsig1_dn, alsig2_up))
  uncommon = total - common - discordant
  result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)

  alout <- data.frame(total,common,discordant,uncommon)
  alout$p_comm <-alout$common / alout$total
  alout$p_disc <-alout$discordant / alout$total
  alout$p_uncom <-alout$uncommon / alout$total
  alout
}

aacompare <-function(aa){
  aa1 <- aa[[1]]
  aa2 <- aa[[2]]
  aasig1 <- subset(aa1,adj.P.Val<0.05)
  aasig2 <- subset(aa2,adj.P.Val<0.05)

  aasig1_up <- rownames(subset(aasig1,logFC>0))
  aasig1_dn <- rownames(subset(aasig1,logFC<0))
  aasig2_up <- rownames(subset(aasig2,logFC>0))
  aasig2_dn <- rownames(subset(aasig2,logFC<0))

  total <- length(unique(c(aasig1_up,aasig2_up,aasig1_dn,aasig2_dn)))
  common <- length(intersect(aasig1_up, aasig2_up)) + length(intersect(aasig1_dn, aasig2_dn))
  discordant <- length(intersect(aasig1_up, aasig2_dn)) + length(intersect(aasig1_dn, aasig2_up))
  uncommon = total - common - discordant
  result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)

  aaout <- data.frame(total,common,discordant,uncommon)
  aaout$p_comm <- aaout$common / aaout$total
  aaout$p_disc <- aaout$discordant / aaout$total
  aaout$p_uncom <-aaout$uncommon / aaout$total
  aaout
}
laout <- do.call(rbind,lapply(lares,lacompare))

alout <- do.call(rbind,lapply(alres,alcompare))

aaout <- do.call(rbind,lapply(aares,aacompare))

laout
##    total common discordant uncommon       p_comm      p_disc    p_uncom
## 1   1421    866          0      555 0.6094299789 0.000000000 0.39057002
## 2   1620   1242          1      377 0.7666666667 0.000617284 0.23271605
## 3    931    475          9      447 0.5102040816 0.009667025 0.48012889
## 4    935    483         12      440 0.5165775401 0.012834225 0.47058824
## 5   1271    483          0      788 0.3800157356 0.000000000 0.61998426
## 6   1277    554          0      723 0.4338292874 0.000000000 0.56617071
## 7   1592      0       1320      272 0.0000000000 0.829145729 0.17085427
## 8   1417      1        791      625 0.0007057163 0.558221595 0.44107269
## 9    904    419         11      474 0.4634955752 0.012168142 0.52433628
## 10  1611   1492          0      119 0.9261328367 0.000000000 0.07386716
## 11  1550      1        603      946 0.0006451613 0.389032258 0.61032258
## 12  1603      2        918      683 0.0012476606 0.572676232 0.42607611
## 13  1604     91         11     1502 0.0567331671 0.006857855 0.93640898
## 14  1519      2        726      791 0.0013166557 0.477946017 0.52073733
## 15  1562   1221          0      341 0.7816901408 0.000000000 0.21830986
## 16  1308      1        736      571 0.0007645260 0.562691131 0.43654434
## 17  1527    181          0     1346 0.1185330714 0.000000000 0.88146693
## 18  1603     12        838      753 0.0074859638 0.522769807 0.46974423
## 19  1260    389          0      871 0.3087301587 0.000000000 0.69126984
## 20  1588      0       1312      276 0.0000000000 0.826196474 0.17380353
## 21  1605     11        805      789 0.0068535826 0.501557632 0.49158879
## 22  1620   1521          0       99 0.9388888889 0.000000000 0.06111111
## 23   634      8         60      566 0.0126182965 0.094637224 0.89274448
## 24   923    441         12      470 0.4777898158 0.013001083 0.50920910
## 25  1597      0       1327      270 0.0000000000 0.830932999 0.16906700
## 26  1561    628          1      932 0.4023062140 0.000640615 0.59705317
## 27  1260    415          0      845 0.3293650794 0.000000000 0.67063492
## 28  1585      0       1304      281 0.0000000000 0.822712934 0.17728707
## 29  1568      1       1238      329 0.0006377551 0.789540816 0.20982143
## 30  1548     36        491     1021 0.0232558140 0.317183463 0.65956072
## 31  1557      1       1102      454 0.0006422608 0.707771355 0.29158638
## 32  1594   1435          0      159 0.9002509410 0.000000000 0.09974906
## 33  1525    195          0     1330 0.1278688525 0.000000000 0.87213115
## 34   903      1        100      802 0.0011074197 0.110741971 0.88815061
## 35   977      3         60      914 0.0030706244 0.061412487 0.93551689
## 36  1049      6        520      523 0.0057197331 0.495710200 0.49857007
## 37  1507    557          0      950 0.3696084937 0.000000000 0.63039151
## 38  1613     35        524     1054 0.0216986981 0.324860508 0.65344079
## 39  1506      3        764      739 0.0019920319 0.507304117 0.49070385
## 40  1562      1       1142      419 0.0006402049 0.731113956 0.26824584
## 41  1555     28        577      950 0.0180064309 0.371061093 0.61093248
## 42  1598      0       1460      138 0.0000000000 0.913642053 0.08635795
## 43  1549    230        106     1213 0.1484828922 0.068431246 0.78308586
## 44  1633      0       1524      109 0.0000000000 0.933251684 0.06674832
## 45  1426      1        928      497 0.0007012623 0.650771388 0.34852735
## 46  1057     14        434      609 0.0132450331 0.410596026 0.57615894
## 47   982      2        410      570 0.0020366599 0.417515275 0.58044807
## 48  1567   1269          0      298 0.8098276962 0.000000000 0.19017230
## 49  1513      5        740      768 0.0033046927 0.489094514 0.50760079
## 50   909      4        163      742 0.0044004400 0.179317932 0.81628163
alout
##    total common discordant uncommon    p_comm       p_disc   p_uncom
## 1   1380    819          0      561 0.5934783 0.0000000000 0.4065217
## 2   1196   1059          0      137 0.8854515 0.0000000000 0.1145485
## 3   1226   1088          0      138 0.8874388 0.0000000000 0.1125612
## 4   1447    761          0      686 0.5259157 0.0000000000 0.4740843
## 5   1223    951          0      272 0.7775961 0.0000000000 0.2224039
## 6   1219    997          0      222 0.8178835 0.0000000000 0.1821165
## 7   1263    851          0      412 0.6737926 0.0000000000 0.3262074
## 8   1361    745          0      616 0.5473916 0.0000000000 0.4526084
## 9   1306    992          0      314 0.7595712 0.0000000000 0.2404288
## 10  1377    785          0      592 0.5700799 0.0000000000 0.4299201
## 11  1223    912          0      311 0.7457073 0.0000000000 0.2542927
## 12  1290    960          0      330 0.7441860 0.0000000000 0.2558140
## 13  1299    997          0      302 0.7675135 0.0000000000 0.2324865
## 14  1504    362          4     1138 0.2406915 0.0026595745 0.7566489
## 15  1255   1069          0      186 0.8517928 0.0000000000 0.1482072
## 16  1221   1055          0      166 0.8640459 0.0000000000 0.1359541
## 17  1230   1093          0      137 0.8886179 0.0000000000 0.1113821
## 18  1322    989          0      333 0.7481089 0.0000000000 0.2518911
## 19  1326   1015          0      311 0.7654600 0.0000000000 0.2345400
## 20  1401    622          0      779 0.4439686 0.0000000000 0.5560314
## 21  1238   1039          0      199 0.8392569 0.0000000000 0.1607431
## 22  1265   1057          0      208 0.8355731 0.0000000000 0.1644269
## 23  1211   1067          0      144 0.8810900 0.0000000000 0.1189100
## 24  1415    684          0      731 0.4833922 0.0000000000 0.5166078
## 25  1242   1049          0      193 0.8446055 0.0000000000 0.1553945
## 26  1361    757          0      604 0.5562087 0.0000000000 0.4437913
## 27  1351   1026          0      325 0.7594375 0.0000000000 0.2405625
## 28  1259    906          0      353 0.7196187 0.0000000000 0.2803813
## 29  1338    858          0      480 0.6412556 0.0000000000 0.3587444
## 30  1343    967          0      376 0.7200298 0.0000000000 0.2799702
## 31  1201   1000          0      201 0.8326395 0.0000000000 0.1673605
## 32  1273   1037          0      236 0.8146112 0.0000000000 0.1853888
## 33  1357   1032          0      325 0.7605011 0.0000000000 0.2394989
## 34  1273    858          0      415 0.6739984 0.0000000000 0.3260016
## 35  1209   1058          0      151 0.8751034 0.0000000000 0.1248966
## 36  1354    950          0      404 0.7016248 0.0000000000 0.2983752
## 37  1196    989          0      207 0.8269231 0.0000000000 0.1730769
## 38  1309    644          0      665 0.4919786 0.0000000000 0.5080214
## 39  1351    852          0      499 0.6306440 0.0000000000 0.3693560
## 40  1250    691          0      559 0.5528000 0.0000000000 0.4472000
## 41  1329    789          0      540 0.5936795 0.0000000000 0.4063205
## 42  1284   1105          0      179 0.8605919 0.0000000000 0.1394081
## 43  1295   1061          0      234 0.8193050 0.0000000000 0.1806950
## 44  1299    955          0      344 0.7351809 0.0000000000 0.2648191
## 45  1209   1043          0      166 0.8626964 0.0000000000 0.1373036
## 46  1296   1032          0      264 0.7962963 0.0000000000 0.2037037
## 47  1472    571          1      900 0.3879076 0.0006793478 0.6114130
## 48  1374    980          0      394 0.7132460 0.0000000000 0.2867540
## 49  1330    921          0      409 0.6924812 0.0000000000 0.3075188
## 50  1342    888          0      454 0.6616990 0.0000000000 0.3383010
aaout
##    total common discordant uncommon      p_comm       p_disc   p_uncom
## 1   1324    370          0      954 0.279456193 0.0000000000 0.7205438
## 2    939    549          0      390 0.584664537 0.0000000000 0.4153355
## 3    809    590          0      219 0.729295426 0.0000000000 0.2707046
## 4   1504    120          1     1383 0.079787234 0.0006648936 0.9195479
## 5    684    443          0      241 0.647660819 0.0000000000 0.3523392
## 6    873    121          0      752 0.138602520 0.0000000000 0.8613975
## 7   1211     17          0     1194 0.014037985 0.0000000000 0.9859620
## 8   1355     77          0     1278 0.056826568 0.0000000000 0.9431734
## 9   1426    271          0     1155 0.190042076 0.0000000000 0.8099579
## 10  1251    307          0      944 0.245403677 0.0000000000 0.7545963
## 11  1126    186          0      940 0.165186501 0.0000000000 0.8348135
## 12  1071    159          0      912 0.148459384 0.0000000000 0.8515406
## 13  1326    137          0     1189 0.103318250 0.0000000000 0.8966817
## 14  1553    160          0     1393 0.103026401 0.0000000000 0.8969736
## 15  1090    297          0      793 0.272477064 0.0000000000 0.7275229
## 16   779    382          0      397 0.490372272 0.0000000000 0.5096277
## 17  1151    289          0      862 0.251086012 0.0000000000 0.7489140
## 18  1077    222          0      855 0.206128134 0.0000000000 0.7938719
## 19  1424    101          0     1323 0.070926966 0.0000000000 0.9290730
## 20  1468     83          0     1385 0.056539510 0.0000000000 0.9434605
## 21   898    547          0      351 0.609131403 0.0000000000 0.3908686
## 22  1222    311          0      911 0.254500818 0.0000000000 0.7454992
## 23   952    459          0      493 0.482142857 0.0000000000 0.5178571
## 24  1458    146          0     1312 0.100137174 0.0000000000 0.8998628
## 25  1084    663          0      421 0.611623616 0.0000000000 0.3883764
## 26  1313    166          0     1147 0.126428027 0.0000000000 0.8735720
## 27  1355    624          0      731 0.460516605 0.0000000000 0.5394834
## 28  1194    148          0     1046 0.123953099 0.0000000000 0.8760469
## 29  1262    116          0     1146 0.091917591 0.0000000000 0.9080824
## 30  1187    343          0      844 0.288963774 0.0000000000 0.7110362
## 31  1032    130          0      902 0.125968992 0.0000000000 0.8740310
## 32  1025    783          0      242 0.763902439 0.0000000000 0.2360976
## 33  1476    351          0     1125 0.237804878 0.0000000000 0.7621951
## 34  1483     89          0     1394 0.060013486 0.0000000000 0.9399865
## 35   962    537          0      425 0.558212058 0.0000000000 0.4417879
## 36   961    518          0      443 0.539021852 0.0000000000 0.4609781
## 37  1195    171          0     1024 0.143096234 0.0000000000 0.8569038
## 38  1287    128          0     1159 0.099456099 0.0000000000 0.9005439
## 39  1317    153          0     1164 0.116173121 0.0000000000 0.8838269
## 40  1224      0          0     1224 0.000000000 0.0000000000 1.0000000
## 41  1304      7          0     1297 0.005368098 0.0000000000 0.9946319
## 42  1040    632          0      408 0.607692308 0.0000000000 0.3923077
## 43  1149    559          0      590 0.486510009 0.0000000000 0.5134900
## 44  1311    160          0     1151 0.122044241 0.0000000000 0.8779558
## 45  1061    283          0      778 0.266729500 0.0000000000 0.7332705
## 46   999    689          0      310 0.689689690 0.0000000000 0.3103103
## 47  1555    103          0     1452 0.066237942 0.0000000000 0.9337621
## 48  1351    338          0     1013 0.250185048 0.0000000000 0.7498150
## 49  1351     96          0     1255 0.071058475 0.0000000000 0.9289415
## 50  1306    206          0     1100 0.157733538 0.0000000000 0.8422665
res <- do.call(rbind,lapply(list(laout,alout,aaout),colMeans))

rownames(res) <- c("la","al","aa")

res <- as.data.frame(res)

res %>%
  kbl(caption="overall summary") %>%
  kable_paper("hover", full_width = F)
overall summary
total common discordant uncommon p_comm p_disc p_uncom
la 1391.72 295.32 461.60 634.80 0.2101705 0.3104725 0.4793570
al 1301.90 919.76 0.10 382.04 0.7132613 0.0000668 0.2866719
aa 1195.10 286.74 0.02 908.34 0.2669902 0.0000133 0.7329965
apply(laout,2,median)
##        total       common   discordant     uncommon       p_comm       p_disc 
## 1.537500e+03 1.300000e+01 2.865000e+02 5.900000e+02 1.293166e-02 2.482507e-01 
##      p_uncom 
## 4.950794e-01
apply(alout,2,median)
##        total       common   discordant     uncommon       p_comm       p_disc 
## 1297.5000000  973.5000000    0.0000000  327.5000000    0.7469081    0.0000000 
##      p_uncom 
##    0.2530919
apply(aaout,2,median)
##        total       common   discordant     uncommon       p_comm       p_disc 
## 1216.5000000  214.0000000    0.0000000  949.0000000    0.1776143    0.0000000 
##      p_uncom 
##    0.8223857

Plot some data.

Raw counts and proportions.

par(mfrow=c(1,3))

barplot(res$common,main="no. common",names.arg=c("LA","AL","AA"))
barplot(res$uncommon,main="no. uncommon",names.arg=c("LA","AL","AA"))
barplot(res$discordant,main="no. discordant",names.arg=c("LA","AL","AA"))

boxplot(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common),
  main="no. common",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon),
  main="no. uncommon",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant),
  main="no. discordant",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant), add=TRUE,cex=1.2,pch=19)

barplot(res$p_comm,main="proportion common",names.arg=c("LA","AL","AA"))
barplot(res$p_uncom,main="proportion uncommon",names.arg=c("LA","AL","AA"))
barplot(res$p_disc,main="proportion discordant",names.arg=c("LA","AL","AA"))

boxplot(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm),
  main="proportion common",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom),
  main="proportion uncommon",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc),
  main="proportion discordant",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc), add=TRUE,cex=1.2,pch=19)

Session information

save.image("GSE158422_split.Rdata")

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] beeswarm_0.4.0                                     
##  [2] kableExtra_1.3.4                                   
##  [3] mitch_1.12.0                                       
##  [4] tictoc_1.2                                         
##  [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
##  [6] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [7] minfi_1.46.0                                       
##  [8] bumphunter_1.42.0                                  
##  [9] locfit_1.5-9.8                                     
## [10] iterators_1.0.14                                   
## [11] foreach_1.5.2                                      
## [12] Biostrings_2.68.1                                  
## [13] XVector_0.40.0                                     
## [14] SummarizedExperiment_1.30.2                        
## [15] Biobase_2.60.0                                     
## [16] MatrixGenerics_1.12.3                              
## [17] matrixStats_1.0.0                                  
## [18] GenomicRanges_1.52.0                               
## [19] GenomeInfoDb_1.36.3                                
## [20] IRanges_2.34.1                                     
## [21] S4Vectors_0.38.2                                   
## [22] BiocGenerics_0.46.0                                
## [23] eulerr_7.0.0                                       
## [24] limma_3.56.2                                       
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.3.1             later_1.3.1              
##   [3] BiocIO_1.10.0             bitops_1.0-7             
##   [5] filelock_1.0.2            tibble_3.2.1             
##   [7] preprocessCore_1.62.1     XML_3.99-0.14            
##   [9] lifecycle_1.0.3           lattice_0.21-9           
##  [11] MASS_7.3-60               base64_2.0.1             
##  [13] scrime_1.3.5              magrittr_2.0.3           
##  [15] sass_0.4.7                rmarkdown_2.25           
##  [17] jquerylib_0.1.4           yaml_2.3.7               
##  [19] httpuv_1.6.11             doRNG_1.8.6              
##  [21] askpass_1.2.0             DBI_1.1.3                
##  [23] RColorBrewer_1.1-3        abind_1.4-5              
##  [25] zlibbioc_1.46.0           rvest_1.0.3              
##  [27] quadprog_1.5-8            purrr_1.0.2              
##  [29] RCurl_1.98-1.12           rappdirs_0.3.3           
##  [31] GenomeInfoDbData_1.2.10   genefilter_1.82.1        
##  [33] annotate_1.78.0           svglite_2.1.1            
##  [35] DelayedMatrixStats_1.22.6 codetools_0.2-19         
##  [37] DelayedArray_0.26.7       xml2_1.3.5               
##  [39] tidyselect_1.2.0          beanplot_1.3.1           
##  [41] BiocFileCache_2.8.0       webshot_0.5.5            
##  [43] illuminaio_0.42.0         GenomicAlignments_1.36.0 
##  [45] jsonlite_1.8.7            multtest_2.56.0          
##  [47] ellipsis_0.3.2            survival_3.5-7           
##  [49] systemfonts_1.0.4         tools_4.3.1              
##  [51] progress_1.2.2            Rcpp_1.0.11              
##  [53] glue_1.6.2                gridExtra_2.3            
##  [55] xfun_0.40                 dplyr_1.1.3              
##  [57] HDF5Array_1.28.1          fastmap_1.1.1            
##  [59] GGally_2.1.2              rhdf5filters_1.12.1      
##  [61] fansi_1.0.4               openssl_2.1.1            
##  [63] caTools_1.18.2            digest_0.6.33            
##  [65] R6_2.5.1                  mime_0.12                
##  [67] colorspace_2.1-0          gtools_3.9.4             
##  [69] biomaRt_2.56.1            RSQLite_2.3.1            
##  [71] utf8_1.2.3                tidyr_1.3.0              
##  [73] generics_0.1.3            data.table_1.14.8        
##  [75] rtracklayer_1.60.1        prettyunits_1.2.0        
##  [77] httr_1.4.7                htmlwidgets_1.6.2        
##  [79] S4Arrays_1.0.6            pkgconfig_2.0.3          
##  [81] gtable_0.3.4              blob_1.2.4               
##  [83] siggenes_1.74.0           htmltools_0.5.6          
##  [85] echarts4r_0.4.5           scales_1.2.1             
##  [87] png_0.1-8                 knitr_1.44               
##  [89] rstudioapi_0.15.0         reshape2_1.4.4           
##  [91] tzdb_0.4.0                rjson_0.2.21             
##  [93] nlme_3.1-163              curl_5.0.2               
##  [95] cachem_1.0.8              rhdf5_2.44.0             
##  [97] stringr_1.5.0             KernSmooth_2.23-22       
##  [99] AnnotationDbi_1.62.2      restfulr_0.0.15          
## [101] GEOquery_2.68.0           pillar_1.9.0             
## [103] grid_4.3.1                reshape_0.8.9            
## [105] vctrs_0.6.3               gplots_3.1.3             
## [107] promises_1.2.1            dbplyr_2.3.4             
## [109] xtable_1.8-4              evaluate_0.22            
## [111] readr_2.1.4               GenomicFeatures_1.52.2   
## [113] cli_3.6.1                 compiler_4.3.1           
## [115] Rsamtools_2.16.0          rlang_1.1.1              
## [117] crayon_1.5.2              rngtools_1.5.2           
## [119] nor1mix_1.3-0             mclust_6.0.0             
## [121] plyr_1.8.8                stringi_1.7.12           
## [123] viridisLite_0.4.2         BiocParallel_1.34.2      
## [125] munsell_0.5.0             Matrix_1.6-1.1           
## [127] hms_1.1.3                 sparseMatrixStats_1.12.2 
## [129] bit64_4.0.5               ggplot2_3.4.3            
## [131] Rhdf5lib_1.22.1           KEGGREST_1.40.1          
## [133] shiny_1.7.5               highr_0.10               
## [135] memoise_2.0.1             bslib_0.5.1              
## [137] bit_4.0.5