Introduction

Here we are splitting the dataset and testing concordance between LA and AL approaches.

suppressPackageStartupMessages({
  library("limma")
  library("eulerr")
  library("IlluminaHumanMethylation450kmanifest")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  library("tictoc")
  library("mitch")
  library("kableExtra")
  library("beeswarm")
})

# optimised for 128 GB sever with 32 threads
CORES=6

Load data

annotations
probe sets
gene sets
design matrix
mval matrix

anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])

gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))

summary(unlist(lapply(sets,length)))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    9.00   24.00   49.68   55.00 6778.00

#genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")

if (!file.exists("GSE158422_design.rds")) {
  download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")

if (!file.exists("GSE158422_design.rds")) {
 download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")

boxplot(list("normal"=matrix(colMeans(mval),ncol=2)[,2],"tumor"=matrix(colMeans(mval),ncol=2)[,1]),
  main="mean probe methylation mval")

Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.

gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")

gs_symbols <- gmt_import("c2.cp.reactome.v2023.1.Hs.symbols.gmt")

GMEA FUNCTIONS

There are three approaches. First, limma can be done first, followed by aggregation by gene and then enrichment test (called LA). Secondly, probe values can be aggregated by gene, followed by limma, followed by enrichment test (AL). Third, probe values can be aggregated to gene level, and then aggregated to gene set level followed by limma test (AA).

LA

This process runs limma first and then aggregates the results before doing an enrichment test.

# limma
runlimma <- function(mval,design,myann) {
  fit.reduced <- lmFit(mval,design)
  fit.reduced <- eBayes(fit.reduced)
  summary(decideTests(fit.reduced))
  dm <- topTable(fit.reduced,coef=4, number = Inf)
  dm <- merge(myann,dm,by=0)
  dm <- dm[order(dm$P.Value),]
  rownames(dm) <- dm$Row.names
  dm$Row.names=NULL
  return(dm)
}

# aggregate
agg <- function(dm,cores=1) {
  gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=cores)
  dm$UCSC_RefGene_Name <- gnl
  l <- mclapply(1:nrow(dm), function(i) {
    a <- dm[i,]
    len <- length(a[[1]][[1]])
    tvals <- as.numeric(rep(a["t"],len))
    genes <- a[[1]][[1]]
    data.frame(genes,tvals)
  },mc.cores=cores)
  df <- do.call(rbind,l)
  keep <- names(which(table(df$genes)>1))
  df <- df[df$genes %in% keep,]
  gn <- unique(df$genes)
  gme_res <- lapply( 1:length(gn), function(i) {
    g <- gn[i]
    tstats <- df[which(df$genes==g),"tvals"]
    myn <- length(tstats)
    mymean <- mean(tstats)
    mymedian <- median(tstats)
    if ( length(tstats) > 2 ) {
      ttest <- t.test(tstats)
      pval <- ttest$p.value
    } else {
      pval = 1
    }
    res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
      "median"=mymedian, pval=pval)
  } )
  gme_res_df <- do.call(rbind, gme_res)
  rownames(gme_res_df) <- gme_res_df[,1]
  gme_res_df <- gme_res_df[,-1]
  tmp <- apply(gme_res_df,2,as.numeric)
  rownames(tmp) <- rownames(gme_res_df)
  gme_res_df <- as.data.frame(tmp)
  gme_res_df$sig <- -log10(gme_res_df[,4])
  gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
  gme_res_df$fdr <- p.adjust(gme_res_df$pval)
  out <- list("df"=df,"gme_res_df"=gme_res_df)
  return(out)
}

# enrich
ttenrich <- function(m,genesets,cores=1) {
  res <- mclapply( 1:length(genesets), function(i) {
    gs <- genesets[i]
    name <- names(gs)
    n_members <- length(which(rownames(m) %in% gs[[1]]))
    if ( n_members > 4 ) {
      tstats <- m[which(rownames(m) %in% gs[[1]]),]
      myn <- length(tstats)
      mymean <- mean(tstats)
      mymedian <- median(tstats)
      wt <- t.test(tstats)
      res <- c(name,myn,mymean,mymedian,wt$p.value)
    }
  } , mc.cores = cores)
  res_df <- do.call(rbind, res)
  rownames(res_df) <- res_df[,1]
  res_df <- res_df[,-1]
  colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
  tmp <- apply(res_df,2,as.numeric)
  rownames(tmp) <- rownames(res_df)
  res_df <- tmp
  res_df <- as.data.frame(res_df)
  res_df <- res_df[order(res_df$pval),]
  res_df$logp <- -log10(res_df$pval )
  res_df$fdr <- p.adjust(res_df$pval,method="fdr")
  res_df[order(abs(res_df$pval)),]
  return(res_df)
}

AL: Aggregate limma functions

Functions for aggregate-limma-enrich approach.

# chromosome by chromosome will be much faster
magg <- function(mval,myann,cores=1){
  gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=cores)
  myann$gnl <- gnl
  keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
  mx <- mval[rownames(mval) %in% keep,]
  mymed <- function(g) {
    probes <- rownames(myann[grep(g,myann$gnl),])
    rows <- which(rownames(mx) %in% probes)
    if ( length(rows) > 1 ) {
      b <- mx[rows,]
      med <- apply(b,2,mean)
      med <- matrix(med,nrow=1)
      colnames(med) <- colnames(b)
      rownames(med) <- g
      return(med)
    }
  }
  med <- mclapply(gn,mymed,mc.cores=cores)
  med <- med[lapply(med,length)>0]
  medf <- do.call(rbind,med)
  return(medf)
}

chragg <- function(mval,myann,cores=1){
  annodf <- as.data.frame(anno)
  keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
  mx <- mval[rownames(mval) %in% keep,]
  chrs <- unique(anno$chr)
  myorder <- unlist(lapply(chrs,function(mychr) { nrow( annodf[annodf$chr==mychr,] ) } ))
  chrs <- chrs[order(-myorder)]
  leadercores <- floor(sqrt(cores))
  workercores <- ceiling(sqrt(cores))
  chrmedf <- mclapply(chrs,function(chr) {
    chrfrag <- annodf[annodf$chr==chr,]
    chrprobes <-rownames(chrfrag)
    chrmx <- mx[rownames(mx) %in% chrprobes,]
    chranno <- myann[rownames(myann) %in% chrprobes,]
    chrmedf <- magg(mval=chrmx,myann=chranno,cores=workercores)
    return(chrmedf)
  },mc.cores=leadercores)
  medf <- do.call(rbind, chrmedf)
  return(medf)
}

agglimma <- function(medf,design) {
  fit.reduced <- lmFit(medf,design)
  fit.reduced <- eBayes(fit.reduced)
  dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  nondup <- !duplicated(dmagg$ID)
  dmagg <- dmagg[nondup,]
  rownames(dmagg) <- dmagg$ID
  dmagg$ID = NULL
  return(dmagg)
}

AA Aggregate-aggregate-limma functions

Median value works well here.

gsagg <- function(x,genesets,cores=1) {
  meds <- mclapply(1:length(genesets), function(i) {
    gs = genesets[[i]]
    xx <- x[rownames(x) %in% gs,]
    med <- apply(xx,2,median)
  },mc.cores=cores)
  mymed <- do.call(rbind,meds)
  rownames(mymed) <- names(genesets)
  as.data.frame(mymed)
}

aalimma <- function(agag,design) {
  fit.reduced <- lmFit(agag,design)
  fit.reduced <- eBayes(fit.reduced)
  dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  return(dmagg)
}

aal <- function(mval,myann,genesets,design,cores=1) {
  medf <- chragg(mval,myann,cores=cores)
  agag <- gsagg(x=medf,genesets=genesets,cores=cores)
  aalres <- aalimma(agag=agag,design=design)
  return(aalres)
}

#aalres <- aal(mval=mval,myann=myann,genesets=gs_symbols,design=design,cores=CORES)

Agg-limma-mitch function

This approach uses the aggregated mvals, limma and instead of a 1-sample t-test it uses mitch which is a competitive test and could give more interpretable results.

runmitch <- function(m,genesets,cores=1) {
  mres <- mitch_calc(m,genesets,minsetsize=5,cores=cores)
  mres <- mres$enrichment_result
  rownames(mres) <- mres$set
  mres$set=NULL
  return(mres)
}

Prep

# prep
sex <- as.data.frame(design)$sex
tumor <- as.data.frame(design)$tumor
patient <- as.character(unlist(lapply(1:ncol(mval),function(i) {c(i,i)})))
patient <- head(patient,ncol(mval))
design <- model.matrix(~ patient + tumor )
rownames(design) <- colnames(mval)

LA workflow split and analyse

split 74 samples 37 patients split into 18 + 18 Conduct LA workflow analysis. Then make overlap.

splitanalyze_la <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  dm1 <- runlimma(mval_a,design_a,myann)
  dmagg1 <- agg(dm1,cores=floor(CORES/3))
  m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
  lares1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES/3)

  dm2 <- runlimma(mval_b,design_b,myann)
  dmagg2 <- agg(dm2,cores=floor(CORES/3))
  m2 <- dmagg2$gme_res_df[,"mean",drop=FALSE]
  lares2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES/3)

  list("lares1"=lares1,"lares2"=lares2)

}

lares <- mclapply( seq(100,5000,100) ,splitanalyze_la,mc.cores=CORES)

AL workflow split and analyse

splitanalyze_al <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  medf1 <- chragg(mval_a,myann,cores=floor(CORES/3))
  magg1 <- agglimma(medf1,design_a)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  alres1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES/3)

  medf2 <- chragg(mval_b,myann,cores=floor(CORES/3))
  magg2 <- agglimma(medf2,design_b)
  m2 <- as.data.frame(magg2$t)
  rownames(m2) <- rownames(magg2)
  colnames(m2) <- "t"
  alres2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES/3)

  list("alres1"=alres1,"alres2"=alres2)
}

alres <- mclapply( seq(100,5000,100) ,splitanalyze_al,mc.cores=CORES)

AA workflow split and analyse

splitanalyze_aa <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  aares1 <- aal(mval=mval_a,myann=myann,genesets=gs_symbols,
    design=design_a,cores=floor(CORES/3))

  aares2 <- aal(mval=mval_b,myann=myann,genesets=gs_symbols,
    design=design_b,cores=floor(CORES/3))

  list("aares1"=aares1,"aares2"=aares2)
}

aares <- mclapply( seq(100,5000,100) ,splitanalyze_aa,mc.cores=CORES)

ALM competitive test

splitanalyze_alm <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  medf1 <- chragg(mval_a,myann,cores=floor(CORES/3))
  magg1 <- agglimma(medf1,design_a)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  almres1 <- runmitch(m=m1,genesets=gs_symbols,cores=CORES)

  medf2 <- chragg(mval_b,myann,cores=floor(CORES/3))
  magg2 <- agglimma(medf2,design_b)
  m2 <- as.data.frame(magg2$t)
  rownames(m2) <- rownames(magg2)
  colnames(m2) <- "t"
  almres2 <- runmitch(m=m2,genesets=gs_symbols,cores=CORES)

  list("almres1"=almres1,"almres2"=almres2)

}

almres <- mclapply( seq(100,5000,100) ,splitanalyze_alm,mc.cores=CORES)

Compare

lacompare <- function(lares){
  lares1 <- lares[[1]]
  lares2 <- lares[[2]]
  lasig1 <- subset(lares1,fdr<0.05)
  lasig2 <- subset(lares2,fdr<0.05)
  lasig1_up <- rownames(subset(lasig1,t_median>0))
  lasig1_dn <- rownames(subset(lasig1,t_median<0))
  lasig2_up <- rownames(subset(lasig2,t_median>0))
  lasig2_dn <- rownames(subset(lasig2,t_median<0))

  total <- length(unique(c(lasig1_up,lasig2_up,lasig1_dn,lasig2_dn)))
  common <- length(intersect(lasig1_up, lasig2_up)) + length(intersect(lasig1_dn, lasig2_dn))
  discordant <- length(intersect(lasig1_up, lasig2_dn)) + length(intersect(lasig1_dn, lasig2_up))
  uncommon = total - common - discordant

  laout <- data.frame(total,common,discordant,uncommon)
  laout$p_comm <-laout$common / laout$total
  laout$p_disc <-laout$discordant / laout$total
  laout$p_uncom <-laout$uncommon / laout$total
  laout
}

alcompare <-function(alres){
  alres1 <- alres[[1]]
  alres2 <- alres[[2]]
  alsig1 <- subset(alres1,fdr<0.05)
  alsig2 <- subset(alres2,fdr<0.05)

  alsig1_up <- rownames(subset(alsig1,t_median>0))
  alsig1_dn <- rownames(subset(alsig1,t_median<0))
  alsig2_up <- rownames(subset(alsig2,t_median>0))
  alsig2_dn <- rownames(subset(alsig2,t_median<0))

  total <- length(unique(c(alsig1_up,alsig2_up,alsig1_dn,alsig2_dn)))
  common <- length(intersect(alsig1_up, alsig2_up)) + length(intersect(alsig1_dn, alsig2_dn))
  discordant <- length(intersect(alsig1_up, alsig2_dn)) + length(intersect(alsig1_dn, alsig2_up))
  uncommon = total - common - discordant
  result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)

  alout <- data.frame(total,common,discordant,uncommon)
  alout$p_comm <-alout$common / alout$total
  alout$p_disc <-alout$discordant / alout$total
  alout$p_uncom <-alout$uncommon / alout$total
  alout
}

aacompare <-function(aa){
  aa1 <- aa[[1]]
  aa2 <- aa[[2]]
  aasig1 <- subset(aa1,adj.P.Val<0.05)
  aasig2 <- subset(aa2,adj.P.Val<0.05)

  aasig1_up <- rownames(subset(aasig1,logFC>0))
  aasig1_dn <- rownames(subset(aasig1,logFC<0))
  aasig2_up <- rownames(subset(aasig2,logFC>0))
  aasig2_dn <- rownames(subset(aasig2,logFC<0))

  total <- length(unique(c(aasig1_up,aasig2_up,aasig1_dn,aasig2_dn)))
  common <- length(intersect(aasig1_up, aasig2_up)) + length(intersect(aasig1_dn, aasig2_dn))
  discordant <- length(intersect(aasig1_up, aasig2_dn)) + length(intersect(aasig1_dn, aasig2_up))
  uncommon = total - common - discordant
  result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)

  aaout <- data.frame(total,common,discordant,uncommon)
  aaout$p_comm <- aaout$common / aaout$total
  aaout$p_disc <- aaout$discordant / aaout$total
  aaout$p_uncom <-aaout$uncommon / aaout$total
  aaout
}

almcompare <-function(almres){
  almres1 <- almres[[1]]
  almres2 <- almres[[2]]
  almsig1 <- subset(almres1,p.adjustANOVA<0.05)
  almsig2 <- subset(almres2,p.adjustANOVA<0.05)

  almsig1_up <- rownames(subset(almsig1,s.dist>0))
  almsig1_dn <- rownames(subset(almsig1,s.dist<0))
  almsig2_up <- rownames(subset(almsig2,s.dist>0))
  almsig2_dn <- rownames(subset(almsig2,s.dist<0))

  total <- length(unique(c(almsig1_up,almsig2_up,almsig1_dn,almsig2_dn)))
  common <- length(intersect(almsig1_up, almsig2_up)) + length(intersect(almsig1_dn, almsig2_dn))
  discordant <- length(intersect(almsig1_up, almsig2_dn)) + length(intersect(almsig1_dn, almsig2_up))
  uncommon = total - common - discordant
  result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)

  almout <- data.frame(total,common,discordant,uncommon)
  almout$p_comm <-almout$common / almout$total
  almout$p_disc <-almout$discordant / almout$total
  almout$p_uncom <-almout$uncommon / almout$total
  almout
}

laout <- do.call(rbind,lapply(lares,lacompare))

alout <- do.call(rbind,lapply(alres,alcompare))

aaout <- do.call(rbind,lapply(aares,aacompare))

almout <- do.call(rbind,lapply(almres,almcompare))

laout

##    total common discordant uncommon       p_comm      p_disc    p_uncom
## 1   1421    866          0      555 0.6094299789 0.000000000 0.39057002
## 2   1620   1242          1      377 0.7666666667 0.000617284 0.23271605
## 3    931    475          9      447 0.5102040816 0.009667025 0.48012889
## 4    935    483         12      440 0.5165775401 0.012834225 0.47058824
## 5   1271    483          0      788 0.3800157356 0.000000000 0.61998426
## 6   1277    554          0      723 0.4338292874 0.000000000 0.56617071
## 7   1592      0       1320      272 0.0000000000 0.829145729 0.17085427
## 8   1417      1        791      625 0.0007057163 0.558221595 0.44107269
## 9    904    419         11      474 0.4634955752 0.012168142 0.52433628
## 10  1611   1492          0      119 0.9261328367 0.000000000 0.07386716
## 11  1550      1        603      946 0.0006451613 0.389032258 0.61032258
## 12  1603      2        918      683 0.0012476606 0.572676232 0.42607611
## 13  1604     90         12     1502 0.0561097257 0.007481297 0.93640898
## 14  1519      2        726      791 0.0013166557 0.477946017 0.52073733
## 15  1562   1221          0      341 0.7816901408 0.000000000 0.21830986
## 16  1308      1        736      571 0.0007645260 0.562691131 0.43654434
## 17  1527    181          0     1346 0.1185330714 0.000000000 0.88146693
## 18  1603     12        838      753 0.0074859638 0.522769807 0.46974423
## 19  1260    389          0      871 0.3087301587 0.000000000 0.69126984
## 20  1588      0       1312      276 0.0000000000 0.826196474 0.17380353
## 21  1605     11        805      789 0.0068535826 0.501557632 0.49158879
## 22  1620   1521          0       99 0.9388888889 0.000000000 0.06111111
## 23   634      9         59      566 0.0141955836 0.093059937 0.89274448
## 24   923    441         12      470 0.4777898158 0.013001083 0.50920910
## 25  1597      0       1327      270 0.0000000000 0.830932999 0.16906700
## 26  1561    628          1      932 0.4023062140 0.000640615 0.59705317
## 27  1260    415          0      845 0.3293650794 0.000000000 0.67063492
## 28  1585      0       1304      281 0.0000000000 0.822712934 0.17728707
## 29  1568      1       1238      329 0.0006377551 0.789540816 0.20982143
## 30  1548     36        491     1021 0.0232558140 0.317183463 0.65956072
## 31  1557      1       1102      454 0.0006422608 0.707771355 0.29158638
## 32  1594   1435          0      159 0.9002509410 0.000000000 0.09974906
## 33  1525    195          0     1330 0.1278688525 0.000000000 0.87213115
## 34   903      1        100      802 0.0011074197 0.110741971 0.88815061
## 35   977      3         60      914 0.0030706244 0.061412487 0.93551689
## 36  1049      6        520      523 0.0057197331 0.495710200 0.49857007
## 37  1507    557          0      950 0.3696084937 0.000000000 0.63039151
## 38  1613     33        526     1054 0.0204587725 0.326100434 0.65344079
## 39  1506      3        764      739 0.0019920319 0.507304117 0.49070385
## 40  1562      1       1142      419 0.0006402049 0.731113956 0.26824584
## 41  1555     28        577      950 0.0180064309 0.371061093 0.61093248
## 42  1598      0       1460      138 0.0000000000 0.913642053 0.08635795
## 43  1549    230        106     1213 0.1484828922 0.068431246 0.78308586
## 44  1633      0       1524      109 0.0000000000 0.933251684 0.06674832
## 45  1426      1        928      497 0.0007012623 0.650771388 0.34852735
## 46  1057     14        434      609 0.0132450331 0.410596026 0.57615894
## 47   982      3        409      570 0.0030549898 0.416496945 0.58044807
## 48  1567   1269          0      298 0.8098276962 0.000000000 0.19017230
## 49  1513      5        740      768 0.0033046927 0.489094514 0.50760079
## 50   909      4        163      742 0.0044004400 0.179317932 0.81628163

alout

##    total common discordant uncommon    p_comm       p_disc   p_uncom
## 1   1380    819          0      561 0.5934783 0.0000000000 0.4065217
## 2   1196   1059          0      137 0.8854515 0.0000000000 0.1145485
## 3   1226   1088          0      138 0.8874388 0.0000000000 0.1125612
## 4   1447    761          0      686 0.5259157 0.0000000000 0.4740843
## 5   1223    951          0      272 0.7775961 0.0000000000 0.2224039
## 6   1219    997          0      222 0.8178835 0.0000000000 0.1821165
## 7   1263    851          0      412 0.6737926 0.0000000000 0.3262074
## 8   1361    745          0      616 0.5473916 0.0000000000 0.4526084
## 9   1306    992          0      314 0.7595712 0.0000000000 0.2404288
## 10  1377    785          0      592 0.5700799 0.0000000000 0.4299201
## 11  1223    912          0      311 0.7457073 0.0000000000 0.2542927
## 12  1290    960          0      330 0.7441860 0.0000000000 0.2558140
## 13  1299    997          0      302 0.7675135 0.0000000000 0.2324865
## 14  1504    362          4     1138 0.2406915 0.0026595745 0.7566489
## 15  1255   1069          0      186 0.8517928 0.0000000000 0.1482072
## 16  1221   1055          0      166 0.8640459 0.0000000000 0.1359541
## 17  1230   1093          0      137 0.8886179 0.0000000000 0.1113821
## 18  1322    989          0      333 0.7481089 0.0000000000 0.2518911
## 19  1326   1015          0      311 0.7654600 0.0000000000 0.2345400
## 20  1401    622          0      779 0.4439686 0.0000000000 0.5560314
## 21  1238   1039          0      199 0.8392569 0.0000000000 0.1607431
## 22  1265   1057          0      208 0.8355731 0.0000000000 0.1644269
## 23  1211   1067          0      144 0.8810900 0.0000000000 0.1189100
## 24  1415    684          0      731 0.4833922 0.0000000000 0.5166078
## 25  1242   1049          0      193 0.8446055 0.0000000000 0.1553945
## 26  1361    757          0      604 0.5562087 0.0000000000 0.4437913
## 27  1351   1026          0      325 0.7594375 0.0000000000 0.2405625
## 28  1259    906          0      353 0.7196187 0.0000000000 0.2803813
## 29  1338    858          0      480 0.6412556 0.0000000000 0.3587444
## 30  1343    967          0      376 0.7200298 0.0000000000 0.2799702
## 31  1201   1000          0      201 0.8326395 0.0000000000 0.1673605
## 32  1273   1037          0      236 0.8146112 0.0000000000 0.1853888
## 33  1357   1032          0      325 0.7605011 0.0000000000 0.2394989
## 34  1273    858          0      415 0.6739984 0.0000000000 0.3260016
## 35  1209   1058          0      151 0.8751034 0.0000000000 0.1248966
## 36  1354    950          0      404 0.7016248 0.0000000000 0.2983752
## 37  1196    989          0      207 0.8269231 0.0000000000 0.1730769
## 38  1309    644          0      665 0.4919786 0.0000000000 0.5080214
## 39  1351    852          0      499 0.6306440 0.0000000000 0.3693560
## 40  1250    691          0      559 0.5528000 0.0000000000 0.4472000
## 41  1329    789          0      540 0.5936795 0.0000000000 0.4063205
## 42  1284   1105          0      179 0.8605919 0.0000000000 0.1394081
## 43  1295   1061          0      234 0.8193050 0.0000000000 0.1806950
## 44  1299    955          0      344 0.7351809 0.0000000000 0.2648191
## 45  1209   1043          0      166 0.8626964 0.0000000000 0.1373036
## 46  1296   1032          0      264 0.7962963 0.0000000000 0.2037037
## 47  1472    571          1      900 0.3879076 0.0006793478 0.6114130
## 48  1374    980          0      394 0.7132460 0.0000000000 0.2867540
## 49  1330    921          0      409 0.6924812 0.0000000000 0.3075188
## 50  1342    888          0      454 0.6616990 0.0000000000 0.3383010

aaout

##    total common discordant uncommon    p_comm p_disc   p_uncom
## 1   1078    465          0      613 0.4313544      0 0.5686456
## 2    843    545          0      298 0.6465006      0 0.3534994
## 3    812    554          0      258 0.6822660      0 0.3177340
## 4   1307    257          0     1050 0.1966335      0 0.8033665
## 5    760    484          0      276 0.6368421      0 0.3631579
## 6    801    343          0      458 0.4282147      0 0.5717853
## 7   1010    360          0      650 0.3564356      0 0.6435644
## 8   1151    263          0      888 0.2284970      0 0.7715030
## 9   1169    424          0      745 0.3627032      0 0.6372968
## 10  1049    460          0      589 0.4385129      0 0.5614871
## 11   930    385          0      545 0.4139785      0 0.5860215
## 12   909    417          0      492 0.4587459      0 0.5412541
## 13  1093    332          0      761 0.3037511      0 0.6962489
## 14  1402    347          0     1055 0.2475036      0 0.7524964
## 15   916    432          0      484 0.4716157      0 0.5283843
## 16   764    504          0      260 0.6596859      0 0.3403141
## 17   937    476          0      461 0.5080043      0 0.4919957
## 18   928    392          0      536 0.4224138      0 0.5775862
## 19  1183    311          0      872 0.2628910      0 0.7371090
## 20  1258    187          0     1071 0.1486486      0 0.8513514
## 21   824    549          0      275 0.6662621      0 0.3337379
## 22   993    419          0      574 0.4219537      0 0.5780463
## 23   867    518          0      349 0.5974625      0 0.4025375
## 24  1222    315          0      907 0.2577741      0 0.7422259
## 25   906    581          0      325 0.6412804      0 0.3587196
## 26  1060    378          0      682 0.3566038      0 0.6433962
## 27  1104    617          0      487 0.5588768      0 0.4411232
## 28   959    331          0      628 0.3451512      0 0.6548488
## 29  1059    295          0      764 0.2785647      0 0.7214353
## 30   956    475          0      481 0.4968619      0 0.5031381
## 31   887    315          0      572 0.3551297      0 0.6448703
## 32   881    612          0      269 0.6946652      0 0.3053348
## 33  1202    468          0      734 0.3893511      0 0.6106489
## 34  1223    252          0      971 0.2060507      0 0.7939493
## 35   876    537          0      339 0.6130137      0 0.3869863
## 36   865    513          0      352 0.5930636      0 0.4069364
## 37   929    356          0      573 0.3832078      0 0.6167922
## 38  1053    341          0      712 0.3238367      0 0.6761633
## 39  1086    297          0      789 0.2734807      0 0.7265193
## 40  1038    139          0      899 0.1339114      0 0.8660886
## 41  1085    245          0      840 0.2258065      0 0.7741935
## 42   900    586          0      314 0.6511111      0 0.3488889
## 43   949    549          0      400 0.5785037      0 0.4214963
## 44  1057    373          0      684 0.3528855      0 0.6471145
## 45   930    409          0      521 0.4397849      0 0.5602151
## 46   909    587          0      322 0.6457646      0 0.3542354
## 47  1309    265          0     1044 0.2024446      0 0.7975554
## 48  1105    496          0      609 0.4488688      0 0.5511312
## 49  1140    197          0      943 0.1728070      0 0.8271930
## 50  1074    391          0      683 0.3640596      0 0.6359404

almout

##    total common discordant uncommon    p_comm p_disc   p_uncom
## 1    169     51          0      118 0.3017751      0 0.6982249
## 2    154     83          0       71 0.5389610      0 0.4610390
## 3    166     72          0       94 0.4337349      0 0.5662651
## 4    168     72          0       96 0.4285714      0 0.5714286
## 5    175     59          0      116 0.3371429      0 0.6628571
## 6    218     53          0      165 0.2431193      0 0.7568807
## 7    166     80          0       86 0.4819277      0 0.5180723
## 8    161     74          0       87 0.4596273      0 0.5403727
## 9    147     64          0       83 0.4353741      0 0.5646259
## 10   190     91          0       99 0.4789474      0 0.5210526
## 11   244     57          0      187 0.2336066      0 0.7663934
## 12   177     69          0      108 0.3898305      0 0.6101695
## 13   148     66          0       82 0.4459459      0 0.5540541
## 14   165     41          0      124 0.2484848      0 0.7515152
## 15   172     61          0      111 0.3546512      0 0.6453488
## 16   173     71          0      102 0.4104046      0 0.5895954
## 17   139     69          0       70 0.4964029      0 0.5035971
## 18   175     45          0      130 0.2571429      0 0.7428571
## 19   149     93          0       56 0.6241611      0 0.3758389
## 20   141     72          0       69 0.5106383      0 0.4893617
## 21   128     80          0       48 0.6250000      0 0.3750000
## 22   149     61          0       88 0.4093960      0 0.5906040
## 23   143     65          0       78 0.4545455      0 0.5454545
## 24   156     61          0       95 0.3910256      0 0.6089744
## 25   137     63          0       74 0.4598540      0 0.5401460
## 26   156     68          0       88 0.4358974      0 0.5641026
## 27   131     62          0       69 0.4732824      0 0.5267176
## 28   141     66          0       75 0.4680851      0 0.5319149
## 29   179     54          0      125 0.3016760      0 0.6983240
## 30   144     70          0       74 0.4861111      0 0.5138889
## 31   128     62          0       66 0.4843750      0 0.5156250
## 32   184     72          0      112 0.3913043      0 0.6086957
## 33   180     49          0      131 0.2722222      0 0.7277778
## 34   177     54          0      123 0.3050847      0 0.6949153
## 35   231     46          0      185 0.1991342      0 0.8008658
## 36   156     46          0      110 0.2948718      0 0.7051282
## 37   173     74          0       99 0.4277457      0 0.5722543
## 38   159     81          0       78 0.5094340      0 0.4905660
## 39   163     63          0      100 0.3865031      0 0.6134969
## 40   143     75          0       68 0.5244755      0 0.4755245
## 41   132     61          0       71 0.4621212      0 0.5378788
## 42   148     73          0       75 0.4932432      0 0.5067568
## 43   183     68          0      115 0.3715847      0 0.6284153
## 44   151     53          0       98 0.3509934      0 0.6490066
## 45   140     81          0       59 0.5785714      0 0.4214286
## 46   154     72          0       82 0.4675325      0 0.5324675
## 47   150     52          0       98 0.3466667      0 0.6533333
## 48   171     70          0      101 0.4093567      0 0.5906433
## 49   171     71          0      100 0.4152047      0 0.5847953
## 50   151     67          0       84 0.4437086      0 0.5562914

res <- do.call(rbind,lapply(list(laout,alout,aaout,almout),colMeans))

rownames(res) <- c("la","al","aa","alm")

res <- as.data.frame(res)

res %>%
  kbl(caption="overall summary") %>%
  kable_paper("hover", full_width = F)

overall summary
	total	common	discordant	uncommon	p_comm	p_disc	p_uncom
la	1391.72	295.30	461.62	634.80	0.2101851	0.3104579	0.4793570
al	1301.90	919.76	0.10	382.04	0.7132613	0.0000668	0.2866719
aa	1014.96	406.88	0.00	608.08	0.4194747	0.0000000	0.5805253
alm	162.12	65.66	0.00	96.46	0.4149891	0.0000000	0.5850109

apply(laout,2,mean)

##        total       common   discordant     uncommon       p_comm       p_disc 
## 1391.7200000  295.3000000  461.6200000  634.8000000    0.2101851    0.3104579 
##      p_uncom 
##    0.4793570

apply(alout,2,mean)

##        total       common   discordant     uncommon       p_comm       p_disc 
## 1.301900e+03 9.197600e+02 1.000000e-01 3.820400e+02 7.132613e-01 6.677845e-05 
##      p_uncom 
## 2.866719e-01

apply(aaout,2,mean)

##        total       common   discordant     uncommon       p_comm       p_disc 
## 1014.9600000  406.8800000    0.0000000  608.0800000    0.4194747    0.0000000 
##      p_uncom 
##    0.5805253

apply(almout,2,mean)

##       total      common  discordant    uncommon      p_comm      p_disc 
## 162.1200000  65.6600000   0.0000000  96.4600000   0.4149891   0.0000000 
##     p_uncom 
##   0.5850109

Plot some data.

Raw counts and proportions.

par(mfrow=c(1,3))

barplot(res$common,main="no. common",names.arg=c("LA","AL","AA","ALM"))
barplot(res$uncommon,main="no. uncommon",names.arg=c("LA","AL","AA","ALM"))
barplot(res$discordant,main="no. discordant",names.arg=c("LA","AL","AA","ALM"))

boxplot(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common,"ALM"=almout$common),
  main="no. common",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common,"ALM"=almout$common), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon,"ALM"=almout$uncommon),
  main="no. uncommon",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon,"ALM"=almout$uncommon), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant,"ALM"=almout$discordant),
  main="no. discordant",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant,"ALM"=almout$discordant), add=TRUE,cex=1.2,pch=19)

barplot(res$p_comm,main="proportion common",names.arg=c("LA","AL","AA","ALM"))
barplot(res$p_uncom,main="proportion uncommon",names.arg=c("LA","AL","AA","ALM"))
barplot(res$p_disc,main="proportion discordant",names.arg=c("LA","AL","AA","ALM"))

boxplot(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm,"ALM"=almout$p_comm),
  main="proportion common",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm,"ALM"=almout$p_comm), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom,"ALM"=almout$p_uncom),
  main="proportion uncommon",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom,"ALM"=almout$p_uncom), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc,"ALM"=almout$p_disc),
  main="proportion discordant",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc,"ALM"=almout$p_disc), add=TRUE,cex=1.2,pch=19)

Session information

save.image("GSE158422_split.Rdata")

sessionInfo()

## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] beeswarm_0.4.0                                     
##  [2] kableExtra_1.3.4                                   
##  [3] mitch_1.12.0                                       
##  [4] tictoc_1.2                                         
##  [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
##  [6] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [7] minfi_1.46.0                                       
##  [8] bumphunter_1.42.0                                  
##  [9] locfit_1.5-9.8                                     
## [10] iterators_1.0.14                                   
## [11] foreach_1.5.2                                      
## [12] Biostrings_2.68.1                                  
## [13] XVector_0.40.0                                     
## [14] SummarizedExperiment_1.30.2                        
## [15] Biobase_2.60.0                                     
## [16] MatrixGenerics_1.12.3                              
## [17] matrixStats_1.0.0                                  
## [18] GenomicRanges_1.52.0                               
## [19] GenomeInfoDb_1.36.3                                
## [20] IRanges_2.34.1                                     
## [21] S4Vectors_0.38.2                                   
## [22] BiocGenerics_0.46.0                                
## [23] eulerr_7.0.0                                       
## [24] limma_3.56.2                                       
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.3.1             later_1.3.1              
##   [3] BiocIO_1.10.0             bitops_1.0-7             
##   [5] filelock_1.0.2            tibble_3.2.1             
##   [7] preprocessCore_1.62.1     XML_3.99-0.14            
##   [9] lifecycle_1.0.3           lattice_0.21-9           
##  [11] MASS_7.3-60               base64_2.0.1             
##  [13] scrime_1.3.5              magrittr_2.0.3           
##  [15] sass_0.4.7                rmarkdown_2.25           
##  [17] jquerylib_0.1.4           yaml_2.3.7               
##  [19] httpuv_1.6.11             doRNG_1.8.6              
##  [21] askpass_1.2.0             DBI_1.1.3                
##  [23] RColorBrewer_1.1-3        abind_1.4-5              
##  [25] zlibbioc_1.46.0           rvest_1.0.3              
##  [27] quadprog_1.5-8            purrr_1.0.2              
##  [29] RCurl_1.98-1.12           rappdirs_0.3.3           
##  [31] GenomeInfoDbData_1.2.10   genefilter_1.82.1        
##  [33] annotate_1.78.0           svglite_2.1.1            
##  [35] DelayedMatrixStats_1.22.6 codetools_0.2-19         
##  [37] DelayedArray_0.26.7       xml2_1.3.5               
##  [39] tidyselect_1.2.0          beanplot_1.3.1           
##  [41] BiocFileCache_2.8.0       webshot_0.5.5            
##  [43] illuminaio_0.42.0         GenomicAlignments_1.36.0 
##  [45] jsonlite_1.8.7            multtest_2.56.0          
##  [47] ellipsis_0.3.2            survival_3.5-7           
##  [49] systemfonts_1.0.4         tools_4.3.1              
##  [51] progress_1.2.2            Rcpp_1.0.11              
##  [53] glue_1.6.2                gridExtra_2.3            
##  [55] xfun_0.40                 dplyr_1.1.3              
##  [57] HDF5Array_1.28.1          fastmap_1.1.1            
##  [59] GGally_2.1.2              rhdf5filters_1.12.1      
##  [61] fansi_1.0.4               openssl_2.1.1            
##  [63] caTools_1.18.2            digest_0.6.33            
##  [65] R6_2.5.1                  mime_0.12                
##  [67] colorspace_2.1-0          gtools_3.9.4             
##  [69] biomaRt_2.56.1            RSQLite_2.3.1            
##  [71] utf8_1.2.3                tidyr_1.3.0              
##  [73] generics_0.1.3            data.table_1.14.8        
##  [75] rtracklayer_1.60.1        prettyunits_1.2.0        
##  [77] httr_1.4.7                htmlwidgets_1.6.2        
##  [79] S4Arrays_1.0.6            pkgconfig_2.0.3          
##  [81] gtable_0.3.4              blob_1.2.4               
##  [83] siggenes_1.74.0           htmltools_0.5.6          
##  [85] echarts4r_0.4.5           scales_1.2.1             
##  [87] png_0.1-8                 knitr_1.44               
##  [89] rstudioapi_0.15.0         reshape2_1.4.4           
##  [91] tzdb_0.4.0                rjson_0.2.21             
##  [93] nlme_3.1-163              curl_5.0.2               
##  [95] cachem_1.0.8              rhdf5_2.44.0             
##  [97] stringr_1.5.0             KernSmooth_2.23-22       
##  [99] AnnotationDbi_1.62.2      restfulr_0.0.15          
## [101] GEOquery_2.68.0           pillar_1.9.0             
## [103] grid_4.3.1                reshape_0.8.9            
## [105] vctrs_0.6.3               gplots_3.1.3             
## [107] promises_1.2.1            dbplyr_2.3.4             
## [109] xtable_1.8-4              evaluate_0.22            
## [111] readr_2.1.4               GenomicFeatures_1.52.2   
## [113] cli_3.6.1                 compiler_4.3.1           
## [115] Rsamtools_2.16.0          rlang_1.1.1              
## [117] crayon_1.5.2              rngtools_1.5.2           
## [119] nor1mix_1.3-0             mclust_6.0.0             
## [121] plyr_1.8.8                stringi_1.7.12           
## [123] viridisLite_0.4.2         BiocParallel_1.34.2      
## [125] munsell_0.5.0             Matrix_1.6-1.1           
## [127] hms_1.1.3                 sparseMatrixStats_1.12.2 
## [129] bit64_4.0.5               ggplot2_3.4.3            
## [131] Rhdf5lib_1.22.1           KEGGREST_1.40.1          
## [133] shiny_1.7.5               highr_0.10               
## [135] memoise_2.0.1             bslib_0.5.1              
## [137] bit_4.0.5

GSE158422: DNA methylation in Lung squamous cell carcinoma - gene methylation enrichment analysis

The GMEA team

2023-10-12