Introduction

Here we are splitting the dataset and testing concordance between LA and AL approaches.

suppressPackageStartupMessages({
  library("limma")
  library("eulerr")
  library("IlluminaHumanMethylation450kmanifest")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  library("tictoc")
  library("mitch")
  library("kableExtra")
  library("beeswarm")
})

CORES=8

Load data

annotations
probe sets
gene sets
design matrix
mval matrix

anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])

gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))

summary(unlist(lapply(sets,length)))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    9.00   24.00   49.68   55.00 6778.00

genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")

if (!file.exists("GSE158422_design.rds")) {
  download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")

if (!file.exists("GSE158422_design.rds")) {
 download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")

boxplot(list("normal"=matrix(colMeans(mval),ncol=2)[,2],"tumor"=matrix(colMeans(mval),ncol=2)[,1]),
  main="mean probe methylation mval")

Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.

gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")

gs_symbols <- gmt_import("c2.cp.reactome.v2023.1.Hs.symbols.gmt")

GMEA FUNCTIONS

There are three approaches. First, limma can be done first, followed by aggregation by gene and then enrichment test (called LA). Secondly, probe values can be aggregated by gene, followed by limma, followed by enrichment test (AL). Third, probe values can be aggregated to gene level, and then aggregated to gene set level followed by limma test (AA).

LA

This process runs limma first and then aggregates the results before doing an enrichment test.

# limma
runlimma <- function(mval,design,myann) {
  fit.reduced <- lmFit(mval,design)
  fit.reduced <- eBayes(fit.reduced)
  summary(decideTests(fit.reduced))
  dm <- topTable(fit.reduced,coef=4, number = Inf)
  dm <- merge(myann,dm,by=0)
  dm <- dm[order(dm$P.Value),]
  rownames(dm) <- dm$Row.names
  dm$Row.names=NULL
  return(dm)
}

# aggregate
agg <- function(dm,cores=1) {
  gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=cores)
  dm$UCSC_RefGene_Name <- gnl
  l <- mclapply(1:nrow(dm), function(i) {
    a <- dm[i,]
    len <- length(a[[1]][[1]])
    tvals <- as.numeric(rep(a["t"],len))
    genes <- a[[1]][[1]]
    data.frame(genes,tvals)
  },mc.cores=cores)
  df <- do.call(rbind,l)
  keep <- names(which(table(df$genes)>1))
  df <- df[df$genes %in% keep,]
  gn <- unique(df$genes)
  gme_res <- lapply( 1:length(gn), function(i) {
    g <- gn[i]
    tstats <- df[which(df$genes==g),"tvals"]
    myn <- length(tstats)
    mymean <- mean(tstats)
    mymedian <- median(tstats)
    if ( length(tstats) > 2 ) {
      ttest <- t.test(tstats)
      pval <- ttest$p.value
    } else {
      pval = 1
    }
    res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
      "median"=mymedian, pval=pval)
  } )
  gme_res_df <- do.call(rbind, gme_res)
  rownames(gme_res_df) <- gme_res_df[,1]
  gme_res_df <- gme_res_df[,-1]
  tmp <- apply(gme_res_df,2,as.numeric)
  rownames(tmp) <- rownames(gme_res_df)
  gme_res_df <- as.data.frame(tmp)
  gme_res_df$sig <- -log10(gme_res_df[,4])
  gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
  gme_res_df$fdr <- p.adjust(gme_res_df$pval)
  out <- list("df"=df,"gme_res_df"=gme_res_df)
  return(out)
}

# enrich
ttenrich <- function(m,genesets,cores=1) {
  res <- mclapply( 1:length(genesets), function(i) {
    gs <- genesets[i]
    name <- names(gs)
    n_members <- length(which(rownames(m) %in% gs[[1]]))
    if ( n_members > 4 ) {
      tstats <- m[which(rownames(m) %in% gs[[1]]),]
      myn <- length(tstats)
      mymean <- mean(tstats)
      mymedian <- median(tstats)
      wt <- t.test(tstats)
      res <- c(name,myn,mymean,mymedian,wt$p.value)
    }
  } , mc.cores = cores)
  res_df <- do.call(rbind, res)
  rownames(res_df) <- res_df[,1]
  res_df <- res_df[,-1]
  colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
  tmp <- apply(res_df,2,as.numeric)
  rownames(tmp) <- rownames(res_df)
  res_df <- tmp
  res_df <- as.data.frame(res_df)
  res_df <- res_df[order(res_df$pval),]
  res_df$logp <- -log10(res_df$pval )
  res_df$fdr <- p.adjust(res_df$pval,method="fdr")
  res_df[order(abs(res_df$pval)),]
  return(res_df)
}

AL: Aggregate limma functions

Functions for aggregate-limma-enrich approach.

# chromosome by chromosome will be much faster
magg <- function(mval,myann,cores=1){
  gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=cores)
  myann$gnl <- gnl
  keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
  mx <- mval[rownames(mval) %in% keep,]
  mymed <- function(g) {
    probes <- rownames(myann[grep(g,myann$gnl),])
    rows <- which(rownames(mx) %in% probes)
    if ( length(rows) > 1 ) {
      b <- mx[rows,]
      med <- apply(b,2,median)
      med <- matrix(med,nrow=1)
      colnames(med) <- colnames(b)
      rownames(med) <- g
      return(med)
    }
  }
  med <- mclapply(gn,mymed,mc.cores=cores)
  med <- med[lapply(med,length)>0]
  medf <- do.call(rbind,med)
  return(medf)
}

chragg <- function(mval,myann,cores=1){
  annodf <- as.data.frame(anno)
  keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
  mx <- mval[rownames(mval) %in% keep,]
  chrs <- unique(anno$chr)
  myorder <- unlist(lapply(chrs,function(mychr) { nrow( annodf[annodf$chr==mychr,] ) } ))
  chrs <- chrs[order(-myorder)]
  leadercores <- floor(sqrt(cores))
  workercores <- ceiling(sqrt(cores))
  chrmedf <- mclapply(chrs,function(chr) {
    chrfrag <- annodf[annodf$chr==chr,]
    chrprobes <-rownames(chrfrag)
    chrmx <- mx[rownames(mx) %in% chrprobes,]
    chranno <- myann[rownames(myann) %in% chrprobes,]
    chrmedf <- magg(mval=chrmx,myann=chranno,cores=workercores)
    return(chrmedf)
  },mc.cores=leadercores)
  medf <- do.call(rbind, chrmedf)
  return(medf)
}

agglimma <- function(medf,design) {
  fit.reduced <- lmFit(medf,design)
  fit.reduced <- eBayes(fit.reduced)
  dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  nondup <- !duplicated(dmagg$ID)
  dmagg <- dmagg[nondup,]
  rownames(dmagg) <- dmagg$ID
  dmagg$ID = NULL
  return(dmagg)
}

AA Aggregate-aggregate-limma functions

gsagg <- function(x,genesets,cores=1) {
  meds <- mclapply(1:length(genesets), function(i) {
    gs = genesets[[i]]
    xx <- x[rownames(x) %in% gs,]
    med <- apply(xx,2,median)
  },mc.cores=cores)
  mymed <- do.call(rbind,meds)
  rownames(mymed) <- names(genesets)
  as.data.frame(mymed)
}

aalimma <- function(agag,design) {
  fit.reduced <- lmFit(agag,design)
  fit.reduced <- eBayes(fit.reduced)
  dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  return(dmagg)
}

aal <- function(mval,myann,genesets,design,cores=1) {
  medf <- chragg(mval,myann,cores=CORES)
  agag <- gsagg(x=medf,genesets=gs_symbols,cores=CORES)
  aalres <- aalimma(agag=agag,design=design)
  return(aalres)
}

#aalres <- aal(mval=mval,myann=myann,genesets=gs_symbols,design=design,cores=CORES)

Prep

# prep
sex <- as.data.frame(design)$sex
tumor <- as.data.frame(design)$tumor
patient <- as.character(unlist(lapply(1:ncol(mval),function(i) {c(i,i)})))
patient <- head(patient,ncol(mval))
design <- model.matrix(~ patient + tumor )
rownames(design) <- colnames(mval)

LA workflow split and analyse

split 74 samples 37 patients split into 18 + 18 Conduct LA workflow analysis. Then make overlap.

splitanalyze_la <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  # la1
  dm1 <- runlimma(mval_a,design_a,myann)
  dmagg1 <- agg(dm1,cores=floor(CORES/3))
  m1 <- dmagg1$gme_res_df[,"median",drop=FALSE]
  lares1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES)

  # la2
  dm2 <- runlimma(mval_b,design_b,myann)
  dmagg2 <- agg(dm2,cores=floor(CORES/3))
  m2 <- dmagg2$gme_res_df[,"median",drop=FALSE]
  lares2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES)

  list("lares1"=lares1,"lares2"=lares2)

}

lares <- mclapply( seq(100,5000,100) ,splitanalyze_la,mc.cores=CORES)

AL workflow split and analyse

splitanalyze_al <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  #al
  medf1 <- chragg(mval_a,myann,cores=floor(CORES/3))
  magg1 <- agglimma(medf1,design_a)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  alres1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES)

  #al
  medf2 <- chragg(mval_b,myann,cores=floor(CORES/3))
  magg2 <- agglimma(medf2,design_b)
  m2 <- as.data.frame(magg2$t)
  rownames(m2) <- rownames(magg2)
  colnames(m2) <- "t"
  alres2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES)

  list("alres1"=alres1,"alres2"=alres2)
}

alres <- mclapply( seq(100,5000,100) ,splitanalyze_al,mc.cores=CORES)

AA workflow split and analyse

splitanalyze_aa <- function(seed){
  set.seed(seed)
  a <- sample(1:37,18)
  a <- a[order(a)]
  nona <- setdiff(1:37,a)
  set.seed(seed)
  b <- sample(nona,18)
  b <- b[order(b)]
  a <- c( (a*2)-1 ,a*2 )
  a <- a[order(a)]
  b <- c( (b*2)-1 ,b*2 )
  b <- b[order(b)]

  design_a <- design[a,]
  design_a <- design_a[,colSums(design_a)>0]
  mval_a <- mval[,a]
  design_b <- design[b,]
  design_b <- design_b[,colSums(design_b)>0]
  mval_b <- mval[,b]

  #aa1
  aares1 <- aal(mval=mval_a,myann=myann,genesets=gs_symbols,
    design=design_a,cores=floor(CORES/3))

  #aa2
  aares2 <- aal(mval=mval_b,myann=myann,genesets=gs_symbols,
    design=design_b,cores=floor(CORES/3))

  list("aares1"=aares1,"aares2"=aares2)
}

aares <- mclapply( seq(100,5000,100) ,splitanalyze_aa,mc.cores=CORES)

Compare

lacompare <- function(lares){
  lares1 <- lares[[1]]
  lares2 <- lares[[2]]
  lasig1 <- subset(lares1,fdr<0.05)
  lasig2 <- subset(lares2,fdr<0.05)
  lasig1_up <- rownames(subset(lasig1,t_median>0))
  lasig1_dn <- rownames(subset(lasig1,t_median<0))
  lasig2_up <- rownames(subset(lasig2,t_median>0))
  lasig2_dn <- rownames(subset(lasig2,t_median<0))

  total <- length(unique(c(lasig1_up,lasig2_up,lasig1_dn,lasig2_dn)))
  common <- length(intersect(lasig1_up, lasig2_up)) + length(intersect(lasig1_dn, lasig2_dn))
  discordant <- length(intersect(lasig1_up, lasig2_dn)) + length(intersect(lasig1_dn, lasig2_up))
  uncommon = total - common - discordant

  laout <- data.frame(total,common,discordant,uncommon)
  laout$p_comm <-laout$common / laout$total
  laout$p_disc <-laout$discordant / laout$total
  laout$p_uncom <-laout$uncommon / laout$total
  laout
}

alcompare <-function(alres){
  alres1 <- alres[[1]]
  alres2 <- alres[[2]]
  alsig1 <- subset(alres1,fdr<0.05)
  alsig2 <- subset(alres2,fdr<0.05)

  alsig1_up <- rownames(subset(alsig1,t_median>0))
  alsig1_dn <- rownames(subset(alsig1,t_median<0))
  alsig2_up <- rownames(subset(alsig2,t_median>0))
  alsig2_dn <- rownames(subset(alsig2,t_median<0))

  total <- length(unique(c(alsig1_up,alsig2_up,alsig1_dn,alsig2_dn)))
  common <- length(intersect(alsig1_up, alsig2_up)) + length(intersect(alsig1_dn, alsig2_dn))
  discordant <- length(intersect(alsig1_up, alsig2_dn)) + length(intersect(alsig1_dn, alsig2_up))
  uncommon = total - common - discordant
  result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)

  alout <- data.frame(total,common,discordant,uncommon)
  alout$p_comm <-alout$common / alout$total
  alout$p_disc <-alout$discordant / alout$total
  alout$p_uncom <-alout$uncommon / alout$total
  alout
}

aacompare <-function(aa){
  aa1 <- aa[[1]]
  aa2 <- aa[[2]]
  aasig1 <- subset(aa1,adj.P.Val<0.05)
  aasig2 <- subset(aa2,adj.P.Val<0.05)

  aasig1_up <- rownames(subset(aasig1,logFC>0))
  aasig1_dn <- rownames(subset(aasig1,logFC<0))
  aasig2_up <- rownames(subset(aasig2,logFC>0))
  aasig2_dn <- rownames(subset(aasig2,logFC<0))

  total <- length(unique(c(aasig1_up,aasig2_up,aasig1_dn,aasig2_dn)))
  common <- length(intersect(aasig1_up, aasig2_up)) + length(intersect(aasig1_dn, aasig2_dn))
  discordant <- length(intersect(aasig1_up, aasig2_dn)) + length(intersect(aasig1_dn, aasig2_up))
  uncommon = total - common - discordant
  result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)

  aaout <- data.frame(total,common,discordant,uncommon)
  aaout$p_comm <- aaout$common / aaout$total
  aaout$p_disc <- aaout$discordant / aaout$total
  aaout$p_uncom <-aaout$uncommon / aaout$total
  aaout
}

laout <- do.call(rbind,lapply(lares,lacompare))

alout <- do.call(rbind,lapply(alres,alcompare))

aaout <- do.call(rbind,lapply(aares,aacompare))

laout

##    total common discordant uncommon       p_comm       p_disc    p_uncom
## 1   1420    908          2      510 0.6394366197 0.0014084507 0.35915493
## 2   1613   1423          0      190 0.8822070676 0.0000000000 0.11779293
## 3    920    461          8      451 0.5010869565 0.0086956522 0.49021739
## 4    921    475          7      439 0.5157437568 0.0076004343 0.47665581
## 5   1434    370          0     1064 0.2580195258 0.0000000000 0.74198047
## 6   1452    577          0      875 0.3973829201 0.0000000000 0.60261708
## 7   1582      0       1306      276 0.0000000000 0.8255372946 0.17446271
## 8   1393      1        720      672 0.0007178751 0.5168700646 0.48241206
## 9    917    460          8      449 0.5016357688 0.0087241003 0.48964013
## 10  1594   1462          0      132 0.9171894605 0.0000000000 0.08281054
## 11  1558      1        517     1040 0.0006418485 0.3318356868 0.66752246
## 12  1591      1        809      781 0.0006285355 0.5084852294 0.49088624
## 13  1614     81         73     1460 0.0501858736 0.0452292441 0.90458488
## 14  1558      1        699      858 0.0006418485 0.4486521181 0.55070603
## 15  1571   1220          0      351 0.7765754297 0.0000000000 0.22342457
## 16  1311      1        659      651 0.0007627765 0.5026697178 0.49656751
## 17  1555    137          1     1417 0.0881028939 0.0006430868 0.91125402
## 18  1585      7        748      830 0.0044164038 0.4719242902 0.52365931
## 19  1436    371          0     1065 0.2583565460 0.0000000000 0.74164345
## 20  1581      0       1294      287 0.0000000000 0.8184693232 0.18153068
## 21  1586      9        743      834 0.0056746532 0.4684741488 0.52585120
## 22  1616   1507          0      109 0.9325495050 0.0000000000 0.06745050
## 23   702     12         53      637 0.0170940171 0.0754985755 0.90740741
## 24   914    453          8      453 0.4956236324 0.0087527352 0.49562363
## 25  1586      0       1289      297 0.0000000000 0.8127364439 0.18726356
## 26  1556    592          0      964 0.3804627249 0.0000000000 0.61953728
## 27  1431    371          0     1060 0.2592592593 0.0000000000 0.74074074
## 28  1573      0       1256      317 0.0000000000 0.7984742530 0.20152575
## 29  1554      0       1217      337 0.0000000000 0.7831402831 0.21685972
## 30  1550     28        610      912 0.0180645161 0.3935483871 0.58838710
## 31  1560      0       1212      348 0.0000000000 0.7769230769 0.22307692
## 32  1593   1443          0      150 0.9058380414 0.0000000000 0.09416196
## 33  1548    135          1     1412 0.0872093023 0.0006459948 0.91214470
## 34   842     28         78      736 0.0332541568 0.0926365796 0.87410926
## 35  1028      3        100      925 0.0029182879 0.0972762646 0.89980545
## 36  1090      6        566      518 0.0055045872 0.5192660550 0.47522936
## 37  1507    562          0      945 0.3729263437 0.0000000000 0.62707366
## 38  1617     27        613      977 0.0166975881 0.3790970934 0.60420532
## 39  1507      2        685      820 0.0013271400 0.4545454545 0.54412741
## 40  1560      0       1213      347 0.0000000000 0.7775641026 0.22243590
## 41  1553     26        611      916 0.0167417901 0.3934320670 0.58982614
## 42  1583      0       1444      139 0.0000000000 0.9121920404 0.08780796
## 43  1546     98        139     1309 0.0633893920 0.0899094437 0.84670116
## 44  1623      0       1490      133 0.0000000000 0.9180529883 0.08194701
## 45  1399      0        961      438 0.0000000000 0.6869192280 0.31308077
## 46  1090     10        543      537 0.0091743119 0.4981651376 0.49266055
## 47   905      1        372      532 0.0011049724 0.4110497238 0.58784530
## 48  1576   1151          1      424 0.7303299492 0.0006345178 0.26903553
## 49  1502      2        686      814 0.0013315579 0.4567243675 0.54194407
## 50   805      3         84      718 0.0037267081 0.1043478261 0.89192547

alout

##    total common discordant uncommon    p_comm       p_disc   p_uncom
## 1   1049    367          0      682 0.3498570 0.0000000000 0.6501430
## 2    811    625          0      186 0.7706535 0.0000000000 0.2293465
## 3    865    598          1      266 0.6913295 0.0011560694 0.3075145
## 4   1147    295          0      852 0.2571927 0.0000000000 0.7428073
## 5    859    487          0      372 0.5669383 0.0000000000 0.4330617
## 6    853    570          0      283 0.6682298 0.0000000000 0.3317702
## 7    885    454          0      431 0.5129944 0.0000000000 0.4870056
## 8   1054    189          1      864 0.1793169 0.0009487666 0.8197343
## 9    904    589          0      315 0.6515487 0.0000000000 0.3484513
## 10  1059    353          0      706 0.3333333 0.0000000000 0.6666667
## 11   859    469          0      390 0.5459837 0.0000000000 0.4540163
## 12   986    493          0      493 0.5000000 0.0000000000 0.5000000
## 13   891    653          0      238 0.7328844 0.0000000000 0.2671156
## 14  1271    135          2     1134 0.1062156 0.0015735641 0.8922109
## 15   839    677          0      162 0.8069130 0.0000000000 0.1930870
## 16   865    539          0      326 0.6231214 0.0000000000 0.3768786
## 17   839    659          0      180 0.7854589 0.0000000000 0.2145411
## 18   984    557          0      427 0.5660569 0.0000000000 0.4339431
## 19  1023    514          0      509 0.5024438 0.0000000000 0.4975562
## 20  1064    219          0      845 0.2058271 0.0000000000 0.7941729
## 21   916    581          0      335 0.6342795 0.0000000000 0.3657205
## 22   896    639          0      257 0.7131696 0.0000000000 0.2868304
## 23   875    592          0      283 0.6765714 0.0000000000 0.3234286
## 24  1101    250          0      851 0.2270663 0.0000000000 0.7729337
## 25   865    623          0      242 0.7202312 0.0000000000 0.2797688
## 26  1007    295          0      712 0.2929494 0.0000000000 0.7070506
## 27  1025    597          4      424 0.5824390 0.0039024390 0.4136585
## 28   903    427          0      476 0.4728682 0.0000000000 0.5271318
## 29   964    337          1      626 0.3495851 0.0010373444 0.6493776
## 30   953    502          0      451 0.5267576 0.0000000000 0.4732424
## 31   796    573          0      223 0.7198492 0.0000000000 0.2801508
## 32   956    569          1      386 0.5951883 0.0010460251 0.4037657
## 33   980    642          0      338 0.6551020 0.0000000000 0.3448980
## 34   887    465          0      422 0.5242390 0.0000000000 0.4757610
## 35   844    630          0      214 0.7464455 0.0000000000 0.2535545
## 36   959    533          0      426 0.5557873 0.0000000000 0.4442127
## 37   842    462          0      380 0.5486936 0.0000000000 0.4513064
## 38   914    282          0      632 0.3085339 0.0000000000 0.6914661
## 39   966    407          0      559 0.4213251 0.0000000000 0.5786749
## 40   896    290          0      606 0.3236607 0.0000000000 0.6763393
## 41   937    410          0      527 0.4375667 0.0000000000 0.5624333
## 42   920    703          0      217 0.7641304 0.0000000000 0.2358696
## 43   887    671          0      216 0.7564825 0.0000000000 0.2435175
## 44   935    445          0      490 0.4759358 0.0000000000 0.5240642
## 45   825    653          1      171 0.7915152 0.0012121212 0.2072727
## 46   987    518          1      468 0.5248227 0.0010131712 0.4741641
## 47  1223    161          1     1061 0.1316435 0.0008176615 0.8675388
## 48  1056    532          0      524 0.5037879 0.0000000000 0.4962121
## 49   931    536          0      395 0.5757250 0.0000000000 0.4242750
## 50   962    468          0      494 0.4864865 0.0000000000 0.5135135

aaout

##    total common discordant uncommon    p_comm      p_disc   p_uncom
## 1    996    527          0      469 0.5291165 0.000000000 0.4708835
## 2    888    602          0      286 0.6779279 0.000000000 0.3220721
## 3    897    593          0      304 0.6610925 0.000000000 0.3389075
## 4   1151    365          0      786 0.3171156 0.000000000 0.6828844
## 5    818    490          0      328 0.5990220 0.000000000 0.4009780
## 6    904    426          0      478 0.4712389 0.000000000 0.5287611
## 7    960    446          0      514 0.4645833 0.000000000 0.5354167
## 8   1123    332          0      791 0.2956367 0.000000000 0.7043633
## 9    957    573          1      383 0.5987461 0.001044932 0.4002090
## 10   986    564          0      422 0.5720081 0.000000000 0.4279919
## 11  1003    427          0      576 0.4257228 0.000000000 0.5742772
## 12  1001    454          0      547 0.4535465 0.000000000 0.5464535
## 13   923    454          0      469 0.4918743 0.000000000 0.5081257
## 14  1208    453          3      752 0.3750000 0.002483444 0.6225166
## 15   840    551          0      289 0.6559524 0.000000000 0.3440476
## 16   853    574          0      279 0.6729191 0.000000000 0.3270809
## 17   921    600          0      321 0.6514658 0.000000000 0.3485342
## 18   915    476          0      439 0.5202186 0.000000000 0.4797814
## 19  1042    426          0      616 0.4088292 0.000000000 0.5911708
## 20  1101    253          0      848 0.2297911 0.000000000 0.7702089
## 21   969    556          0      413 0.5737874 0.000000000 0.4262126
## 22   912    539          0      373 0.5910088 0.000000000 0.4089912
## 23   877    598          0      279 0.6818700 0.000000000 0.3181300
## 24  1121    452          0      669 0.4032114 0.000000000 0.5967886
## 25   951    610          0      341 0.6414301 0.000000000 0.3585699
## 26   985    455          0      530 0.4619289 0.000000000 0.5380711
## 27   934    637          0      297 0.6820128 0.000000000 0.3179872
## 28   898    429          0      469 0.4777283 0.000000000 0.5222717
## 29  1069    304          0      765 0.2843779 0.000000000 0.7156221
## 30   973    492          0      481 0.5056526 0.000000000 0.4943474
## 31   908    380          0      528 0.4185022 0.000000000 0.5814978
## 32   974    636          0      338 0.6529774 0.000000000 0.3470226
## 33   963    599          0      364 0.6220145 0.000000000 0.3779855
## 34   989    382          0      607 0.3862487 0.000000000 0.6137513
## 35   972    556          0      416 0.5720165 0.000000000 0.4279835
## 36   881    487          0      394 0.5527809 0.000000000 0.4472191
## 37   908    422          0      486 0.4647577 0.000000000 0.5352423
## 38   965    406          0      559 0.4207254 0.000000000 0.5792746
## 39   985    400          0      585 0.4060914 0.000000000 0.5939086
## 40  1036    179          0      857 0.1727799 0.000000000 0.8272201
## 41   982    333          0      649 0.3391039 0.000000000 0.6608961
## 42   938    623          0      315 0.6641791 0.000000000 0.3358209
## 43   950    588          0      362 0.6189474 0.000000000 0.3810526
## 44   949    450          0      499 0.4741834 0.000000000 0.5258166
## 45   873    497          0      376 0.5693013 0.000000000 0.4306987
## 46  1049    553          0      496 0.5271687 0.000000000 0.4728313
## 47  1097    389          0      708 0.3546035 0.000000000 0.6453965
## 48  1004    609          0      395 0.6065737 0.000000000 0.3934263
## 49  1013    318          0      695 0.3139191 0.000000000 0.6860809
## 50   950    554          0      396 0.5831579 0.000000000 0.4168421

res <- do.call(rbind,lapply(list(laout,alout,aaout),colMeans))

rownames(res) <- c("la","al","aa")

res <- as.data.frame(res)

res %>%
  kbl(caption="overall summary") %>%
  kable_paper("hover", full_width = F)

overall summary
	total	common	discordant	uncommon	p_comm	p_disc	p_uncom
la	1402.16	288.52	456.52	657.12	0.2030787	0.3081350	0.4887863
al	946.30	484.70	0.26	461.34	0.5279827	0.0002541	0.4717631
aa	971.24	480.38	0.08	490.78	0.5018970	0.0000706	0.4980325

apply(laout,2,median)

##        total       common   discordant     uncommon       p_comm       p_disc 
## 1.551500e+03 1.900000e+01 2.555000e+02 6.440000e+02 1.671969e-02 2.180918e-01 
##      p_uncom 
## 4.960956e-01

apply(alout,2,median)

##       total      common  discordant    uncommon      p_comm      p_disc 
## 925.5000000 516.0000000   0.0000000 425.0000000   0.5473386   0.0000000 
##     p_uncom 
##   0.4526614

apply(aaout,2,median)

##       total      common  discordant    uncommon      p_comm      p_disc 
## 964.0000000 481.5000000   0.0000000 469.0000000   0.5129356   0.0000000 
##     p_uncom 
##   0.4870644

Plot some data.

Raw counts and proportions.

par(mfrow=c(1,3))

barplot(res$common,main="no. common",names.arg=c("LA","AL","AA"))
barplot(res$uncommon,main="no. uncommon",names.arg=c("LA","AL","AA"))
barplot(res$discordant,main="no. discordant",names.arg=c("LA","AL","AA"))

boxplot(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common),
  main="no. common",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon),
  main="no. uncommon",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant),
  main="no. discordant",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant), add=TRUE,cex=1.2,pch=19)

barplot(res$p_comm,main="proportion common",names.arg=c("LA","AL","AA"))
barplot(res$p_uncom,main="proportion uncommon",names.arg=c("LA","AL","AA"))
barplot(res$p_disc,main="proportion discordant",names.arg=c("LA","AL","AA"))

boxplot(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm),
  main="proportion common",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom),
  main="proportion uncommon",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom), add=TRUE,cex=1.2,pch=19)

boxplot(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc),
  main="proportion discordant",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc), add=TRUE,cex=1.2,pch=19)

Session information

save.image("GSE158422_split.Rdata")

sessionInfo()

## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] beeswarm_0.4.0                                     
##  [2] kableExtra_1.3.4                                   
##  [3] mitch_1.12.0                                       
##  [4] tictoc_1.2                                         
##  [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
##  [6] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [7] minfi_1.46.0                                       
##  [8] bumphunter_1.42.0                                  
##  [9] locfit_1.5-9.8                                     
## [10] iterators_1.0.14                                   
## [11] foreach_1.5.2                                      
## [12] Biostrings_2.68.1                                  
## [13] XVector_0.40.0                                     
## [14] SummarizedExperiment_1.30.2                        
## [15] Biobase_2.60.0                                     
## [16] MatrixGenerics_1.12.3                              
## [17] matrixStats_1.0.0                                  
## [18] GenomicRanges_1.52.0                               
## [19] GenomeInfoDb_1.36.3                                
## [20] IRanges_2.34.1                                     
## [21] S4Vectors_0.38.2                                   
## [22] BiocGenerics_0.46.0                                
## [23] eulerr_7.0.0                                       
## [24] limma_3.56.2                                       
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.3.1             later_1.3.1              
##   [3] BiocIO_1.10.0             bitops_1.0-7             
##   [5] filelock_1.0.2            tibble_3.2.1             
##   [7] preprocessCore_1.62.1     XML_3.99-0.14            
##   [9] lifecycle_1.0.3           lattice_0.21-9           
##  [11] MASS_7.3-60               base64_2.0.1             
##  [13] scrime_1.3.5              magrittr_2.0.3           
##  [15] sass_0.4.7                rmarkdown_2.25           
##  [17] jquerylib_0.1.4           yaml_2.3.7               
##  [19] httpuv_1.6.11             doRNG_1.8.6              
##  [21] askpass_1.2.0             DBI_1.1.3                
##  [23] RColorBrewer_1.1-3        abind_1.4-5              
##  [25] zlibbioc_1.46.0           rvest_1.0.3              
##  [27] quadprog_1.5-8            purrr_1.0.2              
##  [29] RCurl_1.98-1.12           rappdirs_0.3.3           
##  [31] GenomeInfoDbData_1.2.10   genefilter_1.82.1        
##  [33] annotate_1.78.0           svglite_2.1.1            
##  [35] DelayedMatrixStats_1.22.6 codetools_0.2-19         
##  [37] DelayedArray_0.26.7       xml2_1.3.5               
##  [39] tidyselect_1.2.0          beanplot_1.3.1           
##  [41] BiocFileCache_2.8.0       webshot_0.5.5            
##  [43] illuminaio_0.42.0         GenomicAlignments_1.36.0 
##  [45] jsonlite_1.8.7            multtest_2.56.0          
##  [47] ellipsis_0.3.2            survival_3.5-7           
##  [49] systemfonts_1.0.4         tools_4.3.1              
##  [51] progress_1.2.2            Rcpp_1.0.11              
##  [53] glue_1.6.2                gridExtra_2.3            
##  [55] xfun_0.40                 dplyr_1.1.3              
##  [57] HDF5Array_1.28.1          fastmap_1.1.1            
##  [59] GGally_2.1.2              rhdf5filters_1.12.1      
##  [61] fansi_1.0.4               openssl_2.1.1            
##  [63] caTools_1.18.2            digest_0.6.33            
##  [65] R6_2.5.1                  mime_0.12                
##  [67] colorspace_2.1-0          gtools_3.9.4             
##  [69] biomaRt_2.56.1            RSQLite_2.3.1            
##  [71] utf8_1.2.3                tidyr_1.3.0              
##  [73] generics_0.1.3            data.table_1.14.8        
##  [75] rtracklayer_1.60.1        prettyunits_1.2.0        
##  [77] httr_1.4.7                htmlwidgets_1.6.2        
##  [79] S4Arrays_1.0.6            pkgconfig_2.0.3          
##  [81] gtable_0.3.4              blob_1.2.4               
##  [83] siggenes_1.74.0           htmltools_0.5.6          
##  [85] echarts4r_0.4.5           scales_1.2.1             
##  [87] png_0.1-8                 knitr_1.44               
##  [89] rstudioapi_0.15.0         reshape2_1.4.4           
##  [91] tzdb_0.4.0                rjson_0.2.21             
##  [93] nlme_3.1-163              curl_5.0.2               
##  [95] cachem_1.0.8              rhdf5_2.44.0             
##  [97] stringr_1.5.0             KernSmooth_2.23-22       
##  [99] AnnotationDbi_1.62.2      restfulr_0.0.15          
## [101] GEOquery_2.68.0           pillar_1.9.0             
## [103] grid_4.3.1                reshape_0.8.9            
## [105] vctrs_0.6.3               gplots_3.1.3             
## [107] promises_1.2.1            dbplyr_2.3.4             
## [109] xtable_1.8-4              evaluate_0.22            
## [111] readr_2.1.4               GenomicFeatures_1.52.2   
## [113] cli_3.6.1                 compiler_4.3.1           
## [115] Rsamtools_2.16.0          rlang_1.1.1              
## [117] crayon_1.5.2              rngtools_1.5.2           
## [119] nor1mix_1.3-0             mclust_6.0.0             
## [121] plyr_1.8.8                stringi_1.7.12           
## [123] viridisLite_0.4.2         BiocParallel_1.34.2      
## [125] munsell_0.5.0             Matrix_1.6-1.1           
## [127] hms_1.1.3                 sparseMatrixStats_1.12.2 
## [129] bit64_4.0.5               ggplot2_3.4.3            
## [131] Rhdf5lib_1.22.1           KEGGREST_1.40.1          
## [133] shiny_1.7.5               highr_0.10               
## [135] memoise_2.0.1             bslib_0.5.1              
## [137] bit_4.0.5

GSE158422: DNA methylation in Lung squamous cell carcinoma - gene methylation enrichment analysis

The GMEA team

2023-10-03