Introduction

In this report we establish a new method for generating simulated data with known ground truth. This will be used to test different gene methylation enrichment approaches systematically.

The general steps are:

  1. Import GSE158422 data corresponding to control (non-tumour tissue).

  2. From the 37 samples, create two groups of 18 samples. One of these will be considered “control” and the other “case”.

  3. Create random gene sets that have similar sized to Reactome pathways.

  4. Some gene sets will be selected to be differentially methylated. Half of these will be hypermethylated and the others will be hypomethylated.

  5. The changes will be incorporated into the “case” samples.

  6. The enrichment analysis will be conducted.

  7. The accuracy will be calculated.

suppressPackageStartupMessages({
  library("stringi")
  library("limma")
  library("missMethyl")
  library("IlluminaHumanMethylation450kmanifest")
  library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  library('org.Hs.eg.db')
  library("psych")
  library("mitch")
  library("kableExtra")
})

# optimised for 128 GB sever with 32 threads
CORES=6

Load data

  • annotations

  • probe sets

  • gene sets

  • design matrix

  • mval matrix

anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])

gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))

summary(unlist(lapply(sets,length)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    9.00   24.00   49.68   55.00 6778.00
#genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")

if (!file.exists("GSE158422_design.rds")) {
  download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")

if (!file.exists("GSE158422_design.rds")) {
 download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")

boxplot(list("normal"=matrix(colMeans(mval),ncol=2)[,2],"tumor"=matrix(colMeans(mval),ncol=2)[,1]),
  main="mean probe methylation mval")

Gene set database

We could use Reactome pathways, however these have a lot of overlapping sets, which could cause inflated false positives. A better solution could be to select random gene sets with size range between 10 and 100 genes.

gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))

lengths <- rep(1:10,100)*10

randomGeneSets <- function(gene_catalog, lengths, seed){
  num_gsets <- length(lengths)
  set.seed(seed) ; seeds <- sample(1:1e6, num_gsets)
  gsets <- lapply(1:num_gsets,function(i) {
    set.seed(seeds[i]) ; gs <- sample(gene_catalog,lengths[i])
    return(gs)
  } )
  names(gsets)<-stri_rand_strings(length(gsets), 15, pattern = "[A-Za-z]")
  return(gsets)
}

gsets <- randomGeneSets(gene_catalog,lengths,seed=100)

Select nonoverlapping gene sets.

Not in use now. Might delete later.

gset_selection <- function(genesetdatabase,seed,gene_catalog) {
  set.seed(seed)
  gsets <- genesetdatabase[sample(1:length(genesetdatabase))]
  genelist=NULL
  genesets=NULL
  for ( i in 1:length(gsets) ) {
    gs <- gsets[i]
    inx <- length(intersect(unlist(gs),genelist))
    num <- length(intersect(unlist(gs),gene_catalog))
    if ( inx == 0 & num >= 10 ) {
      genesets <- c(genesets,gs)
      genelist <- c(unname(unlist(gs)),genelist)
    }
  }
  return(genesets)
}

gset_mod <- gset_selection(genesetdatabase=gsets,seed=100,gene_catalog=gene_catalog)

Incorporate changes

TODO: to incorporate changes to case samples.

Need to figure out what magnitude to change. Will refer to the cancer/normal comparison.

Select genes and probes to alter.

seed=100
frac_genes=0.5
frac_probes=0.5
delta=1
nsamples=10
normal_mval <- mval[,(1:(ncol(mval)/2)*2)]

incorp_dm <- function(genesets,myann,mval,seed,
  frac_genes,frac_probes,groupsize,delta=1) {

  # divide gene sets between hyper and hypomethylated
  nset <- floor(length(genesets)/2)
  set.seed(seed) ; gtup <-sample(genesets,nset)
  set.seed(seed) ; gtdn <- sample(setdiff(genesets,gtup),nset)
  gup <- unname(unlist(gtup))
  gdn <- unname(unlist(gtdn))
  # make probe-gene vector
  probe2gene <- strsplit(myann$UCSC_RefGene_Name,";")
  names(probe2gene) <- rownames(myann)
  probe2gene <- unlist(probe2gene)
  # select probes hypermethylated
  set.seed(seed) ; gup2 <- sample(gup,floor(length(gup)*frac_genes))
  pup <- names(probe2gene[which(probe2gene %in% gup2)])
  set.seed(seed) ; pup2 <- sample(pup,floor(length(pup)*frac_probes))
  # select probes hypomethylated
  set.seed(seed) ; gdn2 <- sample(gdn,floor(length(gdn)*frac_genes))
  pdn <- names(probe2gene[which(probe2gene %in% gdn2)])
  set.seed(seed) ; pdn2 <- sample(pdn,floor(length(pdn)*frac_probes))
  # divide samples between ctrl and case
  ncols <- ncol(mval)
  maxgroupsize=floor(ncols/2)
  if ( groupsize > maxgroupsize ) { stop("groupsize cannot be larger than half the ncols of mval") }
  set.seed(seed) ; ctrl <- sample(1:ncols,groupsize)
  set.seed(seed) ; case <- sample(setdiff(1:ncols,ctrl),groupsize)
  mval_ctrl <- mval[,ctrl]
  mval_case <- mval[,case]
  # incorporate altered signals - change by +1 or -1
  mval_case[rownames(mval_case) %in% pup2,] <-  mval_case[rownames(mval_case) %in% pup2,] + delta
  mval_case[rownames(mval_case) %in% pdn2,] <-  mval_case[rownames(mval_case) %in% pdn2,] - delta
  mval2 <- cbind(mval_ctrl,mval_case)
  result <- list("mval"=mval2,"probes up"=pup2,"probes down"=pdn2,
    "genes up"=gup2,"genes down"=gdn2,
    "genesets up"=gtup,"genesets down"=gtdn)
  return(result)
}

GSAMETH function

# limma
runlimma <- function(mval,design,myann) {
  fit.reduced <- lmFit(mval,design)
  fit.reduced <- eBayes(fit.reduced)
  dm <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  dm <- merge(myann,dm,by=0)
  dm <- dm[order(dm$P.Value),]
  rownames(dm) <- dm$Row.names
  dm$Row.names=NULL
  return(dm)
}

This is how to use the function

This could be complicated as it requires translation of symbols to entrez IDs, but could be simplified if the entrez translation is done at the gsameth step.

simgsa <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  dm3 <- runlimma(mval=mval2,design=d,myann=myann)
  pup3 <- rownames(subset(dm3,adj.P.Val<0.05 & logFC>0))
  pdn3 <- rownames(subset(dm3,adj.P.Val<0.05 & logFC<0))
  if ( length(pup3) < 250 ) { pup3 <- head(rownames(subset(dm3, logFC > 0)), 250) }
  if ( length(pdn3) < 250 ) { pdn3 <- head(rownames(subset(dm3, logFC < 0)), 250) }
  # convert gene sets to entrez
  suppressWarnings(suppressMessages({ gene2entrez <- mapIds(org.Hs.eg.db, gene_catalog, 'ENTREZID', 'SYMBOL') }))
  gsets_entrez <- lapply(gsets,function(gs) {
    gs2 <- unique(gene2entrez[names(gene2entrez) %in% gs])
    gs2 <- gs2[!is.na(gs2)]
    return(gs2)
  })
  suppressWarnings(suppressMessages({
    gsaup3 <- gsameth(sig.cpg=pup3, all.cpg=rownames(dm3), collection=gsets_entrez)
    gsadn3 <- gsameth(sig.cpg=pdn3, all.cpg=rownames(dm3), collection=gsets_entrez)
  }))
  gsig_up3 <- rownames(subset(gsaup3,FDR<0.05))
  gsig_dn3 <- rownames(subset(gsadn3,FDR<0.05))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(gsadn3)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

LA function

This process runs limma first and then aggregates the results before doing an enrichment test.

# aggregate
agg <- function(dm,cores=1) {
  gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=cores)
  dm$UCSC_RefGene_Name <- gnl
  l <- mclapply(1:nrow(dm), function(i) {
    a <- dm[i,]
    len <- length(a[[1]][[1]])
    tvals <- as.numeric(rep(a["t"],len))
    genes <- a[[1]][[1]]
    data.frame(genes,tvals)
  },mc.cores=cores)
  df <- do.call(rbind,l)
  keep <- names(which(table(df$genes)>1))
  df <- df[df$genes %in% keep,]
  gn <- unique(df$genes)
  gme_res <- lapply( 1:length(gn), function(i) {
    g <- gn[i]
    tstats <- df[which(df$genes==g),"tvals"]
    myn <- length(tstats)
    mymean <- mean(tstats)
    mymedian <- median(tstats)
    if ( length(tstats) > 2 ) {
      ttest <- t.test(tstats)
      pval <- ttest$p.value
    } else {
      pval = 1
    }
    res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
      "median"=mymedian, pval=pval)
  } )
  gme_res_df <- do.call(rbind, gme_res)
  rownames(gme_res_df) <- gme_res_df[,1]
  gme_res_df <- gme_res_df[,-1]
  tmp <- apply(gme_res_df,2,as.numeric)
  rownames(tmp) <- rownames(gme_res_df)
  gme_res_df <- as.data.frame(tmp)
  gme_res_df$sig <- -log10(gme_res_df[,4])
  gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
  gme_res_df$fdr <- p.adjust(gme_res_df$pval)
  out <- list("df"=df,"gme_res_df"=gme_res_df)
  return(out)
}

# enrich parametric
ttenrich <- function(m,genesets,cores=1,testtype="selfcontained") {
  res <- mclapply( 1:length(genesets), function(i) {
    scores <- m[,1]
    gs <- genesets[i]
    name <- names(gs)
    n_members <- length(which(rownames(m) %in% gs[[1]]))
    if ( n_members > 4 ) {
      tstats <- m[which(rownames(m) %in% gs[[1]]),]
      myn <- length(tstats)
      mymean <- mean(tstats)
      mymedian <- median(tstats)
      if ( testtype == "selfcontained" ) { wt <- t.test(tstats) }
      if ( testtype == "competitive" ) { wt <- t.test(tstats,scores) }
      res <- c(name,myn,mymean,mymedian,wt$p.value)
    }
  } , mc.cores = cores)
  res_df <- do.call(rbind, res)
  rownames(res_df) <- res_df[,1]
  res_df <- res_df[,-1]
  colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
  tmp <- apply(res_df,2,as.numeric)
  rownames(tmp) <- rownames(res_df)
  res_df <- tmp
  res_df <- as.data.frame(res_df)
  res_df <- res_df[order(res_df$pval),]
  res_df$logp <- -log10(res_df$pval )
  res_df$fdr <- p.adjust(res_df$pval,method="fdr")
  res_df[order(abs(res_df$pval)),]
  return(res_df)
}

# enrich non-parametric
wtenrich <- function(m,genesets,cores=1,testtype="selfcontained") {
  res <- mclapply( 1:length(genesets), function(i) {
    scores <- m[,1]
    gs <- genesets[i]
    name <- names(gs)
    n_members <- length(which(rownames(m) %in% gs[[1]]))
    if ( n_members > 4 ) {
      tstats <- m[which(rownames(m) %in% gs[[1]]),]
      myn <- length(tstats)
      mymean <- mean(tstats)
      mymedian <- median(tstats)
      if ( testtype == "selfcontained" ) { wt <- wilcox.test(tstats) }
      if ( testtype == "competitive" ) { wt <- wilcox.test(tstats,scores) }
      res <- c(name,myn,mymean,mymedian,wt$p.value)
    }
  } , mc.cores = cores)
  res_df <- do.call(rbind, res)
  rownames(res_df) <- res_df[,1]
  res_df <- res_df[,-1]
  colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
  tmp <- apply(res_df,2,as.numeric)
  rownames(tmp) <- rownames(res_df)
  res_df <- tmp
  res_df <- as.data.frame(res_df)
  res_df <- res_df[order(res_df$pval),]
  res_df$logp <- -log10(res_df$pval )
  res_df$fdr <- p.adjust(res_df$pval,method="fdr")
  res_df[order(abs(res_df$pval)),]
  return(res_df)
}

# parametric self-contained
simla <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  dm3 <- runlimma(mval=mval2,design=d,myann=myann)
  dmagg1 <- agg(dm3,cores=4)
  m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
  lares1 <- ttenrich(m=m1,genesets=gsets,cores=4,testtype="selfcontained")
  gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
  gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

# parametric competitive
simlac <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  dm3 <- runlimma(mval=mval2,design=d,myann=myann)
  dmagg1 <- agg(dm3,cores=4)
  m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
  lares1 <- ttenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
  gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
  gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

# nonparametric self-contained
simnla <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  dm3 <- runlimma(mval=mval2,design=d,myann=myann)
  dmagg1 <- agg(dm3,cores=4)
  m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
  lares1 <- wtenrich(m=m1,genesets=gsets,cores=4,testtype="selfcontained")
  gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
  gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

# nonparametric competitive
simnlac <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  dm3 <- runlimma(mval=mval2,design=d,myann=myann)
  dmagg1 <- agg(dm3,cores=4)
  m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
  lares1 <- wtenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
  gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
  gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

AL: Aggregate limma functions

Functions for aggregate-limma-enrich approach.

# chromosome by chromosome will be much faster
magg <- function(mval,myann,cores=1){
  gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
  gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
  gnl <- mclapply(gnl,unique,mc.cores=cores)
  myann$gnl <- gnl
  keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
  mx <- mval[rownames(mval) %in% keep,]
  mymed <- function(g) {
    probes <- rownames(myann[grep(g,myann$gnl),])
    rows <- which(rownames(mx) %in% probes)
    if ( length(rows) > 1 ) {
      b <- mx[rows,]
      med <- apply(b,2,mean)
      med <- matrix(med,nrow=1)
      colnames(med) <- colnames(b)
      rownames(med) <- g
      return(med)
    }
  }
  med <- mclapply(gn,mymed,mc.cores=cores)
  med <- med[lapply(med,length)>0]
  medf <- do.call(rbind,med)
  return(medf)
}

chragg <- function(mval,myann,cores=1){
  annodf <- as.data.frame(anno)
  keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
  mx <- mval[rownames(mval) %in% keep,]
  chrs <- unique(anno$chr)
  myorder <- unlist(lapply(chrs,function(mychr) { nrow( annodf[annodf$chr==mychr,] ) } ))
  chrs <- chrs[order(-myorder)]
  leadercores <- floor(sqrt(cores))
  workercores <- ceiling(sqrt(cores))
  chrmedf <- mclapply(chrs,function(chr) {
    chrfrag <- annodf[annodf$chr==chr,]
    chrprobes <-rownames(chrfrag)
    chrmx <- mx[rownames(mx) %in% chrprobes,]
    chranno <- myann[rownames(myann) %in% chrprobes,]
    chrmedf <- magg(mval=chrmx,myann=chranno,cores=workercores)
    return(chrmedf)
  },mc.cores=leadercores)
  medf <- do.call(rbind, chrmedf)
  return(medf)
}

agglimma <- function(medf,design) {
  fit.reduced <- lmFit(medf,design)
  fit.reduced <- eBayes(fit.reduced)
  dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  nondup <- !duplicated(dmagg$ID)
  dmagg <- dmagg[nondup,]
  rownames(dmagg) <- dmagg$ID
  dmagg$ID = NULL
  return(dmagg)
}

# AL approach parametric self-contained
simal <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  # al pipeline
  medf1 <- chragg(mval=mval2,myann=myann,cores=4)
  magg1 <- agglimma(medf1,d)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  alres1 <- ttenrich(m=m1,genesets=gsets,cores=4,testtype="selfcontained")
  # summarise results
  gsig_up3 <- rownames(subset(alres1, fdr < 0.05 & t_mean > 0))
  gsig_dn3 <- rownames(subset(alres1, fdr < 0.05 & t_mean < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(alres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

# AL approach parametric competitive
simalc <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  # al pipeline
  medf1 <- chragg(mval=mval2,myann=myann,cores=4)
  magg1 <- agglimma(medf1,d)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  alres1 <- ttenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
  # summarise results
  gsig_up3 <- rownames(subset(alres1, fdr < 0.05 & t_mean > 0))
  gsig_dn3 <- rownames(subset(alres1, fdr < 0.05 & t_mean < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(alres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

# AL approach nonparametric self-contained
simnal <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  # al pipeline
  medf1 <- chragg(mval=mval2,myann=myann,cores=4)
  magg1 <- agglimma(medf1,d)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  alres1 <- wtenrich(m=m1,genesets=gsets,cores=4,testtype="selfcontained")
  # summarise results
  gsig_up3 <- rownames(subset(alres1, fdr < 0.05 & t_mean > 0))
  gsig_dn3 <- rownames(subset(alres1, fdr < 0.05 & t_mean < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(alres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

# AL approach nonparametric competitive
simnalc <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  # al pipeline
  medf1 <- chragg(mval=mval2,myann=myann,cores=4)
  magg1 <- agglimma(medf1,d)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  alres1 <- wtenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
  # summarise results
  gsig_up3 <- rownames(subset(alres1, fdr < 0.05 & t_mean > 0))
  gsig_dn3 <- rownames(subset(alres1, fdr < 0.05 & t_mean < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(alres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

AA Aggregate-aggregate-limma functions

Use mean value works well here.

gsagg <- function(x,genesets,cores=1) {
  meds <- mclapply(1:length(genesets), function(i) {
    gs = genesets[[i]]
    xx <- x[rownames(x) %in% gs,]
    med <- apply(xx,2,mean)
  },mc.cores=cores)
  mymed <- do.call(rbind,meds)
  rownames(mymed) <- names(genesets)
  as.data.frame(mymed)
}

aalimma <- function(agag,design) {
  fit.reduced <- lmFit(agag,design)
  fit.reduced <- eBayes(fit.reduced)
  dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
  return(dmagg)
}

aal <- function(mval,myann,genesets,design,cores=1) {
  medf <- chragg(mval,myann,cores=cores)
  agag <- gsagg(x=medf,genesets=genesets,cores=cores)
  aalres <- aalimma(agag=agag,design=design)
  return(aalres)
}

simaa <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  # aa pipeline
  aares1 <- aal(mval=mval2, myann=myann, genesets=gsets, design=d, cores=4)
  # summarise results
  gsig_up3 <- rownames(subset(aares1, adj.P.Val < 0.05 & logFC > 0))
  gsig_dn3 <- rownames(subset(aares1, adj.P.Val < 0.05 & logFC < 0))
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(aares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}

Agg-limma-mitch function

This approach uses the aggregated mvals, limma and instead of a 1-sample t-test it uses mitch which is a competitive test and could give more interpretable results.

runmitch <- function(m,genesets,cores=1) {
  suppressMessages({ mres <- mitch_calc(m,genesets,minsetsize=5,cores=cores) })
  mres <- mres$enrichment_result
  rownames(mres) <- mres$set
  mres$set=NULL
  return(mres)
}

simalm <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
  # generate gene sets
  gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
  lengths <- unname(unlist(lapply(genesetdatabase,length)))
  gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
  # select gene sets to alter
  set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
  # incorporate select changes
  sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
    frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
  # set up limma
  mval2 <- sim$mval
  ncols <- ncol(mval2)
  groupsize <- ncols/2
  ss <- data.frame(colnames(mval2))
  colnames(ss) <- "sample"
  ss$case <- c(rep(0,groupsize),rep(1,groupsize))
  d <- model.matrix(~ ss$case )
  # alm pipeline
  medf1 <- chragg(mval2,myann,cores=4)
  magg1 <- agglimma(medf1,d)
  m1 <- as.data.frame(magg1$t)
  rownames(m1) <- rownames(magg1)
  colnames(m1) <- "t"
  almres1 <- runmitch(m=m1,genesets=gsets,cores=4)
  # summarise results
  gsig_up3 <- rownames( subset( almres1, p.adjustANOVA < 0.05 & s.dist > 0 ) )
  gsig_dn3 <- rownames( subset( almres1, p.adjustANOVA < 0.05 & s.dist < 0 ) )
  gtup <- names(sim[[6]])
  gtdn <- names(sim[[7]])
  UPTP=length(intersect(gsig_up3 ,gtup))
  UPFP=length(setdiff(gsig_up3 ,gtup))
  UPFN=length(setdiff(gtup,gsig_up3))
  DNTP=length(intersect(gsig_dn3 ,gtdn))
  DNFP=length(setdiff(gsig_dn3 ,gtdn))
  DNFN=length(setdiff(gtdn,gsig_dn3))
  TP=UPTP+DNTP
  FP=UPFP+DNFP
  FN=UPFN+DNFN
  TN=nrow(almres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
  PREC=TP/(TP+FP)
  REC=TP/(TP+FN)
  F1=TP/(TP+(0.5*(FP+FN)))
  result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
  return(result)
}


F1 <- function(x,y) {
  ( 2 * x * y ) / ( x + y )
}

Run analyses

Set assumptions.

num_dm_sets=50
sims=10

groupsizes=c(3,6,12)
deltas=c(0.1,0.2,0.3,0.5)
params <- expand.grid("groupsizes"=groupsizes,"deltas"=deltas)
params
##    groupsizes deltas
## 1           3    0.1
## 2           6    0.1
## 3          12    0.1
## 4           3    0.2
## 5           6    0.2
## 6          12    0.2
## 7           3    0.3
## 8           6    0.3
## 9          12    0.3
## 10          3    0.5
## 11          6    0.5
## 12         12    0.5

GSA meth sim

Cannot be run in multicore due to fragility of AnnotationDbi SQLite objects.

gres <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- lapply(1:sims,function(i) {
    simgsa(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5,
      frac_probes=0.5, groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  })
  res <- do.call(rbind,res)
  return(res)
})

gres
## [[1]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  1 50 949    0   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[2]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[3]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[4]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  1 50 949    0   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[5]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  1  0 49 950    1 0.02
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  0  0 50 950  NaN 0.00
##  [6,]  0  0 50 950  NaN 0.00
##  [7,]  0  0 50 950  NaN 0.00
##  [8,]  0  0 50 950  NaN 0.00
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[6]]
##       TP FP FN  TN      PREC  REC
##  [1,] 11  2 39 948 0.8461538 0.22
##  [2,]  4  0 46 950 1.0000000 0.08
##  [3,] 10  0 40 950 1.0000000 0.20
##  [4,]  3  0 47 950 1.0000000 0.06
##  [5,]  6  0 44 950 1.0000000 0.12
##  [6,]  6  0 44 950 1.0000000 0.12
##  [7,] 11  0 39 950 1.0000000 0.22
##  [8,]  9  0 41 950 1.0000000 0.18
##  [9,]  3  0 47 950 1.0000000 0.06
## [10,]  9  0 41 950 1.0000000 0.18
## 
## [[7]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  1 50 949    0   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[8]]
##       TP FP FN  TN PREC  REC
##  [1,]  7  0 43 950    1 0.14
##  [2,]  1  0 49 950    1 0.02
##  [3,]  6  0 44 950    1 0.12
##  [4,]  2  0 48 950    1 0.04
##  [5,]  0  0 50 950  NaN 0.00
##  [6,]  1  0 49 950    1 0.02
##  [7,]  6  0 44 950    1 0.12
##  [8,]  2  0 48 950    1 0.04
##  [9,]  3  0 47 950    1 0.06
## [10,]  6  0 44 950    1 0.12
## 
## [[9]]
##       TP FP FN  TN      PREC  REC
##  [1,] 31  1 19 949 0.9687500 0.62
##  [2,] 18  0 32 950 1.0000000 0.36
##  [3,] 33  1 17 949 0.9705882 0.66
##  [4,] 28  1 22 949 0.9655172 0.56
##  [5,] 27  0 23 950 1.0000000 0.54
##  [6,] 21  0 29 950 1.0000000 0.42
##  [7,] 29  2 21 948 0.9354839 0.58
##  [8,] 24  0 26 950 1.0000000 0.48
##  [9,] 27  1 23 949 0.9642857 0.54
## [10,] 24  0 26 950 1.0000000 0.48
## 
## [[10]]
##       TP FP FN  TN      PREC  REC
##  [1,]  3  0 47 950 1.0000000 0.06
##  [2,]  0  0 50 950       NaN 0.00
##  [3,]  0  0 50 950       NaN 0.00
##  [4,]  0  0 50 950       NaN 0.00
##  [5,]  0  0 50 950       NaN 0.00
##  [6,]  1  0 49 950 1.0000000 0.02
##  [7,]  0  0 50 950       NaN 0.00
##  [8,]  2  0 48 950 1.0000000 0.04
##  [9,]  2  1 48 949 0.6666667 0.04
## [10,]  1  0 49 950 1.0000000 0.02
## 
## [[11]]
##       TP FP FN  TN      PREC  REC
##  [1,] 32  1 18 949 0.9696970 0.64
##  [2,] 26  0 24 950 1.0000000 0.52
##  [3,] 28  0 22 950 1.0000000 0.56
##  [4,] 26  0 24 950 1.0000000 0.52
##  [5,] 21  0 29 950 1.0000000 0.42
##  [6,] 24  0 26 950 1.0000000 0.48
##  [7,] 24  0 26 950 1.0000000 0.48
##  [8,] 25  1 25 949 0.9615385 0.50
##  [9,] 25  1 25 949 0.9615385 0.50
## [10,] 25  0 25 950 1.0000000 0.50
## 
## [[12]]
##       TP FP FN  TN      PREC  REC
##  [1,] 46  2  4 948 0.9583333 0.92
##  [2,] 43  0  7 950 1.0000000 0.86
##  [3,] 44  1  6 949 0.9777778 0.88
##  [4,] 46  1  4 949 0.9787234 0.92
##  [5,] 40  1 10 949 0.9756098 0.80
##  [6,] 45  1  5 949 0.9782609 0.90
##  [7,] 43  2  7 948 0.9555556 0.86
##  [8,] 46  1  4 949 0.9787234 0.92
##  [9,] 45  2  5 948 0.9574468 0.90
## [10,] 41  0  9 950 1.0000000 0.82
gres2 <- do.call(rbind,lapply(gres,colMeans))

gres2
##         TP  FP   FN    TN      PREC   REC
##  [1,]  0.0 0.1 50.0 949.9       NaN 0.000
##  [2,]  0.0 0.0 50.0 950.0       NaN 0.000
##  [3,]  0.0 0.0 50.0 950.0       NaN 0.000
##  [4,]  0.0 0.1 50.0 949.9       NaN 0.000
##  [5,]  0.1 0.0 49.9 950.0       NaN 0.002
##  [6,]  7.2 0.2 42.8 949.8 0.9846154 0.144
##  [7,]  0.0 0.1 50.0 949.9       NaN 0.000
##  [8,]  3.4 0.0 46.6 950.0       NaN 0.068
##  [9,] 26.2 0.6 23.8 949.4 0.9804625 0.524
## [10,]  0.9 0.1 49.1 949.9       NaN 0.018
## [11,] 25.6 0.3 24.4 949.7 0.9892774 0.512
## [12,] 43.9 1.1  6.1 948.9 0.9760431 0.878
gres3p <- do.call(rbind,lapply(groupsizes, function (g) { gres2[params$groupsizes==g,"PREC"] }))
colnames(gres3p) <- deltas
rownames(gres3p) <- groupsizes
gres3p
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN       NaN
## 6  NaN       NaN       NaN 0.9892774
## 12 NaN 0.9846154 0.9804625 0.9760431
gres3r <- do.call(rbind,lapply(groupsizes, function (g) { gres2[params$groupsizes==g,"REC"] }))
colnames(gres3r) <- deltas
rownames(gres3r) <- groupsizes
gres3r
##    0.1   0.2   0.3   0.5
## 3    0 0.000 0.000 0.018
## 6    0 0.002 0.068 0.512
## 12   0 0.144 0.524 0.878
F1(gres3p,gres3r)
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN       NaN
## 6  NaN       NaN       NaN 0.6747721
## 12 NaN 0.2512541 0.6829846 0.9244293

LA sim

parametric self-contained

lares <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simla(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

lares2 <- do.call(rbind,lapply(lares,colMeans))

lares2
##         TP    FP   FN    TN       PREC   REC
##  [1,] 16.7 574.9 33.3 375.1        NaN 0.334
##  [2,] 20.8 666.3 29.2 283.7 0.06976369 0.416
##  [3,] 20.6 624.5 29.4 325.5        NaN 0.412
##  [4,] 17.9 562.3 32.1 387.7        NaN 0.358
##  [5,] 21.9 655.1 28.1 294.9 0.07823014 0.438
##  [6,] 22.2 599.5 27.8 350.5 0.10627255 0.444
##  [7,] 19.0 551.1 31.0 398.9 0.22755219 0.380
##  [8,] 23.6 641.2 26.4 308.8 0.08769080 0.472
##  [9,] 26.3 573.8 23.7 376.2 0.11407826 0.526
## [10,] 22.8 528.7 27.2 421.3 0.22581790 0.456
## [11,] 27.5 603.0 22.5 347.0 0.11024111 0.550
## [12,] 33.2 505.3 16.8 444.7 0.13789056 0.664
lares3p <- do.call(rbind,lapply(groupsizes, function (g) { lares2[params$groupsizes==g,"PREC"] }))
colnames(lares3p) <- deltas
rownames(lares3p) <- groupsizes
lares3p
##           0.1        0.2       0.3       0.5
## 3         NaN        NaN 0.2275522 0.2258179
## 6  0.06976369 0.07823014 0.0876908 0.1102411
## 12        NaN 0.10627255 0.1140783 0.1378906
lares3r <- do.call(rbind,lapply(groupsizes, function (g) { lares2[params$groupsizes==g,"REC"] }))
colnames(lares3r) <- deltas
rownames(lares3r) <- groupsizes
lares3r
##      0.1   0.2   0.3   0.5
## 3  0.334 0.358 0.380 0.456
## 6  0.416 0.438 0.472 0.550
## 12 0.412 0.444 0.526 0.664
F1(lares3p,lares3r)
##          0.1       0.2       0.3       0.5
## 3        NaN       NaN 0.2846499 0.3020541
## 6  0.1194889 0.1327501 0.1479033 0.1836681
## 12       NaN 0.1714969 0.1874932 0.2283587

parametric competitive

lacres <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simlac(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

lacres
## [[1]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[2]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  1  0 49 950    1 0.02
##  [3,]  0  1 50 949    0 0.00
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  0  0 50 950  NaN 0.00
##  [6,]  0  0 50 950  NaN 0.00
##  [7,]  0  0 50 950  NaN 0.00
##  [8,]  0  0 50 950  NaN 0.00
##  [9,]  0  1 50 949    0 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[3]]
##       TP FP FN  TN PREC  REC
##  [1,]  1  0 49 950    1 0.02
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  0  0 50 950  NaN 0.00
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  0  0 50 950  NaN 0.00
##  [6,]  1  0 49 950    1 0.02
##  [7,]  1  0 49 950    1 0.02
##  [8,]  0  0 50 950  NaN 0.00
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[4]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  0  0 50 950  NaN 0.00
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  0  0 50 950  NaN 0.00
##  [6,]  3  1 47 949 0.75 0.06
##  [7,]  0  0 50 950  NaN 0.00
##  [8,]  1  4 49 946 0.20 0.02
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[5]]
##       TP FP FN  TN      PREC  REC
##  [1,]  0  0 50 950       NaN 0.00
##  [2,]  4  0 46 950 1.0000000 0.08
##  [3,] 10  2 40 948 0.8333333 0.20
##  [4,]  5  0 45 950 1.0000000 0.10
##  [5,]  1  0 49 950 1.0000000 0.02
##  [6,]  2  0 48 950 1.0000000 0.04
##  [7,]  0  0 50 950       NaN 0.00
##  [8,]  1  1 49 949 0.5000000 0.02
##  [9,]  1  2 49 948 0.3333333 0.02
## [10,]  3  0 47 950 1.0000000 0.06
## 
## [[6]]
##       TP FP FN  TN      PREC  REC
##  [1,] 14  1 36 949 0.9333333 0.28
##  [2,]  4  0 46 950 1.0000000 0.08
##  [3,] 16  3 34 947 0.8421053 0.32
##  [4,]  1  0 49 950 1.0000000 0.02
##  [5,] 12  1 38 949 0.9230769 0.24
##  [6,]  7  0 43 950 1.0000000 0.14
##  [7,] 13  1 37 949 0.9285714 0.26
##  [8,]  2  0 48 950 1.0000000 0.04
##  [9,]  7  5 43 945 0.5833333 0.14
## [10,]  7  0 43 950 1.0000000 0.14
## 
## [[7]]
##       TP FP FN  TN      PREC  REC
##  [1,]  1  0 49 950 1.0000000 0.02
##  [2,]  2  0 48 950 1.0000000 0.04
##  [3,]  1  0 49 950 1.0000000 0.02
##  [4,]  4  0 46 950 1.0000000 0.08
##  [5,]  2  0 48 950 1.0000000 0.04
##  [6,]  6  1 44 949 0.8571429 0.12
##  [7,]  0  0 50 950       NaN 0.00
##  [8,]  8  4 42 946 0.6666667 0.16
##  [9,]  0  1 50 949 0.0000000 0.00
## [10,]  0  0 50 950       NaN 0.00
## 
## [[8]]
##       TP FP FN  TN      PREC  REC
##  [1,]  7  3 43 947 0.7000000 0.14
##  [2,] 13  0 37 950 1.0000000 0.26
##  [3,] 21  1 29 949 0.9545455 0.42
##  [4,] 14  2 36 948 0.8750000 0.28
##  [5,] 11  0 39 950 1.0000000 0.22
##  [6,]  9  0 41 950 1.0000000 0.18
##  [7,]  5  1 45 949 0.8333333 0.10
##  [8,]  2  7 48 943 0.2222222 0.04
##  [9,]  5  8 45 942 0.3846154 0.10
## [10,]  7  0 43 950 1.0000000 0.14
## 
## [[9]]
##       TP FP FN  TN      PREC  REC
##  [1,] 26  0 24 950 1.0000000 0.52
##  [2,] 16  1 34 949 0.9411765 0.32
##  [3,] 23  2 27 948 0.9200000 0.46
##  [4,] 13  0 37 950 1.0000000 0.26
##  [5,] 20  0 30 950 1.0000000 0.40
##  [6,] 16  0 34 950 1.0000000 0.32
##  [7,] 23  0 27 950 1.0000000 0.46
##  [8,] 18  0 32 950 1.0000000 0.36
##  [9,] 23  5 27 945 0.8214286 0.46
## [10,] 20  0 30 950 1.0000000 0.40
## 
## [[10]]
##       TP FP FN  TN      PREC  REC
##  [1,] 13  0 37 950 1.0000000 0.26
##  [2,] 10  0 40 950 1.0000000 0.20
##  [3,] 12  0 38 950 1.0000000 0.24
##  [4,] 19  0 31 950 1.0000000 0.38
##  [5,] 12  0 38 950 1.0000000 0.24
##  [6,]  9  1 41 949 0.9000000 0.18
##  [7,] 20  1 30 949 0.9523810 0.40
##  [8,] 21  1 29 949 0.9545455 0.42
##  [9,]  8  7 42 943 0.5333333 0.16
## [10,]  9  1 41 949 0.9000000 0.18
## 
## [[11]]
##       TP FP FN  TN      PREC  REC
##  [1,] 19  3 31 947 0.8636364 0.38
##  [2,] 21  0 29 950 1.0000000 0.42
##  [3,] 33  1 17 949 0.9705882 0.66
##  [4,] 28  0 22 950 1.0000000 0.56
##  [5,] 21  1 29 949 0.9545455 0.42
##  [6,] 21  0 29 950 1.0000000 0.42
##  [7,] 21  0 29 950 1.0000000 0.42
##  [8,]  9 11 41 939 0.4500000 0.18
##  [9,] 27  2 23 948 0.9310345 0.54
## [10,] 21  2 29 948 0.9130435 0.42
## 
## [[12]]
##       TP FP FN  TN      PREC  REC
##  [1,] 33  0 17 950 1.0000000 0.66
##  [2,] 27  0 23 950 1.0000000 0.54
##  [3,] 29  1 21 949 0.9666667 0.58
##  [4,] 25  0 25 950 1.0000000 0.50
##  [5,] 23  0 27 950 1.0000000 0.46
##  [6,] 26  0 24 950 1.0000000 0.52
##  [7,] 29  0 21 950 1.0000000 0.58
##  [8,] 27  0 23 950 1.0000000 0.54
##  [9,] 31  1 19 949 0.9687500 0.62
## [10,] 24  0 26 950 1.0000000 0.48
lacres2 <- do.call(rbind,lapply(lacres,colMeans))

lacres2
##         TP  FP   FN    TN      PREC   REC
##  [1,]  0.0 0.0 50.0 950.0       NaN 0.000
##  [2,]  0.1 0.2 49.9 949.8       NaN 0.002
##  [3,]  0.3 0.0 49.7 950.0       NaN 0.006
##  [4,]  0.4 0.5 49.6 949.5       NaN 0.008
##  [5,]  2.7 0.5 47.3 949.5       NaN 0.054
##  [6,]  8.3 1.1 41.7 948.9 0.9210420 0.166
##  [7,]  2.4 0.6 47.6 949.4       NaN 0.048
##  [8,]  9.4 2.2 40.6 947.8 0.7969716 0.188
##  [9,] 19.8 0.8 30.2 949.2 0.9682605 0.396
## [10,] 13.3 1.1 36.7 948.9 0.9240260 0.266
## [11,] 22.1 2.0 27.9 948.0 0.9082848 0.442
## [12,] 27.4 0.2 22.6 949.8 0.9935417 0.548
lacres3p <- do.call(rbind,lapply(groupsizes, function (g) { lacres2[params$groupsizes==g,"PREC"] }))
colnames(lacres3p) <- deltas
rownames(lacres3p) <- groupsizes
lacres3p
##    0.1      0.2       0.3       0.5
## 3  NaN      NaN       NaN 0.9240260
## 6  NaN      NaN 0.7969716 0.9082848
## 12 NaN 0.921042 0.9682605 0.9935417
lacres3r <- do.call(rbind,lapply(groupsizes, function (g) { lacres2[params$groupsizes==g,"REC"] }))
colnames(lacres3r) <- deltas
rownames(lacres3r) <- groupsizes
lacres3r
##      0.1   0.2   0.3   0.5
## 3  0.000 0.008 0.048 0.266
## 6  0.002 0.054 0.188 0.442
## 12 0.006 0.166 0.396 0.548
F1(lacres3p,lacres3r)
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN 0.4130849
## 6  NaN       NaN 0.3042335 0.5946329
## 12 NaN 0.2813009 0.5621084 0.7063848

nonparametric self-contained

nlares <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simnla(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

nlares
## [[1]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 21 662 29 288 0.03074671 0.42
##  [2,] 25 851 25  99 0.02853881 0.50
##  [3,] 18 717 32 233 0.02448980 0.36
##  [4,] 16 476 34 474 0.03252033 0.32
##  [5,] 18 818 32 132 0.02153110 0.36
##  [6,]  0   0 50 950        NaN 0.00
##  [7,] 21 691 29 259 0.02949438 0.42
##  [8,] 24 877 26  73 0.02663707 0.48
##  [9,] 24 958 26  -8 0.02443992 0.48
## [10,]  0   0 50 950        NaN 0.00
## 
## [[2]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 950 25   0 0.02564103 0.50
##  [2,] 22 206 28 744 0.09649123 0.44
##  [3,] 25 774 25 176 0.03128911 0.50
##  [4,] 22 865 28  85 0.02480271 0.44
##  [5,] 24 782 26 168 0.02977667 0.48
##  [6,]  3   1 47 949 0.75000000 0.06
##  [7,] 23 855 27  95 0.02619590 0.46
##  [8,] 25 948 25   2 0.02569373 0.50
##  [9,] 24 898 26  52 0.02603037 0.48
## [10,] 18 604 32 346 0.02893891 0.36
## 
## [[3]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 24 859 26  91 0.02718007 0.48
##  [2,] 25 803 25 147 0.03019324 0.50
##  [3,] 24 876 26  74 0.02666667 0.48
##  [4,]  0   0 50 950        NaN 0.00
##  [5,] 20 393 30 557 0.04842615 0.40
##  [6,] 23 616 27 334 0.03599374 0.46
##  [7,] 23 776 27 174 0.02878598 0.46
##  [8,] 24 827 26 123 0.02820212 0.48
##  [9,] 24 885 26  65 0.02640264 0.48
## [10,] 21 496 29 454 0.04061896 0.42
## 
## [[4]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 21 637 29 313 0.03191489 0.42
##  [2,] 25 847 25 103 0.02866972 0.50
##  [3,] 21 708 29 242 0.02880658 0.42
##  [4,] 20 471 30 479 0.04073320 0.40
##  [5,] 20 809 30 141 0.02412545 0.40
##  [6,]  1   0 49 950 1.00000000 0.02
##  [7,] 22 668 28 282 0.03188406 0.44
##  [8,] 24 863 26  87 0.02705750 0.48
##  [9,] 24 958 26  -8 0.02443992 0.48
## [10,]  0   0 50 950        NaN 0.00
## 
## [[5]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 946 25   4 0.02574665 0.50
##  [2,] 23 196 27 754 0.10502283 0.46
##  [3,] 25 762 25 188 0.03176620 0.50
##  [4,] 22 854 28  96 0.02511416 0.44
##  [5,] 25 766 25 184 0.03160556 0.50
##  [6,]  6   7 44 943 0.46153846 0.12
##  [7,] 24 845 26 105 0.02761795 0.48
##  [8,] 25 945 25   5 0.02577320 0.50
##  [9,] 24 888 26  62 0.02631579 0.48
## [10,] 20 578 30 372 0.03344482 0.40
## 
## [[6]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 24 835 26 115 0.02793946 0.48
##  [2,] 25 787 25 163 0.03078818 0.50
##  [3,] 24 858 26  92 0.02721088 0.48
##  [4,]  6   2 44 948 0.75000000 0.12
##  [5,] 21 367 29 583 0.05412371 0.42
##  [6,] 24 597 26 353 0.03864734 0.48
##  [7,] 23 759 27 191 0.02941176 0.46
##  [8,] 25 803 25 147 0.03019324 0.50
##  [9,] 24 864 26  86 0.02702703 0.48
## [10,] 22 488 28 462 0.04313725 0.44
## 
## [[7]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 21 616 29 334 0.03296703 0.42
##  [2,] 25 840 25 110 0.02890173 0.50
##  [3,] 22 694 28 256 0.03072626 0.44
##  [4,] 22 459 28 491 0.04573805 0.44
##  [5,] 20 801 30 149 0.02436054 0.40
##  [6,]  3   0 47 950 1.00000000 0.06
##  [7,] 23 661 27 289 0.03362573 0.46
##  [8,] 24 847 26 103 0.02755454 0.48
##  [9,] 24 954 26  -4 0.02453988 0.48
## [10,]  3   0 47 950 1.00000000 0.06
## 
## [[8]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 937 25  13 0.02598753 0.50
##  [2,] 24 187 26 763 0.11374408 0.48
##  [3,] 27 755 23 195 0.03452685 0.54
##  [4,] 23 838 27 112 0.02671312 0.46
##  [5,] 25 759 25 191 0.03188776 0.50
##  [6,] 13   9 37 941 0.59090909 0.26
##  [7,] 24 826 26 124 0.02823529 0.48
##  [8,] 25 942 25   8 0.02585315 0.50
##  [9,] 24 878 26  72 0.02660754 0.48
## [10,] 22 563 28 387 0.03760684 0.44
## 
## [[9]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 24 831 26 119 0.02807018 0.48
##  [2,] 25 774 25 176 0.03128911 0.50
##  [3,] 24 848 26 102 0.02752294 0.48
##  [4,] 14   4 36 946 0.77777778 0.28
##  [5,] 27 356 23 594 0.07049608 0.54
##  [6,] 28 592 22 358 0.04516129 0.56
##  [7,] 25 749 25 201 0.03229974 0.50
##  [8,] 25 794 25 156 0.03052503 0.50
##  [9,] 24 854 26  96 0.02733485 0.48
## [10,] 23 477 27 473 0.04600000 0.46
## 
## [[10]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 23 608 27 342 0.03645008 0.46
##  [2,] 25 830 25 120 0.02923977 0.50
##  [3,] 23 684 27 266 0.03253182 0.46
##  [4,] 27 449 23 501 0.05672269 0.54
##  [5,] 22 791 28 159 0.02706027 0.44
##  [6,] 10   0 40 950 1.00000000 0.20
##  [7,] 25 642 25 308 0.03748126 0.50
##  [8,] 26 833 24 117 0.03026775 0.52
##  [9,] 24 949 26   1 0.02466598 0.48
## [10,]  9   1 41 949 0.90000000 0.18
## 
## [[11]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 919 25  31 0.02648305 0.50
##  [2,] 28 189 22 761 0.12903226 0.56
##  [3,] 28 745 22 205 0.03622251 0.56
##  [4,] 25 832 25 118 0.02917153 0.50
##  [5,] 28 747 22 203 0.03612903 0.56
##  [6,] 17   9 33 941 0.65384615 0.34
##  [7,] 24 815 26 135 0.02860548 0.48
##  [8,] 25 926 25  24 0.02628812 0.50
##  [9,] 24 870 26  80 0.02684564 0.48
## [10,] 24 547 26 403 0.04203152 0.48
## 
## [[12]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 27 829 23 121 0.03154206 0.54
##  [2,] 28 770 22 180 0.03508772 0.56
##  [3,] 27 841 23 109 0.03110599 0.54
##  [4,] 19   7 31 943 0.73076923 0.38
##  [5,] 29 334 21 616 0.07988981 0.58
##  [6,] 32 585 18 365 0.05186386 0.64
##  [7,] 28 747 22 203 0.03612903 0.56
##  [8,] 28 791 22 159 0.03418803 0.56
##  [9,] 25 850 25 100 0.02857143 0.50
## [10,] 27 471 23 479 0.05421687 0.54
nlares2 <- do.call(rbind,lapply(nlares,colMeans))

nlares2
##         TP    FP   FN    TN       PREC   REC
##  [1,] 16.7 605.0 33.3 345.0        NaN 0.334
##  [2,] 21.1 688.3 28.9 261.7 0.10648597 0.422
##  [3,] 20.8 653.1 29.2 296.9        NaN 0.416
##  [4,] 17.8 596.1 32.2 353.9        NaN 0.356
##  [5,] 21.9 678.7 28.1 271.3 0.07939456 0.438
##  [6,] 21.8 636.0 28.2 314.0 0.10584789 0.436
##  [7,] 18.7 587.2 31.3 362.8 0.22484137 0.374
##  [8,] 23.2 669.4 26.8 280.6 0.09420713 0.464
##  [9,] 23.9 627.9 26.1 322.1 0.11164770 0.478
## [10,] 21.4 578.7 28.6 371.3 0.21744196 0.428
## [11,] 24.8 659.9 25.2 290.1 0.10346553 0.496
## [12,] 27.0 622.5 23.0 327.5 0.11133640 0.540
nlares3p <- do.call(rbind,lapply(groupsizes, function (g) { nlares2[params$groupsizes==g,"PREC"] }))
colnames(nlares3p) <- deltas
rownames(nlares3p) <- groupsizes
nlares3p
##         0.1        0.2        0.3       0.5
## 3       NaN        NaN 0.22484137 0.2174420
## 6  0.106486 0.07939456 0.09420713 0.1034655
## 12      NaN 0.10584789 0.11164770 0.1113364
nlares3r <- do.call(rbind,lapply(groupsizes, function (g) { nlares2[params$groupsizes==g,"REC"] }))
colnames(nlares3r) <- deltas
rownames(nlares3r) <- groupsizes
nlares3r
##      0.1   0.2   0.3   0.5
## 3  0.334 0.356 0.374 0.428
## 6  0.422 0.438 0.464 0.496
## 12 0.416 0.436 0.478 0.540
F1(nlares3p,nlares3r)
##          0.1       0.2       0.3       0.5
## 3        NaN       NaN 0.2808446 0.2883765
## 6  0.1700597 0.1344228 0.1566161 0.1712155
## 12       NaN 0.1703418 0.1810152 0.1846102

nonparametric competitive

nlacres <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simnlac(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

nlacres
## [[1]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[2]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  1  1 49 949  0.5 0.02
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  0  0 50 950  NaN 0.00
##  [6,]  0  0 50 950  NaN 0.00
##  [7,]  0  0 50 950  NaN 0.00
##  [8,]  0  0 50 950  NaN 0.00
##  [9,]  0  1 50 949  0.0 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[3]]
##       TP FP FN  TN PREC  REC
##  [1,]  1  1 49 949 0.50 0.02
##  [2,]  1  0 49 950 1.00 0.02
##  [3,]  0  0 50 950  NaN 0.00
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  2  0 48 950 1.00 0.04
##  [6,]  3  1 47 949 0.75 0.06
##  [7,]  3  1 47 949 0.75 0.06
##  [8,]  0  0 50 950  NaN 0.00
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[4]]
##       TP FP FN  TN      PREC  REC
##  [1,]  2  0 48 950 1.0000000 0.04
##  [2,]  2  0 48 950 1.0000000 0.04
##  [3,]  2  1 48 949 0.6666667 0.04
##  [4,]  2  0 48 950 1.0000000 0.04
##  [5,]  2  1 48 949 0.6666667 0.04
##  [6,]  3  1 47 949 0.7500000 0.06
##  [7,]  0  0 50 950       NaN 0.00
##  [8,]  2  7 48 943 0.2222222 0.04
##  [9,]  0  0 50 950       NaN 0.00
## [10,]  0  0 50 950       NaN 0.00
## 
## [[5]]
##       TP FP FN  TN      PREC  REC
##  [1,]  5  1 45 949 0.8333333 0.10
##  [2,]  7  0 43 950 1.0000000 0.14
##  [3,] 17  3 33 947 0.8500000 0.34
##  [4,]  6  2 44 948 0.7500000 0.12
##  [5,]  1  0 49 950 1.0000000 0.02
##  [6,] 10  0 40 950 1.0000000 0.20
##  [7,]  4  1 46 949 0.8000000 0.08
##  [8,]  1  2 49 948 0.3333333 0.02
##  [9,]  1  3 49 947 0.2500000 0.02
## [10,]  5  0 45 950 1.0000000 0.10
## 
## [[6]]
##       TP FP FN  TN      PREC  REC
##  [1,] 19  2 31 948 0.9047619 0.38
##  [2,]  8  0 42 950 1.0000000 0.16
##  [3,] 17  3 33 947 0.8500000 0.34
##  [4,] 12  0 38 950 1.0000000 0.24
##  [5,] 14  1 36 949 0.9333333 0.28
##  [6,] 15  1 35 949 0.9375000 0.30
##  [7,] 17  1 33 949 0.9444444 0.34
##  [8,]  6  2 44 948 0.7500000 0.12
##  [9,] 11 10 39 940 0.5238095 0.22
## [10,] 12  0 38 950 1.0000000 0.24
## 
## [[7]]
##       TP FP FN  TN      PREC  REC
##  [1,]  7  0 43 950 1.0000000 0.14
##  [2,]  3  0 47 950 1.0000000 0.06
##  [3,]  6  0 44 950 1.0000000 0.12
##  [4,] 11  0 39 950 1.0000000 0.22
##  [5,]  8  1 42 949 0.8888889 0.16
##  [6,]  7  1 43 949 0.8750000 0.14
##  [7,] 12  0 38 950 1.0000000 0.24
##  [8,]  9  4 41 946 0.6923077 0.18
##  [9,]  2  2 48 948 0.5000000 0.04
## [10,]  0  0 50 950       NaN 0.00
## 
## [[8]]
##       TP FP FN  TN      PREC  REC
##  [1,]  8  4 42 946 0.6666667 0.16
##  [2,] 19  0 31 950 1.0000000 0.38
##  [3,] 28  1 22 949 0.9655172 0.56
##  [4,] 15  1 35 949 0.9375000 0.30
##  [5,] 13  1 37 949 0.9285714 0.26
##  [6,] 12  0 38 950 1.0000000 0.24
##  [7,]  7  2 43 948 0.7777778 0.14
##  [8,]  2  7 48 943 0.2222222 0.04
##  [9,] 13 11 37 939 0.5416667 0.26
## [10,] 16  2 34 948 0.8888889 0.32
## 
## [[9]]
##       TP FP FN  TN      PREC  REC
##  [1,] 24  0 26 950 1.0000000 0.48
##  [2,] 28  0 22 950 1.0000000 0.56
##  [3,] 25  2 25 948 0.9259259 0.50
##  [4,] 18  0 32 950 1.0000000 0.36
##  [5,] 21  1 29 949 0.9545455 0.42
##  [6,] 23  2 27 948 0.9200000 0.46
##  [7,] 24  0 26 950 1.0000000 0.48
##  [8,] 18  0 32 950 1.0000000 0.36
##  [9,] 25  6 25 944 0.8064516 0.50
## [10,] 23  0 27 950 1.0000000 0.46
## 
## [[10]]
##       TP FP FN  TN      PREC  REC
##  [1,] 19  1 31 949 0.9500000 0.38
##  [2,] 15  1 35 949 0.9375000 0.30
##  [3,] 18  0 32 950 1.0000000 0.36
##  [4,] 23  0 27 950 1.0000000 0.46
##  [5,] 13  0 37 950 1.0000000 0.26
##  [6,] 15  1 35 949 0.9375000 0.30
##  [7,] 20  0 30 950 1.0000000 0.40
##  [8,] 21  3 29 947 0.8750000 0.42
##  [9,] 10 11 40 939 0.4761905 0.20
## [10,] 17  2 33 948 0.8947368 0.34
## 
## [[11]]
##       TP FP FN  TN      PREC  REC
##  [1,] 23  9 27 941 0.7187500 0.46
##  [2,] 25  1 25 949 0.9615385 0.50
##  [3,] 33  1 17 949 0.9705882 0.66
##  [4,] 29  0 21 950 1.0000000 0.58
##  [5,] 23  1 27 949 0.9583333 0.46
##  [6,] 24  0 26 950 1.0000000 0.48
##  [7,] 20  0 30 950 1.0000000 0.40
##  [8,] 10 12 40 938 0.4545455 0.20
##  [9,] 29  3 21 947 0.9062500 0.58
## [10,] 20  2 30 948 0.9090909 0.40
## 
## [[12]]
##       TP FP FN  TN      PREC  REC
##  [1,] 33  0 17 950 1.0000000 0.66
##  [2,] 33  0 17 950 1.0000000 0.66
##  [3,] 31  1 19 949 0.9687500 0.62
##  [4,] 24  1 26 949 0.9600000 0.48
##  [5,] 26  1 24 949 0.9629630 0.52
##  [6,] 29  3 21 947 0.9062500 0.58
##  [7,] 27  1 23 949 0.9642857 0.54
##  [8,] 27  0 23 950 1.0000000 0.54
##  [9,] 35  2 15 948 0.9459459 0.70
## [10,] 28  0 22 950 1.0000000 0.56
nlacres2 <- do.call(rbind,lapply(nlacres,colMeans))

nlacres2
##         TP  FP   FN    TN      PREC   REC
##  [1,]  0.0 0.0 50.0 950.0       NaN 0.000
##  [2,]  0.1 0.2 49.9 949.8       NaN 0.002
##  [3,]  1.0 0.3 49.0 949.7       NaN 0.020
##  [4,]  1.5 1.0 48.5 949.0       NaN 0.030
##  [5,]  5.7 1.2 44.3 948.8 0.7816667 0.114
##  [6,] 13.1 2.0 36.9 948.0 0.8843849 0.262
##  [7,]  6.5 0.8 43.5 949.2       NaN 0.130
##  [8,] 13.3 2.9 36.7 947.1 0.7928811 0.266
##  [9,] 22.9 1.1 27.1 948.9 0.9606923 0.458
## [10,] 17.1 1.9 32.9 948.1 0.9070927 0.342
## [11,] 23.6 2.9 26.4 947.1 0.8879096 0.472
## [12,] 29.3 0.9 20.7 949.1 0.9708195 0.586
nlacres3p <- do.call(rbind,lapply(groupsizes, function (g) { nlacres2[params$groupsizes==g,"PREC"] }))
colnames(nlacres3p) <- deltas
rownames(nlacres3p) <- groupsizes
nlacres3p
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN 0.9070927
## 6  NaN 0.7816667 0.7928811 0.8879096
## 12 NaN 0.8843849 0.9606923 0.9708195
nlacres3r <- do.call(rbind,lapply(groupsizes, function (g) { nlacres2[params$groupsizes==g,"REC"] }))
colnames(nlacres3r) <- deltas
rownames(nlacres3r) <- groupsizes
nlacres3r
##      0.1   0.2   0.3   0.5
## 3  0.000 0.030 0.130 0.342
## 6  0.002 0.114 0.266 0.472
## 12 0.020 0.262 0.458 0.586
F1(nlacres3p,nlacres3r)
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN 0.4967217
## 6  NaN 0.1989803 0.3983570 0.6163547
## 12 NaN 0.4042427 0.6202854 0.7308493

AL sim

parametric self-contained

alres <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simal(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

alres
## [[1]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 20 612 30 338 0.03164557 0.40
##  [2,] 25 841 25 109 0.02886836 0.50
##  [3,] 20 690 30 260 0.02816901 0.40
##  [4,] 13 394 37 556 0.03194103 0.26
##  [5,] 19 765 31 185 0.02423469 0.38
##  [6,]  0   0 50 950        NaN 0.00
##  [7,] 20 645 30 305 0.03007519 0.40
##  [8,] 24 862 26  88 0.02708804 0.48
##  [9,] 24 961 26 -11 0.02436548 0.48
## [10,]  0   0 50 950        NaN 0.00
## 
## [[2]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 951 25  -1 0.02561475 0.50
##  [2,] 14  84 36 866 0.14285714 0.28
##  [3,] 25 779 25 171 0.03109453 0.50
##  [4,] 22 848 28 102 0.02528736 0.44
##  [5,] 25 807 25 143 0.03004808 0.50
##  [6,]  4   4 46 946 0.50000000 0.08
##  [7,] 24 843 26 107 0.02768166 0.48
##  [8,] 25 953 25  -3 0.02556237 0.50
##  [9,] 24 901 26  49 0.02594595 0.48
## [10,] 18 626 32 324 0.02795031 0.36
## 
## [[3]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 855 25  95 0.02840909 0.50
##  [2,] 25 778 25 172 0.03113325 0.50
##  [3,] 25 886 25  64 0.02744237 0.50
##  [4,]  0   0 50 950        NaN 0.00
##  [5,] 19 365 31 585 0.04947917 0.38
##  [6,] 22 532 28 418 0.03971119 0.44
##  [7,] 22 752 28 198 0.02842377 0.44
##  [8,] 25 860 25  90 0.02824859 0.50
##  [9,] 24 890 26  60 0.02625821 0.48
## [10,] 21 379 29 571 0.05250000 0.42
## 
## [[4]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 21 588 29 362 0.03448276 0.42
##  [2,] 25 836 25 114 0.02903600 0.50
##  [3,] 21 674 29 276 0.03021583 0.42
##  [4,] 18 375 32 575 0.04580153 0.36
##  [5,] 21 755 29 195 0.02706186 0.42
##  [6,]  0   0 50 950        NaN 0.00
##  [7,] 23 615 27 335 0.03605016 0.46
##  [8,] 24 851 26  99 0.02742857 0.48
##  [9,] 24 961 26 -11 0.02436548 0.48
## [10,]  0   0 50 950        NaN 0.00
## 
## [[5]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 947 25   3 0.02572016 0.50
##  [2,] 21  90 29 860 0.18918919 0.42
##  [3,] 25 759 25 191 0.03188776 0.50
##  [4,] 22 833 28 117 0.02573099 0.44
##  [5,] 25 783 25 167 0.03094059 0.50
##  [6,]  8   9 42 941 0.47058824 0.16
##  [7,] 24 832 26 118 0.02803738 0.48
##  [8,] 25 951 25  -1 0.02561475 0.50
##  [9,] 24 891 26  59 0.02622951 0.48
## [10,] 21 593 29 357 0.03420195 0.42
## 
## [[6]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 823 25 127 0.02948113 0.50
##  [2,] 25 752 25 198 0.03217503 0.50
##  [3,] 25 865 25  85 0.02808989 0.50
##  [4,] 13  24 37 926 0.35135135 0.26
##  [5,] 25 339 25 611 0.06868132 0.50
##  [6,] 24 503 26 447 0.04554080 0.48
##  [7,] 25 726 25 224 0.03328895 0.50
##  [8,] 25 832 25 118 0.02917153 0.50
##  [9,] 24 870 26  80 0.02684564 0.48
## [10,] 21 370 29 580 0.05370844 0.42
## 
## [[7]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 22 577 28 373 0.03672788 0.44
##  [2,] 25 822 25 128 0.02951594 0.50
##  [3,] 22 667 28 283 0.03193033 0.44
##  [4,] 22 365 28 585 0.05684755 0.44
##  [5,] 21 740 29 210 0.02759527 0.42
##  [6,]  2   0 48 950 1.00000000 0.04
##  [7,] 23 598 27 352 0.03703704 0.46
##  [8,] 24 836 26 114 0.02790698 0.48
##  [9,] 24 955 26  -5 0.02451481 0.48
## [10,]  3   0 47 950 1.00000000 0.06
## 
## [[8]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 932 25  18 0.02612330 0.50
##  [2,] 25  91 25 859 0.21551724 0.50
##  [3,] 27 734 23 216 0.03547963 0.54
##  [4,] 24 818 26 132 0.02850356 0.48
##  [5,] 25 770 25 180 0.03144654 0.50
##  [6,] 13  11 37 939 0.54166667 0.26
##  [7,] 24 810 26 140 0.02877698 0.48
##  [8,] 25 949 25   1 0.02566735 0.50
##  [9,] 24 879 26  71 0.02657807 0.48
## [10,] 24 560 26 390 0.04109589 0.48
## 
## [[9]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 806 25 144 0.03008424 0.50
##  [2,] 26 736 24 214 0.03412073 0.52
##  [3,] 27 844 23 106 0.03099885 0.54
##  [4,] 20  18 30 932 0.52631579 0.40
##  [5,] 27 279 23 671 0.08823529 0.54
##  [6,] 32 459 18 491 0.06517312 0.64
##  [7,] 28 692 22 258 0.03888889 0.56
##  [8,] 25 816 25 134 0.02972652 0.50
##  [9,] 24 856 26  94 0.02727273 0.48
## [10,] 24 334 26 616 0.06703911 0.48
## 
## [[10]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 26 547 24 403 0.04537522 0.52
##  [2,] 25 811 25 139 0.02990431 0.50
##  [3,] 23 642 27 308 0.03458647 0.46
##  [4,] 27 332 23 618 0.07520891 0.54
##  [5,] 24 716 26 234 0.03243243 0.48
##  [6,] 11   0 39 950 1.00000000 0.22
##  [7,] 26 549 24 401 0.04521739 0.52
##  [8,] 27 810 23 140 0.03225806 0.54
##  [9,] 24 953 26  -3 0.02456499 0.48
## [10,] 14   0 36 950 1.00000000 0.28
## 
## [[11]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 24 906 26  44 0.02580645 0.48
##  [2,] 29  71 21 879 0.29000000 0.58
##  [3,] 36 673 14 277 0.05077574 0.72
##  [4,] 28 775 22 175 0.03486924 0.56
##  [5,] 31 744 19 206 0.04000000 0.62
##  [6,] 22  10 28 940 0.68750000 0.44
##  [7,] 26 784 24 166 0.03209877 0.52
##  [8,] 25 929 25  21 0.02620545 0.50
##  [9,] 24 860 26  90 0.02714932 0.48
## [10,] 30 480 20 470 0.05882353 0.60
## 
## [[12]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 37 734 13 216 0.04798962 0.74
##  [2,] 31 672 19 278 0.04409673 0.62
##  [3,] 34 799 16 151 0.04081633 0.68
##  [4,] 31  15 19 935 0.67391304 0.62
##  [5,] 30 197 20 753 0.13215859 0.60
##  [6,] 38 365 12 585 0.09429280 0.76
##  [7,] 37 586 13 364 0.05939005 0.74
##  [8,] 31 775 19 175 0.03846154 0.62
##  [9,] 32 801 18 149 0.03841537 0.64
## [10,] 28 256 22 694 0.09859155 0.56
alres2 <- do.call(rbind,lapply(alres,colMeans))

alres2
##         TP    FP   FN    TN       PREC   REC
##  [1,] 16.5 577.0 33.5 373.0        NaN 0.330
##  [2,] 20.6 679.6 29.4 270.4 0.08620421 0.412
##  [3,] 20.8 629.7 29.2 320.3        NaN 0.416
##  [4,] 17.7 565.5 32.3 384.5        NaN 0.354
##  [5,] 22.0 668.8 28.0 281.2 0.08881405 0.440
##  [6,] 23.2 610.4 26.8 339.6 0.06983341 0.464
##  [7,] 18.8 556.0 31.2 394.0 0.22720758 0.376
##  [8,] 23.6 655.4 26.4 294.6 0.10008552 0.472
##  [9,] 25.8 584.0 24.2 366.0 0.09378553 0.516
## [10,] 22.7 536.0 27.3 414.0 0.23195478 0.454
## [11,] 27.5 623.2 22.5 326.8 0.12732285 0.550
## [12,] 32.9 520.0 17.1 430.0 0.12681256 0.658
alres3p <- do.call(rbind,lapply(groupsizes, function (g) { alres2[params$groupsizes==g,"PREC"] }))
colnames(alres3p) <- deltas
rownames(alres3p) <- groupsizes
alres3p
##           0.1        0.2        0.3       0.5
## 3         NaN        NaN 0.22720758 0.2319548
## 6  0.08620421 0.08881405 0.10008552 0.1273228
## 12        NaN 0.06983341 0.09378553 0.1268126
alres3r <- do.call(rbind,lapply(groupsizes, function (g) { alres2[params$groupsizes==g,"REC"] }))
colnames(alres3r) <- deltas
rownames(alres3r) <- groupsizes
alres3r
##      0.1   0.2   0.3   0.5
## 3  0.330 0.354 0.376 0.454
## 6  0.412 0.440 0.472 0.550
## 12 0.416 0.464 0.516 0.658
F1(alres3p,alres3r)
##          0.1       0.2       0.3       0.5
## 3        NaN       NaN 0.2832526 0.3070391
## 6  0.1425766 0.1477956 0.1651514 0.2067775
## 12       NaN 0.1213963 0.1587225 0.2126436

parametric competitive

alcres <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simalc(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

alcres
## [[1]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[2]]
##       TP FP FN  TN      PREC  REC
##  [1,]  0  0 50 950       NaN 0.00
##  [2,]  0  0 50 950       NaN 0.00
##  [3,]  2  1 48 949 0.6666667 0.04
##  [4,]  0  0 50 950       NaN 0.00
##  [5,]  0  0 50 950       NaN 0.00
##  [6,]  0  0 50 950       NaN 0.00
##  [7,]  0  0 50 950       NaN 0.00
##  [8,]  0  0 50 950       NaN 0.00
##  [9,]  0  0 50 950       NaN 0.00
## [10,]  0  0 50 950       NaN 0.00
## 
## [[3]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  0  1 50 949 0.00 0.00
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  1  0 49 950 1.00 0.02
##  [6,]  0  0 50 950  NaN 0.00
##  [7,]  3  1 47 949 0.75 0.06
##  [8,]  0  0 50 950  NaN 0.00
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[4]]
##       TP FP FN  TN      PREC  REC
##  [1,]  0  0 50 950       NaN 0.00
##  [2,]  0  0 50 950       NaN 0.00
##  [3,]  2  0 48 950 1.0000000 0.04
##  [4,]  0  0 50 950       NaN 0.00
##  [5,]  0  0 50 950       NaN 0.00
##  [6,]  0  0 50 950       NaN 0.00
##  [7,]  0  0 50 950       NaN 0.00
##  [8,]  1  5 49 945 0.1666667 0.02
##  [9,]  0  0 50 950       NaN 0.00
## [10,]  0  0 50 950       NaN 0.00
## 
## [[5]]
##       TP FP FN  TN      PREC  REC
##  [1,]  0  0 50 950       NaN 0.00
##  [2,]  1  0 49 950 1.0000000 0.02
##  [3,]  8  1 42 949 0.8888889 0.16
##  [4,]  1  3 49 947 0.2500000 0.02
##  [5,]  2  1 48 949 0.6666667 0.04
##  [6,]  4  0 46 950 1.0000000 0.08
##  [7,]  2  1 48 949 0.6666667 0.04
##  [8,]  1  1 49 949 0.5000000 0.02
##  [9,]  0  1 50 949 0.0000000 0.00
## [10,]  2  0 48 950 1.0000000 0.04
## 
## [[6]]
##       TP FP FN  TN      PREC  REC
##  [1,] 12  3 38 947 0.8000000 0.24
##  [2,]  7  1 43 949 0.8750000 0.14
##  [3,] 10  5 40 945 0.6666667 0.20
##  [4,]  5  0 45 950 1.0000000 0.10
##  [5,] 11  1 39 949 0.9166667 0.22
##  [6,] 10  0 40 950 1.0000000 0.20
##  [7,] 14  1 36 949 0.9333333 0.28
##  [8,]  1  2 49 948 0.3333333 0.02
##  [9,]  9  6 41 944 0.6000000 0.18
## [10,]  0  0 50 950       NaN 0.00
## 
## [[7]]
##       TP FP FN  TN      PREC  REC
##  [1,]  0  0 50 950       NaN 0.00
##  [2,]  1  0 49 950 1.0000000 0.02
##  [3,]  5  0 45 950 1.0000000 0.10
##  [4,]  7  0 43 950 1.0000000 0.14
##  [5,]  5  0 45 950 1.0000000 0.10
##  [6,]  3  0 47 950 1.0000000 0.06
##  [7,]  4  0 46 950 1.0000000 0.08
##  [8,]  8  3 42 947 0.7272727 0.16
##  [9,]  0  0 50 950       NaN 0.00
## [10,]  0  0 50 950       NaN 0.00
## 
## [[8]]
##       TP FP FN  TN      PREC  REC
##  [1,]  4  1 46 949 0.8000000 0.08
##  [2,] 15  1 35 949 0.9375000 0.30
##  [3,] 22  0 28 950 1.0000000 0.44
##  [4,] 12  1 38 949 0.9230769 0.24
##  [5,] 12  0 38 950 1.0000000 0.24
##  [6,]  8  0 42 950 1.0000000 0.16
##  [7,]  8  1 42 949 0.8888889 0.16
##  [8,]  2  7 48 943 0.2222222 0.04
##  [9,]  3  7 47 943 0.3000000 0.06
## [10,]  8  0 42 950 1.0000000 0.16
## 
## [[9]]
##       TP FP FN  TN      PREC  REC
##  [1,] 23  1 27 949 0.9583333 0.46
##  [2,] 16  1 34 949 0.9411765 0.32
##  [3,] 22  2 28 948 0.9166667 0.44
##  [4,] 11  0 39 950 1.0000000 0.22
##  [5,] 17  1 33 949 0.9444444 0.34
##  [6,] 22  0 28 950 1.0000000 0.44
##  [7,] 23  1 27 949 0.9583333 0.46
##  [8,] 16  0 34 950 1.0000000 0.32
##  [9,] 20  7 30 943 0.7407407 0.40
## [10,] 16  0 34 950 1.0000000 0.32
## 
## [[10]]
##       TP FP FN  TN      PREC  REC
##  [1,] 13  0 37 950 1.0000000 0.26
##  [2,] 10  1 40 949 0.9090909 0.20
##  [3,] 16  0 34 950 1.0000000 0.32
##  [4,] 18  1 32 949 0.9473684 0.36
##  [5,] 10  1 40 949 0.9090909 0.20
##  [6,] 10  0 40 950 1.0000000 0.20
##  [7,] 19  2 31 948 0.9047619 0.38
##  [8,] 19  1 31 949 0.9500000 0.38
##  [9,]  6  4 44 946 0.6000000 0.12
## [10,] 10  1 40 949 0.9090909 0.20
## 
## [[11]]
##       TP FP FN  TN      PREC  REC
##  [1,] 20  2 30 948 0.9090909 0.40
##  [2,] 19  1 31 949 0.9500000 0.38
##  [3,] 29  0 21 950 1.0000000 0.58
##  [4,] 25  1 25 949 0.9615385 0.50
##  [5,] 18  0 32 950 1.0000000 0.36
##  [6,] 20  0 30 950 1.0000000 0.40
##  [7,] 23  0 27 950 1.0000000 0.46
##  [8,]  5  7 45 943 0.4166667 0.10
##  [9,] 26  6 24 944 0.8125000 0.52
## [10,] 23  1 27 949 0.9583333 0.46
## 
## [[12]]
##       TP FP FN  TN      PREC  REC
##  [1,] 28  0 22 950 1.0000000 0.56
##  [2,] 21  0 29 950 1.0000000 0.42
##  [3,] 31  1 19 949 0.9687500 0.62
##  [4,] 25  0 25 950 1.0000000 0.50
##  [5,] 24  0 26 950 1.0000000 0.48
##  [6,] 24  0 26 950 1.0000000 0.48
##  [7,] 29  0 21 950 1.0000000 0.58
##  [8,] 27  2 23 948 0.9310345 0.54
##  [9,] 32  3 18 947 0.9142857 0.64
## [10,] 23  0 27 950 1.0000000 0.46
alcres2 <- do.call(rbind,lapply(alcres,colMeans))

alcres2
##         TP  FP   FN    TN      PREC   REC
##  [1,]  0.0 0.0 50.0 950.0       NaN 0.000
##  [2,]  0.2 0.1 49.8 949.9       NaN 0.004
##  [3,]  0.4 0.2 49.6 949.8       NaN 0.008
##  [4,]  0.3 0.5 49.7 949.5       NaN 0.006
##  [5,]  2.1 0.8 47.9 949.2       NaN 0.042
##  [6,]  7.9 1.9 42.1 948.1       NaN 0.158
##  [7,]  3.3 0.3 46.7 949.7       NaN 0.066
##  [8,]  9.4 1.8 40.6 948.2 0.8071688 0.188
##  [9,] 18.6 1.3 31.4 948.7 0.9459695 0.372
## [10,] 13.1 1.1 36.9 948.9 0.9129403 0.262
## [11,] 20.8 1.8 29.2 948.2 0.9008129 0.416
## [12,] 26.4 0.6 23.6 949.4 0.9814070 0.528
alcres3p <- do.call(rbind,lapply(groupsizes, function (g) { alcres2[params$groupsizes==g,"PREC"] }))
colnames(alcres3p) <- deltas
rownames(alcres3p) <- groupsizes
alcres3p
##    0.1 0.2       0.3       0.5
## 3  NaN NaN       NaN 0.9129403
## 6  NaN NaN 0.8071688 0.9008129
## 12 NaN NaN 0.9459695 0.9814070
alcres3r <- do.call(rbind,lapply(groupsizes, function (g) { alcres2[params$groupsizes==g,"REC"] }))
colnames(alcres3r) <- deltas
rownames(alcres3r) <- groupsizes
alcres3r
##      0.1   0.2   0.3   0.5
## 3  0.000 0.006 0.066 0.262
## 6  0.004 0.042 0.188 0.416
## 12 0.008 0.158 0.372 0.528
F1(alcres3p,alcres3r)
##    0.1 0.2       0.3       0.5
## 3  NaN NaN       NaN 0.4071532
## 6  NaN NaN 0.3049688 0.5691593
## 12 NaN NaN 0.5340042 0.6866046

nonparametric self-contained

nalres <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simnal(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

nalres
## [[1]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 21 652 29 298 0.03120357 0.42
##  [2,] 25 850 25 100 0.02857143 0.50
##  [3,] 19 704 31 246 0.02627939 0.38
##  [4,] 15 448 35 502 0.03239741 0.30
##  [5,] 20 818 30 132 0.02386635 0.40
##  [6,]  0   0 50 950        NaN 0.00
##  [7,] 20 647 30 303 0.02998501 0.40
##  [8,] 24 870 26  80 0.02684564 0.48
##  [9,] 24 959 26  -9 0.02441506 0.48
## [10,]  0   0 50 950        NaN 0.00
## 
## [[2]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 942 25   8 0.02585315 0.50
##  [2,] 16  88 34 862 0.15384615 0.32
##  [3,] 24 768 26 182 0.03030303 0.48
##  [4,] 22 849 28 101 0.02525832 0.44
##  [5,] 23 814 27 136 0.02747909 0.46
##  [6,]  4   7 46 943 0.36363636 0.08
##  [7,] 23 836 27 114 0.02677532 0.46
##  [8,] 25 944 25   6 0.02579979 0.50
##  [9,] 24 892 26  58 0.02620087 0.48
## [10,] 17 631 33 319 0.02623457 0.34
## 
## [[3]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 24 857 26  93 0.02724177 0.48
##  [2,] 25 780 25 170 0.03105590 0.50
##  [3,] 25 890 25  60 0.02732240 0.50
##  [4,]  0   0 50 950        NaN 0.00
##  [5,] 19 375 31 575 0.04822335 0.38
##  [6,] 22 562 28 388 0.03767123 0.44
##  [7,] 22 760 28 190 0.02813299 0.44
##  [8,] 24 844 26 106 0.02764977 0.48
##  [9,] 24 886 26  64 0.02637363 0.48
## [10,] 20 389 30 561 0.04889976 0.40
## 
## [[4]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 21 638 29 312 0.03186646 0.42
##  [2,] 25 846 25 104 0.02870264 0.50
##  [3,] 19 692 31 258 0.02672293 0.38
##  [4,] 19 434 31 516 0.04194260 0.38
##  [5,] 21 808 29 142 0.02533172 0.42
##  [6,]  1   0 49 950 1.00000000 0.02
##  [7,] 22 626 28 324 0.03395062 0.44
##  [8,] 24 857 26  93 0.02724177 0.48
##  [9,] 24 958 26  -8 0.02443992 0.48
## [10,]  0   0 50 950        NaN 0.00
## 
## [[5]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 940 25  10 0.02590674 0.50
##  [2,] 19  93 31 857 0.16964286 0.38
##  [3,] 24 741 26 209 0.03137255 0.48
##  [4,] 22 834 28 116 0.02570093 0.44
##  [5,] 25 797 25 153 0.03041363 0.50
##  [6,]  6   7 44 943 0.46153846 0.12
##  [7,] 23 825 27 125 0.02712264 0.46
##  [8,] 25 941 25   9 0.02587992 0.50
##  [9,] 24 889 26  61 0.02628697 0.48
## [10,] 19 612 31 338 0.03011094 0.38
## 
## [[6]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 24 832 26 118 0.02803738 0.48
##  [2,] 25 753 25 197 0.03213368 0.50
##  [3,] 25 873 25  77 0.02783964 0.50
##  [4,]  8   4 42 946 0.66666667 0.16
##  [5,] 21 355 29 595 0.05585106 0.42
##  [6,] 25 554 25 396 0.04317789 0.50
##  [7,] 23 737 27 213 0.03026316 0.46
##  [8,] 25 825 25 125 0.02941176 0.50
##  [9,] 24 869 26  81 0.02687570 0.48
## [10,] 21 381 29 569 0.05223881 0.42
## 
## [[7]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 23 629 27 321 0.03527607 0.46
##  [2,] 25 835 25 115 0.02906977 0.50
##  [3,] 20 688 30 262 0.02824859 0.40
##  [4,] 22 424 28 526 0.04932735 0.44
##  [5,] 21 798 29 152 0.02564103 0.42
##  [6,]  4   1 46 949 0.80000000 0.08
##  [7,] 23 613 27 337 0.03616352 0.46
##  [8,] 25 841 25 109 0.02886836 0.50
##  [9,] 24 954 26  -4 0.02453988 0.48
## [10,]  0   0 50 950        NaN 0.00
## 
## [[8]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 929 25  21 0.02620545 0.50
##  [2,] 22  91 28 859 0.19469027 0.44
##  [3,] 26 732 24 218 0.03430079 0.52
##  [4,] 22 819 28 131 0.02615933 0.44
##  [5,] 25 781 25 169 0.03101737 0.50
##  [6,] 10   8 40 942 0.55555556 0.20
##  [7,] 23 813 27 137 0.02751196 0.46
##  [8,] 25 940 25  10 0.02590674 0.50
##  [9,] 24 878 26  72 0.02660754 0.48
## [10,] 22 600 28 350 0.03536977 0.44
## 
## [[9]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 24 828 26 122 0.02816901 0.48
##  [2,] 25 751 25 199 0.03221649 0.50
##  [3,] 25 859 25  91 0.02828054 0.50
##  [4,] 14   6 36 944 0.70000000 0.28
##  [5,] 25 336 25 614 0.06925208 0.50
##  [6,] 28 547 22 403 0.04869565 0.56
##  [7,] 25 728 25 222 0.03320053 0.50
##  [8,] 25 808 25 142 0.03001200 0.50
##  [9,] 24 856 26  94 0.02727273 0.48
## [10,] 21 368 29 582 0.05398458 0.42
## 
## [[10]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 23 610 27 340 0.03633491 0.46
##  [2,] 25 828 25 122 0.02930832 0.50
##  [3,] 22 680 28 270 0.03133903 0.44
##  [4,] 27 408 23 542 0.06206897 0.54
##  [5,] 23 785 27 165 0.02846535 0.46
##  [6,] 10   1 40 949 0.90909091 0.20
##  [7,] 25 594 25 356 0.04038772 0.50
##  [8,] 25 828 25 122 0.02930832 0.50
##  [9,] 24 948 26   2 0.02469136 0.48
## [10,] 10   2 40 948 0.83333333 0.20
## 
## [[11]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 913 25  37 0.02665245 0.50
##  [2,] 27 104 23 846 0.20610687 0.54
##  [3,] 28 723 22 227 0.03728362 0.56
##  [4,] 25 815 25 135 0.02976190 0.50
##  [5,] 27 770 23 180 0.03387704 0.54
##  [6,] 16  12 34 938 0.57142857 0.32
##  [7,] 23 805 27 145 0.02777778 0.46
##  [8,] 25 924 25  26 0.02634352 0.50
##  [9,] 24 867 26  83 0.02693603 0.48
## [10,] 24 591 26 359 0.03902439 0.48
## 
## [[12]]
##       TP  FP FN  TN       PREC  REC
##  [1,] 25 824 25 126 0.02944641 0.50
##  [2,] 27 747 23 203 0.03488372 0.54
##  [3,] 27 853 23  97 0.03068182 0.54
##  [4,] 19   9 31 941 0.67857143 0.38
##  [5,] 27 325 23 625 0.07670455 0.54
##  [6,] 32 536 18 414 0.05633803 0.64
##  [7,] 29 720 21 230 0.03871829 0.58
##  [8,] 26 797 24 153 0.03159174 0.52
##  [9,] 26 854 24  96 0.02954545 0.52
## [10,] 26 361 24 589 0.06718346 0.52
nalres2 <- do.call(rbind,lapply(nalres,colMeans))

nalres2
##         TP    FP   FN    TN       PREC   REC
##  [1,] 16.8 594.8 33.2 355.2        NaN 0.336
##  [2,] 20.3 677.1 29.7 272.9 0.07313867 0.406
##  [3,] 20.5 634.3 29.5 315.7        NaN 0.410
##  [4,] 17.6 585.9 32.4 364.1        NaN 0.352
##  [5,] 21.2 667.9 28.8 282.1 0.08539756 0.424
##  [6,] 22.1 618.3 27.9 331.7 0.09924958 0.442
##  [7,] 18.7 578.3 31.3 371.7        NaN 0.374
##  [8,] 22.4 659.1 27.6 290.9 0.09833248 0.448
##  [9,] 23.6 608.7 26.4 341.3 0.10510836 0.472
## [10,] 21.4 568.4 28.6 381.6 0.20243282 0.428
## [11,] 24.4 652.4 25.6 297.6 0.10251922 0.488
## [12,] 26.4 602.6 23.6 347.4 0.10736649 0.528
nalres3p <- do.call(rbind,lapply(groupsizes, function (g) { nalres2[params$groupsizes==g,"PREC"] }))
colnames(nalres3p) <- deltas
rownames(nalres3p) <- groupsizes
nalres3p
##           0.1        0.2        0.3       0.5
## 3         NaN        NaN        NaN 0.2024328
## 6  0.07313867 0.08539756 0.09833248 0.1025192
## 12        NaN 0.09924958 0.10510836 0.1073665
nalres3r <- do.call(rbind,lapply(groupsizes, function (g) { nalres2[params$groupsizes==g,"REC"] }))
colnames(nalres3r) <- deltas
rownames(nalres3r) <- groupsizes
nalres3r
##      0.1   0.2   0.3   0.5
## 3  0.336 0.352 0.374 0.428
## 6  0.406 0.424 0.448 0.488
## 12 0.410 0.442 0.472 0.528
F1(nalres3p,nalres3r)
##          0.1       0.2       0.3       0.5
## 3        NaN       NaN       NaN 0.2748627
## 6  0.1239487 0.1421623 0.1612679 0.1694420
## 12       NaN 0.1621001 0.1719301 0.1784466

nonparametric competitive

nalcres <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simnalc(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

nalcres
## [[1]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  1 50 949    0   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[2]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  2  0 48 950    1 0.04
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  0  0 50 950  NaN 0.00
##  [6,]  0  0 50 950  NaN 0.00
##  [7,]  0  0 50 950  NaN 0.00
##  [8,]  1  0 49 950    1 0.02
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[3]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  2  0 48 950 1.00 0.04
##  [3,]  0  2 50 948 0.00 0.00
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  1  0 49 950 1.00 0.02
##  [6,]  1  0 49 950 1.00 0.02
##  [7,]  3  1 47 949 0.75 0.06
##  [8,]  0  0 50 950  NaN 0.00
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[4]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  4  0 46 950  1.0 0.08
##  [4,]  2  0 48 950  1.0 0.04
##  [5,]  2  0 48 950  1.0 0.04
##  [6,]  1  1 49 949  0.5 0.02
##  [7,]  0  0 50 950  NaN 0.00
##  [8,]  3  7 47 943  0.3 0.06
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[5]]
##       TP FP FN  TN      PREC  REC
##  [1,]  1  0 49 950 1.0000000 0.02
##  [2,]  7  0 43 950 1.0000000 0.14
##  [3,]  9  3 41 947 0.7500000 0.18
##  [4,]  4  4 46 946 0.5000000 0.08
##  [5,]  3  1 47 949 0.7500000 0.06
##  [6,]  7  0 43 950 1.0000000 0.14
##  [7,]  2  1 48 949 0.6666667 0.04
##  [8,]  1  4 49 946 0.2000000 0.02
##  [9,]  0  1 50 949 0.0000000 0.00
## [10,]  4  0 46 950 1.0000000 0.08
## 
## [[6]]
##       TP FP FN  TN      PREC  REC
##  [1,] 16  3 34 947 0.8421053 0.32
##  [2,]  7  0 43 950 1.0000000 0.14
##  [3,] 14  8 36 942 0.6363636 0.28
##  [4,] 10  0 40 950 1.0000000 0.20
##  [5,] 15  0 35 950 1.0000000 0.30
##  [6,] 13  2 37 948 0.8666667 0.26
##  [7,] 18  1 32 949 0.9473684 0.36
##  [8,]  2  3 48 947 0.4000000 0.04
##  [9,]  9  7 41 943 0.5625000 0.18
## [10,]  9  0 41 950 1.0000000 0.18
## 
## [[7]]
##       TP FP FN  TN      PREC  REC
##  [1,]  6  0 44 950 1.0000000 0.12
##  [2,]  2  0 48 950 1.0000000 0.04
##  [3,]  7  0 43 950 1.0000000 0.14
##  [4,]  9  0 41 950 1.0000000 0.18
##  [5,]  7  1 43 949 0.8750000 0.14
##  [6,]  3  1 47 949 0.7500000 0.06
##  [7,] 11  0 39 950 1.0000000 0.22
##  [8,]  9  4 41 946 0.6923077 0.18
##  [9,]  0  0 50 950       NaN 0.00
## [10,]  1  0 49 950 1.0000000 0.02
## 
## [[8]]
##       TP FP FN  TN      PREC  REC
##  [1,]  8  2 42 948 0.8000000 0.16
##  [2,] 16  0 34 950 1.0000000 0.32
##  [3,] 26  0 24 950 1.0000000 0.52
##  [4,] 14  1 36 949 0.9333333 0.28
##  [5,] 14  1 36 949 0.9333333 0.28
##  [6,] 11  0 39 950 1.0000000 0.22
##  [7,]  8  1 42 949 0.8888889 0.16
##  [8,]  2  7 48 943 0.2222222 0.04
##  [9,]  8  7 42 943 0.5333333 0.16
## [10,] 10  0 40 950 1.0000000 0.20
## 
## [[9]]
##       TP FP FN  TN      PREC  REC
##  [1,] 26  0 24 950 1.0000000 0.52
##  [2,] 17  1 33 949 0.9444444 0.34
##  [3,] 25  3 25 947 0.8928571 0.50
##  [4,] 16  0 34 950 1.0000000 0.32
##  [5,] 20  1 30 949 0.9523810 0.40
##  [6,] 22  2 28 948 0.9166667 0.44
##  [7,] 25  1 25 949 0.9615385 0.50
##  [8,] 15  0 35 950 1.0000000 0.30
##  [9,] 21  5 29 945 0.8076923 0.42
## [10,] 19  1 31 949 0.9500000 0.38
## 
## [[10]]
##       TP FP FN  TN      PREC  REC
##  [1,] 21  0 29 950 1.0000000 0.42
##  [2,] 14  2 36 948 0.8750000 0.28
##  [3,] 16  0 34 950 1.0000000 0.32
##  [4,] 18  0 32 950 1.0000000 0.36
##  [5,] 12  0 38 950 1.0000000 0.24
##  [6,] 15  2 35 948 0.8823529 0.30
##  [7,] 21  0 29 950 1.0000000 0.42
##  [8,] 21  3 29 947 0.8750000 0.42
##  [9,]  6  8 44 942 0.4285714 0.12
## [10,] 17  1 33 949 0.9444444 0.34
## 
## [[11]]
##       TP FP FN  TN      PREC  REC
##  [1,] 22  4 28 946 0.8461538 0.44
##  [2,] 23  1 27 949 0.9583333 0.46
##  [3,] 33  1 17 949 0.9705882 0.66
##  [4,] 26  0 24 950 1.0000000 0.52
##  [5,] 20  1 30 949 0.9523810 0.40
##  [6,] 23  0 27 950 1.0000000 0.46
##  [7,] 19  0 31 950 1.0000000 0.38
##  [8,]  9 11 41 939 0.4500000 0.18
##  [9,] 28  7 22 943 0.8000000 0.56
## [10,] 21  1 29 949 0.9545455 0.42
## 
## [[12]]
##       TP FP FN  TN      PREC  REC
##  [1,] 29  0 21 950 1.0000000 0.58
##  [2,] 26  1 24 949 0.9629630 0.52
##  [3,] 29  2 21 948 0.9354839 0.58
##  [4,] 24  0 26 950 1.0000000 0.48
##  [5,] 23  1 27 949 0.9583333 0.46
##  [6,] 28  3 22 947 0.9032258 0.56
##  [7,] 28  1 22 949 0.9655172 0.56
##  [8,] 28  2 22 948 0.9333333 0.56
##  [9,] 34  2 16 948 0.9444444 0.68
## [10,] 28  1 22 949 0.9655172 0.56
nalcres2 <- do.call(rbind,lapply(nalcres,colMeans))

nalcres2
##         TP  FP   FN    TN      PREC   REC
##  [1,]  0.0 0.1 50.0 949.9       NaN 0.000
##  [2,]  0.3 0.0 49.7 950.0       NaN 0.006
##  [3,]  0.7 0.3 49.3 949.7       NaN 0.014
##  [4,]  1.2 0.8 48.8 949.2       NaN 0.024
##  [5,]  3.8 1.4 46.2 948.6 0.6866667 0.076
##  [6,] 11.3 2.4 38.7 947.6 0.8255004 0.226
##  [7,]  5.5 0.6 44.5 949.4       NaN 0.110
##  [8,] 11.7 1.9 38.3 948.1 0.8311111 0.234
##  [9,] 20.6 1.4 29.4 948.6 0.9425580 0.412
## [10,] 16.1 1.6 33.9 948.4 0.9005369 0.322
## [11,] 22.4 2.6 27.6 947.4 0.8932002 0.448
## [12,] 27.7 1.3 22.3 948.7 0.9568818 0.554
nalcres3p <- do.call(rbind,lapply(groupsizes, function (g) { nalcres2[params$groupsizes==g,"PREC"] }))
colnames(nalcres3p) <- deltas
rownames(nalcres3p) <- groupsizes
nalcres3p
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN 0.9005369
## 6  NaN 0.6866667 0.8311111 0.8932002
## 12 NaN 0.8255004 0.9425580 0.9568818
nalcres3r <- do.call(rbind,lapply(groupsizes, function (g) { nalcres2[params$groupsizes==g,"REC"] }))
colnames(nalcres3r) <- deltas
rownames(nalcres3r) <- groupsizes
nalcres3r
##      0.1   0.2   0.3   0.5
## 3  0.000 0.024 0.110 0.322
## 6  0.006 0.076 0.234 0.448
## 12 0.014 0.226 0.412 0.554
F1(nalcres3p,nalcres3r)
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN 0.4743789
## 6  NaN 0.1368531 0.3651826 0.5967099
## 12 NaN 0.3548512 0.5733736 0.7017260

AA sim

aares <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simaa(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

aares
## [[1]]
##       TP  FP FN  TN       PREC REC
##  [1,]  0   0 50 950        NaN 0.0
##  [2,]  0   0 50 950        NaN 0.0
##  [3,]  0   0 50 950        NaN 0.0
##  [4,]  0   0 50 950        NaN 0.0
##  [5,]  0   0 50 950        NaN 0.0
##  [6,]  0   0 50 950        NaN 0.0
##  [7,]  0   0 50 950        NaN 0.0
##  [8,]  0   0 50 950        NaN 0.0
##  [9,] 25 929 25  21 0.02620545 0.5
## [10,]  0   0 50 950        NaN 0.0
## 
## [[2]]
##       TP  FP FN  TN       PREC REC
##  [1,]  0   0 50 950        NaN 0.0
##  [2,]  0   0 50 950        NaN 0.0
##  [3,]  0   1 50 949 0.00000000 0.0
##  [4,]  0   1 50 949 0.00000000 0.0
##  [5,]  0   0 50 950        NaN 0.0
##  [6,]  0   0 50 950        NaN 0.0
##  [7,]  0   0 50 950        NaN 0.0
##  [8,] 25 896 25  54 0.02714441 0.5
##  [9,]  0   0 50 950        NaN 0.0
## [10,]  0   0 50 950        NaN 0.0
## 
## [[3]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  1 50 949    0   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[4]]
##       TP  FP FN  TN       PREC REC
##  [1,]  0   0 50 950        NaN 0.0
##  [2,]  0   0 50 950        NaN 0.0
##  [3,]  0   0 50 950        NaN 0.0
##  [4,]  0   0 50 950        NaN 0.0
##  [5,]  0   0 50 950        NaN 0.0
##  [6,]  0   0 50 950        NaN 0.0
##  [7,]  0   0 50 950        NaN 0.0
##  [8,]  0   0 50 950        NaN 0.0
##  [9,] 25 928 25  22 0.02623295 0.5
## [10,]  0   0 50 950        NaN 0.0
## 
## [[5]]
##       TP  FP FN  TN      PREC REC
##  [1,]  0   0 50 950       NaN 0.0
##  [2,]  0   0 50 950       NaN 0.0
##  [3,]  0   1 50 949 0.0000000 0.0
##  [4,]  0   1 50 949 0.0000000 0.0
##  [5,]  0   0 50 950       NaN 0.0
##  [6,]  0   0 50 950       NaN 0.0
##  [7,]  0   0 50 950       NaN 0.0
##  [8,] 25 882 25  68 0.0275634 0.5
##  [9,]  0   0 50 950       NaN 0.0
## [10,]  0   0 50 950       NaN 0.0
## 
## [[6]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  1 50 949    0   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[7]]
##       TP  FP FN  TN       PREC REC
##  [1,]  0   0 50 950        NaN 0.0
##  [2,]  0   0 50 950        NaN 0.0
##  [3,]  0   0 50 950        NaN 0.0
##  [4,]  0   0 50 950        NaN 0.0
##  [5,]  0   0 50 950        NaN 0.0
##  [6,]  0   0 50 950        NaN 0.0
##  [7,]  0   0 50 950        NaN 0.0
##  [8,]  0   0 50 950        NaN 0.0
##  [9,] 25 916 25  34 0.02656748 0.5
## [10,]  0   0 50 950        NaN 0.0
## 
## [[8]]
##       TP  FP FN  TN       PREC REC
##  [1,]  0   0 50 950        NaN 0.0
##  [2,]  0   0 50 950        NaN 0.0
##  [3,]  0   1 50 949 0.00000000 0.0
##  [4,]  0   1 50 949 0.00000000 0.0
##  [5,]  0   0 50 950        NaN 0.0
##  [6,]  0   0 50 950        NaN 0.0
##  [7,]  0   0 50 950        NaN 0.0
##  [8,] 25 873 25  77 0.02783964 0.5
##  [9,]  0   0 50 950        NaN 0.0
## [10,]  0   0 50 950        NaN 0.0
## 
## [[9]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  1 50 949    0   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  0 50 950  NaN   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[10]]
##       TP  FP FN  TN       PREC REC
##  [1,]  0   0 50 950        NaN 0.0
##  [2,]  0   0 50 950        NaN 0.0
##  [3,]  0   0 50 950        NaN 0.0
##  [4,]  0   0 50 950        NaN 0.0
##  [5,]  0   0 50 950        NaN 0.0
##  [6,]  0   0 50 950        NaN 0.0
##  [7,]  0   0 50 950        NaN 0.0
##  [8,]  0   0 50 950        NaN 0.0
##  [9,] 25 904 25  46 0.02691066 0.5
## [10,]  0   0 50 950        NaN 0.0
## 
## [[11]]
##       TP  FP FN  TN       PREC  REC
##  [1,]  1   0 49 950 1.00000000 0.02
##  [2,]  0   0 50 950        NaN 0.00
##  [3,]  0   1 50 949 0.00000000 0.00
##  [4,]  0   1 50 949 0.00000000 0.00
##  [5,]  0   0 50 950        NaN 0.00
##  [6,]  0   0 50 950        NaN 0.00
##  [7,]  0   0 50 950        NaN 0.00
##  [8,] 25 866 25  84 0.02805836 0.50
##  [9,]  1   0 49 950 1.00000000 0.02
## [10,]  0   0 50 950        NaN 0.00
## 
## [[12]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  2  0 48 950  1.0 0.04
##  [3,]  1  1 49 949  0.5 0.02
##  [4,]  1  0 49 950  1.0 0.02
##  [5,]  2  0 48 950  1.0 0.04
##  [6,]  0  0 50 950  NaN 0.00
##  [7,]  1  0 49 950  1.0 0.02
##  [8,]  4  1 46 949  0.8 0.08
##  [9,]  1  0 49 950  1.0 0.02
## [10,]  2  0 48 950  1.0 0.04
aares2 <- do.call(rbind,lapply(aares,colMeans))

aares2
##        TP   FP   FN    TN PREC   REC
##  [1,] 2.5 92.9 47.5 857.1  NaN 0.050
##  [2,] 2.5 89.8 47.5 860.2  NaN 0.050
##  [3,] 0.0  0.1 50.0 949.9  NaN 0.000
##  [4,] 2.5 92.8 47.5 857.2  NaN 0.050
##  [5,] 2.5 88.4 47.5 861.6  NaN 0.050
##  [6,] 0.0  0.1 50.0 949.9  NaN 0.000
##  [7,] 2.5 91.6 47.5 858.4  NaN 0.050
##  [8,] 2.5 87.5 47.5 862.5  NaN 0.050
##  [9,] 0.0  0.1 50.0 949.9  NaN 0.000
## [10,] 2.5 90.4 47.5 859.6  NaN 0.050
## [11,] 2.7 86.8 47.3 863.2  NaN 0.054
## [12,] 1.4  0.2 48.6 949.8  NaN 0.028
aares3p <- do.call(rbind,lapply(groupsizes, function (g) { aares2[params$groupsizes==g,"PREC"] }))
colnames(aares3p) <- deltas
rownames(aares3p) <- groupsizes
aares3p
##    0.1 0.2 0.3 0.5
## 3  NaN NaN NaN NaN
## 6  NaN NaN NaN NaN
## 12 NaN NaN NaN NaN
aares3r <- do.call(rbind,lapply(groupsizes, function (g) { aares2[params$groupsizes==g,"REC"] }))
colnames(aares3r) <- deltas
rownames(aares3r) <- groupsizes
aares3r
##     0.1  0.2  0.3   0.5
## 3  0.05 0.05 0.05 0.050
## 6  0.05 0.05 0.05 0.054
## 12 0.00 0.00 0.00 0.028
F1(aares3p,aares3r)
##    0.1 0.2 0.3 0.5
## 3  NaN NaN NaN NaN
## 6  NaN NaN NaN NaN
## 12 NaN NaN NaN NaN

ALM competitive test

almres <- lapply(1:nrow(params) , function(j) {
  groupsize = params[j,1]
  delta = params[j,2]
  res <- mclapply(1:sims,function(i) {
    simalm(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
      groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
  },mc.cores=4)
  res <- do.call(rbind,res)
  return(res)
})

almres
## [[1]]
##       TP FP FN  TN PREC REC
##  [1,]  0  0 50 950  NaN   0
##  [2,]  0  0 50 950  NaN   0
##  [3,]  0  0 50 950  NaN   0
##  [4,]  0  0 50 950  NaN   0
##  [5,]  0  0 50 950  NaN   0
##  [6,]  0  1 50 949    0   0
##  [7,]  0  0 50 950  NaN   0
##  [8,]  0  0 50 950  NaN   0
##  [9,]  0  0 50 950  NaN   0
## [10,]  0  0 50 950  NaN   0
## 
## [[2]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  3  0 47 950    1 0.06
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  0  0 50 950  NaN 0.00
##  [6,]  0  0 50 950  NaN 0.00
##  [7,]  0  0 50 950  NaN 0.00
##  [8,]  1  0 49 950    1 0.02
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[3]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  2  0 48 950 1.00 0.04
##  [3,]  1  1 49 949 0.50 0.02
##  [4,]  0  0 50 950  NaN 0.00
##  [5,]  1  0 49 950 1.00 0.02
##  [6,]  3  1 47 949 0.75 0.06
##  [7,]  4  0 46 950 1.00 0.08
##  [8,]  0  0 50 950  NaN 0.00
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[4]]
##       TP FP FN  TN PREC  REC
##  [1,]  0  0 50 950  NaN 0.00
##  [2,]  0  0 50 950  NaN 0.00
##  [3,]  4  0 46 950  1.0 0.08
##  [4,]  2  0 48 950  1.0 0.04
##  [5,]  2  0 48 950  1.0 0.04
##  [6,]  1  1 49 949  0.5 0.02
##  [7,]  0  0 50 950  NaN 0.00
##  [8,]  9  1 41 949  0.9 0.18
##  [9,]  0  0 50 950  NaN 0.00
## [10,]  0  0 50 950  NaN 0.00
## 
## [[5]]
##       TP FP FN  TN PREC  REC
##  [1,]  1  0 49 950    1 0.02
##  [2,]  7  0 43 950    1 0.14
##  [3,] 12  0 38 950    1 0.24
##  [4,]  8  0 42 950    1 0.16
##  [5,]  4  0 46 950    1 0.08
##  [6,]  7  0 43 950    1 0.14
##  [7,]  3  0 47 950    1 0.06
##  [8,]  5  0 45 950    1 0.10
##  [9,]  1  0 49 950    1 0.02
## [10,]  4  0 46 950    1 0.08
## 
## [[6]]
##       TP FP FN  TN      PREC  REC
##  [1,] 19  0 31 950 1.0000000 0.38
##  [2,]  7  0 43 950 1.0000000 0.14
##  [3,] 20  2 30 948 0.9090909 0.40
##  [4,] 10  0 40 950 1.0000000 0.20
##  [5,] 15  0 35 950 1.0000000 0.30
##  [6,] 13  2 37 948 0.8666667 0.26
##  [7,] 18  1 32 949 0.9473684 0.36
##  [8,]  5  0 45 950 1.0000000 0.10
##  [9,] 15  2 35 948 0.8823529 0.30
## [10,]  9  0 41 950 1.0000000 0.18
## 
## [[7]]
##       TP FP FN  TN      PREC  REC
##  [1,]  6  0 44 950 1.0000000 0.12
##  [2,]  2  0 48 950 1.0000000 0.04
##  [3,]  7  0 43 950 1.0000000 0.14
##  [4,]  9  0 41 950 1.0000000 0.18
##  [5,]  8  0 42 950 1.0000000 0.16
##  [6,]  3  2 47 948 0.6000000 0.06
##  [7,] 11  0 39 950 1.0000000 0.22
##  [8,] 12  1 38 949 0.9230769 0.24
##  [9,]  0  0 50 950       NaN 0.00
## [10,]  1  0 49 950 1.0000000 0.02
## 
## [[8]]
##       TP FP FN  TN      PREC  REC
##  [1,] 10  0 40 950 1.0000000 0.20
##  [2,] 16  0 34 950 1.0000000 0.32
##  [3,] 26  0 24 950 1.0000000 0.52
##  [4,] 15  0 35 950 1.0000000 0.30
##  [5,] 14  1 36 949 0.9333333 0.28
##  [6,] 11  0 39 950 1.0000000 0.22
##  [7,]  9  0 41 950 1.0000000 0.18
##  [8,] 10  0 40 950 1.0000000 0.20
##  [9,] 15  1 35 949 0.9375000 0.30
## [10,] 11  0 39 950 1.0000000 0.22
## 
## [[9]]
##       TP FP FN  TN      PREC  REC
##  [1,] 26  0 24 950 1.0000000 0.52
##  [2,] 17  1 33 949 0.9444444 0.34
##  [3,] 26  2 24 948 0.9285714 0.52
##  [4,] 16  0 34 950 1.0000000 0.32
##  [5,] 20  1 30 949 0.9523810 0.40
##  [6,] 22  2 28 948 0.9166667 0.44
##  [7,] 25  1 25 949 0.9615385 0.50
##  [8,] 15  0 35 950 1.0000000 0.30
##  [9,] 24  2 26 948 0.9230769 0.48
## [10,] 19  1 31 949 0.9500000 0.38
## 
## [[10]]
##       TP FP FN  TN      PREC  REC
##  [1,] 21  0 29 950 1.0000000 0.42
##  [2,] 15  1 35 949 0.9375000 0.30
##  [3,] 17  0 33 950 1.0000000 0.34
##  [4,] 18  0 32 950 1.0000000 0.36
##  [5,] 12  0 38 950 1.0000000 0.24
##  [6,] 15  2 35 948 0.8823529 0.30
##  [7,] 21  0 29 950 1.0000000 0.42
##  [8,] 21  3 29 947 0.8750000 0.42
##  [9,] 13  1 37 949 0.9285714 0.26
## [10,] 17  1 33 949 0.9444444 0.34
## 
## [[11]]
##       TP FP FN  TN      PREC  REC
##  [1,] 26  0 24 950 1.0000000 0.52
##  [2,] 23  1 27 949 0.9583333 0.46
##  [3,] 33  1 17 949 0.9705882 0.66
##  [4,] 26  0 24 950 1.0000000 0.52
##  [5,] 20  1 30 949 0.9523810 0.40
##  [6,] 23  0 27 950 1.0000000 0.46
##  [7,] 19  0 31 950 1.0000000 0.38
##  [8,] 20  0 30 950 1.0000000 0.40
##  [9,] 34  1 16 949 0.9714286 0.68
## [10,] 21  1 29 949 0.9545455 0.42
## 
## [[12]]
##       TP FP FN  TN      PREC  REC
##  [1,] 29  0 21 950 1.0000000 0.58
##  [2,] 27  1 23 949 0.9642857 0.54
##  [3,] 32  2 18 948 0.9411765 0.64
##  [4,] 24  0 26 950 1.0000000 0.48
##  [5,] 23  1 27 949 0.9583333 0.46
##  [6,] 28  3 22 947 0.9032258 0.56
##  [7,] 28  1 22 949 0.9655172 0.56
##  [8,] 28  2 22 948 0.9333333 0.56
##  [9,] 35  2 15 948 0.9459459 0.70
## [10,] 28  1 22 949 0.9655172 0.56
almres2 <- do.call(rbind,lapply(almres,colMeans))

almres2
##         TP  FP   FN    TN      PREC   REC
##  [1,]  0.0 0.1 50.0 949.9       NaN 0.000
##  [2,]  0.4 0.0 49.6 950.0       NaN 0.008
##  [3,]  1.1 0.2 48.9 949.8       NaN 0.022
##  [4,]  1.8 0.2 48.2 949.8       NaN 0.036
##  [5,]  5.2 0.0 44.8 950.0 1.0000000 0.104
##  [6,] 13.1 0.7 36.9 949.3 0.9605479 0.262
##  [7,]  5.9 0.3 44.1 949.7       NaN 0.118
##  [8,] 13.7 0.2 36.3 949.8 0.9870833 0.274
##  [9,] 21.0 1.0 29.0 949.0 0.9576679 0.420
## [10,] 17.0 0.8 33.0 949.2 0.9567869 0.340
## [11,] 24.5 0.5 25.5 949.5 0.9807277 0.490
## [12,] 28.2 1.3 21.8 948.7 0.9577335 0.564
almres3p <- do.call(rbind,lapply(groupsizes, function (g) { almres2[params$groupsizes==g,"PREC"] }))
colnames(almres3p) <- deltas
rownames(almres3p) <- groupsizes
almres3p
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN 0.9567869
## 6  NaN 1.0000000 0.9870833 0.9807277
## 12 NaN 0.9605479 0.9576679 0.9577335
almres3r <- do.call(rbind,lapply(groupsizes, function (g) { almres2[params$groupsizes==g,"REC"] }))
colnames(almres3r) <- deltas
rownames(almres3r) <- groupsizes
almres3r
##      0.1   0.2   0.3   0.5
## 3  0.000 0.036 0.118 0.340
## 6  0.008 0.104 0.274 0.490
## 12 0.022 0.262 0.420 0.564
F1(almres3p,almres3r)
##    0.1       0.2       0.3       0.5
## 3  NaN       NaN       NaN 0.5017132
## 6  NaN 0.1884058 0.4289341 0.6534950
## 12 NaN 0.4117034 0.5839151 0.7099294

Session information

save.image("GSE158422_simulate.Rdata")

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.3.4                                   
##  [2] mitch_1.12.0                                       
##  [3] psych_2.3.9                                        
##  [4] org.Hs.eg.db_3.17.0                                
##  [5] AnnotationDbi_1.62.2                               
##  [6] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [7] missMethyl_1.34.0                                  
##  [8] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
##  [9] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1 
## [10] minfi_1.46.0                                       
## [11] bumphunter_1.42.0                                  
## [12] locfit_1.5-9.8                                     
## [13] iterators_1.0.14                                   
## [14] foreach_1.5.2                                      
## [15] Biostrings_2.68.1                                  
## [16] XVector_0.40.0                                     
## [17] SummarizedExperiment_1.30.2                        
## [18] Biobase_2.60.0                                     
## [19] MatrixGenerics_1.12.3                              
## [20] matrixStats_1.0.0                                  
## [21] GenomicRanges_1.52.0                               
## [22] GenomeInfoDb_1.36.3                                
## [23] IRanges_2.34.1                                     
## [24] S4Vectors_0.38.2                                   
## [25] BiocGenerics_0.46.0                                
## [26] limma_3.56.2                                       
## [27] stringi_1.7.12                                     
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.3.1             later_1.3.1              
##   [3] BiocIO_1.10.0             bitops_1.0-7             
##   [5] filelock_1.0.2            BiasedUrn_2.0.11         
##   [7] tibble_3.2.1              preprocessCore_1.62.1    
##   [9] XML_3.99-0.14             lifecycle_1.0.3          
##  [11] lattice_0.21-9            MASS_7.3-60              
##  [13] base64_2.0.1              scrime_1.3.5             
##  [15] magrittr_2.0.3            sass_0.4.7               
##  [17] rmarkdown_2.25            jquerylib_0.1.4          
##  [19] yaml_2.3.7                httpuv_1.6.11            
##  [21] doRNG_1.8.6               askpass_1.2.0            
##  [23] DBI_1.1.3                 RColorBrewer_1.1-3       
##  [25] abind_1.4-5               zlibbioc_1.46.0          
##  [27] rvest_1.0.3               quadprog_1.5-8           
##  [29] purrr_1.0.2               RCurl_1.98-1.12          
##  [31] rappdirs_0.3.3            GenomeInfoDbData_1.2.10  
##  [33] genefilter_1.82.1         annotate_1.78.0          
##  [35] svglite_2.1.1             DelayedMatrixStats_1.22.6
##  [37] codetools_0.2-19          DelayedArray_0.26.7      
##  [39] xml2_1.3.5                tidyselect_1.2.0         
##  [41] beanplot_1.3.1            BiocFileCache_2.8.0      
##  [43] webshot_0.5.5             illuminaio_0.42.0        
##  [45] GenomicAlignments_1.36.0  jsonlite_1.8.7           
##  [47] multtest_2.56.0           ellipsis_0.3.2           
##  [49] survival_3.5-7            systemfonts_1.0.4        
##  [51] tools_4.3.1               progress_1.2.2           
##  [53] Rcpp_1.0.11               glue_1.6.2               
##  [55] mnormt_2.1.1              gridExtra_2.3            
##  [57] xfun_0.40                 dplyr_1.1.3              
##  [59] HDF5Array_1.28.1          fastmap_1.1.1            
##  [61] GGally_2.1.2              rhdf5filters_1.12.1      
##  [63] fansi_1.0.4               openssl_2.1.1            
##  [65] caTools_1.18.2            digest_0.6.33            
##  [67] R6_2.5.1                  mime_0.12                
##  [69] colorspace_2.1-0          gtools_3.9.4             
##  [71] biomaRt_2.56.1            RSQLite_2.3.1            
##  [73] utf8_1.2.3                tidyr_1.3.0              
##  [75] generics_0.1.3            data.table_1.14.8        
##  [77] rtracklayer_1.60.1        prettyunits_1.2.0        
##  [79] httr_1.4.7                htmlwidgets_1.6.2        
##  [81] S4Arrays_1.0.6            pkgconfig_2.0.3          
##  [83] gtable_0.3.4              blob_1.2.4               
##  [85] siggenes_1.74.0           htmltools_0.5.6          
##  [87] echarts4r_0.4.5           scales_1.2.1             
##  [89] png_0.1-8                 knitr_1.44               
##  [91] rstudioapi_0.15.0         reshape2_1.4.4           
##  [93] tzdb_0.4.0                rjson_0.2.21             
##  [95] nlme_3.1-163              curl_5.0.2               
##  [97] cachem_1.0.8              rhdf5_2.44.0             
##  [99] stringr_1.5.0             KernSmooth_2.23-22       
## [101] restfulr_0.0.15           GEOquery_2.68.0          
## [103] pillar_1.9.0              grid_4.3.1               
## [105] reshape_0.8.9             vctrs_0.6.3              
## [107] gplots_3.1.3              promises_1.2.1           
## [109] dbplyr_2.3.4              xtable_1.8-4             
## [111] beeswarm_0.4.0            evaluate_0.22            
## [113] readr_2.1.4               GenomicFeatures_1.52.2   
## [115] cli_3.6.1                 compiler_4.3.1           
## [117] Rsamtools_2.16.0          rlang_1.1.1              
## [119] crayon_1.5.2              rngtools_1.5.2           
## [121] nor1mix_1.3-0             mclust_6.0.0             
## [123] plyr_1.8.8                viridisLite_0.4.2        
## [125] BiocParallel_1.34.2       munsell_0.5.0            
## [127] Matrix_1.6-1.1            hms_1.1.3                
## [129] sparseMatrixStats_1.12.2  bit64_4.0.5              
## [131] ggplot2_3.4.3             Rhdf5lib_1.22.1          
## [133] KEGGREST_1.40.1           statmod_1.5.0            
## [135] shiny_1.7.5               memoise_2.0.1            
## [137] bslib_0.5.1               bit_4.0.5

Notes

LA

  • simla: parametric self-contained

  • simlac: parametric competitive

  • simnla: nonparametric self-contained

  • simnlac: nonparametric competitive

AL

  • simal: parametric self-contained

  • simalc: parametric competitive

  • simnal: nonparametric self-contained

  • simnalc: nonparametric competitive