Incorporate changes
TODO: to incorporate changes to case samples.
Need to figure out what magnitude to change. Will refer to the
cancer/normal comparison.
Select genes and probes to alter.
seed=100
frac_genes=0.5
frac_probes=0.5
delta=1
nsamples=10
normal_mval <- mval[,(1:(ncol(mval)/2)*2)]
incorp_dm <- function(genesets,myann,mval,seed, frac_genes,frac_probes,
groupsize,delta=1,gene_catalog) {
# divide gene sets between hyper and hypomethylated
nset <- floor(length(genesets)/2)
set.seed(seed) ; gtup <-sample(genesets,nset)
set.seed(seed) ; gtdn <- sample(setdiff(genesets,gtup),nset)
gup <- unname(unlist(gtup))
gdn <- unname(unlist(gtdn))
# add extra 10% DM genes
gnon <- setdiff(gene_catalog,c(gup,gdn))
gextra <- round(length(gnon)*0.1)
set.seed(seed) ; gup <- c(gup,sample(gnon,gextra))
gnon <- setdiff(gene_catalog,c(gup,gdn))
set.seed(seed) ; gdn <- c(gdn,sample(gnon,gextra))
# make probe-gene vector
probe2gene <- strsplit(myann$UCSC_RefGene_Name,";")
names(probe2gene) <- rownames(myann)
probe2gene <- unlist(probe2gene)
# select probes hypermethylated
set.seed(seed) ; gup2 <- sample(gup,floor(length(gup)*frac_genes))
pup <- names(probe2gene[which(probe2gene %in% gup2)])
set.seed(seed) ; pup2 <- sample(pup,floor(length(pup)*frac_probes))
# select probes hypomethylated
set.seed(seed) ; gdn2 <- sample(gdn,floor(length(gdn)*frac_genes))
pdn <- names(probe2gene[which(probe2gene %in% gdn2)])
set.seed(seed) ; pdn2 <- sample(pdn,floor(length(pdn)*frac_probes))
# add 10% DM probes as well
probes <- rownames(myann)
pnon <- setdiff(probes,c(pup,pdn))
pextra <- round(length(pnon)*0.1)
set.seed(seed) ; pup <- c(pup,sample(pnon,pextra))
pnon <- setdiff(probes,c(pup,pdn))
set.seed(seed) ; pdn <- c(pdn,sample(pnon,pextra))
# divide samples between ctrl and case
ncols <- ncol(mval)
maxgroupsize=floor(ncols/2)
if ( groupsize > maxgroupsize ) { stop("groupsize cannot be larger than half the ncols of mval") }
set.seed(seed) ; ctrl <- sample(1:ncols,groupsize)
set.seed(seed) ; case <- sample(setdiff(1:ncols,ctrl),groupsize)
mval_ctrl <- mval[,ctrl]
mval_case <- mval[,case]
# incorporate altered signals - change by +1 or -1
mval_case[rownames(mval_case) %in% pup2,] <- mval_case[rownames(mval_case) %in% pup2,] + delta
mval_case[rownames(mval_case) %in% pdn2,] <- mval_case[rownames(mval_case) %in% pdn2,] - delta
mval2 <- cbind(mval_ctrl,mval_case)
result <- list("mval"=mval2,"probes up"=pup2,"probes down"=pdn2,
"genes up"=gup2,"genes down"=gdn2,
"genesets up"=gtup,"genesets down"=gtdn)
return(result)
}
GSAMETH function
# limma
runlimma <- function(mval,design,myann) {
fit.reduced <- lmFit(mval,design)
fit.reduced <- eBayes(fit.reduced)
dm <- topTable(fit.reduced,coef=ncol(design), number = Inf)
dm <- merge(myann,dm,by=0)
dm <- dm[order(dm$P.Value),]
rownames(dm) <- dm$Row.names
dm$Row.names=NULL
return(dm)
}
This is how to use the function
This could be complicated as it requires translation of symbols to
entrez IDs, but could be simplified if the entrez translation is done at
the gsameth step.
simgsa <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
pup3 <- rownames(subset(dm3,adj.P.Val<0.05 & logFC>0))
pdn3 <- rownames(subset(dm3,adj.P.Val<0.05 & logFC<0))
if ( length(pup3) < 250 ) { pup3 <- head(rownames(subset(dm3, logFC > 0)), 250) }
if ( length(pdn3) < 250 ) { pdn3 <- head(rownames(subset(dm3, logFC < 0)), 250) }
# convert gene sets to entrez
suppressWarnings(suppressMessages({ gene2entrez <- mapIds(org.Hs.eg.db, gene_catalog, 'ENTREZID', 'SYMBOL') }))
gsets_entrez <- lapply(gsets,function(gs) {
gs2 <- unique(gene2entrez[names(gene2entrez) %in% gs])
gs2 <- gs2[!is.na(gs2)]
return(gs2)
})
suppressWarnings(suppressMessages({
gsaup3 <- gsameth(sig.cpg=pup3, all.cpg=rownames(dm3), collection=gsets_entrez, array.type="EPIC")
gsadn3 <- gsameth(sig.cpg=pdn3, all.cpg=rownames(dm3), collection=gsets_entrez, array.type="EPIC")
}))
gsig_up3 <- rownames(subset(gsaup3,FDR<0.05))
gsig_dn3 <- rownames(subset(gsadn3,FDR<0.05))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(gsadn3)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
LA function
This process runs limma first and then aggregates the results before
doing an enrichment test.
# enrich parametric
ttenrich <- function(m,genesets,cores=1,testtype="selfcontained") {
res <- mclapply( 1:length(genesets), function(i) {
scores <- m[,1]
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
if ( testtype == "selfcontained" ) { wt <- t.test(tstats) }
if ( testtype == "competitive" ) { wt <- t.test(tstats,scores) }
res <- c(name,myn,mymean,mymedian,wt$p.value)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
# enrich non-parametric
wtenrich <- function(m,genesets,cores=1,testtype="selfcontained") {
res <- mclapply( 1:length(genesets), function(i) {
scores <- m[,1]
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
if ( testtype == "selfcontained" ) { wt <- wilcox.test(tstats) }
if ( testtype == "competitive" ) { wt <- wilcox.test(tstats,scores) }
res <- c(name,myn,mymean,mymedian,wt$p.value)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
# LA parametric competitive
simlac <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
dd <- merge(dm3,gt,by.x=0,by.y="probe")
m1 <- aggregate(t ~ gene,dd,mean)
rownames(m1) <- m1$gene
m1$gene=NULL
lares1 <- ttenrich(m=m1,genesets=gsets,cores=2,testtype="competitive")
gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# LA parametric competitive top
simlactop <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
dd <- merge(dm3,gt,by.x=0,by.y="probe")
m1 <- aggregate(t ~ gene,dd, function(x) {
if (abs(max(x)) > abs(min(x))) { max(x) } else { min(x) }
})
rownames(m1) <- m1$gene
m1$gene=NULL
lares1 <- ttenrich(m=m1,genesets=gsets,cores=2,testtype="competitive")
gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# LA nonparametric competitive
simnlac <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
dd <- merge(dm3,gt,by.x=0,by.y="probe")
m1 <- aggregate(t ~ gene,dd,mean)
rownames(m1) <- m1$gene
m1$gene=NULL
lares1 <- wtenrich(m=m1,genesets=gsets,cores=2,testtype="competitive")
gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# LA competitive mitch
simlacm <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
dd <- merge(dm3,gt,by.x=0,by.y="probe")
m1 <- aggregate(t ~ gene,dd,mean)
rownames(m1) <- m1$gene
m1$gene=NULL
lamres1 <- runmitch(m=m1,genesets=gsets,cores=2)
gsig_up3 <- rownames( subset( lamres1, p.adjustANOVA < 0.05 & s.dist > 0 ) )
gsig_dn3 <- rownames( subset( lamres1, p.adjustANOVA < 0.05 & s.dist < 0 ) )
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lamres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# LA rank competitive mitch
simlrm <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
# rank probes first
dm3$rank <- rank(dm3$t) - nrow(subset(dm3,t<0))
mm <- merge(dm3,gt,by.x=0,by.y="probe")
head(mm)
mma <-aggregate(mm$rank ~ gene,mm,mean)
rownames(mma) <- mma$gene
mma$gene = NULL
colnames(mma) <- "meanrank"
lrmres <- runmitch(m=mma,genesets=gsets,cores=2)
gsig_up3 <- rownames( subset( lrmres, p.adjustANOVA < 0.05 & s.dist > 0 ) )
gsig_dn3 <- rownames( subset( lrmres, p.adjustANOVA < 0.05 & s.dist < 0 ) )
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lrmres)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
AL: Aggregate limma
Functions for aggregate-limma-enrich approach.
# AL approach parametric competitive
simalc <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
# al pipeline
mm <- merge(mval2,gt,by.x=0,by.y="probe")
mm$Row.names = NULL
a <- aggregate(. ~ gene,mm,mean)
rownames(a) <- a$gene
a$gene=NULL
fit.reduced <- lmFit(a,d)
fit.reduced <- eBayes(fit.reduced)
al <- topTable(fit.reduced,coef=ncol(d), number = Inf)
m1 <- as.data.frame(al$t)
rownames(m1) <- rownames(al)
colnames(m1) <- "t"
alres1 <- ttenrich(m=m1,genesets=gsets,cores=2,testtype="competitive")
# summarise results
gsig_up3 <- rownames(subset(alres1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(alres1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(alres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# AL approach nonparametric competitive
simnalc <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
# al pipeline
mm <- merge(mval2,gt,by.x=0,by.y="probe")
mm$Row.names = NULL
a <- aggregate(. ~ gene,mm,mean)
rownames(a) <- a$gene
a$gene=NULL
fit.reduced <- lmFit(a,d)
fit.reduced <- eBayes(fit.reduced)
al <- topTable(fit.reduced,coef=ncol(d), number = Inf)
m1 <- as.data.frame(al$t)
rownames(m1) <- rownames(al)
colnames(m1) <- "t"
alres1 <- wtenrich(m=m1,genesets=gsets,cores=2,testtype="competitive")
# summarise results
gsig_up3 <- rownames(subset(alres1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(alres1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(alres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
Agg-limma-mitch function
This approach uses the aggregated mvals, limma and instead of a
1-sample t-test it uses mitch which is a competitive test and could give
more interpretable results.
runmitch <- function(m,genesets,cores=1) {
suppressMessages({ mres <- mitch_calc(m,genesets,minsetsize=5,cores=cores) })
mres <- mres$enrichment_result
rownames(mres) <- mres$set
mres$set=NULL
return(mres)
}
simalm <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
# alm pipeline
mm <- merge(mval2,gt,by.x=0,by.y="probe")
mm$Row.names = NULL
a <- aggregate(. ~ gene,mm,mean)
rownames(a) <- a$gene
a$gene=NULL
fit.reduced <- lmFit(a,d)
fit.reduced <- eBayes(fit.reduced)
al <- topTable(fit.reduced,coef=ncol(d), number = Inf)
m1 <- as.data.frame(al$t)
rownames(m1) <- rownames(al)
colnames(m1) <- "t"
almres1 <- runmitch(m=m1,genesets=gsets,cores=2)
# summarise results
gsig_up3 <- rownames( subset( almres1, p.adjustANOVA < 0.05 & s.dist > 0 ) )
gsig_dn3 <- rownames( subset( almres1, p.adjustANOVA < 0.05 & s.dist < 0 ) )
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(almres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
AA Aggregate-aggregate-limma functions
Use mean value works well here.
gsagg <- function(x,genesets,cores=1) {
meds <- mclapply(1:length(genesets), function(i) {
gs = genesets[[i]]
xx <- x[rownames(x) %in% gs,]
med <- apply(xx,2,mean)
},mc.cores=cores)
mymed <- do.call(rbind,meds)
rownames(mymed) <- names(genesets)
as.data.frame(mymed)
}
aalimma <- function(agag,design) {
fit.reduced <- lmFit(agag,design)
fit.reduced <- eBayes(fit.reduced)
dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
return(dmagg)
}
aal <- function(mval,myann,genesets,design,cores=1) {
medf <- chragg(mval,myann,cores=cores)
agag <- gsagg(x=medf,genesets=genesets,cores=cores)
aalres <- aalimma(agag=agag,design=design)
return(aalres)
}
simaa <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(gt$gene)
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta,
gene_catalog=gene_catalog)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
mm <- merge(mval2,gt,by.x=0,by.y="probe")
mm$Row.names = NULL
a <- aggregate(. ~ gene,mm,mean)
rownames(a) <- a$gene
a$gene=NULL
mystack <- stack(gsets)
mmm <- merge(a,mystack,by.x=0,by.y="values")
mmm$Row.names=NULL
aa <- aggregate(. ~ ind,mmm,mean)
rownames(aa) <- aa$ind
aa$ind=NULL
fit.reduced <- lmFit(aa,d)
fit.reduced <- eBayes(fit.reduced)
aares1 <- topTable(fit.reduced,coef=ncol(d), number = Inf)
# summarise results
gsig_up3 <- rownames(subset(aares1, adj.P.Val < 0.05 & logFC > 0))
gsig_dn3 <- rownames(subset(aares1, adj.P.Val < 0.05 & logFC < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(aares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
F1 <- function(x,y) {
( 2 * x * y ) / ( x + y )
}
Run analyses
Set assumptions.
num_dm_sets=50
sims=20
groupsizes=c(3,5,8,12)
deltas=c(0.1,0.2,0.3,0.4,0.5)
#groupsizes=5
#deltas=0.4
params <- expand.grid("groupsizes"=groupsizes,"deltas"=deltas)
params %>% kbl(caption="Parameters to test") %>% kable_paper("hover", full_width = F)
Parameters to test
groupsizes
|
deltas
|
3
|
0.1
|
5
|
0.1
|
8
|
0.1
|
12
|
0.1
|
3
|
0.2
|
5
|
0.2
|
8
|
0.2
|
12
|
0.2
|
3
|
0.3
|
5
|
0.3
|
8
|
0.3
|
12
|
0.3
|
3
|
0.4
|
5
|
0.4
|
8
|
0.4
|
12
|
0.4
|
3
|
0.5
|
5
|
0.5
|
8
|
0.5
|
12
|
0.5
|
GSA meth sim
Cannot be run in multicore due to fragility of AnnotationDbi SQLite
objects.
gres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- lapply(1:sims,function(i) {
simgsa(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5,
frac_probes=0.5, groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
})
res <- do.call(rbind,res)
return(res)
})
gres2 <- do.call(rbind,lapply(gres,colMeans))
gres2[,"PREC"] <- gres2[,"TP"] / ( gres2[,"TP"] + gres2[,"FP"] )
gres2 %>% kbl(caption="GSAmeth results") %>% kable_paper("hover", full_width = F)
GSAmeth results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.50
|
0.00
|
49.50
|
950.00
|
1.0000000
|
0.010
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.15
|
0.05
|
49.85
|
949.95
|
0.7500000
|
0.003
|
0.45
|
0.00
|
49.55
|
950.00
|
1.0000000
|
0.009
|
1.30
|
0.10
|
48.70
|
949.90
|
0.9285714
|
0.026
|
0.10
|
0.00
|
49.90
|
950.00
|
1.0000000
|
0.002
|
0.35
|
0.05
|
49.65
|
949.95
|
0.8750000
|
0.007
|
2.55
|
0.00
|
47.45
|
950.00
|
1.0000000
|
0.051
|
19.00
|
0.45
|
31.00
|
949.55
|
0.9768638
|
0.380
|
0.20
|
0.05
|
49.80
|
949.95
|
0.8000000
|
0.004
|
0.60
|
0.05
|
49.40
|
949.95
|
0.9230769
|
0.012
|
15.70
|
0.50
|
34.30
|
949.50
|
0.9691358
|
0.314
|
25.15
|
0.70
|
24.85
|
949.30
|
0.9729207
|
0.503
|
gres3p <- do.call(rbind,lapply(groupsizes, function (g) { gres2[params$groupsizes==g,"PREC"] }))
colnames(gres3p) <- deltas
rownames(gres3p) <- groupsizes
gres3p %>% kbl(caption="GSAmeth precision") %>% kable_paper("hover", full_width = F)
GSAmeth precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
NaN
|
1.0000000
|
1.0000000
|
0.8000000
|
5
|
NaN
|
NaN
|
0.7500000
|
0.8750000
|
0.9230769
|
8
|
NaN
|
NaN
|
1.0000000
|
1.0000000
|
0.9691358
|
12
|
NaN
|
1
|
0.9285714
|
0.9768638
|
0.9729207
|
gres3r <- do.call(rbind,lapply(groupsizes, function (g) { gres2[params$groupsizes==g,"REC"] }))
colnames(gres3r) <- deltas
rownames(gres3r) <- groupsizes
gres3r %>% kbl(caption="GSAmeth recall") %>% kable_paper("hover", full_width = F)
GSAmeth recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
0.00
|
0.001
|
0.002
|
0.004
|
5
|
0
|
0.00
|
0.003
|
0.007
|
0.012
|
8
|
0
|
0.00
|
0.009
|
0.051
|
0.314
|
12
|
0
|
0.01
|
0.026
|
0.380
|
0.503
|
F1(gres3p,gres3r) %>% kbl(caption="GSAmeth F1") %>% kable_paper("hover", full_width = F)
GSAmeth F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
NaN
|
0.0019980
|
0.0039920
|
0.0079602
|
5
|
NaN
|
NaN
|
0.0059761
|
0.0138889
|
0.0236920
|
8
|
NaN
|
NaN
|
0.0178394
|
0.0970504
|
0.4743202
|
12
|
NaN
|
0.019802
|
0.0505837
|
0.5471562
|
0.6631510
|
LA sim
parametric competitive
lacres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simlac(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
lacres2 <- do.call(rbind,lapply(lacres,colMeans))
lacres2[,"PREC"] <- lacres2[,"TP"] / ( lacres2[,"TP"] + lacres2[,"FP"] )
lacres2 %>% kbl(caption="LA parametric results") %>% kable_paper("hover", full_width = F)
LA parametric results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.05
|
0.05
|
49.95
|
949.95
|
0.5
|
0.001
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.15
|
0.00
|
49.85
|
950.00
|
1.0
|
0.003
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.10
|
0.00
|
49.90
|
950.00
|
1.0
|
0.002
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.10
|
0.00
|
49.90
|
950.00
|
1.0
|
0.002
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0
|
0.001
|
lacres3p <- do.call(rbind,lapply(groupsizes, function (g) { lacres2[params$groupsizes==g,"PREC"] }))
colnames(lacres3p) <- deltas
rownames(lacres3p) <- groupsizes
lacres3p %>% kbl(caption="LA parametric precision") %>% kable_paper("hover", full_width = F)
LA parametric precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
NaN
|
NaN
|
1
|
1
|
5
|
NaN
|
1
|
1
|
1
|
1
|
8
|
NaN
|
NaN
|
1
|
1
|
1
|
12
|
0.5
|
1
|
1
|
1
|
1
|
lacres3r <- do.call(rbind,lapply(groupsizes, function (g) { lacres2[params$groupsizes==g,"REC"] }))
colnames(lacres3r) <- deltas
rownames(lacres3r) <- groupsizes
lacres3r %>% kbl(caption="LA parametric recall") %>% kable_paper("hover", full_width = F)
LA parametric recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.000
|
0.000
|
0.000
|
0.001
|
0.001
|
5
|
0.000
|
0.001
|
0.003
|
0.002
|
0.002
|
8
|
0.000
|
0.000
|
0.001
|
0.001
|
0.001
|
12
|
0.001
|
0.001
|
0.001
|
0.001
|
0.001
|
F1(lacres3p,lacres3r) %>% kbl(caption="LA parametric F1") %>% kable_paper("hover", full_width = F)
LA parametric F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
NaN
|
NaN
|
0.001998
|
0.001998
|
5
|
NaN
|
0.001998
|
0.0059821
|
0.003992
|
0.003992
|
8
|
NaN
|
NaN
|
0.0019980
|
0.001998
|
0.001998
|
12
|
0.001996
|
0.001998
|
0.0019980
|
0.001998
|
0.001998
|
parametric competitive with “top” aggregation
Too many false positives.
lactopres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simlactop(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
lactopres2 <- do.call(rbind,lapply(lactopres,colMeans))
lactopres2[,"PREC"] <- lactopres2[,"TP"] / ( lactopres2[,"TP"] + lactopres2[,"FP"] )
lactopres2 %>% kbl(caption="LA parametric results (top agg)") %>% kable_paper("hover", full_width = F)
LA parametric results (top agg)
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.10
|
0.85
|
49.90
|
949.15
|
0.1052632
|
0.002
|
0.20
|
2.40
|
49.80
|
947.60
|
0.0769231
|
0.004
|
0.25
|
3.20
|
49.75
|
946.80
|
0.0724638
|
0.005
|
0.25
|
1.80
|
49.75
|
948.20
|
0.1219512
|
0.005
|
0.15
|
1.10
|
49.85
|
948.90
|
0.1200000
|
0.003
|
0.45
|
2.25
|
49.55
|
947.75
|
0.1666667
|
0.009
|
0.70
|
3.00
|
49.30
|
947.00
|
0.1891892
|
0.014
|
0.75
|
1.80
|
49.25
|
948.20
|
0.2941176
|
0.015
|
0.35
|
1.10
|
49.65
|
948.90
|
0.2413793
|
0.007
|
0.50
|
1.90
|
49.50
|
948.10
|
0.2083333
|
0.010
|
1.20
|
2.90
|
48.80
|
947.10
|
0.2926829
|
0.024
|
1.45
|
1.65
|
48.55
|
948.35
|
0.4677419
|
0.029
|
0.65
|
1.05
|
49.35
|
948.95
|
0.3823529
|
0.013
|
0.75
|
1.75
|
49.25
|
948.25
|
0.3000000
|
0.015
|
1.65
|
2.65
|
48.35
|
947.35
|
0.3837209
|
0.033
|
1.40
|
1.30
|
48.60
|
948.70
|
0.5185185
|
0.028
|
0.80
|
0.80
|
49.20
|
949.20
|
0.5000000
|
0.016
|
0.70
|
1.10
|
49.30
|
948.90
|
0.3888889
|
0.014
|
1.60
|
2.30
|
48.40
|
947.70
|
0.4102564
|
0.032
|
1.45
|
1.00
|
48.55
|
949.00
|
0.5918367
|
0.029
|
lactopres3p <- do.call(rbind,lapply(groupsizes, function (g) { lactopres2[params$groupsizes==g,"PREC"] }))
colnames(lactopres3p) <- deltas
rownames(lactopres3p) <- groupsizes
lactopres3p %>% kbl(caption="LA parametric precision (top agg)") %>% kable_paper("hover", full_width = F)
LA parametric precision (top agg)
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.1052632
|
0.1200000
|
0.2413793
|
0.3823529
|
0.5000000
|
5
|
0.0769231
|
0.1666667
|
0.2083333
|
0.3000000
|
0.3888889
|
8
|
0.0724638
|
0.1891892
|
0.2926829
|
0.3837209
|
0.4102564
|
12
|
0.1219512
|
0.2941176
|
0.4677419
|
0.5185185
|
0.5918367
|
lactopres3r <- do.call(rbind,lapply(groupsizes, function (g) { lactopres2[params$groupsizes==g,"REC"] }))
colnames(lactopres3r) <- deltas
rownames(lactopres3r) <- groupsizes
lactopres3r %>% kbl(caption="LA parametric recall (top agg)") %>% kable_paper("hover", full_width = F)
LA parametric recall (top agg)
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.002
|
0.003
|
0.007
|
0.013
|
0.016
|
5
|
0.004
|
0.009
|
0.010
|
0.015
|
0.014
|
8
|
0.005
|
0.014
|
0.024
|
0.033
|
0.032
|
12
|
0.005
|
0.015
|
0.029
|
0.028
|
0.029
|
F1(lactopres3p,lactopres3r) %>% kbl(caption="LA parametric F1 (top agg)") %>% kable_paper("hover", full_width = F)
LA parametric F1 (top agg)
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.0039254
|
0.0058537
|
0.0136054
|
0.0251451
|
0.0310078
|
5
|
0.0076046
|
0.0170778
|
0.0190840
|
0.0285714
|
0.0270270
|
8
|
0.0093545
|
0.0260708
|
0.0443623
|
0.0607735
|
0.0593692
|
12
|
0.0096061
|
0.0285442
|
0.0546139
|
0.0531309
|
0.0552908
|
nonparametric competitive
nlacres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simnlac(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
nlacres2 <- do.call(rbind,lapply(nlacres,colMeans))
nlacres2[,"PREC"] <- nlacres2[,"TP"] / ( nlacres2[,"TP"] + nlacres2[,"FP"] )
nlacres2 %>% kbl(caption="LA nonparametric results") %>% kable_paper("hover", full_width = F)
LA nonparametric results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.25
|
0.00
|
49.75
|
950.00
|
1.0000000
|
0.005
|
0.35
|
0.05
|
49.65
|
949.95
|
0.8750000
|
0.007
|
0.60
|
0.05
|
49.40
|
949.95
|
0.9230769
|
0.012
|
0.15
|
0.00
|
49.85
|
950.00
|
1.0000000
|
0.003
|
0.50
|
0.00
|
49.50
|
950.00
|
1.0000000
|
0.010
|
1.15
|
0.05
|
48.85
|
949.95
|
0.9583333
|
0.023
|
1.70
|
0.05
|
48.30
|
949.95
|
0.9714286
|
0.034
|
0.30
|
0.00
|
49.70
|
950.00
|
1.0000000
|
0.006
|
0.65
|
0.00
|
49.35
|
950.00
|
1.0000000
|
0.013
|
2.05
|
0.10
|
47.95
|
949.90
|
0.9534884
|
0.041
|
2.60
|
0.10
|
47.40
|
949.90
|
0.9629630
|
0.052
|
0.90
|
0.00
|
49.10
|
950.00
|
1.0000000
|
0.018
|
1.30
|
0.00
|
48.70
|
950.00
|
1.0000000
|
0.026
|
3.05
|
0.15
|
46.95
|
949.85
|
0.9531250
|
0.061
|
3.45
|
0.15
|
46.55
|
949.85
|
0.9583333
|
0.069
|
nlacres3p <- do.call(rbind,lapply(groupsizes, function (g) { nlacres2[params$groupsizes==g,"PREC"] }))
colnames(nlacres3p) <- deltas
rownames(nlacres3p) <- groupsizes
nlacres3p %>% kbl(caption="LA nonparametric precision") %>% kable_paper("hover", full_width = F)
LA nonparametric precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
1.0000000
|
1.0000000
|
1.0000000
|
1.0000000
|
5
|
NaN
|
1.0000000
|
1.0000000
|
1.0000000
|
1.0000000
|
8
|
0
|
0.8750000
|
0.9583333
|
0.9534884
|
0.9531250
|
12
|
0
|
0.9230769
|
0.9714286
|
0.9629630
|
0.9583333
|
nlacres3r <- do.call(rbind,lapply(groupsizes, function (g) { nlacres2[params$groupsizes==g,"REC"] }))
colnames(nlacres3r) <- deltas
rownames(nlacres3r) <- groupsizes
nlacres3r %>% kbl(caption="LA nonparametric recall") %>% kable_paper("hover", full_width = F)
LA nonparametric recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
0.001
|
0.003
|
0.006
|
0.018
|
5
|
0
|
0.005
|
0.010
|
0.013
|
0.026
|
8
|
0
|
0.007
|
0.023
|
0.041
|
0.061
|
12
|
0
|
0.012
|
0.034
|
0.052
|
0.069
|
F1(nlacres3p,nlacres3r) %>% kbl(caption="LA nonparametric F1") %>% kable_paper("hover", full_width = F)
LA nonparametric F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
0.0019980
|
0.0059821
|
0.0119284
|
0.0353635
|
5
|
NaN
|
0.0099502
|
0.0198020
|
0.0256663
|
0.0506823
|
8
|
NaN
|
0.0138889
|
0.0449219
|
0.0786194
|
0.1146617
|
12
|
NaN
|
0.0236920
|
0.0657005
|
0.0986717
|
0.1287313
|
LA competitive mitch
lacmres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simlacm(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
lacmres2 <- do.call(rbind,lapply(lacmres,colMeans))
lacmres2[,"PREC"] <- lacmres2[,"TP"] / ( lacmres2[,"TP"] + lacmres2[,"FP"] )
lacmres2 %>% kbl(caption="LA mitch results") %>% kable_paper("hover", full_width = F)
LA mitch results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.25
|
0.00
|
49.75
|
950.00
|
1.0000000
|
0.005
|
0.35
|
0.05
|
49.65
|
949.95
|
0.8750000
|
0.007
|
0.60
|
0.05
|
49.40
|
949.95
|
0.9230769
|
0.012
|
0.20
|
0.00
|
49.80
|
950.00
|
1.0000000
|
0.004
|
0.50
|
0.00
|
49.50
|
950.00
|
1.0000000
|
0.010
|
1.15
|
0.05
|
48.85
|
949.95
|
0.9583333
|
0.023
|
1.70
|
0.05
|
48.30
|
949.95
|
0.9714286
|
0.034
|
0.30
|
0.00
|
49.70
|
950.00
|
1.0000000
|
0.006
|
0.75
|
0.00
|
49.25
|
950.00
|
1.0000000
|
0.015
|
2.05
|
0.10
|
47.95
|
949.90
|
0.9534884
|
0.041
|
2.60
|
0.10
|
47.40
|
949.90
|
0.9629630
|
0.052
|
0.90
|
0.00
|
49.10
|
950.00
|
1.0000000
|
0.018
|
1.30
|
0.00
|
48.70
|
950.00
|
1.0000000
|
0.026
|
3.05
|
0.15
|
46.95
|
949.85
|
0.9531250
|
0.061
|
3.50
|
0.15
|
46.50
|
949.85
|
0.9589041
|
0.070
|
lacmres3p <- do.call(rbind,lapply(groupsizes, function (g) { lacmres2[params$groupsizes==g,"PREC"] }))
colnames(lacmres3p) <- deltas
rownames(lacmres3p) <- groupsizes
lacmres3p %>% kbl(caption="LA mitch precision") %>% kable_paper("hover", full_width = F)
LA mitch precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
1
|
1.0000000
|
1.0000000
|
1.0000000
|
1.0000000
|
5
|
NaN
|
1.0000000
|
1.0000000
|
1.0000000
|
1.0000000
|
8
|
0
|
0.8750000
|
0.9583333
|
0.9534884
|
0.9531250
|
12
|
0
|
0.9230769
|
0.9714286
|
0.9629630
|
0.9589041
|
lacmres3r <- do.call(rbind,lapply(groupsizes, function (g) { lacmres2[params$groupsizes==g,"REC"] }))
colnames(lacmres3r) <- deltas
rownames(lacmres3r) <- groupsizes
lacmres3r %>% kbl(caption="LA mitch recall") %>% kable_paper("hover", full_width = F)
LA mitch recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.001
|
0.001
|
0.004
|
0.006
|
0.018
|
5
|
0.000
|
0.005
|
0.010
|
0.015
|
0.026
|
8
|
0.000
|
0.007
|
0.023
|
0.041
|
0.061
|
12
|
0.000
|
0.012
|
0.034
|
0.052
|
0.070
|
F1(lacmres3p,lacmres3r) %>% kbl(caption="LA mitch F1") %>% kable_paper("hover", full_width = F)
LA mitch F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.001998
|
0.0019980
|
0.0079681
|
0.0119284
|
0.0353635
|
5
|
NaN
|
0.0099502
|
0.0198020
|
0.0295567
|
0.0506823
|
8
|
NaN
|
0.0138889
|
0.0449219
|
0.0786194
|
0.1146617
|
12
|
NaN
|
0.0236920
|
0.0657005
|
0.0986717
|
0.1304753
|
LA rank probes
lrmres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simlrm(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
lrmres2 <- do.call(rbind,lapply(lrmres,colMeans))
lrmres2[,"PREC"] <- lrmres2[,"TP"] / ( lrmres2[,"TP"] + lrmres2[,"FP"] )
lrmres2 %>% kbl(caption="LA rank mitch results") %>% kable_paper("hover", full_width = F)
LA rank mitch results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.20
|
0.00
|
49.80
|
950.00
|
1.0000000
|
0.004
|
0.20
|
0.05
|
49.80
|
949.95
|
0.8000000
|
0.004
|
0.40
|
0.05
|
49.60
|
949.95
|
0.8888889
|
0.008
|
0.20
|
0.00
|
49.80
|
950.00
|
1.0000000
|
0.004
|
0.40
|
0.00
|
49.60
|
950.00
|
1.0000000
|
0.008
|
0.45
|
0.05
|
49.55
|
949.95
|
0.9000000
|
0.009
|
0.55
|
0.05
|
49.45
|
949.95
|
0.9166667
|
0.011
|
0.20
|
0.00
|
49.80
|
950.00
|
1.0000000
|
0.004
|
0.40
|
0.00
|
49.60
|
950.00
|
1.0000000
|
0.008
|
0.45
|
0.05
|
49.55
|
949.95
|
0.9000000
|
0.009
|
0.60
|
0.05
|
49.40
|
949.95
|
0.9230769
|
0.012
|
0.40
|
0.00
|
49.60
|
950.00
|
1.0000000
|
0.008
|
0.50
|
0.00
|
49.50
|
950.00
|
1.0000000
|
0.010
|
0.45
|
0.05
|
49.55
|
949.95
|
0.9000000
|
0.009
|
0.70
|
0.05
|
49.30
|
949.95
|
0.9333333
|
0.014
|
lrmres3p <- do.call(rbind,lapply(groupsizes, function (g) { lrmres2[params$groupsizes==g,"PREC"] }))
colnames(lrmres3p) <- deltas
rownames(lrmres3p) <- groupsizes
lrmres3p %>% kbl(caption="LA rank mitch precision") %>% kable_paper("hover", full_width = F)
LA rank mitch precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
1
|
1.0000000
|
1.0000000
|
1.0000000
|
1.0000000
|
5
|
NaN
|
1.0000000
|
1.0000000
|
1.0000000
|
1.0000000
|
8
|
0
|
0.8000000
|
0.9000000
|
0.9000000
|
0.9000000
|
12
|
NaN
|
0.8888889
|
0.9166667
|
0.9230769
|
0.9333333
|
lrmres3r <- do.call(rbind,lapply(groupsizes, function (g) { lrmres2[params$groupsizes==g,"REC"] }))
colnames(lrmres3r) <- deltas
rownames(lrmres3r) <- groupsizes
lrmres3r %>% kbl(caption="LA rank mitch recall") %>% kable_paper("hover", full_width = F)
LA rank mitch recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.001
|
0.001
|
0.004
|
0.004
|
0.008
|
5
|
0.000
|
0.004
|
0.008
|
0.008
|
0.010
|
8
|
0.000
|
0.004
|
0.009
|
0.009
|
0.009
|
12
|
0.000
|
0.008
|
0.011
|
0.012
|
0.014
|
F1(lrmres3p,lacmres3r) %>% kbl(caption="LA rank mitch F1") %>% kable_paper("hover", full_width = F)
LA rank mitch F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.001998
|
0.0019980
|
0.0079681
|
0.0119284
|
0.0353635
|
5
|
NaN
|
0.0099502
|
0.0198020
|
0.0295567
|
0.0506823
|
8
|
NaN
|
0.0138786
|
0.0448537
|
0.0784272
|
0.1142560
|
12
|
NaN
|
0.0236803
|
0.0655680
|
0.0984538
|
0.1302326
|
AL sim
parametric competitive
alcres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simalc(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
alcres2 <- do.call(rbind,lapply(alcres,colMeans))
alcres2[,"PREC"] <- alcres2[,"TP"] / ( alcres2[,"TP"] + alcres2[,"FP"] )
alcres2 %>% kbl(caption="AL parametric results") %>% kable_paper("hover", full_width = F)
AL parametric results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.00
|
0.10
|
50.00
|
949.90
|
0.0000000
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.10
|
50.00
|
949.90
|
0.0000000
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.15
|
0.00
|
49.85
|
950.00
|
1.0000000
|
0.003
|
0.05
|
0.05
|
49.95
|
949.95
|
0.5000000
|
0.001
|
0.05
|
0.05
|
49.95
|
949.95
|
0.5000000
|
0.001
|
0.10
|
0.05
|
49.90
|
949.95
|
0.6666667
|
0.002
|
0.15
|
0.00
|
49.85
|
950.00
|
1.0000000
|
0.003
|
0.05
|
0.05
|
49.95
|
949.95
|
0.5000000
|
0.001
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.05
|
0.05
|
49.95
|
949.95
|
0.5000000
|
0.001
|
0.10
|
0.00
|
49.90
|
950.00
|
1.0000000
|
0.002
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
alcres3p <- do.call(rbind,lapply(groupsizes, function (g) { alcres2[params$groupsizes==g,"PREC"] }))
colnames(alcres3p) <- deltas
rownames(alcres3p) <- groupsizes
alcres3p %>% kbl(caption="AL parametric precision") %>% kable_paper("hover", full_width = F)
AL parametric precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
0
|
0.0
|
0.6666667
|
0.5
|
5
|
NaN
|
NaN
|
1.0
|
1.0000000
|
1.0
|
8
|
0
|
0
|
0.5
|
0.5000000
|
NaN
|
12
|
0
|
0
|
0.5
|
NaN
|
NaN
|
alcres3r <- do.call(rbind,lapply(groupsizes, function (g) { alcres2[params$groupsizes==g,"REC"] }))
colnames(alcres3r) <- deltas
rownames(alcres3r) <- groupsizes
alcres3r %>% kbl(caption="AL parametric recall") %>% kable_paper("hover", full_width = F)
AL parametric recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
0
|
0.000
|
0.002
|
0.001
|
5
|
0
|
0
|
0.003
|
0.003
|
0.002
|
8
|
0
|
0
|
0.001
|
0.001
|
0.000
|
12
|
0
|
0
|
0.001
|
0.000
|
0.000
|
F1(alcres3p,alcres3r) %>% kbl(caption="AL parametric F1") %>% kable_paper("hover", full_width = F)
AL parametric F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
NaN
|
NaN
|
0.0039880
|
0.001996
|
5
|
NaN
|
NaN
|
0.0059821
|
0.0059821
|
0.003992
|
8
|
NaN
|
NaN
|
0.0019960
|
0.0019960
|
NaN
|
12
|
NaN
|
NaN
|
0.0019960
|
NaN
|
NaN
|
nonparametric competitive
nalcres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simnalc(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
nalcres2 <- do.call(rbind,lapply(nalcres,colMeans))
nalcres2[,"PREC"] <- nalcres2[,"TP"] / ( nalcres2[,"TP"] + nalcres2[,"FP"] )
nalcres2 %>% kbl(caption="AL nonparametric results") %>% kable_paper("hover", full_width = F)
AL nonparametric results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.10
|
50.00
|
949.90
|
0.0000000
|
0.000
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.25
|
0.00
|
49.75
|
950.00
|
1.0000000
|
0.005
|
0.15
|
0.05
|
49.85
|
949.95
|
0.7500000
|
0.003
|
0.60
|
0.05
|
49.40
|
949.95
|
0.9230769
|
0.012
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.40
|
0.00
|
49.60
|
950.00
|
1.0000000
|
0.008
|
0.75
|
0.15
|
49.25
|
949.85
|
0.8333333
|
0.015
|
1.65
|
0.05
|
48.35
|
949.95
|
0.9705882
|
0.033
|
0.30
|
0.10
|
49.70
|
949.90
|
0.7500000
|
0.006
|
0.85
|
0.00
|
49.15
|
950.00
|
1.0000000
|
0.017
|
1.20
|
0.15
|
48.80
|
949.85
|
0.8888889
|
0.024
|
3.05
|
0.10
|
46.95
|
949.90
|
0.9682540
|
0.061
|
0.85
|
0.15
|
49.15
|
949.85
|
0.8500000
|
0.017
|
1.30
|
0.05
|
48.70
|
949.95
|
0.9629630
|
0.026
|
2.20
|
0.15
|
47.80
|
949.85
|
0.9361702
|
0.044
|
3.30
|
0.10
|
46.70
|
949.90
|
0.9705882
|
0.066
|
nalcres3p <- do.call(rbind,lapply(groupsizes, function (g) { nalcres2[params$groupsizes==g,"PREC"] }))
colnames(nalcres3p) <- deltas
rownames(nalcres3p) <- groupsizes
nalcres3p %>% kbl(caption="AL nonparametric precision") %>% kable_paper("hover", full_width = F)
AL nonparametric precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
1.0000000
|
1.0000000
|
0.7500000
|
0.8500000
|
5
|
NaN
|
1.0000000
|
1.0000000
|
1.0000000
|
0.9629630
|
8
|
0
|
0.7500000
|
0.8333333
|
0.8888889
|
0.9361702
|
12
|
0
|
0.9230769
|
0.9705882
|
0.9682540
|
0.9705882
|
nalcres3r <- do.call(rbind,lapply(groupsizes, function (g) { nalcres2[params$groupsizes==g,"REC"] }))
colnames(nalcres3r) <- deltas
rownames(nalcres3r) <- groupsizes
nalcres3r %>% kbl(caption="AL nonparametric recall") %>% kable_paper("hover", full_width = F)
AL nonparametric recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
0.001
|
0.001
|
0.006
|
0.017
|
5
|
0
|
0.005
|
0.008
|
0.017
|
0.026
|
8
|
0
|
0.003
|
0.015
|
0.024
|
0.044
|
12
|
0
|
0.012
|
0.033
|
0.061
|
0.066
|
F1(nalcres3p,nalcres3r) %>% kbl(caption="AL nonparametric F1") %>% kable_paper("hover", full_width = F)
AL nonparametric F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
0.0019980
|
0.0019980
|
0.0119048
|
0.0333333
|
5
|
NaN
|
0.0099502
|
0.0158730
|
0.0334317
|
0.0506329
|
8
|
NaN
|
0.0059761
|
0.0294695
|
0.0467381
|
0.0840497
|
12
|
NaN
|
0.0236920
|
0.0638298
|
0.1147695
|
0.1235955
|
ALM competitive test
almres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simalm(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
almres2 <- do.call(rbind,lapply(almres,colMeans))
almres2[,"PREC"] <- almres2[,"TP"] / ( almres2[,"TP"] + almres2[,"FP"] )
almres2 %>% kbl(caption="ALM results") %>% kable_paper("hover", full_width = F)
ALM results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.00
|
50.00
|
950.00
|
NaN
|
0.000
|
0.00
|
0.05
|
50.00
|
949.95
|
0.0000000
|
0.000
|
0.00
|
0.10
|
50.00
|
949.90
|
0.0000000
|
0.000
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.30
|
0.00
|
49.70
|
950.00
|
1.0000000
|
0.006
|
0.20
|
0.10
|
49.80
|
949.90
|
0.6666667
|
0.004
|
0.60
|
0.05
|
49.40
|
949.95
|
0.9230769
|
0.012
|
0.05
|
0.00
|
49.95
|
950.00
|
1.0000000
|
0.001
|
0.40
|
0.00
|
49.60
|
950.00
|
1.0000000
|
0.008
|
0.75
|
0.15
|
49.25
|
949.85
|
0.8333333
|
0.015
|
1.65
|
0.05
|
48.35
|
949.95
|
0.9705882
|
0.033
|
0.35
|
0.05
|
49.65
|
949.95
|
0.8750000
|
0.007
|
0.85
|
0.00
|
49.15
|
950.00
|
1.0000000
|
0.017
|
1.40
|
0.15
|
48.60
|
949.85
|
0.9032258
|
0.028
|
3.05
|
0.10
|
46.95
|
949.90
|
0.9682540
|
0.061
|
0.90
|
0.10
|
49.10
|
949.90
|
0.9000000
|
0.018
|
1.30
|
0.05
|
48.70
|
949.95
|
0.9629630
|
0.026
|
2.20
|
0.15
|
47.80
|
949.85
|
0.9361702
|
0.044
|
3.30
|
0.10
|
46.70
|
949.90
|
0.9705882
|
0.066
|
almres3p <- do.call(rbind,lapply(groupsizes, function (g) { almres2[params$groupsizes==g,"PREC"] }))
colnames(almres3p) <- deltas
rownames(almres3p) <- groupsizes
almres3p %>% kbl(caption="ALM precision") %>% kable_paper("hover", full_width = F)
ALM precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
1.0000000
|
1.0000000
|
0.8750000
|
0.9000000
|
5
|
NaN
|
1.0000000
|
1.0000000
|
1.0000000
|
0.9629630
|
8
|
0
|
0.6666667
|
0.8333333
|
0.9032258
|
0.9361702
|
12
|
0
|
0.9230769
|
0.9705882
|
0.9682540
|
0.9705882
|
almres3r <- do.call(rbind,lapply(groupsizes, function (g) { almres2[params$groupsizes==g,"REC"] }))
colnames(almres3r) <- deltas
rownames(almres3r) <- groupsizes
almres3r %>% kbl(caption="ALM recall") %>% kable_paper("hover", full_width = F)
ALM recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
0.001
|
0.001
|
0.007
|
0.018
|
5
|
0
|
0.006
|
0.008
|
0.017
|
0.026
|
8
|
0
|
0.004
|
0.015
|
0.028
|
0.044
|
12
|
0
|
0.012
|
0.033
|
0.061
|
0.066
|
F1(almres3p,almres3r) %>% kbl(caption="ALM F1") %>% kable_paper("hover", full_width = F)
ALM F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
NaN
|
0.0019980
|
0.0019980
|
0.0138889
|
0.0352941
|
5
|
NaN
|
0.0119284
|
0.0158730
|
0.0334317
|
0.0506329
|
8
|
NaN
|
0.0079523
|
0.0294695
|
0.0543162
|
0.0840497
|
12
|
NaN
|
0.0236920
|
0.0638298
|
0.1147695
|
0.1235955
|
AA sim
aares <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simaa(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=6)
res <- do.call(rbind,res)
return(res)
})
aares2 <- do.call(rbind,lapply(aares,colMeans))
aares2[,"PREC"] <- aares2[,"TP"] / ( aares2[,"TP"] + aares2[,"FP"] )
aares2 %>% kbl(caption="AA results") %>% kable_paper("hover", full_width = F)
AA results
TP
|
FP
|
FN
|
TN
|
PREC
|
REC
|
2.35
|
82.90
|
47.65
|
867.10
|
0.0275660
|
0.047
|
1.05
|
38.70
|
48.95
|
911.30
|
0.0264151
|
0.021
|
0.35
|
10.50
|
49.65
|
939.50
|
0.0322581
|
0.007
|
0.00
|
0.10
|
50.00
|
949.90
|
0.0000000
|
0.000
|
2.45
|
82.60
|
47.55
|
867.40
|
0.0288066
|
0.049
|
1.05
|
37.80
|
48.95
|
912.20
|
0.0270270
|
0.021
|
0.50
|
10.20
|
49.50
|
939.80
|
0.0467290
|
0.010
|
0.00
|
0.10
|
50.00
|
949.90
|
0.0000000
|
0.000
|
2.45
|
81.15
|
47.55
|
868.85
|
0.0293062
|
0.049
|
1.15
|
36.85
|
48.85
|
913.15
|
0.0302632
|
0.023
|
0.80
|
11.35
|
49.20
|
938.65
|
0.0658436
|
0.016
|
0.00
|
0.10
|
50.00
|
949.90
|
0.0000000
|
0.000
|
2.50
|
79.80
|
47.50
|
870.20
|
0.0303767
|
0.050
|
1.85
|
36.75
|
48.15
|
913.25
|
0.0479275
|
0.037
|
1.60
|
12.30
|
48.40
|
937.70
|
0.1151079
|
0.032
|
0.30
|
0.10
|
49.70
|
949.90
|
0.7500000
|
0.006
|
3.00
|
79.00
|
47.00
|
871.00
|
0.0365854
|
0.060
|
2.30
|
36.85
|
47.70
|
913.15
|
0.0587484
|
0.046
|
2.50
|
12.95
|
47.50
|
937.05
|
0.1618123
|
0.050
|
2.65
|
0.10
|
47.35
|
949.90
|
0.9636364
|
0.053
|
aares3p <- do.call(rbind,lapply(groupsizes, function (g) { aares2[params$groupsizes==g,"PREC"] }))
colnames(aares3p) <- deltas
rownames(aares3p) <- groupsizes
aares3p %>% kbl(caption="AA precision") %>% kable_paper("hover", full_width = F)
AA precision
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.0275660
|
0.0288066
|
0.0293062
|
0.0303767
|
0.0365854
|
5
|
0.0264151
|
0.0270270
|
0.0302632
|
0.0479275
|
0.0587484
|
8
|
0.0322581
|
0.0467290
|
0.0658436
|
0.1151079
|
0.1618123
|
12
|
0.0000000
|
0.0000000
|
0.0000000
|
0.7500000
|
0.9636364
|
aares3r <- do.call(rbind,lapply(groupsizes, function (g) { aares2[params$groupsizes==g,"REC"] }))
colnames(aares3r) <- deltas
rownames(aares3r) <- groupsizes
aares3r %>% kbl(caption="AA recall") %>% kable_paper("hover", full_width = F)
AA recall
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0.047
|
0.049
|
0.049
|
0.050
|
0.060
|
5
|
0.021
|
0.021
|
0.023
|
0.037
|
0.046
|
8
|
0.007
|
0.010
|
0.016
|
0.032
|
0.050
|
12
|
0.000
|
0.000
|
0.000
|
0.006
|
0.053
|
F1(aares3p,almres3r) %>% kbl(caption="AA F1") %>% kable_paper("hover", full_width = F)
AA F1
|
0.1
|
0.2
|
0.3
|
0.4
|
0.5
|
3
|
0
|
0.0019329
|
0.0019340
|
0.0113780
|
0.0241287
|
5
|
0
|
0.0098200
|
0.0126547
|
0.0250978
|
0.0360469
|
8
|
0
|
0.0073692
|
0.0244337
|
0.0450432
|
0.0691867
|
12
|
NaN
|
0.0000000
|
0.0000000
|
0.1128237
|
0.1235388
|
Notes
LA
simla: parametric self-contained
simlac: parametric competitive
simnla: nonparametric self-contained
simnlac: nonparametric competitive
AL
simal: parametric self-contained
simalc: parametric competitive
simnal: nonparametric self-contained
simnalc: nonparametric competitive