In this report we establish a new method for generating simulated data with known ground truth. This will be used to test different gene methylation enrichment approaches systematically.
The general steps are:
Import GSE158422 data corresponding to control (non-tumour tissue).
From the 37 samples, create two groups of 18 samples. One of these will be considered “control” and the other “case”.
Create random gene sets that have similar sized to Reactome pathways.
Some gene sets will be selected to be differentially methylated. Half of these will be hypermethylated and the others will be hypomethylated.
The changes will be incorporated into the “case” samples.
The enrichment analysis will be conducted.
The accuracy will be calculated.
suppressPackageStartupMessages({
library("stringi")
library("limma")
library("missMethyl")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library('org.Hs.eg.db')
library("psych")
library("mitch")
library("kableExtra")
})
## Warning: no function found corresponding to methods exports from 'HDF5Array'
## for: 'extract_sparse_array'
## Warning: multiple methods tables found for 'sparsity'
# optimised for 128 GB sever with 32 threads
CORES=6
annotations
probe sets
gene sets
design matrix
mval matrix
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
#genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")
if (!file.exists("GSE158422_design.rds")) {
options(timeout=10000000000000)
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")
boxplot(list("normal"=matrix(colMeans(mval),ncol=2)[,2],"tumor"=matrix(colMeans(mval),ncol=2)[,1]),
main="mean probe methylation mval")
We could use Reactome pathways, however these have a lot of overlapping sets, which could cause inflated false positives. A better solution could be to select random gene sets with size range between 10 and 100 genes.
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
#lengths <- rep(1:10,100)*10
lengths <- rep(20,1000)
randomGeneSets <- function(gene_catalog, lengths, seed){
num_gsets <- length(lengths)
set.seed(seed) ; seeds <- sample(1:1e6, num_gsets)
gsets <- lapply(1:num_gsets,function(i) {
set.seed(seeds[i]) ; gs <- sample(gene_catalog,lengths[i])
return(gs)
} )
names(gsets)<-stri_rand_strings(length(gsets), 15, pattern = "[A-Za-z]")
return(gsets)
}
gsets <- randomGeneSets(gene_catalog,lengths,seed=100)
Select nonoverlapping gene sets.
Not in use now. Might delete later.
gset_selection <- function(genesetdatabase,seed,gene_catalog) {
set.seed(seed)
gsets <- genesetdatabase[sample(1:length(genesetdatabase))]
genelist=NULL
genesets=NULL
for ( i in 1:length(gsets) ) {
gs <- gsets[i]
inx <- length(intersect(unlist(gs),genelist))
num <- length(intersect(unlist(gs),gene_catalog))
if ( inx == 0 & num >= 10 ) {
genesets <- c(genesets,gs)
genelist <- c(unname(unlist(gs)),genelist)
}
}
return(genesets)
}
gset_mod <- gset_selection(genesetdatabase=gsets,seed=100,gene_catalog=gene_catalog)
TODO: to incorporate changes to case samples.
Need to figure out what magnitude to change. Will refer to the cancer/normal comparison.
Select genes and probes to alter.
seed=100
frac_genes=0.5
frac_probes=0.5
delta=1
nsamples=10
normal_mval <- mval[,(1:(ncol(mval)/2)*2)]
incorp_dm <- function(genesets,myann,mval,seed,
frac_genes,frac_probes,groupsize,delta=1) {
# divide gene sets between hyper and hypomethylated
nset <- floor(length(genesets)/2)
set.seed(seed) ; gtup <-sample(genesets,nset)
set.seed(seed) ; gtdn <- sample(setdiff(genesets,gtup),nset)
gup <- unname(unlist(gtup))
gdn <- unname(unlist(gtdn))
# make probe-gene vector
probe2gene <- strsplit(myann$UCSC_RefGene_Name,";")
names(probe2gene) <- rownames(myann)
probe2gene <- unlist(probe2gene)
# select probes hypermethylated
set.seed(seed) ; gup2 <- sample(gup,floor(length(gup)*frac_genes))
pup <- names(probe2gene[which(probe2gene %in% gup2)])
set.seed(seed) ; pup2 <- sample(pup,floor(length(pup)*frac_probes))
# select probes hypomethylated
set.seed(seed) ; gdn2 <- sample(gdn,floor(length(gdn)*frac_genes))
pdn <- names(probe2gene[which(probe2gene %in% gdn2)])
set.seed(seed) ; pdn2 <- sample(pdn,floor(length(pdn)*frac_probes))
# divide samples between ctrl and case
ncols <- ncol(mval)
maxgroupsize=floor(ncols/2)
if ( groupsize > maxgroupsize ) { stop("groupsize cannot be larger than half the ncols of mval") }
set.seed(seed) ; ctrl <- sample(1:ncols,groupsize)
set.seed(seed) ; case <- sample(setdiff(1:ncols,ctrl),groupsize)
mval_ctrl <- mval[,ctrl]
mval_case <- mval[,case]
# incorporate altered signals - change by +1 or -1
mval_case[rownames(mval_case) %in% pup2,] <- mval_case[rownames(mval_case) %in% pup2,] + delta
mval_case[rownames(mval_case) %in% pdn2,] <- mval_case[rownames(mval_case) %in% pdn2,] - delta
mval2 <- cbind(mval_ctrl,mval_case)
result <- list("mval"=mval2,"probes up"=pup2,"probes down"=pdn2,
"genes up"=gup2,"genes down"=gdn2,
"genesets up"=gtup,"genesets down"=gtdn)
return(result)
}
# limma
runlimma <- function(mval,design,myann) {
fit.reduced <- lmFit(mval,design)
fit.reduced <- eBayes(fit.reduced)
dm <- topTable(fit.reduced,coef=ncol(design), number = Inf)
dm <- merge(myann,dm,by=0)
dm <- dm[order(dm$P.Value),]
rownames(dm) <- dm$Row.names
dm$Row.names=NULL
return(dm)
}
This is how to use the function
This could be complicated as it requires translation of symbols to entrez IDs, but could be simplified if the entrez translation is done at the gsameth step.
simgsa <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
pup3 <- rownames(subset(dm3,adj.P.Val<0.05 & logFC>0))
pdn3 <- rownames(subset(dm3,adj.P.Val<0.05 & logFC<0))
if ( length(pup3) < 250 ) { pup3 <- head(rownames(subset(dm3, logFC > 0)), 250) }
if ( length(pdn3) < 250 ) { pdn3 <- head(rownames(subset(dm3, logFC < 0)), 250) }
# convert gene sets to entrez
suppressWarnings(suppressMessages({ gene2entrez <- mapIds(org.Hs.eg.db, gene_catalog, 'ENTREZID', 'SYMBOL') }))
gsets_entrez <- lapply(gsets,function(gs) {
gs2 <- unique(gene2entrez[names(gene2entrez) %in% gs])
gs2 <- gs2[!is.na(gs2)]
return(gs2)
})
suppressWarnings(suppressMessages({
gsaup3 <- gsameth(sig.cpg=pup3, all.cpg=rownames(dm3), collection=gsets_entrez)
gsadn3 <- gsameth(sig.cpg=pdn3, all.cpg=rownames(dm3), collection=gsets_entrez)
}))
gsig_up3 <- rownames(subset(gsaup3,FDR<0.05))
gsig_dn3 <- rownames(subset(gsadn3,FDR<0.05))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(gsadn3)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
This process runs limma first and then aggregates the results before doing an enrichment test.
# aggregate
agg <- function(dm,cores=1) {
gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
dm$UCSC_RefGene_Name <- gnl
l <- mclapply(1:nrow(dm), function(i) {
a <- dm[i,]
len <- length(a[[1]][[1]])
tvals <- as.numeric(rep(a["t"],len))
genes <- a[[1]][[1]]
data.frame(genes,tvals)
},mc.cores=cores)
df <- do.call(rbind,l)
keep <- names(which(table(df$genes)>1))
df <- df[df$genes %in% keep,]
gn <- unique(df$genes)
gme_res <- lapply( 1:length(gn), function(i) {
g <- gn[i]
tstats <- df[which(df$genes==g),"tvals"]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
top <- tstats[order(-abs(tstats))][1]
if ( length(tstats) > 2 ) {
ttest <- t.test(tstats)
pval <- ttest$p.value
} else {
pval = 1
}
res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
"median"=mymedian, "top"=top, "pval"=pval)
} )
gme_res_df <- do.call(rbind, gme_res)
rownames(gme_res_df) <- gme_res_df[,1]
gme_res_df <- gme_res_df[,-1]
tmp <- apply(gme_res_df,2,as.numeric)
rownames(tmp) <- rownames(gme_res_df)
gme_res_df <- as.data.frame(tmp)
gme_res_df$sig <- -log10(gme_res_df[,"pval"])
gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
gme_res_df$fdr <- p.adjust(gme_res_df$pval)
out <- list("df"=df,"gme_res_df"=gme_res_df)
return(out)
}
# geometric mean aggregate
geoagg <- function(dm,cores=1) {
gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
dm$UCSC_RefGene_Name <- gnl
l <- mclapply(1:nrow(dm), function(i) {
a <- dm[i,]
len <- length(a[[1]][[1]])
tvals <- as.numeric(rep(a["t"],len))
genes <- a[[1]][[1]]
data.frame(genes,tvals)
},mc.cores=cores)
df <- do.call(rbind,l)
keep <- names(which(table(df$genes)>1))
df <- df[df$genes %in% keep,]
gn <- unique(df$genes)
gme_res <- lapply( 1:length(gn), function(i) {
g <- gn[i]
tstats <- df[which(df$genes==g),"tvals"]
myn <- length(tstats)
mymean <- jamGeomean(tstats)
mymedian <- median(tstats)
top <- tstats[order(-abs(tstats))][1]
if ( length(tstats) > 2 ) {
ttest <- t.test(tstats)
pval <- ttest$p.value
} else {
pval = 1
}
res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
"median"=mymedian, "top"=top, "pval"=pval)
} )
gme_res_df <- do.call(rbind, gme_res)
rownames(gme_res_df) <- gme_res_df[,1]
gme_res_df <- gme_res_df[,-1]
tmp <- apply(gme_res_df,2,as.numeric)
rownames(tmp) <- rownames(gme_res_df)
gme_res_df <- as.data.frame(tmp)
gme_res_df$sig <- -log10(gme_res_df[,"pval"])
gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
gme_res_df$fdr <- p.adjust(gme_res_df$pval)
out <- list("df"=df,"gme_res_df"=gme_res_df)
return(out)
}
# enrich parametric
ttenrich <- function(m,genesets,cores=1,testtype="selfcontained") {
res <- mclapply( 1:length(genesets), function(i) {
scores <- m[,1]
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
if ( testtype == "selfcontained" ) { wt <- t.test(tstats) }
if ( testtype == "competitive" ) { wt <- t.test(tstats,scores) }
res <- c(name,myn,mymean,mymedian,wt$p.value)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
# enrich non-parametric
wtenrich <- function(m,genesets,cores=1,testtype="selfcontained") {
res <- mclapply( 1:length(genesets), function(i) {
scores <- m[,1]
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
if ( testtype == "selfcontained" ) { wt <- wilcox.test(tstats) }
if ( testtype == "competitive" ) { wt <- wilcox.test(tstats,scores) }
res <- c(name,myn,mymean,mymedian,wt$p.value)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
# parametric competitive
simlac <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
dmagg1 <- agg(dm3,cores=4)
m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
lares1 <- ttenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# parametric competitive top
simlactop <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
dmagg1 <- agg(dm3,cores=4)
m1 <- dmagg1$gme_res_df[,"top",drop=FALSE]
lares1 <- ttenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# parametric competitive geometric
simlacgeo <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
dmagg1 <- geoagg(dm3,cores=4)
m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
lares1 <- ttenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# nonparametric competitive
simnlac <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
dm3 <- runlimma(mval=mval2,design=d,myann=myann)
dmagg1 <- agg(dm3,cores=4)
m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
lares1 <- wtenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
gsig_up3 <- rownames(subset(lares1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(lares1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(lares1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
Functions for aggregate-limma-enrich approach.
# chromosome by chromosome will be much faster
magg <- function(mval,myann,cores=1){
gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
myann$gnl <- gnl
keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
mx <- mval[rownames(mval) %in% keep,]
mymed <- function(g) {
probes <- rownames(myann[grep(g,myann$gnl),])
rows <- which(rownames(mx) %in% probes)
if ( length(rows) > 1 ) {
b <- mx[rows,]
med <- apply(b,2,mean)
med <- matrix(med,nrow=1)
colnames(med) <- colnames(b)
rownames(med) <- g
return(med)
}
}
med <- mclapply(gn,mymed,mc.cores=cores)
med <- med[lapply(med,length)>0]
medf <- do.call(rbind,med)
return(medf)
}
chragg <- function(mval,myann,cores=1){
annodf <- as.data.frame(anno)
keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
mx <- mval[rownames(mval) %in% keep,]
chrs <- unique(anno$chr)
myorder <- unlist(lapply(chrs,function(mychr) { nrow( annodf[annodf$chr==mychr,] ) } ))
chrs <- chrs[order(-myorder)]
leadercores <- floor(sqrt(cores))
workercores <- ceiling(sqrt(cores))
chrmedf <- mclapply(chrs,function(chr) {
chrfrag <- annodf[annodf$chr==chr,]
chrprobes <-rownames(chrfrag)
chrmx <- mx[rownames(mx) %in% chrprobes,]
chranno <- myann[rownames(myann) %in% chrprobes,]
chrmedf <- magg(mval=chrmx,myann=chranno,cores=workercores)
return(chrmedf)
},mc.cores=leadercores)
medf <- do.call(rbind, chrmedf)
return(medf)
}
agglimma <- function(medf,design) {
fit.reduced <- lmFit(medf,design)
fit.reduced <- eBayes(fit.reduced)
dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
nondup <- !duplicated(dmagg$ID)
dmagg <- dmagg[nondup,]
rownames(dmagg) <- dmagg$ID
dmagg$ID = NULL
return(dmagg)
}
# AL approach parametric competitive
simalc <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
# al pipeline
medf1 <- chragg(mval=mval2,myann=myann,cores=4)
magg1 <- agglimma(medf1,d)
m1 <- as.data.frame(magg1$t)
rownames(m1) <- rownames(magg1)
colnames(m1) <- "t"
alres1 <- ttenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
# summarise results
gsig_up3 <- rownames(subset(alres1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(alres1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(alres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
# AL approach nonparametric competitive
simnalc <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
# al pipeline
medf1 <- chragg(mval=mval2,myann=myann,cores=4)
magg1 <- agglimma(medf1,d)
m1 <- as.data.frame(magg1$t)
rownames(m1) <- rownames(magg1)
colnames(m1) <- "t"
alres1 <- wtenrich(m=m1,genesets=gsets,cores=4,testtype="competitive")
# summarise results
gsig_up3 <- rownames(subset(alres1, fdr < 0.05 & t_mean > 0))
gsig_dn3 <- rownames(subset(alres1, fdr < 0.05 & t_mean < 0))
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(alres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
This approach uses the aggregated mvals, limma and instead of a 1-sample t-test it uses mitch which is a competitive test and could give more interpretable results.
runmitch <- function(m,genesets,cores=1) {
suppressMessages({ mres <- mitch_calc(m,genesets,minsetsize=5,cores=cores) })
mres <- mres$enrichment_result
rownames(mres) <- mres$set
mres$set=NULL
return(mres)
}
simalm <- function(genesetdatabase, myann, mval, seed, frac_genes, frac_probes, groupsize, delta=1, num_dm_sets=50) {
# generate gene sets
gene_catalog <- unique(unlist(strsplit(myann$UCSC_RefGene_Name,";")))
lengths <- unname(unlist(lapply(genesetdatabase,length)))
gsets <- randomGeneSets(gene_catalog,lengths,seed=seed)
# select gene sets to alter
set.seed(seed) ; gset_mod <- sample(gsets,num_dm_sets)
# incorporate select changes
sim <- incorp_dm(genesets=gset_mod, myann=myann, mval=mval, seed=seed,
frac_genes=0.5,frac_probes=0.5,groupsize=groupsize,delta=delta)
# set up limma
mval2 <- sim$mval
ncols <- ncol(mval2)
groupsize <- ncols/2
ss <- data.frame(colnames(mval2))
colnames(ss) <- "sample"
ss$case <- c(rep(0,groupsize),rep(1,groupsize))
d <- model.matrix(~ ss$case )
# alm pipeline
medf1 <- chragg(mval2,myann,cores=4)
magg1 <- agglimma(medf1,d)
m1 <- as.data.frame(magg1$t)
rownames(m1) <- rownames(magg1)
colnames(m1) <- "t"
almres1 <- runmitch(m=m1,genesets=gsets,cores=4)
# summarise results
gsig_up3 <- rownames( subset( almres1, p.adjustANOVA < 0.05 & s.dist > 0 ) )
gsig_dn3 <- rownames( subset( almres1, p.adjustANOVA < 0.05 & s.dist < 0 ) )
gtup <- names(sim[[6]])
gtdn <- names(sim[[7]])
UPTP=length(intersect(gsig_up3 ,gtup))
UPFP=length(setdiff(gsig_up3 ,gtup))
UPFN=length(setdiff(gtup,gsig_up3))
DNTP=length(intersect(gsig_dn3 ,gtdn))
DNFP=length(setdiff(gsig_dn3 ,gtdn))
DNFN=length(setdiff(gtdn,gsig_dn3))
TP=UPTP+DNTP
FP=UPFP+DNFP
FN=UPFN+DNFN
TN=nrow(almres1)-DNTP-DNFP-DNFN-UPTP-UPFP-UPFN
PREC=TP/(TP+FP)
REC=TP/(TP+FN)
F1=TP/(TP+(0.5*(FP+FN)))
result <- c("TP"=TP,"FP"=FP,"FN"=FN,"TN"=TN,"PREC"=PREC,"REC"=REC)
return(result)
}
F1 <- function(x,y) {
( 2 * x * y ) / ( x + y )
}
Set assumptions.
num_dm_sets=50
sims=10
#groupsizes=c(3,5,8,12)
#deltas=c(0.1,0.2,0.3,0.4,0.5)
groupsizes=5
deltas=0.4
params <- expand.grid("groupsizes"=groupsizes,"deltas"=deltas)
params %>% kbl(caption="Parameters to test") %>% kable_paper("hover", full_width = F)
groupsizes | deltas |
---|---|
5 | 0.4 |
Cannot be run in multicore due to fragility of AnnotationDbi SQLite objects.
gres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- lapply(1:sims,function(i) {
simgsa(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5,
frac_probes=0.5, groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
})
res <- do.call(rbind,res)
return(res)
})
gres2 <- do.call(rbind,lapply(gres,colMeans))
gres2 %>% kbl(caption="GSAmeth results") %>% kable_paper("hover", full_width = F)
TP | FP | FN | TN | PREC | REC |
---|---|---|---|---|---|
3.6 | 0.2 | 46.4 | 949.8 | 0.9416667 | 0.072 |
gres3p <- do.call(rbind,lapply(groupsizes, function (g) { gres2[params$groupsizes==g,"PREC"] }))
colnames(gres3p) <- deltas
rownames(gres3p) <- groupsizes
gres3p %>% kbl(caption="GSAmeth precision") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0.9416667 |
gres3r <- do.call(rbind,lapply(groupsizes, function (g) { gres2[params$groupsizes==g,"REC"] }))
colnames(gres3r) <- deltas
rownames(gres3r) <- groupsizes
gres3r %>% kbl(caption="GSAmeth recall") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0.072 |
F1(gres3p,gres3r) %>% kbl(caption="GSAmeth F1") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0.1337718 |
lacres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simlac(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=4)
res <- do.call(rbind,res)
return(res)
})
lacres2 <- do.call(rbind,lapply(lacres,colMeans))
lacres2 %>% kbl(caption="LA parametric results") %>% kable_paper("hover", full_width = F)
TP | FP | FN | TN | PREC | REC |
---|---|---|---|---|---|
0.1 | 0.2 | 49.9 | 949.8 | NaN | 0.002 |
lacres3p <- do.call(rbind,lapply(groupsizes, function (g) { lacres2[params$groupsizes==g,"PREC"] }))
colnames(lacres3p) <- deltas
rownames(lacres3p) <- groupsizes
lacres3p %>% kbl(caption="LA parametric precision") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
lacres3r <- do.call(rbind,lapply(groupsizes, function (g) { lacres2[params$groupsizes==g,"REC"] }))
colnames(lacres3r) <- deltas
rownames(lacres3r) <- groupsizes
lacres3r %>% kbl(caption="LA parametric recall") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0.002 |
F1(lacres3p,lacres3r) %>% kbl(caption="LA parametric F1") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
Too many false positives.
lactopres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simlactop(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=4)
res <- do.call(rbind,res)
return(res)
})
lactopres2 <- do.call(rbind,lapply(lactopres,colMeans))
lactopres2 %>% kbl(caption="LA parametric results (top agg)") %>% kable_paper("hover", full_width = F)
TP | FP | FN | TN | PREC | REC |
---|---|---|---|---|---|
1.7 | 4.5 | 48.3 | 945.5 | NaN | 0.034 |
lactopres3p <- do.call(rbind,lapply(groupsizes, function (g) { lactopres2[params$groupsizes==g,"PREC"] }))
colnames(lactopres3p) <- deltas
rownames(lactopres3p) <- groupsizes
lactopres3p %>% kbl(caption="LA parametric precision (top agg)") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
lactopres3r <- do.call(rbind,lapply(groupsizes, function (g) { lactopres2[params$groupsizes==g,"REC"] }))
colnames(lactopres3r) <- deltas
rownames(lactopres3r) <- groupsizes
lactopres3r %>% kbl(caption="LA parametric recall (top agg)") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0.034 |
F1(lactopres3p,lactopres3r) %>% kbl(caption="LA parametric F1 (top agg)") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
Precision is same but recall was very low.
Not included.
nlacres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simnlac(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=4)
res <- do.call(rbind,res)
return(res)
})
nlacres2 <- do.call(rbind,lapply(nlacres,colMeans))
nlacres2 %>% kbl(caption="LA nonparametric results") %>% kable_paper("hover", full_width = F)
TP | FP | FN | TN | PREC | REC |
---|---|---|---|---|---|
1.7 | 0.2 | 48.3 | 949.8 | NaN | 0.034 |
nlacres3p <- do.call(rbind,lapply(groupsizes, function (g) { nlacres2[params$groupsizes==g,"PREC"] }))
colnames(nlacres3p) <- deltas
rownames(nlacres3p) <- groupsizes
nlacres3p %>% kbl(caption="LA nonparametric precision") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
nlacres3r <- do.call(rbind,lapply(groupsizes, function (g) { nlacres2[params$groupsizes==g,"REC"] }))
colnames(nlacres3r) <- deltas
rownames(nlacres3r) <- groupsizes
nlacres3r %>% kbl(caption="LA nonparametric recall") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0.034 |
F1(nlacres3p,nlacres3r) %>% kbl(caption="LA nonparametric F1") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
alcres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simalc(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=4)
res <- do.call(rbind,res)
return(res)
})
alcres2 <- do.call(rbind,lapply(alcres,colMeans))
alcres2 %>% kbl(caption="AL parametric results") %>% kable_paper("hover", full_width = F)
TP | FP | FN | TN | PREC | REC |
---|---|---|---|---|---|
0 | 0.1 | 50 | 949.9 | NaN | 0 |
alcres3p <- do.call(rbind,lapply(groupsizes, function (g) { alcres2[params$groupsizes==g,"PREC"] }))
colnames(alcres3p) <- deltas
rownames(alcres3p) <- groupsizes
alcres3p %>% kbl(caption="AL parametric precision") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
alcres3r <- do.call(rbind,lapply(groupsizes, function (g) { alcres2[params$groupsizes==g,"REC"] }))
colnames(alcres3r) <- deltas
rownames(alcres3r) <- groupsizes
alcres3r %>% kbl(caption="AL parametric recall") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0 |
F1(alcres3p,alcres3r) %>% kbl(caption="AL parametric F1") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
nalcres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simnalc(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=4)
res <- do.call(rbind,res)
return(res)
})
nalcres2 <- do.call(rbind,lapply(nalcres,colMeans))
nalcres2 %>% kbl(caption="AL nonparametric results") %>% kable_paper("hover", full_width = F)
TP | FP | FN | TN | PREC | REC |
---|---|---|---|---|---|
1.5 | 0.3 | 48.5 | 949.7 | NaN | 0.03 |
nalcres3p <- do.call(rbind,lapply(groupsizes, function (g) { nalcres2[params$groupsizes==g,"PREC"] }))
colnames(nalcres3p) <- deltas
rownames(nalcres3p) <- groupsizes
nalcres3p %>% kbl(caption="AL nonparametric precision") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
nalcres3r <- do.call(rbind,lapply(groupsizes, function (g) { nalcres2[params$groupsizes==g,"REC"] }))
colnames(nalcres3r) <- deltas
rownames(nalcres3r) <- groupsizes
nalcres3r %>% kbl(caption="AL nonparametric recall") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0.03 |
F1(nalcres3p,nalcres3r) %>% kbl(caption="AL nonparametric F1") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
almres <- lapply(1:nrow(params) , function(j) {
groupsize = params[j,1]
delta = params[j,2]
res <- mclapply(1:sims,function(i) {
simalm(genesetdatabase=gsets, myann=myann, mval=normal_mval, seed=i*100, frac_genes=0.5, frac_probes=0.5,
groupsize=groupsize, delta=delta, num_dm_sets=num_dm_sets)
},mc.cores=4)
res <- do.call(rbind,res)
return(res)
})
almres2 <- do.call(rbind,lapply(almres,colMeans))
almres2 %>% kbl(caption="ALM results") %>% kable_paper("hover", full_width = F)
TP | FP | FN | TN | PREC | REC |
---|---|---|---|---|---|
1.6 | 0.2 | 48.4 | 949.8 | NaN | 0.032 |
almres3p <- do.call(rbind,lapply(groupsizes, function (g) { almres2[params$groupsizes==g,"PREC"] }))
colnames(almres3p) <- deltas
rownames(almres3p) <- groupsizes
almres3p %>% kbl(caption="ALM precision") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
almres3r <- do.call(rbind,lapply(groupsizes, function (g) { almres2[params$groupsizes==g,"REC"] }))
colnames(almres3r) <- deltas
rownames(almres3r) <- groupsizes
almres3r %>% kbl(caption="ALM recall") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | 0.032 |
F1(almres3p,almres3r) %>% kbl(caption="ALM F1") %>% kable_paper("hover", full_width = F)
0.4 | |
---|---|
5 | NaN |
save.image("GSE158422_simulate.Rdata")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] kableExtra_1.3.4
## [2] psych_2.3.9
## [3] org.Hs.eg.db_3.17.0
## [4] AnnotationDbi_1.64.0
## [5] IlluminaHumanMethylation450kmanifest_0.4.0
## [6] missMethyl_1.34.0
## [7] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [8] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [9] minfi_1.46.0
## [10] bumphunter_1.42.0
## [11] locfit_1.5-9.8
## [12] iterators_1.0.14
## [13] foreach_1.5.2
## [14] Biostrings_2.70.0
## [15] XVector_0.42.0
## [16] SummarizedExperiment_1.32.0
## [17] Biobase_2.62.0
## [18] MatrixGenerics_1.14.0
## [19] matrixStats_1.0.0
## [20] GenomicRanges_1.54.0
## [21] GenomeInfoDb_1.38.0
## [22] IRanges_2.36.0
## [23] S4Vectors_0.40.0
## [24] BiocGenerics_0.48.0
## [25] limma_3.58.0
## [26] stringi_1.7.12
## [27] mitch_1.12.0
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.1 later_1.3.1
## [3] BiocIO_1.12.0 bitops_1.0-7
## [5] filelock_1.0.2 BiasedUrn_2.0.11
## [7] tibble_3.2.1 preprocessCore_1.62.1
## [9] XML_3.99-0.14 lifecycle_1.0.3
## [11] lattice_0.22-5 MASS_7.3-60
## [13] base64_2.0.1 scrime_1.3.5
## [15] magrittr_2.0.3 sass_0.4.7
## [17] rmarkdown_2.25 jquerylib_0.1.4
## [19] yaml_2.3.7 httpuv_1.6.12
## [21] doRNG_1.8.6 askpass_1.2.0
## [23] DBI_1.1.3 RColorBrewer_1.1-3
## [25] abind_1.4-5 zlibbioc_1.48.0
## [27] quadprog_1.5-8 rvest_1.0.3
## [29] purrr_1.0.2 RCurl_1.98-1.12
## [31] rappdirs_0.3.3 GenomeInfoDbData_1.2.11
## [33] genefilter_1.82.1 annotate_1.78.0
## [35] svglite_2.1.1 DelayedMatrixStats_1.22.6
## [37] codetools_0.2-19 DelayedArray_0.28.0
## [39] xml2_1.3.5 tidyselect_1.2.0
## [41] beanplot_1.3.1 BiocFileCache_2.10.0
## [43] illuminaio_0.42.0 webshot_0.5.5
## [45] GenomicAlignments_1.38.0 jsonlite_1.8.7
## [47] multtest_2.56.0 ellipsis_0.3.2
## [49] survival_3.5-7 systemfonts_1.0.5
## [51] tools_4.3.1 progress_1.2.2
## [53] Rcpp_1.0.11 glue_1.6.2
## [55] mnormt_2.1.1 gridExtra_2.3
## [57] SparseArray_1.2.0 xfun_0.40
## [59] dplyr_1.1.3 HDF5Array_1.28.1
## [61] fastmap_1.1.1 GGally_2.1.2
## [63] rhdf5filters_1.12.1 fansi_1.0.5
## [65] openssl_2.1.1 caTools_1.18.2
## [67] digest_0.6.33 R6_2.5.1
## [69] mime_0.12 colorspace_2.1-0
## [71] gtools_3.9.4 biomaRt_2.58.0
## [73] RSQLite_2.3.1 tidyr_1.3.0
## [75] utf8_1.2.4 generics_0.1.3
## [77] data.table_1.14.8 rtracklayer_1.62.0
## [79] prettyunits_1.2.0 httr_1.4.7
## [81] htmlwidgets_1.6.2 S4Arrays_1.2.0
## [83] pkgconfig_2.0.3 gtable_0.3.4
## [85] blob_1.2.4 siggenes_1.74.0
## [87] htmltools_0.5.6.1 echarts4r_0.4.5
## [89] scales_1.2.1 png_0.1-8
## [91] knitr_1.44 rstudioapi_0.15.0
## [93] tzdb_0.4.0 reshape2_1.4.4
## [95] rjson_0.2.21 nlme_3.1-163
## [97] curl_5.1.0 cachem_1.0.8
## [99] rhdf5_2.44.0 stringr_1.5.0
## [101] KernSmooth_2.23-22 restfulr_0.0.15
## [103] GEOquery_2.68.0 pillar_1.9.0
## [105] grid_4.3.1 reshape_0.8.9
## [107] vctrs_0.6.4 gplots_3.1.3
## [109] promises_1.2.1 dbplyr_2.3.4
## [111] xtable_1.8-4 beeswarm_0.4.0
## [113] evaluate_0.22 readr_2.1.4
## [115] GenomicFeatures_1.54.0 cli_3.6.1
## [117] compiler_4.3.1 Rsamtools_2.18.0
## [119] rlang_1.1.1 crayon_1.5.2
## [121] rngtools_1.5.2 nor1mix_1.3-0
## [123] mclust_6.0.0 plyr_1.8.9
## [125] viridisLite_0.4.2 BiocParallel_1.36.0
## [127] munsell_0.5.0 Matrix_1.6-1.1
## [129] hms_1.1.3 sparseMatrixStats_1.12.2
## [131] bit64_4.0.5 ggplot2_3.4.4
## [133] Rhdf5lib_1.22.1 KEGGREST_1.42.0
## [135] statmod_1.5.0 shiny_1.7.5.1
## [137] highr_0.10 memoise_2.0.1.9000
## [139] bslib_0.5.1 bit_4.0.5
LA
simla: parametric self-contained
simlac: parametric competitive
simnla: nonparametric self-contained
simnlac: nonparametric competitive
AL
simal: parametric self-contained
simalc: parametric competitive
simnal: nonparametric self-contained
simnalc: nonparametric competitive