Here we are splitting the dataset and testing concordance between LA and AL approaches.
suppressPackageStartupMessages({
library("limma")
library("eulerr")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library("tictoc")
library("mitch")
library("kableExtra")
library("beeswarm")
})
# optimised for 128 GB sever with 32 threads
CORES=6
annotations
probe sets
gene sets
design matrix
mval matrix
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
#genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")
boxplot(list("normal"=matrix(colMeans(mval),ncol=2)[,2],"tumor"=matrix(colMeans(mval),ncol=2)[,1]),
main="mean probe methylation mval")
Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.
gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")
gs_symbols <- gmt_import("c2.cp.reactome.v2023.1.Hs.symbols.gmt")
There are three approaches. First, limma can be done first, followed by aggregation by gene and then enrichment test (called LA). Secondly, probe values can be aggregated by gene, followed by limma, followed by enrichment test (AL). Third, probe values can be aggregated to gene level, and then aggregated to gene set level followed by limma test (AA).
This process runs limma first and then aggregates the results before doing an enrichment test.
# limma
runlimma <- function(mval,design,myann) {
fit.reduced <- lmFit(mval,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
dm <- topTable(fit.reduced,coef=4, number = Inf)
dm <- merge(myann,dm,by=0)
dm <- dm[order(dm$P.Value),]
rownames(dm) <- dm$Row.names
dm$Row.names=NULL
return(dm)
}
# aggregate
agg <- function(dm,cores=1) {
gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
dm$UCSC_RefGene_Name <- gnl
l <- mclapply(1:nrow(dm), function(i) {
a <- dm[i,]
len <- length(a[[1]][[1]])
tvals <- as.numeric(rep(a["t"],len))
genes <- a[[1]][[1]]
data.frame(genes,tvals)
},mc.cores=cores)
df <- do.call(rbind,l)
keep <- names(which(table(df$genes)>1))
df <- df[df$genes %in% keep,]
gn <- unique(df$genes)
gme_res <- lapply( 1:length(gn), function(i) {
g <- gn[i]
tstats <- df[which(df$genes==g),"tvals"]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
if ( length(tstats) > 2 ) {
ttest <- t.test(tstats)
pval <- ttest$p.value
} else {
pval = 1
}
res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
"median"=mymedian, pval=pval)
} )
gme_res_df <- do.call(rbind, gme_res)
rownames(gme_res_df) <- gme_res_df[,1]
gme_res_df <- gme_res_df[,-1]
tmp <- apply(gme_res_df,2,as.numeric)
rownames(tmp) <- rownames(gme_res_df)
gme_res_df <- as.data.frame(tmp)
gme_res_df$sig <- -log10(gme_res_df[,4])
gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
gme_res_df$fdr <- p.adjust(gme_res_df$pval)
out <- list("df"=df,"gme_res_df"=gme_res_df)
return(out)
}
# enrich
ttenrich <- function(m,genesets,cores=1) {
res <- mclapply( 1:length(genesets), function(i) {
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
wt <- t.test(tstats)
res <- c(name,myn,mymean,mymedian,wt$p.value)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
Functions for aggregate-limma-enrich approach.
# chromosome by chromosome will be much faster
magg <- function(mval,myann,cores=1){
gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
myann$gnl <- gnl
keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
mx <- mval[rownames(mval) %in% keep,]
mymed <- function(g) {
probes <- rownames(myann[grep(g,myann$gnl),])
rows <- which(rownames(mx) %in% probes)
if ( length(rows) > 1 ) {
b <- mx[rows,]
med <- apply(b,2,mean)
med <- matrix(med,nrow=1)
colnames(med) <- colnames(b)
rownames(med) <- g
return(med)
}
}
med <- mclapply(gn,mymed,mc.cores=cores)
med <- med[lapply(med,length)>0]
medf <- do.call(rbind,med)
return(medf)
}
chragg <- function(mval,myann,cores=1){
annodf <- as.data.frame(anno)
keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
mx <- mval[rownames(mval) %in% keep,]
chrs <- unique(anno$chr)
myorder <- unlist(lapply(chrs,function(mychr) { nrow( annodf[annodf$chr==mychr,] ) } ))
chrs <- chrs[order(-myorder)]
leadercores <- floor(sqrt(cores))
workercores <- ceiling(sqrt(cores))
chrmedf <- mclapply(chrs,function(chr) {
chrfrag <- annodf[annodf$chr==chr,]
chrprobes <-rownames(chrfrag)
chrmx <- mx[rownames(mx) %in% chrprobes,]
chranno <- myann[rownames(myann) %in% chrprobes,]
chrmedf <- magg(mval=chrmx,myann=chranno,cores=workercores)
return(chrmedf)
},mc.cores=leadercores)
medf <- do.call(rbind, chrmedf)
return(medf)
}
agglimma <- function(medf,design) {
fit.reduced <- lmFit(medf,design)
fit.reduced <- eBayes(fit.reduced)
dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
nondup <- !duplicated(dmagg$ID)
dmagg <- dmagg[nondup,]
rownames(dmagg) <- dmagg$ID
dmagg$ID = NULL
return(dmagg)
}
Median value works well here.
gsagg <- function(x,genesets,cores=1) {
meds <- mclapply(1:length(genesets), function(i) {
gs = genesets[[i]]
xx <- x[rownames(x) %in% gs,]
med <- apply(xx,2,median)
},mc.cores=cores)
mymed <- do.call(rbind,meds)
rownames(mymed) <- names(genesets)
as.data.frame(mymed)
}
aalimma <- function(agag,design) {
fit.reduced <- lmFit(agag,design)
fit.reduced <- eBayes(fit.reduced)
dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
return(dmagg)
}
aal <- function(mval,myann,genesets,design,cores=1) {
medf <- chragg(mval,myann,cores=cores)
agag <- gsagg(x=medf,genesets=genesets,cores=cores)
aalres <- aalimma(agag=agag,design=design)
return(aalres)
}
#aalres <- aal(mval=mval,myann=myann,genesets=gs_symbols,design=design,cores=CORES)
This approach uses the aggregated mvals, limma and instead of a 1-sample t-test it uses mitch which is a competitive test and could give more interpretable results.
runmitch <- function(m,genesets,cores=1) {
mres <- mitch_calc(m,genesets,minsetsize=5,cores=cores)
mres <- mres$enrichment_result
rownames(mres) <- mres$set
mres$set=NULL
return(mres)
}
# prep
sex <- as.data.frame(design)$sex
tumor <- as.data.frame(design)$tumor
patient <- as.character(unlist(lapply(1:ncol(mval),function(i) {c(i,i)})))
patient <- head(patient,ncol(mval))
design <- model.matrix(~ patient + tumor )
rownames(design) <- colnames(mval)
split 74 samples 37 patients split into 18 + 18 Conduct LA workflow analysis. Then make overlap.
splitanalyze_la <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
dm1 <- runlimma(mval_a,design_a,myann)
dmagg1 <- agg(dm1,cores=floor(CORES/3))
m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
lares1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES/3)
dm2 <- runlimma(mval_b,design_b,myann)
dmagg2 <- agg(dm2,cores=floor(CORES/3))
m2 <- dmagg2$gme_res_df[,"mean",drop=FALSE]
lares2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES/3)
list("lares1"=lares1,"lares2"=lares2)
}
lares <- mclapply( seq(100,5000,100) ,splitanalyze_la,mc.cores=CORES)
splitanalyze_al <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
medf1 <- chragg(mval_a,myann,cores=floor(CORES/3))
magg1 <- agglimma(medf1,design_a)
m1 <- as.data.frame(magg1$t)
rownames(m1) <- rownames(magg1)
colnames(m1) <- "t"
alres1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES/3)
medf2 <- chragg(mval_b,myann,cores=floor(CORES/3))
magg2 <- agglimma(medf2,design_b)
m2 <- as.data.frame(magg2$t)
rownames(m2) <- rownames(magg2)
colnames(m2) <- "t"
alres2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES/3)
list("alres1"=alres1,"alres2"=alres2)
}
alres <- mclapply( seq(100,5000,100) ,splitanalyze_al,mc.cores=CORES)
splitanalyze_aa <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
aares1 <- aal(mval=mval_a,myann=myann,genesets=gs_symbols,
design=design_a,cores=floor(CORES/3))
aares2 <- aal(mval=mval_b,myann=myann,genesets=gs_symbols,
design=design_b,cores=floor(CORES/3))
list("aares1"=aares1,"aares2"=aares2)
}
aares <- mclapply( seq(100,5000,100) ,splitanalyze_aa,mc.cores=CORES)
splitanalyze_alm <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
medf1 <- chragg(mval_a,myann,cores=floor(CORES/3))
magg1 <- agglimma(medf1,design_a)
m1 <- as.data.frame(magg1$t)
rownames(m1) <- rownames(magg1)
colnames(m1) <- "t"
almres1 <- runmitch(m=m1,genesets=gs_symbols,cores=CORES)
medf2 <- chragg(mval_b,myann,cores=floor(CORES/3))
magg2 <- agglimma(medf2,design_b)
m2 <- as.data.frame(magg2$t)
rownames(m2) <- rownames(magg2)
colnames(m2) <- "t"
almres2 <- runmitch(m=m2,genesets=gs_symbols,cores=CORES)
list("almres1"=almres1,"almres2"=almres2)
}
almres <- mclapply( seq(100,5000,100) ,splitanalyze_alm,mc.cores=CORES)
lacompare <- function(lares){
lares1 <- lares[[1]]
lares2 <- lares[[2]]
lasig1 <- subset(lares1,fdr<0.05)
lasig2 <- subset(lares2,fdr<0.05)
lasig1_up <- rownames(subset(lasig1,t_median>0))
lasig1_dn <- rownames(subset(lasig1,t_median<0))
lasig2_up <- rownames(subset(lasig2,t_median>0))
lasig2_dn <- rownames(subset(lasig2,t_median<0))
total <- length(unique(c(lasig1_up,lasig2_up,lasig1_dn,lasig2_dn)))
common <- length(intersect(lasig1_up, lasig2_up)) + length(intersect(lasig1_dn, lasig2_dn))
discordant <- length(intersect(lasig1_up, lasig2_dn)) + length(intersect(lasig1_dn, lasig2_up))
uncommon = total - common - discordant
laout <- data.frame(total,common,discordant,uncommon)
laout$p_comm <-laout$common / laout$total
laout$p_disc <-laout$discordant / laout$total
laout$p_uncom <-laout$uncommon / laout$total
laout
}
alcompare <-function(alres){
alres1 <- alres[[1]]
alres2 <- alres[[2]]
alsig1 <- subset(alres1,fdr<0.05)
alsig2 <- subset(alres2,fdr<0.05)
alsig1_up <- rownames(subset(alsig1,t_median>0))
alsig1_dn <- rownames(subset(alsig1,t_median<0))
alsig2_up <- rownames(subset(alsig2,t_median>0))
alsig2_dn <- rownames(subset(alsig2,t_median<0))
total <- length(unique(c(alsig1_up,alsig2_up,alsig1_dn,alsig2_dn)))
common <- length(intersect(alsig1_up, alsig2_up)) + length(intersect(alsig1_dn, alsig2_dn))
discordant <- length(intersect(alsig1_up, alsig2_dn)) + length(intersect(alsig1_dn, alsig2_up))
uncommon = total - common - discordant
result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)
alout <- data.frame(total,common,discordant,uncommon)
alout$p_comm <-alout$common / alout$total
alout$p_disc <-alout$discordant / alout$total
alout$p_uncom <-alout$uncommon / alout$total
alout
}
aacompare <-function(aa){
aa1 <- aa[[1]]
aa2 <- aa[[2]]
aasig1 <- subset(aa1,adj.P.Val<0.05)
aasig2 <- subset(aa2,adj.P.Val<0.05)
aasig1_up <- rownames(subset(aasig1,logFC>0))
aasig1_dn <- rownames(subset(aasig1,logFC<0))
aasig2_up <- rownames(subset(aasig2,logFC>0))
aasig2_dn <- rownames(subset(aasig2,logFC<0))
total <- length(unique(c(aasig1_up,aasig2_up,aasig1_dn,aasig2_dn)))
common <- length(intersect(aasig1_up, aasig2_up)) + length(intersect(aasig1_dn, aasig2_dn))
discordant <- length(intersect(aasig1_up, aasig2_dn)) + length(intersect(aasig1_dn, aasig2_up))
uncommon = total - common - discordant
result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)
aaout <- data.frame(total,common,discordant,uncommon)
aaout$p_comm <- aaout$common / aaout$total
aaout$p_disc <- aaout$discordant / aaout$total
aaout$p_uncom <-aaout$uncommon / aaout$total
aaout
}
almcompare <-function(almres){
almres1 <- almres[[1]]
almres2 <- almres[[2]]
almsig1 <- subset(almres1,p.adjustANOVA<0.05)
almsig2 <- subset(almres2,p.adjustANOVA<0.05)
almsig1_up <- rownames(subset(almsig1,s.dist>0))
almsig1_dn <- rownames(subset(almsig1,s.dist<0))
almsig2_up <- rownames(subset(almsig2,s.dist>0))
almsig2_dn <- rownames(subset(almsig2,s.dist<0))
total <- length(unique(c(almsig1_up,almsig2_up,almsig1_dn,almsig2_dn)))
common <- length(intersect(almsig1_up, almsig2_up)) + length(intersect(almsig1_dn, almsig2_dn))
discordant <- length(intersect(almsig1_up, almsig2_dn)) + length(intersect(almsig1_dn, almsig2_up))
uncommon = total - common - discordant
result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)
almout <- data.frame(total,common,discordant,uncommon)
almout$p_comm <-almout$common / almout$total
almout$p_disc <-almout$discordant / almout$total
almout$p_uncom <-almout$uncommon / almout$total
almout
}
laout <- do.call(rbind,lapply(lares,lacompare))
alout <- do.call(rbind,lapply(alres,alcompare))
aaout <- do.call(rbind,lapply(aares,aacompare))
almout <- do.call(rbind,lapply(almres,almcompare))
laout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 1421 866 0 555 0.6094299789 0.000000000 0.39057002
## 2 1620 1242 1 377 0.7666666667 0.000617284 0.23271605
## 3 931 475 9 447 0.5102040816 0.009667025 0.48012889
## 4 935 483 12 440 0.5165775401 0.012834225 0.47058824
## 5 1271 483 0 788 0.3800157356 0.000000000 0.61998426
## 6 1277 554 0 723 0.4338292874 0.000000000 0.56617071
## 7 1592 0 1320 272 0.0000000000 0.829145729 0.17085427
## 8 1417 1 791 625 0.0007057163 0.558221595 0.44107269
## 9 904 419 11 474 0.4634955752 0.012168142 0.52433628
## 10 1611 1492 0 119 0.9261328367 0.000000000 0.07386716
## 11 1550 1 603 946 0.0006451613 0.389032258 0.61032258
## 12 1603 2 918 683 0.0012476606 0.572676232 0.42607611
## 13 1604 90 12 1502 0.0561097257 0.007481297 0.93640898
## 14 1519 2 726 791 0.0013166557 0.477946017 0.52073733
## 15 1562 1221 0 341 0.7816901408 0.000000000 0.21830986
## 16 1308 1 736 571 0.0007645260 0.562691131 0.43654434
## 17 1527 181 0 1346 0.1185330714 0.000000000 0.88146693
## 18 1603 12 838 753 0.0074859638 0.522769807 0.46974423
## 19 1260 389 0 871 0.3087301587 0.000000000 0.69126984
## 20 1588 0 1312 276 0.0000000000 0.826196474 0.17380353
## 21 1605 11 805 789 0.0068535826 0.501557632 0.49158879
## 22 1620 1521 0 99 0.9388888889 0.000000000 0.06111111
## 23 634 9 59 566 0.0141955836 0.093059937 0.89274448
## 24 923 441 12 470 0.4777898158 0.013001083 0.50920910
## 25 1597 0 1327 270 0.0000000000 0.830932999 0.16906700
## 26 1561 628 1 932 0.4023062140 0.000640615 0.59705317
## 27 1260 415 0 845 0.3293650794 0.000000000 0.67063492
## 28 1585 0 1304 281 0.0000000000 0.822712934 0.17728707
## 29 1568 1 1238 329 0.0006377551 0.789540816 0.20982143
## 30 1548 36 491 1021 0.0232558140 0.317183463 0.65956072
## 31 1557 1 1102 454 0.0006422608 0.707771355 0.29158638
## 32 1594 1435 0 159 0.9002509410 0.000000000 0.09974906
## 33 1525 195 0 1330 0.1278688525 0.000000000 0.87213115
## 34 903 1 100 802 0.0011074197 0.110741971 0.88815061
## 35 977 3 60 914 0.0030706244 0.061412487 0.93551689
## 36 1049 6 520 523 0.0057197331 0.495710200 0.49857007
## 37 1507 557 0 950 0.3696084937 0.000000000 0.63039151
## 38 1613 33 526 1054 0.0204587725 0.326100434 0.65344079
## 39 1506 3 764 739 0.0019920319 0.507304117 0.49070385
## 40 1562 1 1142 419 0.0006402049 0.731113956 0.26824584
## 41 1555 28 577 950 0.0180064309 0.371061093 0.61093248
## 42 1598 0 1460 138 0.0000000000 0.913642053 0.08635795
## 43 1549 230 106 1213 0.1484828922 0.068431246 0.78308586
## 44 1633 0 1524 109 0.0000000000 0.933251684 0.06674832
## 45 1426 1 928 497 0.0007012623 0.650771388 0.34852735
## 46 1057 14 434 609 0.0132450331 0.410596026 0.57615894
## 47 982 3 409 570 0.0030549898 0.416496945 0.58044807
## 48 1567 1269 0 298 0.8098276962 0.000000000 0.19017230
## 49 1513 5 740 768 0.0033046927 0.489094514 0.50760079
## 50 909 4 163 742 0.0044004400 0.179317932 0.81628163
alout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 1380 819 0 561 0.5934783 0.0000000000 0.4065217
## 2 1196 1059 0 137 0.8854515 0.0000000000 0.1145485
## 3 1226 1088 0 138 0.8874388 0.0000000000 0.1125612
## 4 1447 761 0 686 0.5259157 0.0000000000 0.4740843
## 5 1223 951 0 272 0.7775961 0.0000000000 0.2224039
## 6 1219 997 0 222 0.8178835 0.0000000000 0.1821165
## 7 1263 851 0 412 0.6737926 0.0000000000 0.3262074
## 8 1361 745 0 616 0.5473916 0.0000000000 0.4526084
## 9 1306 992 0 314 0.7595712 0.0000000000 0.2404288
## 10 1377 785 0 592 0.5700799 0.0000000000 0.4299201
## 11 1223 912 0 311 0.7457073 0.0000000000 0.2542927
## 12 1290 960 0 330 0.7441860 0.0000000000 0.2558140
## 13 1299 997 0 302 0.7675135 0.0000000000 0.2324865
## 14 1504 362 4 1138 0.2406915 0.0026595745 0.7566489
## 15 1255 1069 0 186 0.8517928 0.0000000000 0.1482072
## 16 1221 1055 0 166 0.8640459 0.0000000000 0.1359541
## 17 1230 1093 0 137 0.8886179 0.0000000000 0.1113821
## 18 1322 989 0 333 0.7481089 0.0000000000 0.2518911
## 19 1326 1015 0 311 0.7654600 0.0000000000 0.2345400
## 20 1401 622 0 779 0.4439686 0.0000000000 0.5560314
## 21 1238 1039 0 199 0.8392569 0.0000000000 0.1607431
## 22 1265 1057 0 208 0.8355731 0.0000000000 0.1644269
## 23 1211 1067 0 144 0.8810900 0.0000000000 0.1189100
## 24 1415 684 0 731 0.4833922 0.0000000000 0.5166078
## 25 1242 1049 0 193 0.8446055 0.0000000000 0.1553945
## 26 1361 757 0 604 0.5562087 0.0000000000 0.4437913
## 27 1351 1026 0 325 0.7594375 0.0000000000 0.2405625
## 28 1259 906 0 353 0.7196187 0.0000000000 0.2803813
## 29 1338 858 0 480 0.6412556 0.0000000000 0.3587444
## 30 1343 967 0 376 0.7200298 0.0000000000 0.2799702
## 31 1201 1000 0 201 0.8326395 0.0000000000 0.1673605
## 32 1273 1037 0 236 0.8146112 0.0000000000 0.1853888
## 33 1357 1032 0 325 0.7605011 0.0000000000 0.2394989
## 34 1273 858 0 415 0.6739984 0.0000000000 0.3260016
## 35 1209 1058 0 151 0.8751034 0.0000000000 0.1248966
## 36 1354 950 0 404 0.7016248 0.0000000000 0.2983752
## 37 1196 989 0 207 0.8269231 0.0000000000 0.1730769
## 38 1309 644 0 665 0.4919786 0.0000000000 0.5080214
## 39 1351 852 0 499 0.6306440 0.0000000000 0.3693560
## 40 1250 691 0 559 0.5528000 0.0000000000 0.4472000
## 41 1329 789 0 540 0.5936795 0.0000000000 0.4063205
## 42 1284 1105 0 179 0.8605919 0.0000000000 0.1394081
## 43 1295 1061 0 234 0.8193050 0.0000000000 0.1806950
## 44 1299 955 0 344 0.7351809 0.0000000000 0.2648191
## 45 1209 1043 0 166 0.8626964 0.0000000000 0.1373036
## 46 1296 1032 0 264 0.7962963 0.0000000000 0.2037037
## 47 1472 571 1 900 0.3879076 0.0006793478 0.6114130
## 48 1374 980 0 394 0.7132460 0.0000000000 0.2867540
## 49 1330 921 0 409 0.6924812 0.0000000000 0.3075188
## 50 1342 888 0 454 0.6616990 0.0000000000 0.3383010
aaout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 1078 465 0 613 0.4313544 0 0.5686456
## 2 843 545 0 298 0.6465006 0 0.3534994
## 3 812 554 0 258 0.6822660 0 0.3177340
## 4 1307 257 0 1050 0.1966335 0 0.8033665
## 5 760 484 0 276 0.6368421 0 0.3631579
## 6 801 343 0 458 0.4282147 0 0.5717853
## 7 1010 360 0 650 0.3564356 0 0.6435644
## 8 1151 263 0 888 0.2284970 0 0.7715030
## 9 1169 424 0 745 0.3627032 0 0.6372968
## 10 1049 460 0 589 0.4385129 0 0.5614871
## 11 930 385 0 545 0.4139785 0 0.5860215
## 12 909 417 0 492 0.4587459 0 0.5412541
## 13 1093 332 0 761 0.3037511 0 0.6962489
## 14 1402 347 0 1055 0.2475036 0 0.7524964
## 15 916 432 0 484 0.4716157 0 0.5283843
## 16 764 504 0 260 0.6596859 0 0.3403141
## 17 937 476 0 461 0.5080043 0 0.4919957
## 18 928 392 0 536 0.4224138 0 0.5775862
## 19 1183 311 0 872 0.2628910 0 0.7371090
## 20 1258 187 0 1071 0.1486486 0 0.8513514
## 21 824 549 0 275 0.6662621 0 0.3337379
## 22 993 419 0 574 0.4219537 0 0.5780463
## 23 867 518 0 349 0.5974625 0 0.4025375
## 24 1222 315 0 907 0.2577741 0 0.7422259
## 25 906 581 0 325 0.6412804 0 0.3587196
## 26 1060 378 0 682 0.3566038 0 0.6433962
## 27 1104 617 0 487 0.5588768 0 0.4411232
## 28 959 331 0 628 0.3451512 0 0.6548488
## 29 1059 295 0 764 0.2785647 0 0.7214353
## 30 956 475 0 481 0.4968619 0 0.5031381
## 31 887 315 0 572 0.3551297 0 0.6448703
## 32 881 612 0 269 0.6946652 0 0.3053348
## 33 1202 468 0 734 0.3893511 0 0.6106489
## 34 1223 252 0 971 0.2060507 0 0.7939493
## 35 876 537 0 339 0.6130137 0 0.3869863
## 36 865 513 0 352 0.5930636 0 0.4069364
## 37 929 356 0 573 0.3832078 0 0.6167922
## 38 1053 341 0 712 0.3238367 0 0.6761633
## 39 1086 297 0 789 0.2734807 0 0.7265193
## 40 1038 139 0 899 0.1339114 0 0.8660886
## 41 1085 245 0 840 0.2258065 0 0.7741935
## 42 900 586 0 314 0.6511111 0 0.3488889
## 43 949 549 0 400 0.5785037 0 0.4214963
## 44 1057 373 0 684 0.3528855 0 0.6471145
## 45 930 409 0 521 0.4397849 0 0.5602151
## 46 909 587 0 322 0.6457646 0 0.3542354
## 47 1309 265 0 1044 0.2024446 0 0.7975554
## 48 1105 496 0 609 0.4488688 0 0.5511312
## 49 1140 197 0 943 0.1728070 0 0.8271930
## 50 1074 391 0 683 0.3640596 0 0.6359404
almout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 169 51 0 118 0.3017751 0 0.6982249
## 2 154 83 0 71 0.5389610 0 0.4610390
## 3 166 72 0 94 0.4337349 0 0.5662651
## 4 168 72 0 96 0.4285714 0 0.5714286
## 5 175 59 0 116 0.3371429 0 0.6628571
## 6 218 53 0 165 0.2431193 0 0.7568807
## 7 166 80 0 86 0.4819277 0 0.5180723
## 8 161 74 0 87 0.4596273 0 0.5403727
## 9 147 64 0 83 0.4353741 0 0.5646259
## 10 190 91 0 99 0.4789474 0 0.5210526
## 11 244 57 0 187 0.2336066 0 0.7663934
## 12 177 69 0 108 0.3898305 0 0.6101695
## 13 148 66 0 82 0.4459459 0 0.5540541
## 14 165 41 0 124 0.2484848 0 0.7515152
## 15 172 61 0 111 0.3546512 0 0.6453488
## 16 173 71 0 102 0.4104046 0 0.5895954
## 17 139 69 0 70 0.4964029 0 0.5035971
## 18 175 45 0 130 0.2571429 0 0.7428571
## 19 149 93 0 56 0.6241611 0 0.3758389
## 20 141 72 0 69 0.5106383 0 0.4893617
## 21 128 80 0 48 0.6250000 0 0.3750000
## 22 149 61 0 88 0.4093960 0 0.5906040
## 23 143 65 0 78 0.4545455 0 0.5454545
## 24 156 61 0 95 0.3910256 0 0.6089744
## 25 137 63 0 74 0.4598540 0 0.5401460
## 26 156 68 0 88 0.4358974 0 0.5641026
## 27 131 62 0 69 0.4732824 0 0.5267176
## 28 141 66 0 75 0.4680851 0 0.5319149
## 29 179 54 0 125 0.3016760 0 0.6983240
## 30 144 70 0 74 0.4861111 0 0.5138889
## 31 128 62 0 66 0.4843750 0 0.5156250
## 32 184 72 0 112 0.3913043 0 0.6086957
## 33 180 49 0 131 0.2722222 0 0.7277778
## 34 177 54 0 123 0.3050847 0 0.6949153
## 35 231 46 0 185 0.1991342 0 0.8008658
## 36 156 46 0 110 0.2948718 0 0.7051282
## 37 173 74 0 99 0.4277457 0 0.5722543
## 38 159 81 0 78 0.5094340 0 0.4905660
## 39 163 63 0 100 0.3865031 0 0.6134969
## 40 143 75 0 68 0.5244755 0 0.4755245
## 41 132 61 0 71 0.4621212 0 0.5378788
## 42 148 73 0 75 0.4932432 0 0.5067568
## 43 183 68 0 115 0.3715847 0 0.6284153
## 44 151 53 0 98 0.3509934 0 0.6490066
## 45 140 81 0 59 0.5785714 0 0.4214286
## 46 154 72 0 82 0.4675325 0 0.5324675
## 47 150 52 0 98 0.3466667 0 0.6533333
## 48 171 70 0 101 0.4093567 0 0.5906433
## 49 171 71 0 100 0.4152047 0 0.5847953
## 50 151 67 0 84 0.4437086 0 0.5562914
res <- do.call(rbind,lapply(list(laout,alout,aaout,almout),colMeans))
rownames(res) <- c("la","al","aa","alm")
res <- as.data.frame(res)
res %>%
kbl(caption="overall summary") %>%
kable_paper("hover", full_width = F)
total | common | discordant | uncommon | p_comm | p_disc | p_uncom | |
---|---|---|---|---|---|---|---|
la | 1391.72 | 295.30 | 461.62 | 634.80 | 0.2101851 | 0.3104579 | 0.4793570 |
al | 1301.90 | 919.76 | 0.10 | 382.04 | 0.7132613 | 0.0000668 | 0.2866719 |
aa | 1014.96 | 406.88 | 0.00 | 608.08 | 0.4194747 | 0.0000000 | 0.5805253 |
alm | 162.12 | 65.66 | 0.00 | 96.46 | 0.4149891 | 0.0000000 | 0.5850109 |
apply(laout,2,mean)
## total common discordant uncommon p_comm p_disc
## 1391.7200000 295.3000000 461.6200000 634.8000000 0.2101851 0.3104579
## p_uncom
## 0.4793570
apply(alout,2,mean)
## total common discordant uncommon p_comm p_disc
## 1.301900e+03 9.197600e+02 1.000000e-01 3.820400e+02 7.132613e-01 6.677845e-05
## p_uncom
## 2.866719e-01
apply(aaout,2,mean)
## total common discordant uncommon p_comm p_disc
## 1014.9600000 406.8800000 0.0000000 608.0800000 0.4194747 0.0000000
## p_uncom
## 0.5805253
apply(almout,2,mean)
## total common discordant uncommon p_comm p_disc
## 162.1200000 65.6600000 0.0000000 96.4600000 0.4149891 0.0000000
## p_uncom
## 0.5850109
Plot some data.
Raw counts and proportions.
par(mfrow=c(1,3))
barplot(res$common,main="no. common",names.arg=c("LA","AL","AA","ALM"))
barplot(res$uncommon,main="no. uncommon",names.arg=c("LA","AL","AA","ALM"))
barplot(res$discordant,main="no. discordant",names.arg=c("LA","AL","AA","ALM"))
boxplot(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common,"ALM"=almout$common),
main="no. common",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common,"ALM"=almout$common), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon,"ALM"=almout$uncommon),
main="no. uncommon",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon,"ALM"=almout$uncommon), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant,"ALM"=almout$discordant),
main="no. discordant",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant,"ALM"=almout$discordant), add=TRUE,cex=1.2,pch=19)
barplot(res$p_comm,main="proportion common",names.arg=c("LA","AL","AA","ALM"))
barplot(res$p_uncom,main="proportion uncommon",names.arg=c("LA","AL","AA","ALM"))
barplot(res$p_disc,main="proportion discordant",names.arg=c("LA","AL","AA","ALM"))
boxplot(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm,"ALM"=almout$p_comm),
main="proportion common",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm,"ALM"=almout$p_comm), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom,"ALM"=almout$p_uncom),
main="proportion uncommon",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom,"ALM"=almout$p_uncom), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc,"ALM"=almout$p_disc),
main="proportion discordant",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc,"ALM"=almout$p_disc), add=TRUE,cex=1.2,pch=19)
save.image("GSE158422_split.Rdata")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] beeswarm_0.4.0
## [2] kableExtra_1.3.4
## [3] mitch_1.12.0
## [4] tictoc_1.2
## [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [6] IlluminaHumanMethylation450kmanifest_0.4.0
## [7] minfi_1.46.0
## [8] bumphunter_1.42.0
## [9] locfit_1.5-9.8
## [10] iterators_1.0.14
## [11] foreach_1.5.2
## [12] Biostrings_2.68.1
## [13] XVector_0.40.0
## [14] SummarizedExperiment_1.30.2
## [15] Biobase_2.60.0
## [16] MatrixGenerics_1.12.3
## [17] matrixStats_1.0.0
## [18] GenomicRanges_1.52.0
## [19] GenomeInfoDb_1.36.3
## [20] IRanges_2.34.1
## [21] S4Vectors_0.38.2
## [22] BiocGenerics_0.46.0
## [23] eulerr_7.0.0
## [24] limma_3.56.2
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.1 later_1.3.1
## [3] BiocIO_1.10.0 bitops_1.0-7
## [5] filelock_1.0.2 tibble_3.2.1
## [7] preprocessCore_1.62.1 XML_3.99-0.14
## [9] lifecycle_1.0.3 lattice_0.21-9
## [11] MASS_7.3-60 base64_2.0.1
## [13] scrime_1.3.5 magrittr_2.0.3
## [15] sass_0.4.7 rmarkdown_2.25
## [17] jquerylib_0.1.4 yaml_2.3.7
## [19] httpuv_1.6.11 doRNG_1.8.6
## [21] askpass_1.2.0 DBI_1.1.3
## [23] RColorBrewer_1.1-3 abind_1.4-5
## [25] zlibbioc_1.46.0 rvest_1.0.3
## [27] quadprog_1.5-8 purrr_1.0.2
## [29] RCurl_1.98-1.12 rappdirs_0.3.3
## [31] GenomeInfoDbData_1.2.10 genefilter_1.82.1
## [33] annotate_1.78.0 svglite_2.1.1
## [35] DelayedMatrixStats_1.22.6 codetools_0.2-19
## [37] DelayedArray_0.26.7 xml2_1.3.5
## [39] tidyselect_1.2.0 beanplot_1.3.1
## [41] BiocFileCache_2.8.0 webshot_0.5.5
## [43] illuminaio_0.42.0 GenomicAlignments_1.36.0
## [45] jsonlite_1.8.7 multtest_2.56.0
## [47] ellipsis_0.3.2 survival_3.5-7
## [49] systemfonts_1.0.4 tools_4.3.1
## [51] progress_1.2.2 Rcpp_1.0.11
## [53] glue_1.6.2 gridExtra_2.3
## [55] xfun_0.40 dplyr_1.1.3
## [57] HDF5Array_1.28.1 fastmap_1.1.1
## [59] GGally_2.1.2 rhdf5filters_1.12.1
## [61] fansi_1.0.4 openssl_2.1.1
## [63] caTools_1.18.2 digest_0.6.33
## [65] R6_2.5.1 mime_0.12
## [67] colorspace_2.1-0 gtools_3.9.4
## [69] biomaRt_2.56.1 RSQLite_2.3.1
## [71] utf8_1.2.3 tidyr_1.3.0
## [73] generics_0.1.3 data.table_1.14.8
## [75] rtracklayer_1.60.1 prettyunits_1.2.0
## [77] httr_1.4.7 htmlwidgets_1.6.2
## [79] S4Arrays_1.0.6 pkgconfig_2.0.3
## [81] gtable_0.3.4 blob_1.2.4
## [83] siggenes_1.74.0 htmltools_0.5.6
## [85] echarts4r_0.4.5 scales_1.2.1
## [87] png_0.1-8 knitr_1.44
## [89] rstudioapi_0.15.0 reshape2_1.4.4
## [91] tzdb_0.4.0 rjson_0.2.21
## [93] nlme_3.1-163 curl_5.0.2
## [95] cachem_1.0.8 rhdf5_2.44.0
## [97] stringr_1.5.0 KernSmooth_2.23-22
## [99] AnnotationDbi_1.62.2 restfulr_0.0.15
## [101] GEOquery_2.68.0 pillar_1.9.0
## [103] grid_4.3.1 reshape_0.8.9
## [105] vctrs_0.6.3 gplots_3.1.3
## [107] promises_1.2.1 dbplyr_2.3.4
## [109] xtable_1.8-4 evaluate_0.22
## [111] readr_2.1.4 GenomicFeatures_1.52.2
## [113] cli_3.6.1 compiler_4.3.1
## [115] Rsamtools_2.16.0 rlang_1.1.1
## [117] crayon_1.5.2 rngtools_1.5.2
## [119] nor1mix_1.3-0 mclust_6.0.0
## [121] plyr_1.8.8 stringi_1.7.12
## [123] viridisLite_0.4.2 BiocParallel_1.34.2
## [125] munsell_0.5.0 Matrix_1.6-1.1
## [127] hms_1.1.3 sparseMatrixStats_1.12.2
## [129] bit64_4.0.5 ggplot2_3.4.3
## [131] Rhdf5lib_1.22.1 KEGGREST_1.40.1
## [133] shiny_1.7.5 highr_0.10
## [135] memoise_2.0.1 bslib_0.5.1
## [137] bit_4.0.5