Here we are splitting the dataset and testing concordance between LA and AL approaches.
suppressPackageStartupMessages({
library("limma")
library("eulerr")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library("tictoc")
library("mitch")
library("kableExtra")
library("beeswarm")
})
CORES=8
annotations
probe sets
gene sets
design matrix
mval matrix
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")
boxplot(list("normal"=matrix(colMeans(mval),ncol=2)[,2],"tumor"=matrix(colMeans(mval),ncol=2)[,1]),
main="mean probe methylation mval")
Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.
gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")
gs_symbols <- gmt_import("c2.cp.reactome.v2023.1.Hs.symbols.gmt")
There are three approaches. First, limma can be done first, followed by aggregation by gene and then enrichment test (called LA). Secondly, probe values can be aggregated by gene, followed by limma, followed by enrichment test (AL). Third, probe values can be aggregated to gene level, and then aggregated to gene set level followed by limma test (AA).
This process runs limma first and then aggregates the results before doing an enrichment test.
# limma
runlimma <- function(mval,design,myann) {
fit.reduced <- lmFit(mval,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
dm <- topTable(fit.reduced,coef=4, number = Inf)
dm <- merge(myann,dm,by=0)
dm <- dm[order(dm$P.Value),]
rownames(dm) <- dm$Row.names
dm$Row.names=NULL
return(dm)
}
# aggregate
agg <- function(dm,cores=1) {
gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
dm$UCSC_RefGene_Name <- gnl
l <- mclapply(1:nrow(dm), function(i) {
a <- dm[i,]
len <- length(a[[1]][[1]])
tvals <- as.numeric(rep(a["t"],len))
genes <- a[[1]][[1]]
data.frame(genes,tvals)
},mc.cores=cores)
df <- do.call(rbind,l)
keep <- names(which(table(df$genes)>1))
df <- df[df$genes %in% keep,]
gn <- unique(df$genes)
gme_res <- lapply( 1:length(gn), function(i) {
g <- gn[i]
tstats <- df[which(df$genes==g),"tvals"]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
if ( length(tstats) > 2 ) {
ttest <- t.test(tstats)
pval <- ttest$p.value
} else {
pval = 1
}
res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
"median"=mymedian, pval=pval)
} )
gme_res_df <- do.call(rbind, gme_res)
rownames(gme_res_df) <- gme_res_df[,1]
gme_res_df <- gme_res_df[,-1]
tmp <- apply(gme_res_df,2,as.numeric)
rownames(tmp) <- rownames(gme_res_df)
gme_res_df <- as.data.frame(tmp)
gme_res_df$sig <- -log10(gme_res_df[,4])
gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
gme_res_df$fdr <- p.adjust(gme_res_df$pval)
out <- list("df"=df,"gme_res_df"=gme_res_df)
return(out)
}
# enrich
ttenrich <- function(m,genesets,cores=1) {
res <- mclapply( 1:length(genesets), function(i) {
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- median(tstats)
wt <- t.test(tstats)
res <- c(name,myn,mymean,mymedian,wt$p.value)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
Functions for aggregate-limma-enrich approach.
# chromosome by chromosome will be much faster
magg <- function(mval,myann,cores=1){
gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
myann$gnl <- gnl
keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
mx <- mval[rownames(mval) %in% keep,]
mymed <- function(g) {
probes <- rownames(myann[grep(g,myann$gnl),])
rows <- which(rownames(mx) %in% probes)
if ( length(rows) > 1 ) {
b <- mx[rows,]
med <- apply(b,2,median)
med <- matrix(med,nrow=1)
colnames(med) <- colnames(b)
rownames(med) <- g
return(med)
}
}
med <- mclapply(gn,mymed,mc.cores=cores)
med <- med[lapply(med,length)>0]
medf <- do.call(rbind,med)
return(medf)
}
chragg <- function(mval,myann,cores=1){
annodf <- as.data.frame(anno)
keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
mx <- mval[rownames(mval) %in% keep,]
chrs <- unique(anno$chr)
myorder <- unlist(lapply(chrs,function(mychr) { nrow( annodf[annodf$chr==mychr,] ) } ))
chrs <- chrs[order(-myorder)]
leadercores <- floor(sqrt(cores))
workercores <- ceiling(sqrt(cores))
chrmedf <- mclapply(chrs,function(chr) {
chrfrag <- annodf[annodf$chr==chr,]
chrprobes <-rownames(chrfrag)
chrmx <- mx[rownames(mx) %in% chrprobes,]
chranno <- myann[rownames(myann) %in% chrprobes,]
chrmedf <- magg(mval=chrmx,myann=chranno,cores=workercores)
return(chrmedf)
},mc.cores=leadercores)
medf <- do.call(rbind, chrmedf)
return(medf)
}
agglimma <- function(medf,design) {
fit.reduced <- lmFit(medf,design)
fit.reduced <- eBayes(fit.reduced)
dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
nondup <- !duplicated(dmagg$ID)
dmagg <- dmagg[nondup,]
rownames(dmagg) <- dmagg$ID
dmagg$ID = NULL
return(dmagg)
}
gsagg <- function(x,genesets,cores=1) {
meds <- mclapply(1:length(genesets), function(i) {
gs = genesets[[i]]
xx <- x[rownames(x) %in% gs,]
med <- apply(xx,2,median)
},mc.cores=cores)
mymed <- do.call(rbind,meds)
rownames(mymed) <- names(genesets)
as.data.frame(mymed)
}
aalimma <- function(agag,design) {
fit.reduced <- lmFit(agag,design)
fit.reduced <- eBayes(fit.reduced)
dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
return(dmagg)
}
aal <- function(mval,myann,genesets,design,cores=1) {
medf <- chragg(mval,myann,cores=CORES)
agag <- gsagg(x=medf,genesets=gs_symbols,cores=CORES)
aalres <- aalimma(agag=agag,design=design)
return(aalres)
}
#aalres <- aal(mval=mval,myann=myann,genesets=gs_symbols,design=design,cores=CORES)
# prep
sex <- as.data.frame(design)$sex
tumor <- as.data.frame(design)$tumor
patient <- as.character(unlist(lapply(1:ncol(mval),function(i) {c(i,i)})))
patient <- head(patient,ncol(mval))
design <- model.matrix(~ patient + tumor )
rownames(design) <- colnames(mval)
split 74 samples 37 patients split into 18 + 18 Conduct LA workflow analysis. Then make overlap.
splitanalyze_la <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
# la1
dm1 <- runlimma(mval_a,design_a,myann)
dmagg1 <- agg(dm1,cores=floor(CORES/3))
m1 <- dmagg1$gme_res_df[,"median",drop=FALSE]
lares1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES)
# la2
dm2 <- runlimma(mval_b,design_b,myann)
dmagg2 <- agg(dm2,cores=floor(CORES/3))
m2 <- dmagg2$gme_res_df[,"median",drop=FALSE]
lares2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES)
list("lares1"=lares1,"lares2"=lares2)
}
lares <- mclapply( seq(100,5000,100) ,splitanalyze_la,mc.cores=CORES)
splitanalyze_al <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
#al
medf1 <- chragg(mval_a,myann,cores=floor(CORES/3))
magg1 <- agglimma(medf1,design_a)
m1 <- as.data.frame(magg1$t)
rownames(m1) <- rownames(magg1)
colnames(m1) <- "t"
alres1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES)
#al
medf2 <- chragg(mval_b,myann,cores=floor(CORES/3))
magg2 <- agglimma(medf2,design_b)
m2 <- as.data.frame(magg2$t)
rownames(m2) <- rownames(magg2)
colnames(m2) <- "t"
alres2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES)
list("alres1"=alres1,"alres2"=alres2)
}
alres <- mclapply( seq(100,5000,100) ,splitanalyze_al,mc.cores=CORES)
splitanalyze_aa <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
#aa1
aares1 <- aal(mval=mval_a,myann=myann,genesets=gs_symbols,
design=design_a,cores=floor(CORES/3))
#aa2
aares2 <- aal(mval=mval_b,myann=myann,genesets=gs_symbols,
design=design_b,cores=floor(CORES/3))
list("aares1"=aares1,"aares2"=aares2)
}
aares <- mclapply( seq(100,5000,100) ,splitanalyze_aa,mc.cores=CORES)
lacompare <- function(lares){
lares1 <- lares[[1]]
lares2 <- lares[[2]]
lasig1 <- subset(lares1,fdr<0.05)
lasig2 <- subset(lares2,fdr<0.05)
lasig1_up <- rownames(subset(lasig1,t_median>0))
lasig1_dn <- rownames(subset(lasig1,t_median<0))
lasig2_up <- rownames(subset(lasig2,t_median>0))
lasig2_dn <- rownames(subset(lasig2,t_median<0))
total <- length(unique(c(lasig1_up,lasig2_up,lasig1_dn,lasig2_dn)))
common <- length(intersect(lasig1_up, lasig2_up)) + length(intersect(lasig1_dn, lasig2_dn))
discordant <- length(intersect(lasig1_up, lasig2_dn)) + length(intersect(lasig1_dn, lasig2_up))
uncommon = total - common - discordant
laout <- data.frame(total,common,discordant,uncommon)
laout$p_comm <-laout$common / laout$total
laout$p_disc <-laout$discordant / laout$total
laout$p_uncom <-laout$uncommon / laout$total
laout
}
alcompare <-function(alres){
alres1 <- alres[[1]]
alres2 <- alres[[2]]
alsig1 <- subset(alres1,fdr<0.05)
alsig2 <- subset(alres2,fdr<0.05)
alsig1_up <- rownames(subset(alsig1,t_median>0))
alsig1_dn <- rownames(subset(alsig1,t_median<0))
alsig2_up <- rownames(subset(alsig2,t_median>0))
alsig2_dn <- rownames(subset(alsig2,t_median<0))
total <- length(unique(c(alsig1_up,alsig2_up,alsig1_dn,alsig2_dn)))
common <- length(intersect(alsig1_up, alsig2_up)) + length(intersect(alsig1_dn, alsig2_dn))
discordant <- length(intersect(alsig1_up, alsig2_dn)) + length(intersect(alsig1_dn, alsig2_up))
uncommon = total - common - discordant
result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)
alout <- data.frame(total,common,discordant,uncommon)
alout$p_comm <-alout$common / alout$total
alout$p_disc <-alout$discordant / alout$total
alout$p_uncom <-alout$uncommon / alout$total
alout
}
aacompare <-function(aa){
aa1 <- aa[[1]]
aa2 <- aa[[2]]
aasig1 <- subset(aa1,adj.P.Val<0.05)
aasig2 <- subset(aa2,adj.P.Val<0.05)
aasig1_up <- rownames(subset(aasig1,logFC>0))
aasig1_dn <- rownames(subset(aasig1,logFC<0))
aasig2_up <- rownames(subset(aasig2,logFC>0))
aasig2_dn <- rownames(subset(aasig2,logFC<0))
total <- length(unique(c(aasig1_up,aasig2_up,aasig1_dn,aasig2_dn)))
common <- length(intersect(aasig1_up, aasig2_up)) + length(intersect(aasig1_dn, aasig2_dn))
discordant <- length(intersect(aasig1_up, aasig2_dn)) + length(intersect(aasig1_dn, aasig2_up))
uncommon = total - common - discordant
result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)
aaout <- data.frame(total,common,discordant,uncommon)
aaout$p_comm <- aaout$common / aaout$total
aaout$p_disc <- aaout$discordant / aaout$total
aaout$p_uncom <-aaout$uncommon / aaout$total
aaout
}
laout <- do.call(rbind,lapply(lares,lacompare))
alout <- do.call(rbind,lapply(alres,alcompare))
aaout <- do.call(rbind,lapply(aares,aacompare))
laout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 1420 908 2 510 0.6394366197 0.0014084507 0.35915493
## 2 1613 1423 0 190 0.8822070676 0.0000000000 0.11779293
## 3 920 461 8 451 0.5010869565 0.0086956522 0.49021739
## 4 921 475 7 439 0.5157437568 0.0076004343 0.47665581
## 5 1434 370 0 1064 0.2580195258 0.0000000000 0.74198047
## 6 1452 577 0 875 0.3973829201 0.0000000000 0.60261708
## 7 1582 0 1306 276 0.0000000000 0.8255372946 0.17446271
## 8 1393 1 720 672 0.0007178751 0.5168700646 0.48241206
## 9 917 460 8 449 0.5016357688 0.0087241003 0.48964013
## 10 1594 1462 0 132 0.9171894605 0.0000000000 0.08281054
## 11 1558 1 517 1040 0.0006418485 0.3318356868 0.66752246
## 12 1591 1 809 781 0.0006285355 0.5084852294 0.49088624
## 13 1614 81 73 1460 0.0501858736 0.0452292441 0.90458488
## 14 1558 1 699 858 0.0006418485 0.4486521181 0.55070603
## 15 1571 1220 0 351 0.7765754297 0.0000000000 0.22342457
## 16 1311 1 659 651 0.0007627765 0.5026697178 0.49656751
## 17 1555 137 1 1417 0.0881028939 0.0006430868 0.91125402
## 18 1585 7 748 830 0.0044164038 0.4719242902 0.52365931
## 19 1436 371 0 1065 0.2583565460 0.0000000000 0.74164345
## 20 1581 0 1294 287 0.0000000000 0.8184693232 0.18153068
## 21 1586 9 743 834 0.0056746532 0.4684741488 0.52585120
## 22 1616 1507 0 109 0.9325495050 0.0000000000 0.06745050
## 23 702 12 53 637 0.0170940171 0.0754985755 0.90740741
## 24 914 453 8 453 0.4956236324 0.0087527352 0.49562363
## 25 1586 0 1289 297 0.0000000000 0.8127364439 0.18726356
## 26 1556 592 0 964 0.3804627249 0.0000000000 0.61953728
## 27 1431 371 0 1060 0.2592592593 0.0000000000 0.74074074
## 28 1573 0 1256 317 0.0000000000 0.7984742530 0.20152575
## 29 1554 0 1217 337 0.0000000000 0.7831402831 0.21685972
## 30 1550 28 610 912 0.0180645161 0.3935483871 0.58838710
## 31 1560 0 1212 348 0.0000000000 0.7769230769 0.22307692
## 32 1593 1443 0 150 0.9058380414 0.0000000000 0.09416196
## 33 1548 135 1 1412 0.0872093023 0.0006459948 0.91214470
## 34 842 28 78 736 0.0332541568 0.0926365796 0.87410926
## 35 1028 3 100 925 0.0029182879 0.0972762646 0.89980545
## 36 1090 6 566 518 0.0055045872 0.5192660550 0.47522936
## 37 1507 562 0 945 0.3729263437 0.0000000000 0.62707366
## 38 1617 27 613 977 0.0166975881 0.3790970934 0.60420532
## 39 1507 2 685 820 0.0013271400 0.4545454545 0.54412741
## 40 1560 0 1213 347 0.0000000000 0.7775641026 0.22243590
## 41 1553 26 611 916 0.0167417901 0.3934320670 0.58982614
## 42 1583 0 1444 139 0.0000000000 0.9121920404 0.08780796
## 43 1546 98 139 1309 0.0633893920 0.0899094437 0.84670116
## 44 1623 0 1490 133 0.0000000000 0.9180529883 0.08194701
## 45 1399 0 961 438 0.0000000000 0.6869192280 0.31308077
## 46 1090 10 543 537 0.0091743119 0.4981651376 0.49266055
## 47 905 1 372 532 0.0011049724 0.4110497238 0.58784530
## 48 1576 1151 1 424 0.7303299492 0.0006345178 0.26903553
## 49 1502 2 686 814 0.0013315579 0.4567243675 0.54194407
## 50 805 3 84 718 0.0037267081 0.1043478261 0.89192547
alout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 1049 367 0 682 0.3498570 0.0000000000 0.6501430
## 2 811 625 0 186 0.7706535 0.0000000000 0.2293465
## 3 865 598 1 266 0.6913295 0.0011560694 0.3075145
## 4 1147 295 0 852 0.2571927 0.0000000000 0.7428073
## 5 859 487 0 372 0.5669383 0.0000000000 0.4330617
## 6 853 570 0 283 0.6682298 0.0000000000 0.3317702
## 7 885 454 0 431 0.5129944 0.0000000000 0.4870056
## 8 1054 189 1 864 0.1793169 0.0009487666 0.8197343
## 9 904 589 0 315 0.6515487 0.0000000000 0.3484513
## 10 1059 353 0 706 0.3333333 0.0000000000 0.6666667
## 11 859 469 0 390 0.5459837 0.0000000000 0.4540163
## 12 986 493 0 493 0.5000000 0.0000000000 0.5000000
## 13 891 653 0 238 0.7328844 0.0000000000 0.2671156
## 14 1271 135 2 1134 0.1062156 0.0015735641 0.8922109
## 15 839 677 0 162 0.8069130 0.0000000000 0.1930870
## 16 865 539 0 326 0.6231214 0.0000000000 0.3768786
## 17 839 659 0 180 0.7854589 0.0000000000 0.2145411
## 18 984 557 0 427 0.5660569 0.0000000000 0.4339431
## 19 1023 514 0 509 0.5024438 0.0000000000 0.4975562
## 20 1064 219 0 845 0.2058271 0.0000000000 0.7941729
## 21 916 581 0 335 0.6342795 0.0000000000 0.3657205
## 22 896 639 0 257 0.7131696 0.0000000000 0.2868304
## 23 875 592 0 283 0.6765714 0.0000000000 0.3234286
## 24 1101 250 0 851 0.2270663 0.0000000000 0.7729337
## 25 865 623 0 242 0.7202312 0.0000000000 0.2797688
## 26 1007 295 0 712 0.2929494 0.0000000000 0.7070506
## 27 1025 597 4 424 0.5824390 0.0039024390 0.4136585
## 28 903 427 0 476 0.4728682 0.0000000000 0.5271318
## 29 964 337 1 626 0.3495851 0.0010373444 0.6493776
## 30 953 502 0 451 0.5267576 0.0000000000 0.4732424
## 31 796 573 0 223 0.7198492 0.0000000000 0.2801508
## 32 956 569 1 386 0.5951883 0.0010460251 0.4037657
## 33 980 642 0 338 0.6551020 0.0000000000 0.3448980
## 34 887 465 0 422 0.5242390 0.0000000000 0.4757610
## 35 844 630 0 214 0.7464455 0.0000000000 0.2535545
## 36 959 533 0 426 0.5557873 0.0000000000 0.4442127
## 37 842 462 0 380 0.5486936 0.0000000000 0.4513064
## 38 914 282 0 632 0.3085339 0.0000000000 0.6914661
## 39 966 407 0 559 0.4213251 0.0000000000 0.5786749
## 40 896 290 0 606 0.3236607 0.0000000000 0.6763393
## 41 937 410 0 527 0.4375667 0.0000000000 0.5624333
## 42 920 703 0 217 0.7641304 0.0000000000 0.2358696
## 43 887 671 0 216 0.7564825 0.0000000000 0.2435175
## 44 935 445 0 490 0.4759358 0.0000000000 0.5240642
## 45 825 653 1 171 0.7915152 0.0012121212 0.2072727
## 46 987 518 1 468 0.5248227 0.0010131712 0.4741641
## 47 1223 161 1 1061 0.1316435 0.0008176615 0.8675388
## 48 1056 532 0 524 0.5037879 0.0000000000 0.4962121
## 49 931 536 0 395 0.5757250 0.0000000000 0.4242750
## 50 962 468 0 494 0.4864865 0.0000000000 0.5135135
aaout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 996 527 0 469 0.5291165 0.000000000 0.4708835
## 2 888 602 0 286 0.6779279 0.000000000 0.3220721
## 3 897 593 0 304 0.6610925 0.000000000 0.3389075
## 4 1151 365 0 786 0.3171156 0.000000000 0.6828844
## 5 818 490 0 328 0.5990220 0.000000000 0.4009780
## 6 904 426 0 478 0.4712389 0.000000000 0.5287611
## 7 960 446 0 514 0.4645833 0.000000000 0.5354167
## 8 1123 332 0 791 0.2956367 0.000000000 0.7043633
## 9 957 573 1 383 0.5987461 0.001044932 0.4002090
## 10 986 564 0 422 0.5720081 0.000000000 0.4279919
## 11 1003 427 0 576 0.4257228 0.000000000 0.5742772
## 12 1001 454 0 547 0.4535465 0.000000000 0.5464535
## 13 923 454 0 469 0.4918743 0.000000000 0.5081257
## 14 1208 453 3 752 0.3750000 0.002483444 0.6225166
## 15 840 551 0 289 0.6559524 0.000000000 0.3440476
## 16 853 574 0 279 0.6729191 0.000000000 0.3270809
## 17 921 600 0 321 0.6514658 0.000000000 0.3485342
## 18 915 476 0 439 0.5202186 0.000000000 0.4797814
## 19 1042 426 0 616 0.4088292 0.000000000 0.5911708
## 20 1101 253 0 848 0.2297911 0.000000000 0.7702089
## 21 969 556 0 413 0.5737874 0.000000000 0.4262126
## 22 912 539 0 373 0.5910088 0.000000000 0.4089912
## 23 877 598 0 279 0.6818700 0.000000000 0.3181300
## 24 1121 452 0 669 0.4032114 0.000000000 0.5967886
## 25 951 610 0 341 0.6414301 0.000000000 0.3585699
## 26 985 455 0 530 0.4619289 0.000000000 0.5380711
## 27 934 637 0 297 0.6820128 0.000000000 0.3179872
## 28 898 429 0 469 0.4777283 0.000000000 0.5222717
## 29 1069 304 0 765 0.2843779 0.000000000 0.7156221
## 30 973 492 0 481 0.5056526 0.000000000 0.4943474
## 31 908 380 0 528 0.4185022 0.000000000 0.5814978
## 32 974 636 0 338 0.6529774 0.000000000 0.3470226
## 33 963 599 0 364 0.6220145 0.000000000 0.3779855
## 34 989 382 0 607 0.3862487 0.000000000 0.6137513
## 35 972 556 0 416 0.5720165 0.000000000 0.4279835
## 36 881 487 0 394 0.5527809 0.000000000 0.4472191
## 37 908 422 0 486 0.4647577 0.000000000 0.5352423
## 38 965 406 0 559 0.4207254 0.000000000 0.5792746
## 39 985 400 0 585 0.4060914 0.000000000 0.5939086
## 40 1036 179 0 857 0.1727799 0.000000000 0.8272201
## 41 982 333 0 649 0.3391039 0.000000000 0.6608961
## 42 938 623 0 315 0.6641791 0.000000000 0.3358209
## 43 950 588 0 362 0.6189474 0.000000000 0.3810526
## 44 949 450 0 499 0.4741834 0.000000000 0.5258166
## 45 873 497 0 376 0.5693013 0.000000000 0.4306987
## 46 1049 553 0 496 0.5271687 0.000000000 0.4728313
## 47 1097 389 0 708 0.3546035 0.000000000 0.6453965
## 48 1004 609 0 395 0.6065737 0.000000000 0.3934263
## 49 1013 318 0 695 0.3139191 0.000000000 0.6860809
## 50 950 554 0 396 0.5831579 0.000000000 0.4168421
res <- do.call(rbind,lapply(list(laout,alout,aaout),colMeans))
rownames(res) <- c("la","al","aa")
res <- as.data.frame(res)
res %>%
kbl(caption="overall summary") %>%
kable_paper("hover", full_width = F)
total | common | discordant | uncommon | p_comm | p_disc | p_uncom | |
---|---|---|---|---|---|---|---|
la | 1402.16 | 288.52 | 456.52 | 657.12 | 0.2030787 | 0.3081350 | 0.4887863 |
al | 946.30 | 484.70 | 0.26 | 461.34 | 0.5279827 | 0.0002541 | 0.4717631 |
aa | 971.24 | 480.38 | 0.08 | 490.78 | 0.5018970 | 0.0000706 | 0.4980325 |
apply(laout,2,median)
## total common discordant uncommon p_comm p_disc
## 1.551500e+03 1.900000e+01 2.555000e+02 6.440000e+02 1.671969e-02 2.180918e-01
## p_uncom
## 4.960956e-01
apply(alout,2,median)
## total common discordant uncommon p_comm p_disc
## 925.5000000 516.0000000 0.0000000 425.0000000 0.5473386 0.0000000
## p_uncom
## 0.4526614
apply(aaout,2,median)
## total common discordant uncommon p_comm p_disc
## 964.0000000 481.5000000 0.0000000 469.0000000 0.5129356 0.0000000
## p_uncom
## 0.4870644
Plot some data.
Raw counts and proportions.
par(mfrow=c(1,3))
barplot(res$common,main="no. common",names.arg=c("LA","AL","AA"))
barplot(res$uncommon,main="no. uncommon",names.arg=c("LA","AL","AA"))
barplot(res$discordant,main="no. discordant",names.arg=c("LA","AL","AA"))
boxplot(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common),
main="no. common",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon),
main="no. uncommon",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant),
main="no. discordant",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant), add=TRUE,cex=1.2,pch=19)
barplot(res$p_comm,main="proportion common",names.arg=c("LA","AL","AA"))
barplot(res$p_uncom,main="proportion uncommon",names.arg=c("LA","AL","AA"))
barplot(res$p_disc,main="proportion discordant",names.arg=c("LA","AL","AA"))
boxplot(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm),
main="proportion common",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom),
main="proportion uncommon",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc),
main="proportion discordant",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc), add=TRUE,cex=1.2,pch=19)
save.image("GSE158422_split.Rdata")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] beeswarm_0.4.0
## [2] kableExtra_1.3.4
## [3] mitch_1.12.0
## [4] tictoc_1.2
## [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [6] IlluminaHumanMethylation450kmanifest_0.4.0
## [7] minfi_1.46.0
## [8] bumphunter_1.42.0
## [9] locfit_1.5-9.8
## [10] iterators_1.0.14
## [11] foreach_1.5.2
## [12] Biostrings_2.68.1
## [13] XVector_0.40.0
## [14] SummarizedExperiment_1.30.2
## [15] Biobase_2.60.0
## [16] MatrixGenerics_1.12.3
## [17] matrixStats_1.0.0
## [18] GenomicRanges_1.52.0
## [19] GenomeInfoDb_1.36.3
## [20] IRanges_2.34.1
## [21] S4Vectors_0.38.2
## [22] BiocGenerics_0.46.0
## [23] eulerr_7.0.0
## [24] limma_3.56.2
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.1 later_1.3.1
## [3] BiocIO_1.10.0 bitops_1.0-7
## [5] filelock_1.0.2 tibble_3.2.1
## [7] preprocessCore_1.62.1 XML_3.99-0.14
## [9] lifecycle_1.0.3 lattice_0.21-9
## [11] MASS_7.3-60 base64_2.0.1
## [13] scrime_1.3.5 magrittr_2.0.3
## [15] sass_0.4.7 rmarkdown_2.25
## [17] jquerylib_0.1.4 yaml_2.3.7
## [19] httpuv_1.6.11 doRNG_1.8.6
## [21] askpass_1.2.0 DBI_1.1.3
## [23] RColorBrewer_1.1-3 abind_1.4-5
## [25] zlibbioc_1.46.0 rvest_1.0.3
## [27] quadprog_1.5-8 purrr_1.0.2
## [29] RCurl_1.98-1.12 rappdirs_0.3.3
## [31] GenomeInfoDbData_1.2.10 genefilter_1.82.1
## [33] annotate_1.78.0 svglite_2.1.1
## [35] DelayedMatrixStats_1.22.6 codetools_0.2-19
## [37] DelayedArray_0.26.7 xml2_1.3.5
## [39] tidyselect_1.2.0 beanplot_1.3.1
## [41] BiocFileCache_2.8.0 webshot_0.5.5
## [43] illuminaio_0.42.0 GenomicAlignments_1.36.0
## [45] jsonlite_1.8.7 multtest_2.56.0
## [47] ellipsis_0.3.2 survival_3.5-7
## [49] systemfonts_1.0.4 tools_4.3.1
## [51] progress_1.2.2 Rcpp_1.0.11
## [53] glue_1.6.2 gridExtra_2.3
## [55] xfun_0.40 dplyr_1.1.3
## [57] HDF5Array_1.28.1 fastmap_1.1.1
## [59] GGally_2.1.2 rhdf5filters_1.12.1
## [61] fansi_1.0.4 openssl_2.1.1
## [63] caTools_1.18.2 digest_0.6.33
## [65] R6_2.5.1 mime_0.12
## [67] colorspace_2.1-0 gtools_3.9.4
## [69] biomaRt_2.56.1 RSQLite_2.3.1
## [71] utf8_1.2.3 tidyr_1.3.0
## [73] generics_0.1.3 data.table_1.14.8
## [75] rtracklayer_1.60.1 prettyunits_1.2.0
## [77] httr_1.4.7 htmlwidgets_1.6.2
## [79] S4Arrays_1.0.6 pkgconfig_2.0.3
## [81] gtable_0.3.4 blob_1.2.4
## [83] siggenes_1.74.0 htmltools_0.5.6
## [85] echarts4r_0.4.5 scales_1.2.1
## [87] png_0.1-8 knitr_1.44
## [89] rstudioapi_0.15.0 reshape2_1.4.4
## [91] tzdb_0.4.0 rjson_0.2.21
## [93] nlme_3.1-163 curl_5.0.2
## [95] cachem_1.0.8 rhdf5_2.44.0
## [97] stringr_1.5.0 KernSmooth_2.23-22
## [99] AnnotationDbi_1.62.2 restfulr_0.0.15
## [101] GEOquery_2.68.0 pillar_1.9.0
## [103] grid_4.3.1 reshape_0.8.9
## [105] vctrs_0.6.3 gplots_3.1.3
## [107] promises_1.2.1 dbplyr_2.3.4
## [109] xtable_1.8-4 evaluate_0.22
## [111] readr_2.1.4 GenomicFeatures_1.52.2
## [113] cli_3.6.1 compiler_4.3.1
## [115] Rsamtools_2.16.0 rlang_1.1.1
## [117] crayon_1.5.2 rngtools_1.5.2
## [119] nor1mix_1.3-0 mclust_6.0.0
## [121] plyr_1.8.8 stringi_1.7.12
## [123] viridisLite_0.4.2 BiocParallel_1.34.2
## [125] munsell_0.5.0 Matrix_1.6-1.1
## [127] hms_1.1.3 sparseMatrixStats_1.12.2
## [129] bit64_4.0.5 ggplot2_3.4.3
## [131] Rhdf5lib_1.22.1 KEGGREST_1.40.1
## [133] shiny_1.7.5 highr_0.10
## [135] memoise_2.0.1 bslib_0.5.1
## [137] bit_4.0.5