Here we are splitting the dataset and testing concordance between LA and AL approaches.
suppressPackageStartupMessages({
library("limma")
library("eulerr")
library("IlluminaHumanMethylation450kmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
library("tictoc")
library("mitch")
library("kableExtra")
library("beeswarm")
})
CORES=8
annotations
probe sets
gene sets
design matrix
mval matrix
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
gp <- myann[,"UCSC_RefGene_Name",drop=FALSE]
gp2 <- strsplit(gp$UCSC_RefGene_Name,";")
names(gp2) <- rownames(gp)
sets <- split(rep(names(gp2), lengths(gp2)), unlist(gp2))
summary(unlist(lapply(sets,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 24.00 49.68 55.00 6778.00
genesets <- gmt_import("https://ziemann-lab.net/public/gmea_prototype/ReactomePathways.gmt")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_design.rds", "GSE158422_design.rds")
}
design <- readRDS("GSE158422_design.rds")
if (!file.exists("GSE158422_design.rds")) {
download.file("https://ziemann-lab.net/public/gmea_prototype/GSE158422_mx.rds","GSE158422_mx.rds")
}
mval <- readRDS("GSE158422_mx.rds")
boxplot(list("normal"=matrix(colMeans(mval),ncol=2)[,2],"tumor"=matrix(colMeans(mval),ncol=2)[,1]),
main="mean probe methylation mval")
Reactome pathways were downloaded on the 14th Sept 2023 from MsigDB.
gs_entrez <- gmt_import("c2.cp.reactome.v2023.1.Hs.entrez.gmt")
gs_symbols <- gmt_import("c2.cp.reactome.v2023.1.Hs.symbols.gmt")
There are three approaches. First, limma can be done first, followed by aggregation by gene and then enrichment test (called LA). Secondly, probe values can be aggregated by gene, followed by limma, followed by enrichment test (AL). Third, probe values can be aggregated to gene level, and then aggregated to gene set level followed by limma test (AA).
This process runs limma first and then aggregates the results before doing an enrichment test.
# limma
runlimma <- function(mval,design,myann) {
fit.reduced <- lmFit(mval,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
dm <- topTable(fit.reduced,coef=4, number = Inf)
dm <- merge(myann,dm,by=0)
dm <- dm[order(dm$P.Value),]
rownames(dm) <- dm$Row.names
dm$Row.names=NULL
return(dm)
}
# aggregate
agg <- function(dm,cores=1) {
gn <- unique(unlist(strsplit( dm$UCSC_RefGene_Name ,";")))
gnl <- strsplit( dm$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
dm$UCSC_RefGene_Name <- gnl
l <- mclapply(1:nrow(dm), function(i) {
a <- dm[i,]
len <- length(a[[1]][[1]])
tvals <- as.numeric(rep(a["t"],len))
genes <- a[[1]][[1]]
data.frame(genes,tvals)
},mc.cores=cores)
df <- do.call(rbind,l)
keep <- names(which(table(df$genes)>1))
df <- df[df$genes %in% keep,]
gn <- unique(df$genes)
gme_res <- lapply( 1:length(gn), function(i) {
g <- gn[i]
tstats <- df[which(df$genes==g),"tvals"]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- mean(tstats)
if ( length(tstats) > 2 ) {
ttest <- t.test(tstats)
pval <- ttest$p.value
} else {
pval = 1
}
res <- c("gene"=g,"nprobes"=myn,"mean"=mymean,
"median"=mymedian, pval=pval)
} )
gme_res_df <- do.call(rbind, gme_res)
rownames(gme_res_df) <- gme_res_df[,1]
gme_res_df <- gme_res_df[,-1]
tmp <- apply(gme_res_df,2,as.numeric)
rownames(tmp) <- rownames(gme_res_df)
gme_res_df <- as.data.frame(tmp)
gme_res_df$sig <- -log10(gme_res_df[,4])
gme_res_df <- gme_res_df[order(-gme_res_df$sig),]
gme_res_df$fdr <- p.adjust(gme_res_df$pval)
out <- list("df"=df,"gme_res_df"=gme_res_df)
return(out)
}
# enrich
ttenrich <- function(m,genesets,cores=1) {
res <- mclapply( 1:length(genesets), function(i) {
gs <- genesets[i]
name <- names(gs)
n_members <- length(which(rownames(m) %in% gs[[1]]))
if ( n_members > 4 ) {
tstats <- m[which(rownames(m) %in% gs[[1]]),]
myn <- length(tstats)
mymean <- mean(tstats)
mymedian <- mean(tstats)
wt <- t.test(tstats)
res <- c(name,myn,mymean,mymedian,wt$p.value)
}
} , mc.cores = cores)
res_df <- do.call(rbind, res)
rownames(res_df) <- res_df[,1]
res_df <- res_df[,-1]
colnames(res_df) <- c("n_genes","t_mean","t_median","pval")
tmp <- apply(res_df,2,as.numeric)
rownames(tmp) <- rownames(res_df)
res_df <- tmp
res_df <- as.data.frame(res_df)
res_df <- res_df[order(res_df$pval),]
res_df$logp <- -log10(res_df$pval )
res_df$fdr <- p.adjust(res_df$pval,method="fdr")
res_df[order(abs(res_df$pval)),]
return(res_df)
}
Functions for aggregate-limma-enrich approach.
# chromosome by chromosome will be much faster
magg <- function(mval,myann,cores=1){
gn <- unique(unlist(strsplit( myann$UCSC_RefGene_Name ,";")))
gnl <- strsplit( myann$UCSC_RefGene_Name ,";")
gnl <- mclapply(gnl,unique,mc.cores=cores)
myann$gnl <- gnl
keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
mx <- mval[rownames(mval) %in% keep,]
mymed <- function(g) {
probes <- rownames(myann[grep(g,myann$gnl),])
rows <- which(rownames(mx) %in% probes)
if ( length(rows) > 1 ) {
b <- mx[rows,]
med <- apply(b,2,mean)
med <- matrix(med,nrow=1)
colnames(med) <- colnames(b)
rownames(med) <- g
return(med)
}
}
med <- mclapply(gn,mymed,mc.cores=cores)
med <- med[lapply(med,length)>0]
medf <- do.call(rbind,med)
return(medf)
}
chragg <- function(mval,myann,cores=1){
annodf <- as.data.frame(anno)
keep <- rownames(subset(myann,UCSC_RefGene_Name!=""))
mx <- mval[rownames(mval) %in% keep,]
chrs <- unique(anno$chr)
myorder <- unlist(lapply(chrs,function(mychr) { nrow( annodf[annodf$chr==mychr,] ) } ))
chrs <- chrs[order(-myorder)]
leadercores <- floor(sqrt(cores))
workercores <- ceiling(sqrt(cores))
chrmedf <- mclapply(chrs,function(chr) {
chrfrag <- annodf[annodf$chr==chr,]
chrprobes <-rownames(chrfrag)
chrmx <- mx[rownames(mx) %in% chrprobes,]
chranno <- myann[rownames(myann) %in% chrprobes,]
chrmedf <- magg(mval=chrmx,myann=chranno,cores=workercores)
return(chrmedf)
},mc.cores=leadercores)
medf <- do.call(rbind, chrmedf)
return(medf)
}
agglimma <- function(medf,design) {
fit.reduced <- lmFit(medf,design)
fit.reduced <- eBayes(fit.reduced)
dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
nondup <- !duplicated(dmagg$ID)
dmagg <- dmagg[nondup,]
rownames(dmagg) <- dmagg$ID
dmagg$ID = NULL
return(dmagg)
}
gsagg <- function(x,genesets,cores=1) {
meds <- mclapply(1:length(genesets), function(i) {
gs = genesets[[i]]
xx <- x[rownames(x) %in% gs,]
med <- apply(xx,2,mean)
},mc.cores=cores)
mymed <- do.call(rbind,meds)
rownames(mymed) <- names(genesets)
as.data.frame(mymed)
}
aalimma <- function(agag,design) {
fit.reduced <- lmFit(agag,design)
fit.reduced <- eBayes(fit.reduced)
dmagg <- topTable(fit.reduced,coef=ncol(design), number = Inf)
return(dmagg)
}
aal <- function(mval,myann,genesets,design,cores=1) {
medf <- chragg(mval,myann,cores=CORES)
agag <- gsagg(x=medf,genesets=gs_symbols,cores=CORES)
aalres <- aalimma(agag=agag,design=design)
return(aalres)
}
#aalres <- aal(mval=mval,myann=myann,genesets=gs_symbols,design=design,cores=CORES)
# prep
sex <- as.data.frame(design)$sex
tumor <- as.data.frame(design)$tumor
patient <- as.character(unlist(lapply(1:ncol(mval),function(i) {c(i,i)})))
patient <- head(patient,ncol(mval))
design <- model.matrix(~ patient + tumor )
rownames(design) <- colnames(mval)
split 74 samples 37 patients split into 18 + 18 Conduct LA workflow analysis. Then make overlap.
splitanalyze_la <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
# la1
dm1 <- runlimma(mval_a,design_a,myann)
dmagg1 <- agg(dm1,cores=floor(CORES/3))
m1 <- dmagg1$gme_res_df[,"mean",drop=FALSE]
lares1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES)
# la2
dm2 <- runlimma(mval_b,design_b,myann)
dmagg2 <- agg(dm2,cores=floor(CORES/3))
m2 <- dmagg2$gme_res_df[,"mean",drop=FALSE]
lares2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES)
list("lares1"=lares1,"lares2"=lares2)
}
lares <- mclapply( seq(100,5000,100) ,splitanalyze_la,mc.cores=CORES)
splitanalyze_al <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
#al
medf1 <- chragg(mval_a,myann,cores=floor(CORES/3))
magg1 <- agglimma(medf1,design_a)
m1 <- as.data.frame(magg1$t)
rownames(m1) <- rownames(magg1)
colnames(m1) <- "t"
alres1 <- ttenrich(m=m1,genesets=gs_symbols,cores=CORES)
#al
medf2 <- chragg(mval_b,myann,cores=floor(CORES/3))
magg2 <- agglimma(medf2,design_b)
m2 <- as.data.frame(magg2$t)
rownames(m2) <- rownames(magg2)
colnames(m2) <- "t"
alres2 <- ttenrich(m=m2,genesets=gs_symbols,cores=CORES)
list("alres1"=alres1,"alres2"=alres2)
}
alres <- mclapply( seq(100,5000,100) ,splitanalyze_al,mc.cores=CORES)
splitanalyze_aa <- function(seed){
set.seed(seed)
a <- sample(1:37,18)
a <- a[order(a)]
nona <- setdiff(1:37,a)
set.seed(seed)
b <- sample(nona,18)
b <- b[order(b)]
a <- c( (a*2)-1 ,a*2 )
a <- a[order(a)]
b <- c( (b*2)-1 ,b*2 )
b <- b[order(b)]
design_a <- design[a,]
design_a <- design_a[,colSums(design_a)>0]
mval_a <- mval[,a]
design_b <- design[b,]
design_b <- design_b[,colSums(design_b)>0]
mval_b <- mval[,b]
#aa1
aares1 <- aal(mval=mval_a,myann=myann,genesets=gs_symbols,
design=design_a,cores=floor(CORES/3))
#aa2
aares2 <- aal(mval=mval_b,myann=myann,genesets=gs_symbols,
design=design_b,cores=floor(CORES/3))
list("aares1"=aares1,"aares2"=aares2)
}
aares <- mclapply( seq(100,5000,100) ,splitanalyze_aa,mc.cores=CORES)
lacompare <- function(lares){
lares1 <- lares[[1]]
lares2 <- lares[[2]]
lasig1 <- subset(lares1,fdr<0.05)
lasig2 <- subset(lares2,fdr<0.05)
lasig1_up <- rownames(subset(lasig1,t_median>0))
lasig1_dn <- rownames(subset(lasig1,t_median<0))
lasig2_up <- rownames(subset(lasig2,t_median>0))
lasig2_dn <- rownames(subset(lasig2,t_median<0))
total <- length(unique(c(lasig1_up,lasig2_up,lasig1_dn,lasig2_dn)))
common <- length(intersect(lasig1_up, lasig2_up)) + length(intersect(lasig1_dn, lasig2_dn))
discordant <- length(intersect(lasig1_up, lasig2_dn)) + length(intersect(lasig1_dn, lasig2_up))
uncommon = total - common - discordant
laout <- data.frame(total,common,discordant,uncommon)
laout$p_comm <-laout$common / laout$total
laout$p_disc <-laout$discordant / laout$total
laout$p_uncom <-laout$uncommon / laout$total
laout
}
alcompare <-function(alres){
alres1 <- alres[[1]]
alres2 <- alres[[2]]
alsig1 <- subset(alres1,fdr<0.05)
alsig2 <- subset(alres2,fdr<0.05)
alsig1_up <- rownames(subset(alsig1,t_median>0))
alsig1_dn <- rownames(subset(alsig1,t_median<0))
alsig2_up <- rownames(subset(alsig2,t_median>0))
alsig2_dn <- rownames(subset(alsig2,t_median<0))
total <- length(unique(c(alsig1_up,alsig2_up,alsig1_dn,alsig2_dn)))
common <- length(intersect(alsig1_up, alsig2_up)) + length(intersect(alsig1_dn, alsig2_dn))
discordant <- length(intersect(alsig1_up, alsig2_dn)) + length(intersect(alsig1_dn, alsig2_up))
uncommon = total - common - discordant
result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)
alout <- data.frame(total,common,discordant,uncommon)
alout$p_comm <-alout$common / alout$total
alout$p_disc <-alout$discordant / alout$total
alout$p_uncom <-alout$uncommon / alout$total
alout
}
aacompare <-function(aa){
aa1 <- aa[[1]]
aa2 <- aa[[2]]
aasig1 <- subset(aa1,adj.P.Val<0.05)
aasig2 <- subset(aa2,adj.P.Val<0.05)
aasig1_up <- rownames(subset(aasig1,logFC>0))
aasig1_dn <- rownames(subset(aasig1,logFC<0))
aasig2_up <- rownames(subset(aasig2,logFC>0))
aasig2_dn <- rownames(subset(aasig2,logFC<0))
total <- length(unique(c(aasig1_up,aasig2_up,aasig1_dn,aasig2_dn)))
common <- length(intersect(aasig1_up, aasig2_up)) + length(intersect(aasig1_dn, aasig2_dn))
discordant <- length(intersect(aasig1_up, aasig2_dn)) + length(intersect(aasig1_dn, aasig2_up))
uncommon = total - common - discordant
result <- c("total"=total,"common"=common,"discordant"=discordant,"uncommon"=uncommon)
aaout <- data.frame(total,common,discordant,uncommon)
aaout$p_comm <- aaout$common / aaout$total
aaout$p_disc <- aaout$discordant / aaout$total
aaout$p_uncom <-aaout$uncommon / aaout$total
aaout
}
laout <- do.call(rbind,lapply(lares,lacompare))
alout <- do.call(rbind,lapply(alres,alcompare))
aaout <- do.call(rbind,lapply(aares,aacompare))
laout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 1421 866 0 555 0.6094299789 0.000000000 0.39057002
## 2 1620 1242 1 377 0.7666666667 0.000617284 0.23271605
## 3 931 475 9 447 0.5102040816 0.009667025 0.48012889
## 4 935 483 12 440 0.5165775401 0.012834225 0.47058824
## 5 1271 483 0 788 0.3800157356 0.000000000 0.61998426
## 6 1277 554 0 723 0.4338292874 0.000000000 0.56617071
## 7 1592 0 1320 272 0.0000000000 0.829145729 0.17085427
## 8 1417 1 791 625 0.0007057163 0.558221595 0.44107269
## 9 904 419 11 474 0.4634955752 0.012168142 0.52433628
## 10 1611 1492 0 119 0.9261328367 0.000000000 0.07386716
## 11 1550 1 603 946 0.0006451613 0.389032258 0.61032258
## 12 1603 2 918 683 0.0012476606 0.572676232 0.42607611
## 13 1604 91 11 1502 0.0567331671 0.006857855 0.93640898
## 14 1519 2 726 791 0.0013166557 0.477946017 0.52073733
## 15 1562 1221 0 341 0.7816901408 0.000000000 0.21830986
## 16 1308 1 736 571 0.0007645260 0.562691131 0.43654434
## 17 1527 181 0 1346 0.1185330714 0.000000000 0.88146693
## 18 1603 12 838 753 0.0074859638 0.522769807 0.46974423
## 19 1260 389 0 871 0.3087301587 0.000000000 0.69126984
## 20 1588 0 1312 276 0.0000000000 0.826196474 0.17380353
## 21 1605 11 805 789 0.0068535826 0.501557632 0.49158879
## 22 1620 1521 0 99 0.9388888889 0.000000000 0.06111111
## 23 634 8 60 566 0.0126182965 0.094637224 0.89274448
## 24 923 441 12 470 0.4777898158 0.013001083 0.50920910
## 25 1597 0 1327 270 0.0000000000 0.830932999 0.16906700
## 26 1561 628 1 932 0.4023062140 0.000640615 0.59705317
## 27 1260 415 0 845 0.3293650794 0.000000000 0.67063492
## 28 1585 0 1304 281 0.0000000000 0.822712934 0.17728707
## 29 1568 1 1238 329 0.0006377551 0.789540816 0.20982143
## 30 1548 36 491 1021 0.0232558140 0.317183463 0.65956072
## 31 1557 1 1102 454 0.0006422608 0.707771355 0.29158638
## 32 1594 1435 0 159 0.9002509410 0.000000000 0.09974906
## 33 1525 195 0 1330 0.1278688525 0.000000000 0.87213115
## 34 903 1 100 802 0.0011074197 0.110741971 0.88815061
## 35 977 3 60 914 0.0030706244 0.061412487 0.93551689
## 36 1049 6 520 523 0.0057197331 0.495710200 0.49857007
## 37 1507 557 0 950 0.3696084937 0.000000000 0.63039151
## 38 1613 35 524 1054 0.0216986981 0.324860508 0.65344079
## 39 1506 3 764 739 0.0019920319 0.507304117 0.49070385
## 40 1562 1 1142 419 0.0006402049 0.731113956 0.26824584
## 41 1555 28 577 950 0.0180064309 0.371061093 0.61093248
## 42 1598 0 1460 138 0.0000000000 0.913642053 0.08635795
## 43 1549 230 106 1213 0.1484828922 0.068431246 0.78308586
## 44 1633 0 1524 109 0.0000000000 0.933251684 0.06674832
## 45 1426 1 928 497 0.0007012623 0.650771388 0.34852735
## 46 1057 14 434 609 0.0132450331 0.410596026 0.57615894
## 47 982 2 410 570 0.0020366599 0.417515275 0.58044807
## 48 1567 1269 0 298 0.8098276962 0.000000000 0.19017230
## 49 1513 5 740 768 0.0033046927 0.489094514 0.50760079
## 50 909 4 163 742 0.0044004400 0.179317932 0.81628163
alout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 1380 819 0 561 0.5934783 0.0000000000 0.4065217
## 2 1196 1059 0 137 0.8854515 0.0000000000 0.1145485
## 3 1226 1088 0 138 0.8874388 0.0000000000 0.1125612
## 4 1447 761 0 686 0.5259157 0.0000000000 0.4740843
## 5 1223 951 0 272 0.7775961 0.0000000000 0.2224039
## 6 1219 997 0 222 0.8178835 0.0000000000 0.1821165
## 7 1263 851 0 412 0.6737926 0.0000000000 0.3262074
## 8 1361 745 0 616 0.5473916 0.0000000000 0.4526084
## 9 1306 992 0 314 0.7595712 0.0000000000 0.2404288
## 10 1377 785 0 592 0.5700799 0.0000000000 0.4299201
## 11 1223 912 0 311 0.7457073 0.0000000000 0.2542927
## 12 1290 960 0 330 0.7441860 0.0000000000 0.2558140
## 13 1299 997 0 302 0.7675135 0.0000000000 0.2324865
## 14 1504 362 4 1138 0.2406915 0.0026595745 0.7566489
## 15 1255 1069 0 186 0.8517928 0.0000000000 0.1482072
## 16 1221 1055 0 166 0.8640459 0.0000000000 0.1359541
## 17 1230 1093 0 137 0.8886179 0.0000000000 0.1113821
## 18 1322 989 0 333 0.7481089 0.0000000000 0.2518911
## 19 1326 1015 0 311 0.7654600 0.0000000000 0.2345400
## 20 1401 622 0 779 0.4439686 0.0000000000 0.5560314
## 21 1238 1039 0 199 0.8392569 0.0000000000 0.1607431
## 22 1265 1057 0 208 0.8355731 0.0000000000 0.1644269
## 23 1211 1067 0 144 0.8810900 0.0000000000 0.1189100
## 24 1415 684 0 731 0.4833922 0.0000000000 0.5166078
## 25 1242 1049 0 193 0.8446055 0.0000000000 0.1553945
## 26 1361 757 0 604 0.5562087 0.0000000000 0.4437913
## 27 1351 1026 0 325 0.7594375 0.0000000000 0.2405625
## 28 1259 906 0 353 0.7196187 0.0000000000 0.2803813
## 29 1338 858 0 480 0.6412556 0.0000000000 0.3587444
## 30 1343 967 0 376 0.7200298 0.0000000000 0.2799702
## 31 1201 1000 0 201 0.8326395 0.0000000000 0.1673605
## 32 1273 1037 0 236 0.8146112 0.0000000000 0.1853888
## 33 1357 1032 0 325 0.7605011 0.0000000000 0.2394989
## 34 1273 858 0 415 0.6739984 0.0000000000 0.3260016
## 35 1209 1058 0 151 0.8751034 0.0000000000 0.1248966
## 36 1354 950 0 404 0.7016248 0.0000000000 0.2983752
## 37 1196 989 0 207 0.8269231 0.0000000000 0.1730769
## 38 1309 644 0 665 0.4919786 0.0000000000 0.5080214
## 39 1351 852 0 499 0.6306440 0.0000000000 0.3693560
## 40 1250 691 0 559 0.5528000 0.0000000000 0.4472000
## 41 1329 789 0 540 0.5936795 0.0000000000 0.4063205
## 42 1284 1105 0 179 0.8605919 0.0000000000 0.1394081
## 43 1295 1061 0 234 0.8193050 0.0000000000 0.1806950
## 44 1299 955 0 344 0.7351809 0.0000000000 0.2648191
## 45 1209 1043 0 166 0.8626964 0.0000000000 0.1373036
## 46 1296 1032 0 264 0.7962963 0.0000000000 0.2037037
## 47 1472 571 1 900 0.3879076 0.0006793478 0.6114130
## 48 1374 980 0 394 0.7132460 0.0000000000 0.2867540
## 49 1330 921 0 409 0.6924812 0.0000000000 0.3075188
## 50 1342 888 0 454 0.6616990 0.0000000000 0.3383010
aaout
## total common discordant uncommon p_comm p_disc p_uncom
## 1 1324 370 0 954 0.279456193 0.0000000000 0.7205438
## 2 939 549 0 390 0.584664537 0.0000000000 0.4153355
## 3 809 590 0 219 0.729295426 0.0000000000 0.2707046
## 4 1504 120 1 1383 0.079787234 0.0006648936 0.9195479
## 5 684 443 0 241 0.647660819 0.0000000000 0.3523392
## 6 873 121 0 752 0.138602520 0.0000000000 0.8613975
## 7 1211 17 0 1194 0.014037985 0.0000000000 0.9859620
## 8 1355 77 0 1278 0.056826568 0.0000000000 0.9431734
## 9 1426 271 0 1155 0.190042076 0.0000000000 0.8099579
## 10 1251 307 0 944 0.245403677 0.0000000000 0.7545963
## 11 1126 186 0 940 0.165186501 0.0000000000 0.8348135
## 12 1071 159 0 912 0.148459384 0.0000000000 0.8515406
## 13 1326 137 0 1189 0.103318250 0.0000000000 0.8966817
## 14 1553 160 0 1393 0.103026401 0.0000000000 0.8969736
## 15 1090 297 0 793 0.272477064 0.0000000000 0.7275229
## 16 779 382 0 397 0.490372272 0.0000000000 0.5096277
## 17 1151 289 0 862 0.251086012 0.0000000000 0.7489140
## 18 1077 222 0 855 0.206128134 0.0000000000 0.7938719
## 19 1424 101 0 1323 0.070926966 0.0000000000 0.9290730
## 20 1468 83 0 1385 0.056539510 0.0000000000 0.9434605
## 21 898 547 0 351 0.609131403 0.0000000000 0.3908686
## 22 1222 311 0 911 0.254500818 0.0000000000 0.7454992
## 23 952 459 0 493 0.482142857 0.0000000000 0.5178571
## 24 1458 146 0 1312 0.100137174 0.0000000000 0.8998628
## 25 1084 663 0 421 0.611623616 0.0000000000 0.3883764
## 26 1313 166 0 1147 0.126428027 0.0000000000 0.8735720
## 27 1355 624 0 731 0.460516605 0.0000000000 0.5394834
## 28 1194 148 0 1046 0.123953099 0.0000000000 0.8760469
## 29 1262 116 0 1146 0.091917591 0.0000000000 0.9080824
## 30 1187 343 0 844 0.288963774 0.0000000000 0.7110362
## 31 1032 130 0 902 0.125968992 0.0000000000 0.8740310
## 32 1025 783 0 242 0.763902439 0.0000000000 0.2360976
## 33 1476 351 0 1125 0.237804878 0.0000000000 0.7621951
## 34 1483 89 0 1394 0.060013486 0.0000000000 0.9399865
## 35 962 537 0 425 0.558212058 0.0000000000 0.4417879
## 36 961 518 0 443 0.539021852 0.0000000000 0.4609781
## 37 1195 171 0 1024 0.143096234 0.0000000000 0.8569038
## 38 1287 128 0 1159 0.099456099 0.0000000000 0.9005439
## 39 1317 153 0 1164 0.116173121 0.0000000000 0.8838269
## 40 1224 0 0 1224 0.000000000 0.0000000000 1.0000000
## 41 1304 7 0 1297 0.005368098 0.0000000000 0.9946319
## 42 1040 632 0 408 0.607692308 0.0000000000 0.3923077
## 43 1149 559 0 590 0.486510009 0.0000000000 0.5134900
## 44 1311 160 0 1151 0.122044241 0.0000000000 0.8779558
## 45 1061 283 0 778 0.266729500 0.0000000000 0.7332705
## 46 999 689 0 310 0.689689690 0.0000000000 0.3103103
## 47 1555 103 0 1452 0.066237942 0.0000000000 0.9337621
## 48 1351 338 0 1013 0.250185048 0.0000000000 0.7498150
## 49 1351 96 0 1255 0.071058475 0.0000000000 0.9289415
## 50 1306 206 0 1100 0.157733538 0.0000000000 0.8422665
res <- do.call(rbind,lapply(list(laout,alout,aaout),colMeans))
rownames(res) <- c("la","al","aa")
res <- as.data.frame(res)
res %>%
kbl(caption="overall summary") %>%
kable_paper("hover", full_width = F)
total | common | discordant | uncommon | p_comm | p_disc | p_uncom | |
---|---|---|---|---|---|---|---|
la | 1391.72 | 295.32 | 461.60 | 634.80 | 0.2101705 | 0.3104725 | 0.4793570 |
al | 1301.90 | 919.76 | 0.10 | 382.04 | 0.7132613 | 0.0000668 | 0.2866719 |
aa | 1195.10 | 286.74 | 0.02 | 908.34 | 0.2669902 | 0.0000133 | 0.7329965 |
apply(laout,2,median)
## total common discordant uncommon p_comm p_disc
## 1.537500e+03 1.300000e+01 2.865000e+02 5.900000e+02 1.293166e-02 2.482507e-01
## p_uncom
## 4.950794e-01
apply(alout,2,median)
## total common discordant uncommon p_comm p_disc
## 1297.5000000 973.5000000 0.0000000 327.5000000 0.7469081 0.0000000
## p_uncom
## 0.2530919
apply(aaout,2,median)
## total common discordant uncommon p_comm p_disc
## 1216.5000000 214.0000000 0.0000000 949.0000000 0.1776143 0.0000000
## p_uncom
## 0.8223857
Plot some data.
Raw counts and proportions.
par(mfrow=c(1,3))
barplot(res$common,main="no. common",names.arg=c("LA","AL","AA"))
barplot(res$uncommon,main="no. uncommon",names.arg=c("LA","AL","AA"))
barplot(res$discordant,main="no. discordant",names.arg=c("LA","AL","AA"))
boxplot(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common),
main="no. common",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$common,"AL"=alout$common,"AA"=aaout$common), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon),
main="no. uncommon",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$uncommon,"AL"=alout$uncommon,"AA"=aaout$uncommon), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant),
main="no. discordant",col="white",ylim=c(0,1550))
beeswarm(list("LA"=laout$discordant,"AL"=alout$discordant,"AA"=aaout$discordant), add=TRUE,cex=1.2,pch=19)
barplot(res$p_comm,main="proportion common",names.arg=c("LA","AL","AA"))
barplot(res$p_uncom,main="proportion uncommon",names.arg=c("LA","AL","AA"))
barplot(res$p_disc,main="proportion discordant",names.arg=c("LA","AL","AA"))
boxplot(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm),
main="proportion common",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_comm,"AL"=alout$p_comm,"AA"=aaout$p_comm), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom),
main="proportion uncommon",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_uncom,"AL"=alout$p_uncom,"AA"=aaout$p_uncom), add=TRUE,cex=1.2,pch=19)
boxplot(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc),
main="proportion discordant",col="white",ylim=c(0,1))
beeswarm(list("LA"=laout$p_disc,"AL"=alout$p_disc,"AA"=aaout$p_disc), add=TRUE,cex=1.2,pch=19)
save.image("GSE158422_split.Rdata")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] beeswarm_0.4.0
## [2] kableExtra_1.3.4
## [3] mitch_1.12.0
## [4] tictoc_1.2
## [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [6] IlluminaHumanMethylation450kmanifest_0.4.0
## [7] minfi_1.46.0
## [8] bumphunter_1.42.0
## [9] locfit_1.5-9.8
## [10] iterators_1.0.14
## [11] foreach_1.5.2
## [12] Biostrings_2.68.1
## [13] XVector_0.40.0
## [14] SummarizedExperiment_1.30.2
## [15] Biobase_2.60.0
## [16] MatrixGenerics_1.12.3
## [17] matrixStats_1.0.0
## [18] GenomicRanges_1.52.0
## [19] GenomeInfoDb_1.36.3
## [20] IRanges_2.34.1
## [21] S4Vectors_0.38.2
## [22] BiocGenerics_0.46.0
## [23] eulerr_7.0.0
## [24] limma_3.56.2
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.1 later_1.3.1
## [3] BiocIO_1.10.0 bitops_1.0-7
## [5] filelock_1.0.2 tibble_3.2.1
## [7] preprocessCore_1.62.1 XML_3.99-0.14
## [9] lifecycle_1.0.3 lattice_0.21-9
## [11] MASS_7.3-60 base64_2.0.1
## [13] scrime_1.3.5 magrittr_2.0.3
## [15] sass_0.4.7 rmarkdown_2.25
## [17] jquerylib_0.1.4 yaml_2.3.7
## [19] httpuv_1.6.11 doRNG_1.8.6
## [21] askpass_1.2.0 DBI_1.1.3
## [23] RColorBrewer_1.1-3 abind_1.4-5
## [25] zlibbioc_1.46.0 rvest_1.0.3
## [27] quadprog_1.5-8 purrr_1.0.2
## [29] RCurl_1.98-1.12 rappdirs_0.3.3
## [31] GenomeInfoDbData_1.2.10 genefilter_1.82.1
## [33] annotate_1.78.0 svglite_2.1.1
## [35] DelayedMatrixStats_1.22.6 codetools_0.2-19
## [37] DelayedArray_0.26.7 xml2_1.3.5
## [39] tidyselect_1.2.0 beanplot_1.3.1
## [41] BiocFileCache_2.8.0 webshot_0.5.5
## [43] illuminaio_0.42.0 GenomicAlignments_1.36.0
## [45] jsonlite_1.8.7 multtest_2.56.0
## [47] ellipsis_0.3.2 survival_3.5-7
## [49] systemfonts_1.0.4 tools_4.3.1
## [51] progress_1.2.2 Rcpp_1.0.11
## [53] glue_1.6.2 gridExtra_2.3
## [55] xfun_0.40 dplyr_1.1.3
## [57] HDF5Array_1.28.1 fastmap_1.1.1
## [59] GGally_2.1.2 rhdf5filters_1.12.1
## [61] fansi_1.0.4 openssl_2.1.1
## [63] caTools_1.18.2 digest_0.6.33
## [65] R6_2.5.1 mime_0.12
## [67] colorspace_2.1-0 gtools_3.9.4
## [69] biomaRt_2.56.1 RSQLite_2.3.1
## [71] utf8_1.2.3 tidyr_1.3.0
## [73] generics_0.1.3 data.table_1.14.8
## [75] rtracklayer_1.60.1 prettyunits_1.2.0
## [77] httr_1.4.7 htmlwidgets_1.6.2
## [79] S4Arrays_1.0.6 pkgconfig_2.0.3
## [81] gtable_0.3.4 blob_1.2.4
## [83] siggenes_1.74.0 htmltools_0.5.6
## [85] echarts4r_0.4.5 scales_1.2.1
## [87] png_0.1-8 knitr_1.44
## [89] rstudioapi_0.15.0 reshape2_1.4.4
## [91] tzdb_0.4.0 rjson_0.2.21
## [93] nlme_3.1-163 curl_5.0.2
## [95] cachem_1.0.8 rhdf5_2.44.0
## [97] stringr_1.5.0 KernSmooth_2.23-22
## [99] AnnotationDbi_1.62.2 restfulr_0.0.15
## [101] GEOquery_2.68.0 pillar_1.9.0
## [103] grid_4.3.1 reshape_0.8.9
## [105] vctrs_0.6.3 gplots_3.1.3
## [107] promises_1.2.1 dbplyr_2.3.4
## [109] xtable_1.8-4 evaluate_0.22
## [111] readr_2.1.4 GenomicFeatures_1.52.2
## [113] cli_3.6.1 compiler_4.3.1
## [115] Rsamtools_2.16.0 rlang_1.1.1
## [117] crayon_1.5.2 rngtools_1.5.2
## [119] nor1mix_1.3-0 mclust_6.0.0
## [121] plyr_1.8.8 stringi_1.7.12
## [123] viridisLite_0.4.2 BiocParallel_1.34.2
## [125] munsell_0.5.0 Matrix_1.6-1.1
## [127] hms_1.1.3 sparseMatrixStats_1.12.2
## [129] bit64_4.0.5 ggplot2_3.4.3
## [131] Rhdf5lib_1.22.1 KEGGREST_1.40.1
## [133] shiny_1.7.5 highr_0.10
## [135] memoise_2.0.1 bslib_0.5.1
## [137] bit_4.0.5