library("knitr")
library("kableExtra")
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("stringr")
library("tidyr")
library("eulerr")
#PMID33947848_results <- read.table("../output_lists/DAVID_PMID33947848.txt", header = TRUE, sep = "\t") #Change PMID/PMCID
PMID36344834_results <- read.table("../../output_lists/DAVID_PMID36344834.txt", header = TRUE, sep = "\t") #Change PMID/PMCID and text after "output_lists/"
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : number of items read is not a multiple of the number of columns
#PMID33947848_results <- read.csv("../output_lists/PMID33947848_webDAVID_20240513.tsv", header = TRUE, sep = "\t")
#split_terms <- str_split(PMID33947848_results$Term, pattern = "~")
split_terms <- str_split(PMID36344834_results$Term, pattern = "~")
#PMID33947848_results$GOID <- sapply(split_terms, "[", 1)
#PMID33947848_results$GO.Term <- sapply(split_terms, "[", 2)
PMID36344834_results$GOID <- sapply(split_terms, "[", 1)
PMID36344834_results$GO.Term <- sapply(split_terms, "[", 2)
#PMID33947848_results <- select(PMID33947848_results, GOID, GO.Term, Category, Count, X., PValue, Genes, List.Total, Pop.Hits, Pop.Total, Fold.Enrichment, Bonferroni, Benjamini, FDR)
PMID36344834_results <- select(PMID36344834_results, GOID, GO.Term, Category, Count, X., PValue, Genes, List.Total, Pop.Hits, Pop.Total, Fold.Enrichment, Bonferroni, Benjamini, FDR)
#tmp <- PMID33947848_results
tmp <- PMID36344834_results
tmp$Genes=NULL
head(tmp,20) %>% #Change PMID/PMCID
kbl(caption="Top results from the DAVID Website") %>%
kable_paper("hover", full_width = F)
GOID | GO.Term | Category | Count | X. | PValue | List.Total | Pop.Hits | Pop.Total | Fold.Enrichment | Bonferroni | Benjamini | FDR |
---|---|---|---|---|---|---|---|---|---|---|---|---|
GO:0005829 | cytosol | GOTERM_CC_DIRECT | 498 | 50.920245 | 0 | 971 | 5537 | 20521 | 1.900790 | 0 | 0 | 0 |
GO:0005515 | protein binding | GOTERM_MF_DIRECT | 849 | 86.809816 | 0 | 966 | 12707 | 18883 | 1.306046 | 0 | 0 | 0 |
GO:0005654 | nucleoplasm | GOTERM_CC_DIRECT | 379 | 38.752556 | 0 | 971 | 3972 | 20521 | 2.016551 | 0 | 0 | 0 |
GO:0005737 | cytoplasm | GOTERM_CC_DIRECT | 431 | 44.069530 | 0 | 971 | 5636 | 20521 | 1.616165 | 0 | 0 | 0 |
GO:0005634 | nucleus | GOTERM_CC_DIRECT | 433 | 44.274029 | 0 | 971 | 6021 | 20521 | 1.519842 | 0 | 0 | 0 |
GO:0019901 | protein kinase binding | GOTERM_MF_DIRECT | 76 | 7.770961 | 0 | 966 | 483 | 18883 | 3.075816 | 0 | 0 | 0 |
GO:0070062 | extracellular exosome | GOTERM_CC_DIRECT | 193 | 19.734151 | 0 | 971 | 2241 | 20521 | 1.820098 | 0 | 0 | 0 |
GO:0042802 | identical protein binding | GOTERM_MF_DIRECT | 168 | 17.177914 | 0 | 966 | 1734 | 18883 | 1.893887 | 0 | 0 | 0 |
GO:0005739 | mitochondrion | GOTERM_CC_DIRECT | 137 | 14.008180 | 0 | 971 | 1427 | 20521 | 2.028971 | 0 | 0 | 0 |
GO:0016310 | phosphorylation | GOTERM_BP_DIRECT | 80 | 8.179959 | 0 | 947 | 623 | 19256 | 2.611067 | 0 | 0 | 0 |
GO:0019899 | enzyme binding | GOTERM_MF_DIRECT | 60 | 6.134969 | 0 | 966 | 374 | 18883 | 3.135982 | 0 | 0 | 0 |
GO:0005524 | ATP binding | GOTERM_MF_DIRECT | 150 | 15.337423 | 0 | 966 | 1540 | 18883 | 1.903989 | 0 | 0 | 0 |
GO:0051301 | cell division | GOTERM_BP_DIRECT | 59 | 6.032720 | 0 | 947 | 386 | 19256 | 3.107998 | 0 | 0 | 0 |
GO:0006468 | protein phosphorylation | GOTERM_BP_DIRECT | 58 | 5.930470 | 0 | 947 | 379 | 19256 | 3.111751 | 0 | 0 | 0 |
GO:0007049 | cell cycle | GOTERM_BP_DIRECT | 54 | 5.521472 | 0 | 947 | 351 | 19256 | 3.128259 | 0 | 0 | 0 |
GO:0043065 | positive regulation of apoptotic process | GOTERM_BP_DIRECT | 51 | 5.214724 | 0 | 947 | 321 | 19256 | 3.230586 | 0 | 0 | 0 |
GO:0045893 | positive regulation of transcription, DNA-templated | GOTERM_BP_DIRECT | 83 | 8.486708 | 0 | 947 | 718 | 19256 | 2.350551 | 0 | 0 | 0 |
GO:0032991 | macromolecular complex | GOTERM_CC_DIRECT | 79 | 8.077710 | 0 | 971 | 692 | 20521 | 2.412683 | 0 | 0 | 0 |
GO:0031625 | ubiquitin protein ligase binding | GOTERM_MF_DIRECT | 49 | 5.010225 | 0 | 966 | 312 | 18883 | 3.069979 | 0 | 0 | 0 |
GO:0004672 | protein kinase activity | GOTERM_MF_DIRECT | 55 | 5.623722 | 0 | 966 | 385 | 18883 | 2.792517 | 0 | 0 | 0 |
script_results <- readRDS("../../DAVID_vScripts/Validation_2_PMID36344834/ora_arranged2023.RDS")
script_results$GOID <- sapply(script_results$Description, function(term) strsplit(term, " ")[[1]][1])
script_results$GO.Term <- sapply(script_results$Description, function(term) paste(strsplit(term, " ")[[1]][-1], collapse = " "))
script_results <- select(script_results, S_No, GOID, GO.Term, GeneRatio, BgRatio, pvalue, p.adjust, qvalue, geneID, Count, gr, br, EnrichmentScore)
# minimum of 2 genes
script_results <- subset(script_results,Count >=2 )
tmp <- script_results
tmp$geneID=NULL
head(tmp,20) %>%
kbl(caption="Top results from DAVID Reborn Tool") %>%
kable_paper("hover", full_width = F)
S_No | GOID | GO.Term | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | Count | gr | br | EnrichmentScore |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | GO:0005515 | MF protein binding | 849/977 | 12707/20758 | 0 | 0 | 0 | 849 | 0.8689867 | 0.6121495 | 1.419566 |
2 | GO:0005829 | CC cytosol | 496/977 | 5447/20758 | 0 | 0 | 0 | 496 | 0.5076766 | 0.2624049 | 1.934707 |
3 | GO:0005654 | CC nucleoplasm | 378/977 | 3905/20758 | 0 | 0 | 0 | 378 | 0.3868987 | 0.1881202 | 2.056656 |
4 | GO:0005634 | CC nucleus | 423/977 | 5680/20758 | 0 | 0 | 0 | 423 | 0.4329580 | 0.2736294 | 1.582279 |
5 | GO:0005737 | CC cytoplasm | 392/977 | 5111/20758 | 0 | 0 | 0 | 392 | 0.4012282 | 0.2462183 | 1.629563 |
6 | GO:0042802 | MF identical protein binding | 158/977 | 1545/20758 | 0 | 0 | 0 | 158 | 0.1617195 | 0.0744291 | 2.172799 |
7 | GO:0019901 | MF protein kinase binding | 75/977 | 471/20758 | 0 | 0 | 0 | 75 | 0.0767656 | 0.0226900 | 3.383228 |
8 | GO:0005524 | MF ATP binding | 148/977 | 1479/20758 | 0 | 0 | 0 | 148 | 0.1514841 | 0.0712496 | 2.126104 |
9 | GO:0070062 | CC extracellular exosome | 192/977 | 2212/20758 | 0 | 0 | 0 | 192 | 0.1965200 | 0.1065613 | 1.844196 |
10 | GO:0019899 | MF enzyme binding | 59/977 | 355/20758 | 0 | 0 | 0 | 59 | 0.0603889 | 0.0171018 | 3.531137 |
11 | GO:0006468 | BP protein phosphorylation | 61/977 | 384/20758 | 0 | 0 | 0 | 61 | 0.0624360 | 0.0184989 | 3.375123 |
12 | GO:0005739 | CC mitochondrion | 134/977 | 1363/20758 | 0 | 0 | 0 | 134 | 0.1371546 | 0.0656614 | 2.088815 |
13 | GO:0051301 | BP cell division | 53/977 | 351/20758 | 0 | 0 | 0 | 53 | 0.0542477 | 0.0169091 | 3.208187 |
14 | GO:0045893 | BP positive regulation of DNA-templated transcription | 79/977 | 688/20758 | 0 | 0 | 0 | 79 | 0.0808598 | 0.0331438 | 2.439662 |
15 | GO:0031625 | MF ubiquitin protein ligase binding | 47/977 | 305/20758 | 0 | 0 | 0 | 47 | 0.0481064 | 0.0146931 | 3.274078 |
16 | GO:0043066 | BP negative regulation of apoptotic process | 61/977 | 477/20758 | 0 | 0 | 0 | 61 | 0.0624360 | 0.0229791 | 2.717080 |
17 | GO:0071456 | BP cellular response to hypoxia | 29/977 | 134/20758 | 0 | 0 | 0 | 29 | 0.0296827 | 0.0064553 | 4.598161 |
18 | GO:1902895 | BP positive regulation of miRNA transcription | 19/977 | 56/20758 | 0 | 0 | 0 | 19 | 0.0194473 | 0.0026978 | 7.208693 |
19 | GO:0005783 | CC endoplasmic reticulum | 103/977 | 1081/20758 | 0 | 0 | 0 | 103 | 0.1054248 | 0.0520763 | 2.024429 |
20 | GO:0043065 | BP positive regulation of apoptotic process | 45/977 | 303/20758 | 0 | 0 | 0 | 45 | 0.0460594 | 0.0145968 | 3.155447 |
Extract the gene lists from the results of the web tool and the reborn tool to see where genes are being exluded.
#web_genes <- unique(gsub(" ","",unlist(strsplit(PMID33947848_results$Genes,","))))
#web_genes <- sort(web_genes)
#str(web_genes)
#481 genes
web_genes <- unique(gsub(" ","",unlist(strsplit(PMID36344834_results$Genes,","))))
web_genes <- sort(web_genes)
str(web_genes)
## chr [1:976] "AARS1" "ABCB6" "ABCC5" "ABCF1" "ABCF3" "ABHD4" "ABHD6" "ABL1" ...
script_genes <- unique(unlist(strsplit(script_results$geneID," ")))
script_genes <- sort(script_genes)
str(script_genes)
## chr [1:977] "AARS1" "ABCB6" "ABCC5" "ABCF1" "ABCF3" "ABHD4" "ABHD6" "ABL1" ...
# 499 genes looks right
v1 <- list("web"=web_genes,"script"=script_genes)
plot(euler(v1),quantities = list(cex = 1.0), labels = list(cex = 1.5))
message("Genes unique to web")
## Genes unique to web
setdiff(web_genes,script_genes)
## character(0)
message("Genes unique to script")
## Genes unique to script
setdiff(script_genes, web_genes)
## [1] "NT5DC2"
# From DAVID webserver table output:
#web_split <- sapply(strsplit(PMID33947848_results$Term,"~"),"[[",1)
#web_split <- sapply(strsplit(PMID36344834_results$Term,"~"),"[[",1)
#DAVID_webserver <- PMID33947848_results %>% #Change PMID/PMCID
#select(GOID, Count, PValue, Fold.Enrichment, FDR)
DAVID_webserver <- PMID36344834_results %>% #Change PMID/PMCID
select(GOID, Count, PValue, Fold.Enrichment, FDR)
DAVID_reborn <- script_results %>%
select(S_No, GOID, GO.Term, GeneRatio, pvalue, p.adjust, Count ,EnrichmentScore)
comparison_table <- merge(DAVID_webserver, DAVID_reborn, by = "GOID", all = TRUE)
comparison_table <- comparison_table[order(comparison_table$PValue),]
comparison_table$geneID=NULL
head(comparison_table,50) %>%
kbl(caption="Top replication results") %>%
kable_paper("hover", full_width = F)
GOID | Count.x | PValue | Fold.Enrichment | FDR | S_No | GO.Term | GeneRatio | pvalue | p.adjust | Count.y | EnrichmentScore | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
487 | GO:0005829 | 498 | 0e+00 | 1.900790 | 0.00e+00 | 2 | CC cytosol | 496/977 | 0.0000000 | 0.0000000 | 496 | 1.934707 |
402 | GO:0005515 | 849 | 0e+00 | 1.306046 | 0.00e+00 | 1 | MF protein binding | 849/977 | 0.0000000 | 0.0000000 | 849 | 1.419566 |
427 | GO:0005654 | 379 | 0e+00 | 2.016551 | 0.00e+00 | 3 | CC nucleoplasm | 378/977 | 0.0000000 | 0.0000000 | 378 | 2.056656 |
446 | GO:0005737 | 431 | 0e+00 | 1.616165 | 0.00e+00 | 5 | CC cytoplasm | 392/977 | 0.0000000 | 0.0000000 | 392 | 1.629563 |
420 | GO:0005634 | 433 | 0e+00 | 1.519842 | 0.00e+00 | 4 | CC nucleus | 423/977 | 0.0000000 | 0.0000000 | 423 | 1.582279 |
1206 | GO:0019901 | 76 | 0e+00 | 3.075816 | 0.00e+00 | 7 | MF protein kinase binding | 75/977 | 0.0000000 | 0.0000000 | 75 | 3.383228 |
2677 | GO:0070062 | 193 | 0e+00 | 1.820098 | 0.00e+00 | 9 | CC extracellular exosome | 192/977 | 0.0000000 | 0.0000000 | 192 | 1.844196 |
1915 | GO:0042802 | 168 | 0e+00 | 1.893887 | 0.00e+00 | 6 | MF identical protein binding | 158/977 | 0.0000000 | 0.0000000 | 158 | 2.172799 |
447 | GO:0005739 | 137 | 0e+00 | 2.028971 | 0.00e+00 | 12 | CC mitochondrion | 134/977 | 0.0000000 | 0.0000000 | 134 | 2.088815 |
1101 | GO:0016310 | 80 | 0e+00 | 2.611067 | 0.00e+00 | 342 | BP phosphorylation | 27/977 | 0.0007437 | 0.0150603 | 27 | 1.951225 |
1204 | GO:0019899 | 60 | 0e+00 | 3.135982 | 0.00e+00 | 10 | MF enzyme binding | 59/977 | 0.0000000 | 0.0000000 | 59 | 3.531137 |
408 | GO:0005524 | 150 | 0e+00 | 1.903989 | 0.00e+00 | 8 | MF ATP binding | 148/977 | 0.0000000 | 0.0000000 | 148 | 2.126104 |
2455 | GO:0051301 | 59 | 0e+00 | 3.107998 | 0.00e+00 | 13 | BP cell division | 53/977 | 0.0000000 | 0.0000000 | 53 | 3.208187 |
596 | GO:0006468 | 58 | 0e+00 | 3.111751 | 0.00e+00 | 11 | BP protein phosphorylation | 61/977 | 0.0000000 | 0.0000000 | 61 | 3.375123 |
713 | GO:0007049 | 54 | 0e+00 | 3.128259 | 0.00e+00 | 34 | BP cell cycle | 40/977 | 0.0000000 | 0.0000002 | 40 | 2.940716 |
1936 | GO:0043065 | 51 | 0e+00 | 3.230586 | 0.00e+00 | 20 | BP positive regulation of apoptotic process | 45/977 | 0.0000000 | 0.0000000 | 45 | 3.155447 |
2182 | GO:0045893 | 83 | 0e+00 | 2.350551 | 0.00e+00 | 14 | BP positive regulation of DNA-templated transcription | 79/977 | 0.0000000 | 0.0000000 | 79 | 2.439662 |
1603 | GO:0032991 | 79 | 0e+00 | 2.412683 | 0.00e+00 | 21 | CC protein-containing complex | 71/977 | 0.0000000 | 0.0000000 | 71 | 2.390672 |
1476 | GO:0031625 | 49 | 0e+00 | 3.069979 | 0.00e+00 | 15 | MF ubiquitin protein ligase binding | 47/977 | 0.0000000 | 0.0000000 | 47 | 3.274078 |
331 | GO:0004672 | 55 | 0e+00 | 2.792517 | 0.00e+00 | 55 | MF protein kinase activity | 30/977 | 0.0000000 | 0.0000058 | 30 | 3.064424 |
1937 | GO:0043066 | 63 | 0e+00 | 2.531664 | 0.00e+00 | 16 | BP negative regulation of apoptotic process | 61/977 | 0.0000000 | 0.0000000 | 61 | 2.717080 |
3133 | GO:1902895 | 20 | 0e+00 | 6.777895 | 0.00e+00 | 18 | BP positive regulation of miRNA transcription | 19/977 | 0.0000000 | 0.0000000 | 19 | 7.208693 |
905 | GO:0009410 | 40 | 0e+00 | 3.253390 | 1.00e-07 | 30 | BP response to xenobiotic stimulus | 36/977 | 0.0000000 | 0.0000001 | 36 | 3.200336 |
510 | GO:0005925 | 54 | 0e+00 | 2.666424 | 0.00e+00 | 22 | CC focal adhesion | 55/977 | 0.0000000 | 0.0000000 | 55 | 2.756054 |
2808 | GO:0071456 | 29 | 0e+00 | 4.242280 | 1.00e-07 | 17 | BP cellular response to hypoxia | 29/977 | 0.0000000 | 0.0000000 | 29 | 4.598161 |
469 | GO:0005783 | 104 | 0e+00 | 1.916237 | 0.00e+00 | 19 | CC endoplasmic reticulum | 103/977 | 0.0000000 | 0.0000000 | 103 | 2.024429 |
2189 | GO:0045944 | 112 | 0e+00 | 1.851523 | 1.00e-07 | 23 | BP positive regulation of transcription by RNA polymerase II | 108/977 | 0.0000000 | 0.0000000 | 108 | 1.923421 |
2650 | GO:0061629 | 34 | 0e+00 | 3.592535 | 0.00e+00 | 24 | MF RNA polymerase II-specific DNA-binding transcription factor binding | 32/977 | 0.0000000 | 0.0000000 | 32 | 3.885106 |
3265 | GO:2000045 | 18 | 0e+00 | 6.777895 | 2.00e-07 | 27 | BP regulation of G1/S transition of mitotic cell cycle | 17/977 | 0.0000000 | 0.0000000 | 17 | 6.946028 |
2100 | GO:0045296 | 45 | 0e+00 | 2.748884 | 3.00e-07 | 25 | MF cadherin binding | 45/977 | 0.0000000 | 0.0000000 | 45 | 3.016089 |
673 | GO:0006915 | 65 | 0e+00 | 2.225067 | 1.10e-06 | 43 | BP apoptotic process | 58/977 | 0.0000000 | 0.0000019 | 58 | 2.216380 |
2187 | GO:0045931 | 14 | 0e+00 | 8.133474 | 1.60e-06 | 31 | BP positive regulation of mitotic cell cycle | 13/977 | 0.0000000 | 0.0000002 | 13 | 8.631461 |
7 | GO:0000079 | 16 | 0e+00 | 6.777895 | 1.60e-06 | 28 | BP regulation of cyclin-dependent protein serine/threonine kinase activity | 16/977 | 0.0000000 | 0.0000001 | 16 | 7.082224 |
2906 | GO:0090398 | 17 | 0e+00 | 6.064432 | 2.60e-06 | 29 | BP cellular senescence | 17/977 | 0.0000000 | 0.0000001 | 17 | 6.336727 |
851 | GO:0008284 | 57 | 0e+00 | 2.299643 | 2.80e-06 | 32 | BP positive regulation of cell population proliferation | 56/977 | 0.0000000 | 0.0000002 | 56 | 2.428191 |
332 | GO:0004674 | 48 | 0e+00 | 2.424511 | 3.90e-06 | 33 | MF protein serine/threonine kinase activity | 46/977 | 0.0000000 | 0.0000002 | 46 | 2.699854 |
2166 | GO:0045786 | 16 | 0e+00 | 5.915254 | 1.03e-05 | 42 | BP negative regulation of cell cycle | 15/977 | 0.0000000 | 0.0000016 | 15 | 6.128848 |
482 | GO:0005813 | 64 | 1e-07 | 2.077678 | 4.20e-06 | 41 | CC centrosome | 64/977 | 0.0000000 | 0.0000015 | 64 | 2.134674 |
12 | GO:0000122 | 90 | 1e-07 | 1.810120 | 1.41e-05 | 26 | BP negative regulation of transcription by RNA polymerase II | 89/977 | 0.0000000 | 0.0000000 | 89 | 2.062109 |
2462 | GO:0051402 | 19 | 1e-07 | 4.711464 | 1.58e-05 | 38 | BP neuron apoptotic process | 19/977 | 0.0000000 | 0.0000007 | 19 | 5.046085 |
2296 | GO:0048471 | 70 | 1e-07 | 1.985734 | 4.90e-06 | 73 | CC perinuclear region of cytoplasm | 65/977 | 0.0000003 | 0.0000292 | 65 | 1.928818 |
10 | GO:0000086 | 15 | 1e-07 | 6.100106 | 1.58e-05 | 52 | BP G2/M transition of mitotic cell cycle | 14/977 | 0.0000000 | 0.0000034 | 14 | 6.196946 |
1169 | GO:0018108 | 15 | 1e-07 | 6.100106 | 1.58e-05 | 40 | BP peptidyl-tyrosine phosphorylation | 15/977 | 0.0000000 | 0.0000012 | 15 | 6.249022 |
2261 | GO:0048013 | 15 | 1e-07 | 6.100106 | 1.58e-05 | 39 | BP ephrin receptor signaling pathway | 15/977 | 0.0000000 | 0.0000007 | 15 | 6.504084 |
2790 | GO:0071364 | 14 | 1e-07 | 6.469809 | 2.19e-05 | 37 | BP cellular response to epidermal growth factor stimulus | 14/977 | 0.0000000 | 0.0000007 | 14 | 7.082224 |
2181 | GO:0045892 | 59 | 1e-07 | 2.097356 | 2.62e-05 | 36 | BP negative regulation of DNA-templated transcription | 60/977 | 0.0000000 | 0.0000003 | 60 | 2.301084 |
1345 | GO:0030335 | 35 | 1e-07 | 2.769179 | 2.62e-05 | 49 | BP positive regulation of cell migration | 34/977 | 0.0000000 | 0.0000031 | 34 | 2.912850 |
173 | GO:0001934 | 31 | 2e-07 | 2.959363 | 3.44e-05 | 62 | BP positive regulation of protein phosphorylation | 28/977 | 0.0000001 | 0.0000142 | 28 | 3.066530 |
1538 | GO:0032465 | 13 | 2e-07 | 6.777895 | 3.53e-05 | 44 | BP regulation of cytokinesis | 13/977 | 0.0000000 | 0.0000021 | 13 | 7.082224 |
2008 | GO:0043524 | 25 | 2e-07 | 3.434744 | 3.85e-05 | 82 | BP negative regulation of neuron apoptotic process | 22/977 | 0.0000006 | 0.0000478 | 22 | 3.362783 |
Compare p-values directly.
comparison_table$fdrdiff <- abs(-log10(comparison_table$FDR) - -log10(comparison_table$p.adjust))
hist( comparison_table$fdrdiff / -log10(comparison_table$FDR) )
table(comparison_table$fdrdiff / -log10(comparison_table$FDR) < 1)
##
## FALSE TRUE
## 1596 494
comparison_table$fdrcheck <- comparison_table$fdrdiff / -log10(comparison_table$FDR) < 1
Compare fold enrichments directly.
comparison_table$folddiff <- abs( comparison_table$Fold.Enrichment - comparison_table$EnrichmentScore)
hist( comparison_table$folddiff / comparison_table$Fold.Enrichment )
table( comparison_table$folddiff / comparison_table$Fold.Enrichment <0.3)
##
## FALSE TRUE
## 60 2030
comparison_table$foldcheck <- comparison_table$folddiff / comparison_table$Fold.Enrichment <0.3
Now look at the significant sets and see if they classify as replicated.
sig <- subset(comparison_table, FDR <0.05)
repro <- subset(sig, fdrcheck == "TRUE" & foldcheck == "TRUE")
notrepro <- subset(sig, fdrcheck != "TRUE" | foldcheck != "TRUE")
message("No. significant results from paper")
## No. significant results from paper
nrow(sig)
## [1] 266
message("No. replicated results")
## No. replicated results
nrow(repro)
## [1] 263
message("No. non-replicated results")
## No. non-replicated results
nrow(notrepro)
## [1] 3
message("Success rate")
## Success rate
nrow(repro) / nrow(sig)
## [1] 0.9887218
head(repro,50) %>%
kbl(caption="Top reproduced results") %>%
kable_paper("hover", full_width = F)
GOID | Count.x | PValue | Fold.Enrichment | FDR | S_No | GO.Term | GeneRatio | pvalue | p.adjust | Count.y | EnrichmentScore | fdrdiff | fdrcheck | folddiff | foldcheck | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
487 | GO:0005829 | 498 | 0e+00 | 1.900790 | 0.00e+00 | 2 | CC cytosol | 496/977 | 0.0000000 | 0.0000000 | 496 | 1.934707 | 1.5334373 | TRUE | 0.0339173 | TRUE |
402 | GO:0005515 | 849 | 0e+00 | 1.306046 | 0.00e+00 | 1 | MF protein binding | 849/977 | 0.0000000 | 0.0000000 | 849 | 1.419566 | 20.8675813 | TRUE | 0.1135199 | TRUE |
427 | GO:0005654 | 379 | 0e+00 | 2.016551 | 0.00e+00 | 3 | CC nucleoplasm | 378/977 | 0.0000000 | 0.0000000 | 378 | 2.056656 | 1.2538178 | TRUE | 0.0401050 | TRUE |
446 | GO:0005737 | 431 | 0e+00 | 1.616165 | 0.00e+00 | 5 | CC cytoplasm | 392/977 | 0.0000000 | 0.0000000 | 392 | 1.629563 | 3.8640991 | TRUE | 0.0133984 | TRUE |
420 | GO:0005634 | 433 | 0e+00 | 1.519842 | 0.00e+00 | 4 | CC nucleus | 423/977 | 0.0000000 | 0.0000000 | 423 | 1.582279 | 1.9669073 | TRUE | 0.0624363 | TRUE |
1206 | GO:0019901 | 76 | 0e+00 | 3.075816 | 0.00e+00 | 7 | MF protein kinase binding | 75/977 | 0.0000000 | 0.0000000 | 75 | 3.383228 | 2.2731719 | TRUE | 0.3074124 | TRUE |
2677 | GO:0070062 | 193 | 0e+00 | 1.820098 | 0.00e+00 | 9 | CC extracellular exosome | 192/977 | 0.0000000 | 0.0000000 | 192 | 1.844196 | 0.2188497 | TRUE | 0.0240980 | TRUE |
1915 | GO:0042802 | 168 | 0e+00 | 1.893887 | 0.00e+00 | 6 | MF identical protein binding | 158/977 | 0.0000000 | 0.0000000 | 158 | 2.172799 | 4.3582496 | TRUE | 0.2789120 | TRUE |
447 | GO:0005739 | 137 | 0e+00 | 2.028971 | 0.00e+00 | 12 | CC mitochondrion | 134/977 | 0.0000000 | 0.0000000 | 134 | 2.088815 | 0.3200390 | TRUE | 0.0598434 | TRUE |
1101 | GO:0016310 | 80 | 0e+00 | 2.611067 | 0.00e+00 | 342 | BP phosphorylation | 27/977 | 0.0007437 | 0.0150603 | 27 | 1.951225 | 8.6238206 | TRUE | 0.6598420 | TRUE |
1204 | GO:0019899 | 60 | 0e+00 | 3.135982 | 0.00e+00 | 10 | MF enzyme binding | 59/977 | 0.0000000 | 0.0000000 | 59 | 3.531137 | 2.2421750 | TRUE | 0.3951556 | TRUE |
408 | GO:0005524 | 150 | 0e+00 | 1.903989 | 0.00e+00 | 8 | MF ATP binding | 148/977 | 0.0000000 | 0.0000000 | 148 | 2.126104 | 3.6334081 | TRUE | 0.2221150 | TRUE |
2455 | GO:0051301 | 59 | 0e+00 | 3.107998 | 0.00e+00 | 13 | BP cell division | 53/977 | 0.0000000 | 0.0000000 | 53 | 3.208187 | 0.3157693 | TRUE | 0.1001886 | TRUE |
596 | GO:0006468 | 58 | 0e+00 | 3.111751 | 0.00e+00 | 11 | BP protein phosphorylation | 61/977 | 0.0000000 | 0.0000000 | 61 | 3.375123 | 3.2731178 | TRUE | 0.2633713 | TRUE |
713 | GO:0007049 | 54 | 0e+00 | 3.128259 | 0.00e+00 | 34 | BP cell cycle | 40/977 | 0.0000000 | 0.0000002 | 40 | 2.940716 | 2.7934141 | TRUE | 0.1875432 | TRUE |
1936 | GO:0043065 | 51 | 0e+00 | 3.230586 | 0.00e+00 | 20 | BP positive regulation of apoptotic process | 45/977 | 0.0000000 | 0.0000000 | 45 | 3.155447 | 0.8661148 | TRUE | 0.0751390 | TRUE |
2182 | GO:0045893 | 83 | 0e+00 | 2.350551 | 0.00e+00 | 14 | BP positive regulation of DNA-templated transcription | 79/977 | 0.0000000 | 0.0000000 | 79 | 2.439662 | 0.8143112 | TRUE | 0.0891103 | TRUE |
1603 | GO:0032991 | 79 | 0e+00 | 2.412683 | 0.00e+00 | 21 | CC protein-containing complex | 71/977 | 0.0000000 | 0.0000000 | 71 | 2.390672 | 1.4944858 | TRUE | 0.0220115 | TRUE |
1476 | GO:0031625 | 49 | 0e+00 | 3.069979 | 0.00e+00 | 15 | MF ubiquitin protein ligase binding | 47/977 | 0.0000000 | 0.0000000 | 47 | 3.274078 | 0.6664312 | TRUE | 0.2040989 | TRUE |
331 | GO:0004672 | 55 | 0e+00 | 2.792517 | 0.00e+00 | 55 | MF protein kinase activity | 30/977 | 0.0000000 | 0.0000058 | 30 | 3.064424 | 3.3846929 | TRUE | 0.2719071 | TRUE |
1937 | GO:0043066 | 63 | 0e+00 | 2.531664 | 0.00e+00 | 16 | BP negative regulation of apoptotic process | 61/977 | 0.0000000 | 0.0000000 | 61 | 2.717080 | 1.5735299 | TRUE | 0.1854155 | TRUE |
3133 | GO:1902895 | 20 | 0e+00 | 6.777895 | 0.00e+00 | 18 | BP positive regulation of miRNA transcription | 19/977 | 0.0000000 | 0.0000000 | 19 | 7.208693 | 1.1039919 | TRUE | 0.4307977 | TRUE |
905 | GO:0009410 | 40 | 0e+00 | 3.253390 | 1.00e-07 | 30 | BP response to xenobiotic stimulus | 36/977 | 0.0000000 | 0.0000001 | 36 | 3.200336 | 0.3286086 | TRUE | 0.0530539 | TRUE |
510 | GO:0005925 | 54 | 0e+00 | 2.666424 | 0.00e+00 | 22 | CC focal adhesion | 55/977 | 0.0000000 | 0.0000000 | 55 | 2.756054 | 0.6742218 | TRUE | 0.0896299 | TRUE |
2808 | GO:0071456 | 29 | 0e+00 | 4.242280 | 1.00e-07 | 17 | BP cellular response to hypoxia | 29/977 | 0.0000000 | 0.0000000 | 29 | 4.598161 | 1.6393738 | TRUE | 0.3558810 | TRUE |
469 | GO:0005783 | 104 | 0e+00 | 1.916237 | 0.00e+00 | 19 | CC endoplasmic reticulum | 103/977 | 0.0000000 | 0.0000000 | 103 | 2.024429 | 0.9607952 | TRUE | 0.1081917 | TRUE |
2189 | GO:0045944 | 112 | 0e+00 | 1.851523 | 1.00e-07 | 23 | BP positive regulation of transcription by RNA polymerase II | 108/977 | 0.0000000 | 0.0000000 | 108 | 1.923421 | 1.0373080 | TRUE | 0.0718980 | TRUE |
2650 | GO:0061629 | 34 | 0e+00 | 3.592535 | 0.00e+00 | 24 | MF RNA polymerase II-specific DNA-binding transcription factor binding | 32/977 | 0.0000000 | 0.0000000 | 32 | 3.885106 | 0.6294467 | TRUE | 0.2925706 | TRUE |
3265 | GO:2000045 | 18 | 0e+00 | 6.777895 | 2.00e-07 | 27 | BP regulation of G1/S transition of mitotic cell cycle | 17/977 | 0.0000000 | 0.0000000 | 17 | 6.946028 | 0.7593623 | TRUE | 0.1681328 | TRUE |
2100 | GO:0045296 | 45 | 0e+00 | 2.748884 | 3.00e-07 | 25 | MF cadherin binding | 45/977 | 0.0000000 | 0.0000000 | 45 | 3.016089 | 1.4446050 | TRUE | 0.2672054 | TRUE |
673 | GO:0006915 | 65 | 0e+00 | 2.225067 | 1.10e-06 | 43 | BP apoptotic process | 58/977 | 0.0000000 | 0.0000019 | 58 | 2.216380 | 0.2579959 | TRUE | 0.0086870 | TRUE |
2187 | GO:0045931 | 14 | 0e+00 | 8.133474 | 1.60e-06 | 31 | BP positive regulation of mitotic cell cycle | 13/977 | 0.0000000 | 0.0000002 | 13 | 8.631461 | 0.9552565 | TRUE | 0.4979870 | TRUE |
7 | GO:0000079 | 16 | 0e+00 | 6.777895 | 1.60e-06 | 28 | BP regulation of cyclin-dependent protein serine/threonine kinase activity | 16/977 | 0.0000000 | 0.0000001 | 16 | 7.082224 | 1.3536469 | TRUE | 0.3043294 | TRUE |
2906 | GO:0090398 | 17 | 0e+00 | 6.064432 | 2.60e-06 | 29 | BP cellular senescence | 17/977 | 0.0000000 | 0.0000001 | 17 | 6.336727 | 1.2850156 | TRUE | 0.2722947 | TRUE |
851 | GO:0008284 | 57 | 0e+00 | 2.299643 | 2.80e-06 | 32 | BP positive regulation of cell population proliferation | 56/977 | 0.0000000 | 0.0000002 | 56 | 2.428191 | 1.1986957 | TRUE | 0.1285483 | TRUE |
332 | GO:0004674 | 48 | 0e+00 | 2.424511 | 3.90e-06 | 33 | MF protein serine/threonine kinase activity | 46/977 | 0.0000000 | 0.0000002 | 46 | 2.699854 | 1.3036646 | TRUE | 0.2753426 | TRUE |
2166 | GO:0045786 | 16 | 0e+00 | 5.915254 | 1.03e-05 | 42 | BP negative regulation of cell cycle | 15/977 | 0.0000000 | 0.0000016 | 15 | 6.128848 | 0.8090315 | TRUE | 0.2135942 | TRUE |
482 | GO:0005813 | 64 | 1e-07 | 2.077678 | 4.20e-06 | 41 | CC centrosome | 64/977 | 0.0000000 | 0.0000015 | 64 | 2.134674 | 0.4325447 | TRUE | 0.0569955 | TRUE |
12 | GO:0000122 | 90 | 1e-07 | 1.810120 | 1.41e-05 | 26 | BP negative regulation of transcription by RNA polymerase II | 89/977 | 0.0000000 | 0.0000000 | 89 | 2.062109 | 2.9321044 | TRUE | 0.2519886 | TRUE |
2462 | GO:0051402 | 19 | 1e-07 | 4.711464 | 1.58e-05 | 38 | BP neuron apoptotic process | 19/977 | 0.0000000 | 0.0000007 | 19 | 5.046085 | 1.3612400 | TRUE | 0.3346213 | TRUE |
2296 | GO:0048471 | 70 | 1e-07 | 1.985734 | 4.90e-06 | 73 | CC perinuclear region of cytoplasm | 65/977 | 0.0000003 | 0.0000292 | 65 | 1.928818 | 0.7752540 | TRUE | 0.0569158 | TRUE |
10 | GO:0000086 | 15 | 1e-07 | 6.100106 | 1.58e-05 | 52 | BP G2/M transition of mitotic cell cycle | 14/977 | 0.0000000 | 0.0000034 | 14 | 6.196946 | 0.6644158 | TRUE | 0.0968408 | TRUE |
1169 | GO:0018108 | 15 | 1e-07 | 6.100106 | 1.58e-05 | 40 | BP peptidyl-tyrosine phosphorylation | 15/977 | 0.0000000 | 0.0000012 | 15 | 6.249022 | 1.1044736 | TRUE | 0.1489160 | TRUE |
2261 | GO:0048013 | 15 | 1e-07 | 6.100106 | 1.58e-05 | 39 | BP ephrin receptor signaling pathway | 15/977 | 0.0000000 | 0.0000007 | 15 | 6.504084 | 1.3612400 | TRUE | 0.4039781 | TRUE |
2790 | GO:0071364 | 14 | 1e-07 | 6.469809 | 2.19e-05 | 37 | BP cellular response to epidermal growth factor stimulus | 14/977 | 0.0000000 | 0.0000007 | 14 | 7.082224 | 1.5038425 | TRUE | 0.6124155 | TRUE |
2181 | GO:0045892 | 59 | 1e-07 | 2.097356 | 2.62e-05 | 36 | BP negative regulation of DNA-templated transcription | 60/977 | 0.0000000 | 0.0000003 | 60 | 2.301084 | 1.9221630 | TRUE | 0.2037281 | TRUE |
1345 | GO:0030335 | 35 | 1e-07 | 2.769179 | 2.62e-05 | 49 | BP positive regulation of cell migration | 34/977 | 0.0000000 | 0.0000031 | 34 | 2.912850 | 0.9274529 | TRUE | 0.1436715 | TRUE |
173 | GO:0001934 | 31 | 2e-07 | 2.959363 | 3.44e-05 | 62 | BP positive regulation of protein phosphorylation | 28/977 | 0.0000001 | 0.0000142 | 28 | 3.066530 | 0.3831931 | TRUE | 0.1071675 | TRUE |
1538 | GO:0032465 | 13 | 2e-07 | 6.777895 | 3.53e-05 | 44 | BP regulation of cytokinesis | 13/977 | 0.0000000 | 0.0000021 | 13 | 7.082224 | 1.2255948 | TRUE | 0.3043294 | TRUE |
2008 | GO:0043524 | 25 | 2e-07 | 3.434744 | 3.85e-05 | 82 | BP negative regulation of neuron apoptotic process | 22/977 | 0.0000006 | 0.0000478 | 22 | 3.362783 | 0.0941283 | TRUE | 0.0719613 | TRUE |
head(notrepro,50) %>%
kbl(caption="Top un-reproduced results") %>%
kable_paper("hover", full_width = F)
GOID | Count.x | PValue | Fold.Enrichment | FDR | S_No | GO.Term | GeneRatio | pvalue | p.adjust | Count.y | EnrichmentScore | fdrdiff | fdrcheck | folddiff | foldcheck | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1099 | GO:0016301 | 30 | 0.0000131 | 2.463986 | 0.0009073 | 214 | MF kinase activity | 11/977 | 0.0001330 | 0.0043019 | 11 | 3.769571 | 0.6759028 | TRUE | 1.3055855 | FALSE |
278 | GO:0003723 | 111 | 0.0000336 | 1.471041 | 0.0017157 | 46 | MF RNA binding | 111/977 | 0.0000000 | 0.0000026 | 111 | 1.707734 | 2.8233451 | FALSE | 0.2366929 | TRUE |
2780 | GO:0071310 | 9 | 0.0001525 | 5.545551 | 0.0076096 | 1649 | BP cellular response to organic substance | 3/977 | 0.0431592 | 0.1507773 | 3 | 3.749413 | 1.2969713 | TRUE | 1.7961376 | FALSE |
For reproducibility
sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] eulerr_7.0.2 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [5] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [9] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0 kableExtra_1.4.0
## [13] knitr_1.47
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3 xml2_1.3.6
## [5] polylabelr_0.2.0 stringi_1.8.4 hms_1.1.3 digest_0.6.35
## [9] magrittr_2.0.3 evaluate_0.24.0 grid_4.4.0 timechange_0.3.0
## [13] fastmap_1.2.0 jsonlite_1.8.8 fansi_1.0.6 viridisLite_0.4.2
## [17] scales_1.3.0 jquerylib_0.1.4 cli_3.6.2 rlang_1.1.4
## [21] polyclip_1.10-6 munsell_0.5.1 withr_3.0.0 cachem_1.1.0
## [25] yaml_2.3.8 tools_4.4.0 tzdb_0.4.0 colorspace_2.1-0
## [29] vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4 pkgconfig_2.0.3
## [33] pillar_1.9.0 bslib_0.7.0 gtable_0.3.5 glue_1.7.0
## [37] Rcpp_1.0.12 systemfonts_1.1.0 xfun_0.44 tidyselect_1.2.1
## [41] highr_0.11 rstudioapi_0.16.0 htmltools_0.5.8.1 rmarkdown_2.27
## [45] svglite_2.1.3 compiler_4.4.0