Here we are performing an analysis of 200 articles which is randomly selected from 1500 PMC articles. These articles were examined independently by two team members and the results were compared. Any inconsistencies were resolved.
The code shown here was used to generate Figure 2 of the manuscript.
knitr::opts_chunk$set(fig.width=7, fig.height=5)
library("wordcloud")
x <- read.table("../data/QC-analysis2.tsv",header=TRUE,fill=TRUE,sep="\t")
head(x)
## Pubmed.Central.ID Article.number Allocated Journal
## 1 PMC6493771 691 - PLoS Comput Biol
## 2 PMC6442023 462 - Front Pharmacol
## 3 PMC6384238 213 - Front Oncol
## 4 PMC6594459 1172 - Ann Oncol
## 5 PMC6649552 1407 - Cell Cycle
## 6 PMC6478283 637 - PLoS One
## Omics.type Organism
## 1 EXCLUDE EXCLUDE
## 2 RNA-seq Homo sapiens, Rattus norvegicus
## 3 Gene expression array, RNA-seq Homo sapiens
## 4 Database Homo sapiens
## 5 RNA-seq Mus musculus
## 6 Database Homo sapiens
## Gene.set.library GS.version Statistical.test.used
## 1 EXCLUDE EXCLUDE EXCLUDE
## 2 GEO No GSEA
## 3 GO, KEGG No Not stated
## 4 MSigDB No GSEA
## 5 GO, KEGG No Not stated
## 6 KEGG, Reactome, PID, DisGeNET, GO No Not stated
## FDR.Correction App.used App.Version Code.availability
## 1 EXCLUDE EXCLUDE EXCLUDE EXCLUDE
## 2 No GSEA Yes <NA>
## 3 Yes clusterProfiler No <NA>
## 4 No GSEA Yes <NA>
## 5 No DAVID No <NA>
## 6 No ToppGene No <NA>
## Background.gene.set Assumptions.violated Gene.lists.provided
## 1 EXCLUDE EXCLUDE EXCLUDE
## 2 <NA> FDR No
## 3 Not stated Background Yes
## 4 <NA> FDR No
## 5 Not stated Background, FDR No
## 6 Not stated Background, FDR Yes
## Separated.up.and.down Parameters
## 1 - -
## 2 - rank=no, algo=no, weight=no
## 3 - -
## 4 - rank=yes, algo=yes, weight=no
## 5 Combined -
## 6 - -
colnames(x)
## [1] "Pubmed.Central.ID" "Article.number" "Allocated"
## [4] "Journal" "Omics.type" "Organism"
## [7] "Gene.set.library" "GS.version" "Statistical.test.used"
## [10] "FDR.Correction" "App.used" "App.Version"
## [13] "Code.availability" "Background.gene.set" "Assumptions.violated"
## [16] "Gene.lists.provided" "Separated.up.and.down" "Parameters"
dim(x)
## [1] 249 18
exclude <- subset(x,x$GS.version=="EXCLUDE")
nrow(exclude)
## [1] 14
length(unique(exclude$Pubmed.Central.ID))
## [1] 14
x <- subset(x,x$GS.version!="EXCLUDE")
nrow(x)
## [1] 235
length(unique(x$Pubmed.Central.ID))
## [1] 186
journal <- x$Journal
journal_split <- strsplit(journal,", ")
journal <- unlist(journal_split)
res <- table(journal)
res <- res[order(res)]
length(res)
## [1] 96
res
## journal
## 3 Biotech Am J Physiol Gastrointest Liver Physiol
## 1 1
## Animals (Basel) Ann Oncol
## 1 1
## Appl Environ Microbiol Biosci Rep
## 1 1
## BMC Infect Dis BMC Med Genet
## 1 1
## BMC Med Genomics BMC Musculoskelet Disord
## 1 1
## Cancer Control Cancers (Basel)
## 1 1
## Cell Cycle Cell Death Discov
## 1 1
## Cells Chin Med
## 1 1
## Commun Biol Endocrinology
## 1 1
## FEBS Open Bio Genome Biol Evol
## 1 1
## Genomics Inform Heliyon
## 1 1
## Hepatology Int J Endocrinol
## 1 1
## Int J Nanomedicine Int J Ophthalmol
## 1 1
## J Assist Reprod Genet J Bacteriol
## 1 1
## J Cancer J Cardiovasc Dev Dis
## 1 1
## J Diabetes Investig J Immunother Cancer
## 1 1
## J Invest Dermatol J Ovarian Res
## 1 1
## J Res Med Sci J Transl Med
## 1 1
## J Virol Medicine (Baltimore)
## 1 1
## Mol Breed Mol Ther Nucleic Acids
## 1 1
## Neurobiol Dis Nutrients
## 1 1
## Radiat Res Respir Res
## 1 1
## Stem Cells Int Thyroid
## 1 1
## Toxicol Sci Virology
## 1 1
## World J Gastrointest Oncol Am J Transl Res
## 1 2
## Arch Med Sci BMC Genet
## 2 2
## Cell Commun Signal Cell Death Dis
## 2 2
## Clin Cancer Res Clin Epigenetics
## 2 2
## Clin Proteomics Diabetes Metab Syndr Obes
## 2 2
## Dis Markers Epigenetics
## 2 2
## Front Neurosci Front Physiol
## 2 2
## Genes (Basel) J Hematol Oncol
## 2 2
## Med Sci Monit Metabolomics
## 2 2
## Mol Med Rice (N Y)
## 2 2
## Transl Psychiatry Viruses
## 2 2
## Biol Open BMC Bioinformatics
## 3 3
## BMC Cancer Cancer Cell Int
## 3 3
## Front Immunol Int J Mol Sci
## 3 3
## J Clin Med Mol Autism
## 3 3
## Aging (Albany NY) Exp Ther Med
## 4 4
## Front Oncol Front Pharmacol
## 4 4
## Metabolites Mol Med Rep
## 4 4
## Oncotarget RNA Biol
## 4 4
## Cancer Manag Res Biomed Res Int
## 5 6
## Onco Targets Ther Oncol Rep
## 6 7
## Front Genet BMC Genomics
## 8 9
## Oncol Lett PeerJ
## 9 11
## Sci Rep PLoS One
## 12 15
par(mar=c(1,1,1,1))
#names(res) <- gsub("Gene expression array","RNA array",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Med Sci Monit could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Metabolomics could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Transl Psychiatry could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Endocrinology could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : FEBS Open Bio could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Hepatology could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Int J Endocrinol could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Int J Nanomedicine could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Int J Ophthalmol could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Assist Reprod Genet could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Cardiovasc Dev Dis could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Diabetes Investig could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Immunother Cancer could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Invest Dermatol could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Ovarian Res could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Medicine (Baltimore) could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Mol Ther Nucleic Acids could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Respir Res could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Stem Cells Int could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Thyroid could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words
## = 200, : World J Gastrointest Oncol could not be fit on page. It will not be
## plotted.
par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
main = "Journal", xlim=c(0,17))
grid()
other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Journal", xlim=c(0,165))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 7, label = res2, pos = 3, cex = 1, col = "black")
dir.create("images")
## Warning in dir.create("images"): 'images' already exists
png("images/journals2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Journal", xlim=c(0,170))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 10, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/journals2.pdf",width=4,height=4)
par(mar=c(5,9,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Journal", xlim=c(0,180))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 15, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
omics <- x$Omics.type
omics_split <- strsplit(omics,", ")
omics <- unlist(omics_split)
res <- table(omics)
res <- res[order(res)]
length(res)
## [1] 18
res
## omics
## Metgenomics miRNA-seq
## 1 1
## NanoString gene expression PCR Array
## 1 1
## PPI scRNA-seq
## 1 1
## DNA methylation sequencing miRNA array
## 2 2
## CNV array Genotyping array
## 3 5
## Protein array DNA methylation array
## 6 7
## Genome sequencing Proteomics
## 10 14
## Metabolomics Database
## 15 19
## RNA-seq Gene expression array
## 70 91
par(mar=c(1,1,1,1))
names(res) <- gsub("Gene expression array","RNA array",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
main = "Omics type", xlim=c(0,100))
grid()
names(res) <- gsub("RNA array","Gene expression array",names(res))
other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Omics type", xlim=c(0,100))
text(y = (1:length(res2)*1.2)-1.2 , x = res2+5, label = res2, pos = 3, cex = 1, col = "black")
png("images/omics2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Omics type", xlim=c(0,100))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 6, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/omics2.pdf",width=4,height=4)
par(mar=c(5,10,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Omics type", xlim=c(0,110))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 10, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
org <- x$Organism
org_split <- strsplit(org,", ")
org <- unlist(org_split)
res <- table(org)
res <- res[order(res)]
length(res)
## [1] 31
res
## org
## Acropora cervicornis Aedes aegypti Ananas comosus
## 1 1 1
## Anas platyrhynchos Bos grunniens Brassica napus
## 1 1 1
## Candida albicans Canis lupus familiaris Clostridium scindens
## 1 1 1
## Coturnix japonica Moschus berezovskii Mycobacterium smegmatis
## 1 1 1
## Oreochromis niloticus Oryctolagus cuniculus Pygoscelis antarcticus
## 1 1 1
## Pygoscelis papua Salvelinus alpinus Suaeda salsa
## 1 1 1
## Triticum aestivum Vicia faba Bemisia tabaci
## 1 1 2
## Mauremys reevesii Mizuhopecten yessoensis Pagrus major
## 2 2 2
## Sclerotinia sclerotiorum Oryza sativa Bos taurus
## 2 5 7
## Sus scrofa Rattus norvegicus Mus musculus
## 7 10 24
## Homo sapiens
## 157
par(mar=c(1,1,1,1))
names(res) <- gsub("Homo sapiens","human",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"), scale=c(4,.5))
par(mar=c(5,12,3,5))
names(res) <- gsub("human","Homo sapiens",names(res))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
main = "Organism", xlim=c(0,200))
grid()
other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Organism", xlim=c(0,200))
text(y = (1:length(res2)*1.2)-1.2 , x = res2+7, label = res2, pos = 3, cex = 1, col = "black")
png("images/organisms2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Organism", xlim=c(0,200))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 15, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/organisms2.pdf",width=4,height=4)
par(mar=c(5,11,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Organism", xlim=c(0,200))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 25, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
GSL <-x$Gene.set.library
GSL_split <- strsplit(GSL,", ")
GSL <- unlist(GSL_split)
res <- table(GSL)
res <- res[order(res)]
length(res)
## [1] 26
which(names(res)=="Not stated")/sum(res)*100
## [1] 6.325301
res
## GSL
## ChemRICH COG CYTOBAND
## 1 1 1
## DisGeNET GEO Human Metabolome Database
## 1 1 1
## Ingenuity Knowledge base InterPro Jensen Diseases database
## 1 1 1
## MetaCore Metascape OMIM
## 1 1 1
## Pathway commons PID SIGNOR
## 1 1 1
## TRANSFAC Vectorbase JASPAR
## 1 1 2
## MetaboAnalyst BioCarta Not stated
## 2 3 14
## Reactome MSigDB Ingenuity Knowledge Base
## 16 20 23
## KEGG GO
## 114 121
par(mar=c(1,1,1,1))
names(res) <- gsub("Homo sapiens","human",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
main = "Gene set library", xlim=c(0,140))
grid()
other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene set library", xlim=c(0,140))
text(y = (1:length(res2)*1.2)-1.2 , x = res2+7, label = res2, pos = 3, cex = 1, col = "black")
png("images/genesetlib2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene set library", xlim=c(0,150))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 15, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/genesetlib2.pdf",width=4,height=4)
par(mar=c(5,11,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene set library", xlim=c(0,150))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 16, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
GSV <-x$GS.version
res <- table(GSV)
res
## GSV
## No Yes
## 217 18
res[1]/sum(res)*100
## No
## 92.34043
par(mar=c(14,12,3,10))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene set version defined", xlim=c(0,250))
text(y = (1:length(res)*1.2) - 0.75 , x = res+15, label = res, pos = 3, cex = 1, col = "black")
png("images/genesetvers2.png",width=300,height=150)
par(mar=c(5,5,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene set version defined", xlim=c(0,250))
text(y = (1:length(res)*1.2)-1.2 , x = res + 20, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/genesetvers2.pdf",width=3,height=2)
par(mar=c(5,4,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene set version defined", xlim=c(0,260))
text(y = (1:length(res)*1.2)-1.2 , x = res + 25, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
test <-x$Statistical.test.used
test <- strsplit(test,", ")
test <- unlist(test)
res <- table(test)
res <- res[order(res)]
res[which(names(res)=="Not stated")] / sum(res) * 100
## Not stated
## 56.30252
length(res)
## [1] 12
res
## test
## Binomial EASE GSVA
## 1 1 1
## Kruskal-Wallis modified Chi-squared MSEA
## 1 1 1
## Kolmogorov–Smirnov No test Fisher
## 2 14 24
## GSEA Hypergeometric Not stated
## 29 29 134
par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
main = "Test used", xlim=c(0,150))
grid()
other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Test used", xlim=c(0,150))
text(y = (1:length(res2)*1.2)-1.1 , x = res2+10, label = res2, pos = 3, cex = 1, col = "black")
png("images/stattest2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Test used", xlim=c(0,160))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 15, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/stattest2.pdf",width=4,height=4)
par(mar=c(5,11,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Test used", xlim=c(0,170))
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <e2>
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <80>
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <93>
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <e2>
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <80>
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <93>
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 20, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
fdr <-x$FDR.Correction
fdr <- strsplit(fdr,", ")
fdr <- unlist(fdr)
res <- table(fdr)
res <- res[order(res)]
res[which(names(res)!="Yes")]/sum(res)*100
## fdr
## Not stated No test No
## 3.846154 5.982906 39.316239
res
## fdr
## Not stated No test No Yes
## 9 14 92 119
par(mar=c(1,1,1,5))
wordcloud(words = names(res), freq = res, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
par(mar=c(10,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "FDR correction performed", xlim=c(0,150))
text(y = (1:length(res)*1.2) - 0.8 , x = res+10, label = res, pos = 3, cex = 1, col = "black")
png("images/fdr2.png",width=300,height=200)
par(mar=c(5,8,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "FDR correction performed", xlim=c(0,160))
text(y = (1:length(res)*1.2)-1.2 , x = res + 15, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/fdr2.pdf",width=3,height=3)
par(mar=c(5,5,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "FDR correction performed", xlim=c(0,160))
text(y = (1:length(res)*1.2)-1.0 , x = res + 15, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
App <-x$App.used
App_split <- strsplit(App,", ")
App <- unlist(App_split)
res <- table(App)
res <- res[order(res)]
res[which(names(res)=="Not stated")]/sum(res)*100
## Not stated
## 6.382979
length(res)
## [1] 50
res
## App
## anamiR ChemRICH
## 1 1
## Custom MATLAB script Cytoscape (No plugin stated)
## 1 1
## EggNOG fgsea
## 1 1
## FunRich g:GOSt
## 1 1
## GAGE GENCLIP
## 1 1
## GeneCodis GO: TermFinder
## 1 1
## GO::TermFinder GSVA
## 1 1
## KSEA Limma
## 1 1
## Metascape Molecule Annotation System
## 1 1
## MSEA NetworkAnalyst
## 1 1
## Pascal PathVisio
## 1 1
## R script ReactomePA
## 1 1
## SNP2GO R package topGO
## 1 1
## ToppGene webMeV
## 1 1
## agriGO BiNGO
## 2 2
## g:Profiler MetaCore
## 2 2
## STRING Custom R script
## 2 3
## GOrilla KAAS
## 3 3
## Mummichog Blast2GO
## 3 4
## WebGestalt ClueGO/Cytoscape
## 4 5
## Enrichr GOseq
## 5 5
## MetaboAnalyst KOBAS
## 7 9
## clusterProfiler PANTHER
## 10 10
## Not stated Ingenuity Pathway Analysis
## 15 26
## GSEA DAVID
## 30 55
par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
main = "App used", xlim=c(0,60))
grid()
other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "App used", xlim=c(0,80))
text(y = (1:length(res2)*1.2)-1.1 , x = res2+5, label = res2, pos = 3, cex = 1, col = "black")
png("images/app2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "App used", xlim=c(0,80))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 5, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/app2.pdf",width=4,height=4)
par(mar=c(5,12,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "App used", xlim=c(0,80))
text(y = (1:length(res2)*1.2)-1.2 , x = res2 + 10, label = res2, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
APV <-x$App.Version
res <- table(APV)
res
## APV
## No Yes
## 167 68
res[1]/sum(res)*100
## No
## 71.06383
par(mar=c(14,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "App version defined", xlim=c(0,200))
text(y = (1:length(res)*1.2) - 0.75 , x = res+15, label = res, pos = 3, cex = 1, col = "black")
png("images/appvers2.png",width=300,height=150)
par(mar=c(5,5,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "App version defined", xlim=c(0,200))
text(y = (1:length(res)*1.2)-1.2 , x = res + 15, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/appvers2.pdf",width=3,height=2)
par(mar=c(5,4,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "App version defined", xlim=c(0,200))
text(y = (1:length(res)*1.2)-1.2 , x = res + 19, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
code <-x$Code.availability
res <- table(code)
res
## code
## No Yes
## 44 3
res[1]/sum(res)*100
## No
## 93.61702
par(mar=c(14,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Code availability", xlim=c(0,50))
text(y = (1:length(res)*1.2) - 0.75 , x = res+2, label = res, pos = 3, cex = 1, col = "black")
png("images/code2.png",width=300,height=150)
par(mar=c(5,5,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Code availability", xlim=c(0,50))
text(y = (1:length(res)*1.2)-1.2 , x = res + 3, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/code2.pdf",width=3,height=2)
par(mar=c(5,4,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Code availability", xlim=c(0,55))
text(y = (1:length(res)*1.2)-1.2 , x = res + 5, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
BG <-x$Background.gene.set
BG <- gsub("Yes","Yes, correct",BG)
res <- table(BG)
res
## BG
## No Not stated Stated, but incorrect
## 5 178 6
## Yes, correct
## 8
sum(res[which(names(res)!="Yes")])/sum(res)*100
## [1] 100
par(mar=c(10,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Background list specification", xlim=c(0,200))
text(y = (1:length(res)*1.2) - 0.85 , x = res+10, label = res, pos = 3, cex = 1, col = "black")
png("images/bg2.png",width=300,height=200)
par(mar=c(5,9,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Background list specified", xlim=c(0,220))
text(y = (1:length(res)*1.2)-1.1 , x = res + 18, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/bg2.pdf",width=4,height=3)
par(mar=c(5,10,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Background list specified", xlim=c(0,220))
text(y = (1:length(res)*1.2)-1.1 , x = res + 20, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
GL <-x$Gene.lists.provided
res <- table(GL)
res
## GL
## No Yes
## 142 93
sum(res[which(names(res)!="Yes")])/sum(res)*100
## [1] 60.42553
par(mar=c(14,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene lists provided", xlim=c(0,160))
text(y = (1:length(res)*1.2) - 0.75 , x = res+8, label = res, pos = 3, cex = 1, col = "black")
png("images/genelists2.png",width=300,height=150)
par(mar=c(5,5,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene lists provided", xlim=c(0,160))
text(y = (1:length(res)*1.2)-1.2 , x = res + 9, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/genelists2.pdf",width=3,height=2)
par(mar=c(5,4,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Gene lists provided", xlim=c(0,170))
text(y = (1:length(res)*1.2)-1.25 , x = res + 15, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
ok <- nrow(subset(x,Assumptions.violated=="No"))
ok
## [1] 35
bad <- nrow(subset(x,Assumptions.violated!="No"))
bad
## [1] 200
ok/sum(bad,ok)*100
## [1] 14.89362
ass <-x$Assumptions.violated
ass <- gsub("^No$","None",ass)
ass <- strsplit(ass,", ")
ass <- unlist(ass)
res <- table(ass)
res <- res[order(res)]
res
## ass
## Misinterpreted FDR values Inference without test No data shown
## 2 11 13
## None FDR Background
## 35 94 179
par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
par(mar=c(8,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Methodological flaws", xlim=c(0,200))
text(y = (1:length(res)*1.2) - 0.9 , x = res+8, label = res, pos = 3, cex = 1, col = "black")
png("images/assumptions2.png",width=400,height=250)
par(mar=c(5,12,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Methodological flaws", xlim=c(0,220))
text(y = (1:length(res)*1.2)-1.1 , x = res + 18, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/assumptions2.pdf",width=4,height=3)
par(mar=c(5,11,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Methodological flaws", xlim=c(0,230))
text(y = (1:length(res)*1.2)-1.2 , x = res + 25, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
comb <-x$Separated.up.and.down
res <- table(comb)
res
## comb
## - Combined Not stated Separated
## 126 76 5 28
sum(res[which(names(res)!="Separated")])/sum(res)*100
## [1] 88.08511
res <- res[which(names(res)!="-")]
par(mar=c(10,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Separated or combined ORA (differential expression)", xlim=c(0,90))
grid()
text(y = (1:length(res)*1.2) - 0.9 , x = res+5, label = res, pos = 3, cex = 1, col = "black")
png("images/comb2.png",width=300,height=200)
par(mar=c(5,8,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Separated or combined ORA", xlim=c(0,100))
text(y = (1:length(res)*1.2)-1.1 , x = res + 5, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/comb2.pdf",width=3,height=2.5)
par(mar=c(5,5,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
main = "Separated or combined ORA", xlim=c(0,100))
text(y = (1:length(res)*1.2)-1.1 , x = res + 10, label = res, pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
param <-x$Parameters
param <- param[which(param!="-")]
param <- gsub(" ","",param)
str(param)
## chr [1:28] "rank=no,algo=no,weight=no" "rank=yes,algo=yes,weight=no" ...
rankspec <- sapply(strsplit(param,","),"[[",1)
rankspec <- gsub("rank=","",rankspec)
rankspec <- table(rankspec)
rankspec
## rankspec
## no yes
## 16 12
algospec <- sapply(strsplit(param,","),"[[",2)
algospec <- gsub("algo=","",algospec)
algospec <- table(algospec)
algospec
## algospec
## no yes
## 19 9
weightspec <- sapply(strsplit(param,","),"[[",3)
weightspec <- gsub("weight=","",weightspec)
weightspec <- table(weightspec)
weightspec
## weightspec
## no yes
## 24 4
par(mar=c(10,12,3,4))
params <- rbind(rankspec,algospec,weightspec)
rownames(params) <- c("rank method", "test type", "weight type")
colnames(params) <- c("not stated", "stated")
barplot(t(params),horiz=TRUE,las=1,xlim=c(0,30),legend = colnames(params),
main="GSEA parameter reporting", xlab="no. analyses",
args.legend = list(x = "topleft", inset = c(0.05, 0.1)))
text(y = (1:nrow(params)*1.2) - 0.7 , x = params[,1]-3, label = params[,1], pos = 3, cex = 1, col = "white")
text(y = (1:nrow(params)*1.2) - 0.7 , x = params[,1]+2, label = params[,2], pos = 3, cex = 1, col = "black")
png("images/gseaparam2.png",width=350,height=250)
par(mar=c(5,8,3,2))
params <- rbind(rankspec,algospec,weightspec)
rownames(params) <- c("rank method", "test type", "weight type")
colnames(params) <- c("not stated", "stated")
barplot(t(params),horiz=TRUE,las=1,xlim=c(0,30),legend = colnames(params),
main="GSEA parameter reporting", xlab="no. analyses",
args.legend = list(x = "topleft", inset = c(0.05, 0.1)))
text(y = (1:nrow(params)*1.2) - 0.7 , x = params[,1]-3, label = params[,1], pos = 3, cex = 1, col = "white")
text(y = (1:nrow(params)*1.2) - 0.7 , x = params[,1]+2, label = params[,2], pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
pdf("images/gseaparam2.pdf",width=4.5,height=3)
par(mar=c(5,6,3,2))
barplot(t(params),horiz=TRUE,las=1,xlim=c(0,30),legend = colnames(params),
main="GSEA parameter reporting", xlab="no. analyses",
args.legend = list(x = "topleft", bg="white",inset = c(0.05, 0.1)))
text(y = (1:nrow(params)*1.2) - 0.8 , x = params[,1]-3, label = params[,1], pos = 3, cex = 1, col = "white")
text(y = (1:nrow(params)*1.2) - 0.8 , x = params[,1]+2, label = params[,2], pos = 3, cex = 1, col = "black")
dev.off()
## png
## 2
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] rmdformats_1.0.3 beeswarm_0.4.0
## [3] eulerr_6.1.1 mitch_1.5.1
## [5] clusterProfiler_4.0.5 DESeq2_1.32.0
## [7] SummarizedExperiment_1.22.0 Biobase_2.52.0
## [9] MatrixGenerics_1.4.3 matrixStats_0.61.0
## [11] GenomicRanges_1.44.0 GenomeInfoDb_1.28.4
## [13] IRanges_2.26.0 S4Vectors_0.30.0
## [15] BiocGenerics_0.38.0 getDEE2_1.2.0
## [17] anytime_0.3.9 kableExtra_1.3.4
## [19] XML_3.99-0.8 reutils_0.2.3
## [21] vioplot_0.3.7 zoo_1.8-9
## [23] sm_2.2-5.7 wordcloud_2.6
## [25] RColorBrewer_1.1-2 rsvg_2.1.2
## [27] DiagrammeRsvg_0.1 DiagrammeR_1.0.6.1
## [29] forcats_0.5.1 stringr_1.4.0
## [31] dplyr_1.0.7 purrr_0.3.4
## [33] readr_2.0.2 tidyr_1.1.4
## [35] tibble_3.1.5 ggplot2_3.3.5
## [37] tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 tidyselect_1.1.1 RSQLite_2.2.8
## [4] AnnotationDbi_1.54.1 htmlwidgets_1.5.4 grid_4.1.2
## [7] BiocParallel_1.26.2 scatterpie_0.1.7 munsell_0.5.0
## [10] withr_2.4.2 colorspace_2.0-2 GOSemSim_2.18.1
## [13] highr_0.9 knitr_1.36 rstudioapi_0.13
## [16] DOSE_3.18.3 GenomeInfoDbData_1.2.6 polyclip_1.10-0
## [19] bit64_4.0.5 farver_2.1.0 downloader_0.4
## [22] vctrs_0.3.8 treeio_1.16.2 generics_0.1.0
## [25] xfun_0.26 R6_2.5.1 graphlayouts_0.7.2
## [28] locfit_1.5-9.4 bitops_1.0-7 cachem_1.0.6
## [31] reshape_0.8.8 fgsea_1.18.0 gridGraphics_0.5-1
## [34] DelayedArray_0.18.0 assertthat_0.2.1 promises_1.2.0.1
## [37] scales_1.1.1 ggraph_2.0.5 enrichplot_1.12.3
## [40] gtable_0.3.0 tidygraph_1.2.0 rlang_0.4.11
## [43] genefilter_1.74.0 systemfonts_1.0.2 splines_4.1.2
## [46] lazyeval_0.2.2 htm2txt_2.1.1 broom_0.7.9
## [49] yaml_2.2.1 reshape2_1.4.4 modelr_0.1.8
## [52] backports_1.2.1 httpuv_1.6.3 qvalue_2.24.0
## [55] tools_4.1.2 bookdown_0.24 ggplotify_0.1.0
## [58] gplots_3.1.1 ellipsis_0.3.2 jquerylib_0.1.4
## [61] Rcpp_1.0.7 plyr_1.8.6 visNetwork_2.1.0
## [64] zlibbioc_1.38.0 RCurl_1.98-1.5 viridis_0.6.1
## [67] cowplot_1.1.1 haven_2.4.3 ggrepel_0.9.1
## [70] fs_1.5.0 magrittr_2.0.1 data.table_1.14.2
## [73] DO.db_2.9 reprex_2.0.1 hms_1.1.1
## [76] patchwork_1.1.1 mime_0.12 evaluate_0.14
## [79] xtable_1.8-4 readxl_1.3.1 gridExtra_2.3
## [82] compiler_4.1.2 KernSmooth_2.23-20 V8_3.6.0
## [85] crayon_1.4.1 shadowtext_0.0.9 htmltools_0.5.2
## [88] ggfun_0.0.4 later_1.3.0 tzdb_0.1.2
## [91] geneplotter_1.70.0 aplot_0.1.1 lubridate_1.8.0
## [94] DBI_1.1.1 tweenr_1.0.2 dbplyr_2.1.1
## [97] MASS_7.3-54 Matrix_1.3-4 cli_3.0.1
## [100] igraph_1.2.6 pkgconfig_2.0.3 xml2_1.3.2
## [103] ggtree_3.0.4 svglite_2.0.0 annotate_1.70.0
## [106] bslib_0.3.1 webshot_0.5.2 XVector_0.32.0
## [109] rvest_1.0.1 yulab.utils_0.0.4 digest_0.6.28
## [112] Biostrings_2.60.2 rmarkdown_2.11 cellranger_1.1.0
## [115] fastmatch_1.1-3 tidytree_0.3.6 curl_4.3.2
## [118] gtools_3.9.2 shiny_1.7.1 lifecycle_1.0.1
## [121] nlme_3.1-153 jsonlite_1.7.2 echarts4r_0.4.2
## [124] viridisLite_0.4.0 fansi_0.5.0 pillar_1.6.3
## [127] lattice_0.20-45 GGally_2.1.2 KEGGREST_1.32.0
## [130] fastmap_1.1.0 httr_1.4.2 survival_3.2-13
## [133] GO.db_3.13.0 glue_1.4.2 png_0.1-7
## [136] bit_4.0.4 ggforce_0.3.3 stringi_1.7.5
## [139] sass_0.4.0 blob_1.2.2 caTools_1.18.2
## [142] memoise_2.0.0 ape_5.5