Intro

Here we are performing an analysis of 200 articles which is randomly selected from 1500 PMC articles. These articles were examined independently by two team members and the results were compared. Any inconsistencies were resolved.

The code shown here was used to generate Figure 2 of the manuscript.

knitr::opts_chunk$set(fig.width=7, fig.height=5) 

library("wordcloud")

Overview of included and excluded analyses

x <- read.table("../data/QC-analysis2.tsv",header=TRUE,fill=TRUE,sep="\t")
head(x)
##   Pubmed.Central.ID Article.number Allocated          Journal
## 1        PMC6493771            691         - PLoS Comput Biol
## 2        PMC6442023            462         -  Front Pharmacol
## 3        PMC6384238            213         -      Front Oncol
## 4        PMC6594459           1172         -        Ann Oncol
## 5        PMC6649552           1407         -       Cell Cycle
## 6        PMC6478283            637         -         PLoS One
##                       Omics.type                        Organism
## 1                        EXCLUDE                         EXCLUDE
## 2                        RNA-seq Homo sapiens, Rattus norvegicus
## 3 Gene expression array, RNA-seq                    Homo sapiens
## 4                       Database                    Homo sapiens
## 5                        RNA-seq                    Mus musculus
## 6                       Database                    Homo sapiens
##                    Gene.set.library GS.version Statistical.test.used
## 1                           EXCLUDE    EXCLUDE               EXCLUDE
## 2                               GEO         No                  GSEA
## 3                          GO, KEGG         No            Not stated
## 4                            MSigDB         No                  GSEA
## 5                          GO, KEGG         No            Not stated
## 6 KEGG, Reactome, PID, DisGeNET, GO         No            Not stated
##   FDR.Correction        App.used App.Version Code.availability
## 1        EXCLUDE         EXCLUDE     EXCLUDE           EXCLUDE
## 2             No            GSEA         Yes              <NA>
## 3            Yes clusterProfiler          No              <NA>
## 4             No            GSEA         Yes              <NA>
## 5             No           DAVID          No              <NA>
## 6             No        ToppGene          No              <NA>
##   Background.gene.set Assumptions.violated Gene.lists.provided
## 1             EXCLUDE              EXCLUDE             EXCLUDE
## 2                <NA>                  FDR                  No
## 3          Not stated           Background                 Yes
## 4                <NA>                  FDR                  No
## 5          Not stated      Background, FDR                  No
## 6          Not stated      Background, FDR                 Yes
##   Separated.up.and.down                    Parameters
## 1                     -                             -
## 2                     -   rank=no, algo=no, weight=no
## 3                     -                             -
## 4                     - rank=yes, algo=yes, weight=no
## 5              Combined                             -
## 6                     -                             -
colnames(x)
##  [1] "Pubmed.Central.ID"     "Article.number"        "Allocated"            
##  [4] "Journal"               "Omics.type"            "Organism"             
##  [7] "Gene.set.library"      "GS.version"            "Statistical.test.used"
## [10] "FDR.Correction"        "App.used"              "App.Version"          
## [13] "Code.availability"     "Background.gene.set"   "Assumptions.violated" 
## [16] "Gene.lists.provided"   "Separated.up.and.down" "Parameters"
dim(x)
## [1] 249  18
exclude <- subset(x,x$GS.version=="EXCLUDE")
nrow(exclude)
## [1] 14
length(unique(exclude$Pubmed.Central.ID))
## [1] 14
x <- subset(x,x$GS.version!="EXCLUDE")
nrow(x)
## [1] 235
length(unique(x$Pubmed.Central.ID))
## [1] 186

Journal

journal <- x$Journal
journal_split <- strsplit(journal,", ")
journal <- unlist(journal_split)
res <- table(journal)
res <- res[order(res)]
length(res)
## [1] 96
res
## journal
##                               3 Biotech Am J Physiol Gastrointest Liver Physiol 
##                                       1                                       1 
##                         Animals (Basel)                               Ann Oncol 
##                                       1                                       1 
##                  Appl Environ Microbiol                              Biosci Rep 
##                                       1                                       1 
##                          BMC Infect Dis                           BMC Med Genet 
##                                       1                                       1 
##                        BMC Med Genomics                BMC Musculoskelet Disord 
##                                       1                                       1 
##                          Cancer Control                         Cancers (Basel) 
##                                       1                                       1 
##                              Cell Cycle                       Cell Death Discov 
##                                       1                                       1 
##                                   Cells                                Chin Med 
##                                       1                                       1 
##                             Commun Biol                           Endocrinology 
##                                       1                                       1 
##                           FEBS Open Bio                        Genome Biol Evol 
##                                       1                                       1 
##                         Genomics Inform                                 Heliyon 
##                                       1                                       1 
##                              Hepatology                        Int J Endocrinol 
##                                       1                                       1 
##                      Int J Nanomedicine                        Int J Ophthalmol 
##                                       1                                       1 
##                   J Assist Reprod Genet                             J Bacteriol 
##                                       1                                       1 
##                                J Cancer                    J Cardiovasc Dev Dis 
##                                       1                                       1 
##                     J Diabetes Investig                     J Immunother Cancer 
##                                       1                                       1 
##                       J Invest Dermatol                           J Ovarian Res 
##                                       1                                       1 
##                           J Res Med Sci                            J Transl Med 
##                                       1                                       1 
##                                 J Virol                    Medicine (Baltimore) 
##                                       1                                       1 
##                               Mol Breed                  Mol Ther Nucleic Acids 
##                                       1                                       1 
##                           Neurobiol Dis                               Nutrients 
##                                       1                                       1 
##                              Radiat Res                              Respir Res 
##                                       1                                       1 
##                          Stem Cells Int                                 Thyroid 
##                                       1                                       1 
##                             Toxicol Sci                                Virology 
##                                       1                                       1 
##              World J Gastrointest Oncol                         Am J Transl Res 
##                                       1                                       2 
##                            Arch Med Sci                               BMC Genet 
##                                       2                                       2 
##                      Cell Commun Signal                          Cell Death Dis 
##                                       2                                       2 
##                         Clin Cancer Res                        Clin Epigenetics 
##                                       2                                       2 
##                         Clin Proteomics               Diabetes Metab Syndr Obes 
##                                       2                                       2 
##                             Dis Markers                             Epigenetics 
##                                       2                                       2 
##                          Front Neurosci                           Front Physiol 
##                                       2                                       2 
##                           Genes (Basel)                         J Hematol Oncol 
##                                       2                                       2 
##                           Med Sci Monit                            Metabolomics 
##                                       2                                       2 
##                                 Mol Med                              Rice (N Y) 
##                                       2                                       2 
##                       Transl Psychiatry                                 Viruses 
##                                       2                                       2 
##                               Biol Open                      BMC Bioinformatics 
##                                       3                                       3 
##                              BMC Cancer                         Cancer Cell Int 
##                                       3                                       3 
##                           Front Immunol                           Int J Mol Sci 
##                                       3                                       3 
##                              J Clin Med                              Mol Autism 
##                                       3                                       3 
##                       Aging (Albany NY)                            Exp Ther Med 
##                                       4                                       4 
##                             Front Oncol                         Front Pharmacol 
##                                       4                                       4 
##                             Metabolites                             Mol Med Rep 
##                                       4                                       4 
##                              Oncotarget                                RNA Biol 
##                                       4                                       4 
##                        Cancer Manag Res                          Biomed Res Int 
##                                       5                                       6 
##                       Onco Targets Ther                               Oncol Rep 
##                                       6                                       7 
##                             Front Genet                            BMC Genomics 
##                                       8                                       9 
##                              Oncol Lett                                   PeerJ 
##                                       9                                      11 
##                                 Sci Rep                                PLoS One 
##                                      12                                      15
par(mar=c(1,1,1,1))
#names(res) <- gsub("Gene expression array","RNA array",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Med Sci Monit could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Metabolomics could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Transl Psychiatry could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Endocrinology could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : FEBS Open Bio could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Hepatology could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Int J Endocrinol could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Int J Nanomedicine could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Int J Ophthalmol could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Assist Reprod Genet could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Cardiovasc Dev Dis could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Diabetes Investig could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Immunother Cancer could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Invest Dermatol could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Ovarian Res could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Medicine (Baltimore) could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Mol Ther Nucleic Acids could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Respir Res could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Stem Cells Int could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Thyroid could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words
## = 200, : World J Gastrointest Oncol could not be fit on page. It will not be
## plotted.