PMC Survey QC Analysis

Intro

Here we are performing an analysis of 200 articles which is randomly selected from 1500 PMC articles.

knitr::opts_chunk$set(fig.width=7, fig.height=5) 

library("wordcloud")

## Loading required package: RColorBrewer

library("RColorBrewer")
library("wordcloud2")
library("reutils")
library("XML")
library("kableExtra")
library("Biobase")

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## 
## Attaching package: 'Biobase'

## The following object is masked from 'package:reutils':
## 
##     content

library("vioplot")

## Loading required package: sm

## Warning in fun(libname, pkgname): couldn't connect to display ":0"

## Package 'sm', version 2.2-5.6: type help(sm) for summary information

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Overview of included and excluded analyses

x <- read.table("data/QC-analysis2.tsv",header=TRUE,fill=TRUE,sep="\t")
head(x)

##   Pubmed.Central.ID Article.number Allocated          Journal
## 1        PMC6493771            691         - PLoS Comput Biol
## 2        PMC6442023            462         -  Front Pharmacol
## 3        PMC6384238            213         -      Front Oncol
## 4        PMC6594459           1172         -        Ann Oncol
## 5        PMC6649552           1407         -       Cell Cycle
## 6        PMC6478283            637         -         PLoS One
##                       Omics.type                        Organism
## 1                        EXCLUDE                         EXCLUDE
## 2                        RNA-seq Homo sapiens, Rattus norvegicus
## 3 Gene expression array, RNA-seq                    Homo sapiens
## 4                       Database                    Homo sapiens
## 5                        RNA-seq                    Mus musculus
## 6                       Database                    Homo sapiens
##                    Gene.set.library GS.version Statistical.test.used
## 1                           EXCLUDE    EXCLUDE               EXCLUDE
## 2                               GEO         No                  GSEA
## 3                          GO, KEGG         No            Not stated
## 4                            MSigDB         No                  GSEA
## 5                          GO, KEGG         No            Not stated
## 6 KEGG, Reactome, PID, DisGeNET, GO         No            Not stated
##   FDR.Correction        App.used App.Version Code.availability
## 1        EXCLUDE         EXCLUDE     EXCLUDE           EXCLUDE
## 2             No            GSEA         Yes              <NA>
## 3            Yes clusterProfiler          No              <NA>
## 4             No            GSEA         Yes              <NA>
## 5             No           DAVID          No              <NA>
## 6             No        ToppGene          No              <NA>
##   Background.gene.set Assumptions.violated Gene.lists.provided
## 1             EXCLUDE              EXCLUDE             EXCLUDE
## 2                <NA>                  FDR                  No
## 3          Not stated           Background                 Yes
## 4                <NA>                  FDR                  No
## 5          Not stated      Background, FDR                  No
## 6          Not stated      Background, FDR                 Yes
##   Separated.up.and.down                    Parameters
## 1                     -                             -
## 2                     -   rank=no, algo=no, weight=no
## 3                     -                             -
## 4                     - rank=yes, algo=yes, weight=no
## 5              Combined                             -
## 6                     -                             -

colnames(x)

##  [1] "Pubmed.Central.ID"     "Article.number"        "Allocated"            
##  [4] "Journal"               "Omics.type"            "Organism"             
##  [7] "Gene.set.library"      "GS.version"            "Statistical.test.used"
## [10] "FDR.Correction"        "App.used"              "App.Version"          
## [13] "Code.availability"     "Background.gene.set"   "Assumptions.violated" 
## [16] "Gene.lists.provided"   "Separated.up.and.down" "Parameters"

dim(x)

## [1] 249  18

exclude <- subset(x,x$GS.version=="EXCLUDE")
nrow(exclude)

## [1] 14

length(unique(exclude$Pubmed.Central.ID))

## [1] 14

x <- subset(x,x$GS.version!="EXCLUDE")
nrow(x)

## [1] 235

length(unique(x$Pubmed.Central.ID))

## [1] 186

Journal

journal <- x$Journal
journal_split <- strsplit(journal,", ")
journal <- unlist(journal_split)
res <- table(journal)
res <- res[order(res)]
length(res)

## [1] 96

res

## journal
##                               3 Biotech Am J Physiol Gastrointest Liver Physiol 
##                                       1                                       1 
##                         Animals (Basel)                               Ann Oncol 
##                                       1                                       1 
##                  Appl Environ Microbiol                              Biosci Rep 
##                                       1                                       1 
##                          BMC Infect Dis                           BMC Med Genet 
##                                       1                                       1 
##                        BMC Med Genomics                BMC Musculoskelet Disord 
##                                       1                                       1 
##                          Cancer Control                         Cancers (Basel) 
##                                       1                                       1 
##                              Cell Cycle                       Cell Death Discov 
##                                       1                                       1 
##                                   Cells                                Chin Med 
##                                       1                                       1 
##                             Commun Biol                           Endocrinology 
##                                       1                                       1 
##                           FEBS Open Bio                        Genome Biol Evol 
##                                       1                                       1 
##                         Genomics Inform                                 Heliyon 
##                                       1                                       1 
##                              Hepatology                        Int J Endocrinol 
##                                       1                                       1 
##                      Int J Nanomedicine                        Int J Ophthalmol 
##                                       1                                       1 
##                   J Assist Reprod Genet                             J Bacteriol 
##                                       1                                       1 
##                                J Cancer                    J Cardiovasc Dev Dis 
##                                       1                                       1 
##                     J Diabetes Investig                     J Immunother Cancer 
##                                       1                                       1 
##                       J Invest Dermatol                           J Ovarian Res 
##                                       1                                       1 
##                           J Res Med Sci                            J Transl Med 
##                                       1                                       1 
##                                 J Virol                    Medicine (Baltimore) 
##                                       1                                       1 
##                               Mol Breed                  Mol Ther Nucleic Acids 
##                                       1                                       1 
##                           Neurobiol Dis                               Nutrients 
##                                       1                                       1 
##                              Radiat Res                              Respir Res 
##                                       1                                       1 
##                          Stem Cells Int                                 Thyroid 
##                                       1                                       1 
##                             Toxicol Sci                                Virology 
##                                       1                                       1 
##              World J Gastrointest Oncol                         Am J Transl Res 
##                                       1                                       2 
##                            Arch Med Sci                               BMC Genet 
##                                       2                                       2 
##                      Cell Commun Signal                          Cell Death Dis 
##                                       2                                       2 
##                         Clin Cancer Res                        Clin Epigenetics 
##                                       2                                       2 
##                         Clin Proteomics               Diabetes Metab Syndr Obes 
##                                       2                                       2 
##                             Dis Markers                             Epigenetics 
##                                       2                                       2 
##                          Front Neurosci                           Front Physiol 
##                                       2                                       2 
##                           Genes (Basel)                         J Hematol Oncol 
##                                       2                                       2 
##                           Med Sci Monit                            Metabolomics 
##                                       2                                       2 
##                                 Mol Med                              Rice (N Y) 
##                                       2                                       2 
##                       Transl Psychiatry                                 Viruses 
##                                       2                                       2 
##                               Biol Open                      BMC Bioinformatics 
##                                       3                                       3 
##                              BMC Cancer                         Cancer Cell Int 
##                                       3                                       3 
##                           Front Immunol                           Int J Mol Sci 
##                                       3                                       3 
##                              J Clin Med                              Mol Autism 
##                                       3                                       3 
##                       Aging (Albany NY)                            Exp Ther Med 
##                                       4                                       4 
##                             Front Oncol                         Front Pharmacol 
##                                       4                                       4 
##                             Metabolites                             Mol Med Rep 
##                                       4                                       4 
##                              Oncotarget                                RNA Biol 
##                                       4                                       4 
##                        Cancer Manag Res                          Biomed Res Int 
##                                       5                                       6 
##                       Onco Targets Ther                               Oncol Rep 
##                                       6                                       7 
##                             Front Genet                            BMC Genomics 
##                                       8                                       9 
##                              Oncol Lett                                   PeerJ 
##                                       9                                      11 
##                                 Sci Rep                                PLoS One 
##                                      12                                      15

par(mar=c(1,1,1,1))
#names(res) <- gsub("Gene expression array","RNA array",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Cell Commun Signal could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Am J Physiol Gastrointest Liver Physiol could not be fit on page. It will
## not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Appl Environ Microbiol could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words
## = 200, : BMC Musculoskelet Disord could not be fit on page. It will not be
## plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Cancers (Basel) could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Int J Nanomedicine could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Int J Ophthalmol could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Assist Reprod Genet could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Cardiovasc Dev Dis could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Diabetes Investig could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Immunother Cancer could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Invest Dermatol could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : J Ovarian Res could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Medicine (Baltimore) could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Mol Ther Nucleic Acids could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Neurobiol Dis could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Nutrients could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Respir Res could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Stem Cells Int could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words =
## 200, : Virology could not be fit on page. It will not be plotted.

## Warning in wordcloud(words = names(res), freq = res, min.freq = 1, max.words
## = 200, : World J Gastrointest Oncol could not be fit on page. It will not be
## plotted.

par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Journal", xlim=c(0,17))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Journal", xlim=c(0,165))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 7, label = res2, pos = 3, cex = 1, col = "black")

dir.create("images")

## Warning in dir.create("images"): 'images' already exists

png("images/journals2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Journal", xlim=c(0,170))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 10, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/journals2.pdf",width=4,height=4)
par(mar=c(5,9,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Journal", xlim=c(0,180))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 15, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Omics type

omics <- x$Omics.type
omics_split <- strsplit(omics,", ")
omics <- unlist(omics_split)
res <- table(omics)
res <- res[order(res)]
length(res)

## [1] 18

res

## omics
##                Metgenomics                  miRNA-seq 
##                          1                          1 
## NanoString gene expression                  PCR Array 
##                          1                          1 
##                        PPI                  scRNA-seq 
##                          1                          1 
## DNA methylation sequencing                miRNA array 
##                          2                          2 
##                  CNV array           Genotyping array 
##                          3                          5 
##              Protein array      DNA methylation array 
##                          6                          7 
##          Genome sequencing                 Proteomics 
##                         10                         14 
##               Metabolomics                   Database 
##                         15                         19 
##                    RNA-seq      Gene expression array 
##                         70                         91

par(mar=c(1,1,1,1))
names(res) <- gsub("Gene expression array","RNA array",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Omics type", xlim=c(0,100))
grid()

names(res) <- gsub("RNA array","Gene expression array",names(res))

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Omics type", xlim=c(0,100))
text(y = (1:length(res2)*1.2)-1.2  , x = res2+5, label = res2, pos = 3, cex = 1, col = "black")

png("images/omics2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Omics type", xlim=c(0,100))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 6, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/omics2.pdf",width=4,height=4)
par(mar=c(5,10,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Omics type", xlim=c(0,110))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 10, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Organism

org <- x$Organism
org_split <- strsplit(org,", ")
org <- unlist(org_split)
res <- table(org)
res <- res[order(res)]
length(res)

## [1] 31

res

## org
##     Acropora cervicornis            Aedes aegypti           Ananas comosus 
##                        1                        1                        1 
##       Anas platyrhynchos            Bos grunniens           Brassica napus 
##                        1                        1                        1 
##         Candida albicans   Canis lupus familiaris     Clostridium scindens 
##                        1                        1                        1 
##        Coturnix japonica      Moschus berezovskii  Mycobacterium smegmatis 
##                        1                        1                        1 
##    Oreochromis niloticus    Oryctolagus cuniculus   Pygoscelis antarcticus 
##                        1                        1                        1 
##         Pygoscelis papua       Salvelinus alpinus             Suaeda salsa 
##                        1                        1                        1 
##        Triticum aestivum               Vicia faba           Bemisia tabaci 
##                        1                        1                        2 
##        Mauremys reevesii  Mizuhopecten yessoensis             Pagrus major 
##                        2                        2                        2 
## Sclerotinia sclerotiorum             Oryza sativa               Bos taurus 
##                        2                        5                        7 
##               Sus scrofa        Rattus norvegicus             Mus musculus 
##                        7                       10                       24 
##             Homo sapiens 
##                      157

par(mar=c(1,1,1,1))
names(res) <- gsub("Homo sapiens","human",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"), scale=c(4,.5))

par(mar=c(5,12,3,5))
names(res) <- gsub("human","Homo sapiens",names(res))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Organism", xlim=c(0,200))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Organism", xlim=c(0,200))
text(y = (1:length(res2)*1.2)-1.2  , x = res2+7, label = res2, pos = 3, cex = 1, col = "black")

png("images/organisms2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Organism", xlim=c(0,200))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 15, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/organisms2.pdf",width=4,height=4)
par(mar=c(5,11,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Organism", xlim=c(0,200))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 25, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Gene set library

GSL <-x$Gene.set.library
GSL_split <- strsplit(GSL,", ")
GSL <- unlist(GSL_split)
res <- table(GSL)
res <- res[order(res)]
length(res)

## [1] 26

which(names(res)=="Not stated")/sum(res)*100

## [1] 6.325301

res

## GSL
##                  ChemRICH                       COG                  CYTOBAND 
##                         1                         1                         1 
##                  DisGeNET                       GEO Human Metabolome Database 
##                         1                         1                         1 
##  Ingenuity Knowledge base                  InterPro  Jensen Diseases database 
##                         1                         1                         1 
##                  MetaCore                 Metascape                      OMIM 
##                         1                         1                         1 
##           Pathway commons                       PID                    SIGNOR 
##                         1                         1                         1 
##                  TRANSFAC                Vectorbase                    JASPAR 
##                         1                         1                         2 
##             MetaboAnalyst                  BioCarta                Not stated 
##                         2                         3                        14 
##                  Reactome                    MSigDB  Ingenuity Knowledge Base 
##                        16                        20                        23 
##                      KEGG                        GO 
##                       114                       121

par(mar=c(1,1,1,1))
names(res) <- gsub("Homo sapiens","human",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Gene set library", xlim=c(0,140))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene set library", xlim=c(0,140))
text(y = (1:length(res2)*1.2)-1.2  , x = res2+7, label = res2, pos = 3, cex = 1, col = "black")

png("images/genesetlib2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene set library", xlim=c(0,150))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 15, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/genesetlib2.pdf",width=4,height=4)
par(mar=c(5,11,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene set library", xlim=c(0,150))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 16, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Gene set version

GSV <-x$GS.version
res <- table(GSV)
res

## GSV
##  No Yes 
## 217  18

res[1]/sum(res)*100

##       No 
## 92.34043

par(mar=c(14,12,3,10))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene set version defined", xlim=c(0,250))
text(y = (1:length(res)*1.2) - 0.75 , x = res+15, label = res, pos = 3, cex = 1, col = "black")

png("images/genesetvers2.png",width=300,height=150)
par(mar=c(5,5,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene set version defined", xlim=c(0,250))
text(y = (1:length(res)*1.2)-1.2  , x = res + 20, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/genesetvers2.pdf",width=3,height=2)
par(mar=c(5,4,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene set version defined", xlim=c(0,260))
text(y = (1:length(res)*1.2)-1.2  , x = res + 25, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Statistical test used

test <-x$Statistical.test.used
test <- strsplit(test,", ")
test <- unlist(test)
res <- table(test)
res <- res[order(res)]
res[which(names(res)=="Not stated")] / sum(res) * 100

## Not stated 
##   56.30252

length(res)

## [1] 12

res

## test
##             Binomial                 EASE                 GSVA 
##                    1                    1                    1 
##       Kruskal-Wallis modified Chi-squared                 MSEA 
##                    1                    1                    1 
##   Kolmogorov–Smirnov              No test               Fisher 
##                    2                   14                   24 
##                 GSEA       Hypergeometric           Not stated 
##                   29                   29                  134

par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Test used", xlim=c(0,150))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Test used", xlim=c(0,150))
text(y = (1:length(res2)*1.2)-1.1  , x = res2+10, label = res2, pos = 3, cex = 1, col = "black")

png("images/stattest2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Test used", xlim=c(0,160))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 15, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/stattest2.pdf",width=4,height=4)
par(mar=c(5,11,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Test used", xlim=c(0,170))

## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <e2>

## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <80>

## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <93>

## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <e2>

## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <80>

## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : conversion failure on 'Kolmogorov–Smirnov' in 'mbcsToSbcs': dot
## substituted for <93>

text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 20, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

FDR Correction

fdr <-x$FDR.Correction
fdr <- strsplit(fdr,", ")
fdr <- unlist(fdr)
res <- table(fdr)
res <- res[order(res)]
res[which(names(res)!="Yes")]/sum(res)*100

## fdr
## Not stated    No test         No 
##   3.846154   5.982906  39.316239

res

## fdr
## Not stated    No test         No        Yes 
##          9         14         92        119

par(mar=c(1,1,1,5))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

par(mar=c(10,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "FDR correction performed", xlim=c(0,150))
text(y = (1:length(res)*1.2) - 0.8 , x = res+10, label = res, pos = 3, cex = 1, col = "black")

png("images/fdr2.png",width=300,height=200)
par(mar=c(5,8,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "FDR correction performed", xlim=c(0,160))
text(y = (1:length(res)*1.2)-1.2  , x = res + 15, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/fdr2.pdf",width=3,height=3)
par(mar=c(5,5,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "FDR correction performed", xlim=c(0,160))
text(y = (1:length(res)*1.2)-1.0  , x = res + 15, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

App used

App <-x$App.used
App_split <- strsplit(App,", ")
App <- unlist(App_split)
res <- table(App)
res <- res[order(res)]
res[which(names(res)=="Not stated")]/sum(res)*100

## Not stated 
##   6.382979

length(res)

## [1] 50

res

## App
##                       anamiR                     ChemRICH 
##                            1                            1 
##         Custom MATLAB script Cytoscape (No plugin stated) 
##                            1                            1 
##                       EggNOG                        fgsea 
##                            1                            1 
##                      FunRich                       g:GOSt 
##                            1                            1 
##                         GAGE                      GENCLIP 
##                            1                            1 
##                    GeneCodis               GO: TermFinder 
##                            1                            1 
##               GO::TermFinder                         GSVA 
##                            1                            1 
##                         KSEA                        Limma 
##                            1                            1 
##                    Metascape   Molecule Annotation System 
##                            1                            1 
##                         MSEA               NetworkAnalyst 
##                            1                            1 
##                       Pascal                    PathVisio 
##                            1                            1 
##                     R script                   ReactomePA 
##                            1                            1 
##             SNP2GO R package                        topGO 
##                            1                            1 
##                     ToppGene                       webMeV 
##                            1                            1 
##                       agriGO                        BiNGO 
##                            2                            2 
##                   g:Profiler                     MetaCore 
##                            2                            2 
##                       STRING              Custom R script 
##                            2                            3 
##                      GOrilla                         KAAS 
##                            3                            3 
##                    Mummichog                     Blast2GO 
##                            3                            4 
##                   WebGestalt             ClueGO/Cytoscape 
##                            4                            5 
##                      Enrichr                        GOseq 
##                            5                            5 
##                MetaboAnalyst                        KOBAS 
##                            7                            9 
##              clusterProfiler                      PANTHER 
##                           10                           10 
##                   Not stated   Ingenuity Pathway Analysis 
##                           15                           26 
##                         GSEA                        DAVID 
##                           30                           55

par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "App used", xlim=c(0,60))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,5))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "App used", xlim=c(0,80))
text(y = (1:length(res2)*1.2)-1.1  , x = res2+5, label = res2, pos = 3, cex = 1, col = "black")

png("images/app2.png",width=400,height=300)
par(mar=c(5,12,3,3))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "App used", xlim=c(0,80))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 5, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/app2.pdf",width=4,height=4)
par(mar=c(5,12,3,2))
barplot(res2,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "App used", xlim=c(0,80))
text(y = (1:length(res2)*1.2)-1.2  , x = res2 + 10, label = res2, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

App version

APV <-x$App.Version
res <- table(APV)
res

## APV
##  No Yes 
## 167  68

res[1]/sum(res)*100

##       No 
## 71.06383

par(mar=c(14,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "App version defined", xlim=c(0,200))
text(y = (1:length(res)*1.2) - 0.75 , x = res+15, label = res, pos = 3, cex = 1, col = "black")

png("images/appvers2.png",width=300,height=150)
par(mar=c(5,5,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "App version defined", xlim=c(0,200))
text(y = (1:length(res)*1.2)-1.2  , x = res + 15, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/appvers2.pdf",width=3,height=2)
par(mar=c(5,4,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "App version defined", xlim=c(0,200))
text(y = (1:length(res)*1.2)-1.2  , x = res + 19, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Code available

code <-x$Code.availability
res <- table(code)
res

## code
##  No Yes 
##  44   3

res[1]/sum(res)*100

##       No 
## 93.61702

par(mar=c(14,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Code availability", xlim=c(0,50))
text(y = (1:length(res)*1.2) - 0.75 , x = res+2, label = res, pos = 3, cex = 1, col = "black")

png("images/code2.png",width=300,height=150)
par(mar=c(5,5,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Code availability", xlim=c(0,50))
text(y = (1:length(res)*1.2)-1.2  , x = res + 3, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/code2.pdf",width=3,height=2)
par(mar=c(5,4,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Code availability", xlim=c(0,55))
text(y = (1:length(res)*1.2)-1.2  , x = res + 5, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Background gene set

BG <-x$Background.gene.set
BG <- gsub("Yes","Yes, correct",BG)
res <- table(BG)
res

## BG
##                    No            Not stated Stated, but incorrect 
##                     5                   178                     6 
##          Yes, correct 
##                     8

sum(res[which(names(res)!="Yes")])/sum(res)*100

## [1] 100

par(mar=c(10,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Background list specification", xlim=c(0,200))
text(y = (1:length(res)*1.2) - 0.85 , x = res+10, label = res, pos = 3, cex = 1, col = "black")

png("images/bg2.png",width=300,height=200)
par(mar=c(5,9,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Background list specified", xlim=c(0,220))
text(y = (1:length(res)*1.2)-1.1  , x = res + 18, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/bg2.pdf",width=4,height=3)
par(mar=c(5,10,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Background list specified", xlim=c(0,220))
text(y = (1:length(res)*1.2)-1.1  , x = res + 20, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Gene lists provided

GL <-x$Gene.lists.provided
res <- table(GL)
res

## GL
##  No Yes 
## 142  93

sum(res[which(names(res)!="Yes")])/sum(res)*100

## [1] 60.42553

par(mar=c(14,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene lists provided", xlim=c(0,160))
text(y = (1:length(res)*1.2) - 0.75 , x = res+8, label = res, pos = 3, cex = 1, col = "black")

png("images/genelists2.png",width=300,height=150)
par(mar=c(5,5,3,3))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene lists provided", xlim=c(0,160))
text(y = (1:length(res)*1.2)-1.2  , x = res + 9, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/genelists2.pdf",width=3,height=2)
par(mar=c(5,4,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene lists provided", xlim=c(0,170))
text(y = (1:length(res)*1.2)-1.25  , x = res + 15, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Methodological flaws

ok <- nrow(subset(x,Assumptions.violated=="No"))
ok

## [1] 35

bad <- nrow(subset(x,Assumptions.violated!="No"))
bad

## [1] 200

ok/sum(bad,ok)*100

## [1] 14.89362

ass <-x$Assumptions.violated
ass <- gsub("^No$","None",ass)
ass <- strsplit(ass,", ")
ass <- unlist(ass)
res <- table(ass)
res <- res[order(res)]
res

## ass
## Misinterpreted FDR values    Inference without test             No data shown 
##                         2                        11                        13 
##                      None                       FDR                Background 
##                        35                        94                       179

par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1, 
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

par(mar=c(8,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Methodological flaws", xlim=c(0,200))
text(y = (1:length(res)*1.2) - 0.9 , x = res+8, label = res, pos = 3, cex = 1, col = "black")

png("images/assumptions2.png",width=400,height=250)
par(mar=c(5,12,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Methodological flaws", xlim=c(0,220))
text(y = (1:length(res)*1.2)-1.1  , x = res + 18, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/assumptions2.pdf",width=4,height=3)
par(mar=c(5,11,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Methodological flaws", xlim=c(0,230))
text(y = (1:length(res)*1.2)-1.2  , x = res + 25, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Separated or combined

comb <-x$Separated.up.and.down
res <- table(comb)
res

## comb
##          -   Combined Not stated  Separated 
##        126         76          5         28

sum(res[which(names(res)!="Separated")])/sum(res)*100

## [1] 88.08511

res <- res[which(names(res)!="-")]


par(mar=c(10,12,3,5))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Separated or combined ORA (differential expression)", xlim=c(0,90))
grid()
text(y = (1:length(res)*1.2) - 0.9 , x = res+5, label = res, pos = 3, cex = 1, col = "black")

png("images/comb2.png",width=300,height=200)
par(mar=c(5,8,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Separated or combined ORA", xlim=c(0,100))
text(y = (1:length(res)*1.2)-1.1  , x = res + 5, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/comb2.pdf",width=3,height=2.5)
par(mar=c(5,5,3,2))
barplot(res,horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Separated or combined ORA", xlim=c(0,100))
text(y = (1:length(res)*1.2)-1.1  , x = res + 10, label = res, pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

GSEA parameters

param <-x$Parameters
param <- param[which(param!="-")]
param <- gsub(" ","",param)
str(param)

##  chr [1:28] "rank=no,algo=no,weight=no" "rank=yes,algo=yes,weight=no" ...

rankspec <- sapply(strsplit(param,","),"[[",1)
rankspec <- gsub("rank=","",rankspec)
rankspec <- table(rankspec)
rankspec

## rankspec
##  no yes 
##  16  12

algospec <- sapply(strsplit(param,","),"[[",2)
algospec <- gsub("algo=","",algospec)
algospec <- table(algospec)
algospec

## algospec
##  no yes 
##  19   9

weightspec <- sapply(strsplit(param,","),"[[",3)
weightspec <- gsub("weight=","",weightspec)
weightspec <- table(weightspec)
weightspec

## weightspec
##  no yes 
##  24   4

par(mar=c(10,12,3,4))
params <- rbind(rankspec,algospec,weightspec)
rownames(params) <- c("rank method", "test type", "weight type")
colnames(params) <- c("not stated", "stated")
barplot(t(params),horiz=TRUE,las=1,xlim=c(0,30),legend = colnames(params),
        main="GSEA parameter reporting", xlab="no. analyses",
        args.legend = list(x = "topleft", inset = c(0.05, 0.1)))
text(y = (1:nrow(params)*1.2) - 0.7 , x = params[,1]-3, label = params[,1], pos = 3, cex = 1, col = "white")
text(y = (1:nrow(params)*1.2) - 0.7 , x = params[,1]+2, label = params[,2], pos = 3, cex = 1, col = "black")

png("images/gseaparam2.png",width=350,height=250)
par(mar=c(5,8,3,2))
params <- rbind(rankspec,algospec,weightspec)
rownames(params) <- c("rank method", "test type", "weight type")
colnames(params) <- c("not stated", "stated")
barplot(t(params),horiz=TRUE,las=1,xlim=c(0,30),legend = colnames(params),
        main="GSEA parameter reporting", xlab="no. analyses",
        args.legend = list(x = "topleft", inset = c(0.05, 0.1)))
text(y = (1:nrow(params)*1.2) - 0.7 , x = params[,1]-3, label = params[,1], pos = 3, cex = 1, col = "white")
text(y = (1:nrow(params)*1.2) - 0.7 , x = params[,1]+2, label = params[,2], pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

pdf("images/gseaparam2.pdf",width=4.5,height=3)
par(mar=c(5,6,3,2))
barplot(t(params),horiz=TRUE,las=1,xlim=c(0,30),legend = colnames(params),
        main="GSEA parameter reporting", xlab="no. analyses",
        args.legend = list(x = "topleft", bg="white",inset = c(0.05, 0.1)))
text(y = (1:nrow(params)*1.2) - 0.8 , x = params[,1]-3, label = params[,1], pos = 3, cex = 1, col = "white")
text(y = (1:nrow(params)*1.2) - 0.8 , x = params[,1]+2, label = params[,2], pos = 3, cex = 1, col = "black")
dev.off()

## png 
##   2

Score

Here I am proposing a scoring scheme. If there is information missing from the article, or basic mistakes are made, then a point is deducted. If the article goes over and above the basic reproducibility, then they are awarded a point.

Gene set library origin not stated = -1

Gene set library version not stated = -1

Stat test not stated = -1

No stat test conducted = -1

No FDR correction conducted = -1

App used not stated = -1

App version not stated = -1

Background list not defined = -1

Inappropriate background list used = -1

Code availability = +1

Gene lists provided = +1

score <- function(r){
  r[is.na(r)] <- 0
  SCORE=0
  # gene set lib
  if(r["Gene.set.library"]=="Not stated"){
    SCORE=SCORE-1
  }
  # GS version
  if(r["GS.version"]=="No"){
    SCORE=SCORE-1
  }
  # stat test
  if(r["Statistical.test.used"]=="No test"){
    SCORE=SCORE-1
  }
  if(r["Statistical.test.used"]=="Not stated"){
    SCORE=SCORE-1
  }
  # FDR
  if(r["FDR.Correction"]!="Yes"){
    SCORE=SCORE-1
  }
  # app used
  if(r["App.used"]=="Not stated"){
    SCORE=SCORE-1
  }
  # App version
  if(r["App.Version"]=="No"){
    SCORE=SCORE-1
  }
  # Code availability
  if(r["Code.availability"]=="Yes"){
    SCORE=SCORE+1
  }
  # Background
  if(r["Background.gene.set"]=="Not stated"){
    SCORE=SCORE-1
  }
  if(r["Background.gene.set"]=="No"){
    SCORE=SCORE-1
  }
  if(r["Background.gene.set"]=="Stated, but incorrect"){
    SCORE=SCORE-1
  }
  # gene list
  if(r["Gene.lists.provided"]=="Yes"){
    SCORE=SCORE+1
  }
  return(SCORE)
}

scores <- apply(X = x,MARGIN = 1, FUN = score)
barplot(table(scores), xlab="analysis score",ylab="frequency")

x$scores <- scores
length(which(scores>0))

## [1] 1

mean(x$scores)

## [1] -3.276596

sd(x$scores)

## [1] 1.442667

median(x$scores)

## [1] -3

Journal rank

jmetrics <- read.table("scimagojr_2020.csv",sep=";",header=TRUE,dec = ",")
jmetrics$Title <- toupper(jmetrics$Title)
head(jmetrics,3)

##   Rank Sourceid                                 Title    Type
## 1    1    28773    CA-A CANCER JOURNAL FOR CLINICIANS journal
## 2    2    19434      MMWR RECOMMENDATIONS AND REPORTS journal
## 3    3    20315 NATURE REVIEWS MOLECULAR CELL BIOLOGY journal
##                 Issn    SJR SJR.Best.Quartile H.index Total.Docs...2020.
## 1 15424863, 00079235 62.937                Q1     168                 47
## 2 10575987, 15458601 40.949                Q1     143                 10
## 3 14710072, 14710080 37.461                Q1     431                115
##   Total.Docs...3years. Total.Refs. Total.Cites..3years. Citable.Docs...3years.
## 1                  119        3452                15499                     80
## 2                    9        1292                  492                      9
## 3                  338        8439                10844                    167
##   Cites...Doc...2years. Ref....Doc.        Country           Region
## 1                126.34       73.45  United States Northern America
## 2                 50.00      129.20  United States Northern America
## 3                 32.83       73.38 United Kingdom   Western Europe
##                                          Publisher  Coverage
## 1                                  Wiley-Blackwell 1950-2020
## 2 Centers for Disease Control and Prevention (CDC) 1990-2020
## 3                          Nature Publishing Group 2000-2020
##                                                                                                                                                    Categories
## 1                                                                                                                              Hematology (Q1); Oncology (Q1)
## 2 Epidemiology (Q1); Health Information Management (Q1); Health (social science) (Q1); Health, Toxicology and Mutagenesis (Q1); Medicine (miscellaneous) (Q1)
## 3                                                                                                                   Cell Biology (Q1); Molecular Biology (Q1)

dim(jmetrics)

## [1] 32952    20

nlmcat <- readLines("nlmcatalog_result.txt")
journaltitle <- nlmcat[grep("Title\\(s\\):",nlmcat)]
journaltitle <- gsub("Title\\(s\\): ","",journaltitle)
journaltitle <- gsub("\\.$","",journaltitle)
journaltitle <- toupper(journaltitle) 
journalabbrev <- nlmcat[grep("Title Abbreviation:",nlmcat)]
journalabbrev <- sapply(strsplit(journalabbrev,":"),"[[",2)
journalabbrev <- gsub(" $","",journalabbrev)
journalabbrev <- gsub("^ ","",journalabbrev)
jdf <- data.frame(journalabbrev,journaltitle)

mjdf <- merge(jdf,jmetrics,by.x="journaltitle",by.y="Title")

xm <- merge(x,mjdf,by.x="Journal",by.y="journalabbrev",all.x = TRUE)

Analysis scores vs Journal rank

tail(xm$Journal)

## [1] "Transl Psychiatry"          "Transl Psychiatry"         
## [3] "Virology"                   "Viruses"                   
## [5] "Viruses"                    "World J Gastrointest Oncol"

mylm1 <- lm (xm$scores ~ xm$SJR)
plot(xm$SJR,xm$scores, bty="n",
  xlab="SJR",ylab="score",main="Association of analysis scores with journal metrics")
abline(mylm1,col="red")

cor.test(xm$scores,xm$SJR,method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  xm$scores and xm$SJR
## t = -0.30805, df = 174, p-value = 0.7584
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1706771  0.1250053
## sample estimates:
##        cor 
## -0.0233465

cor.test(xm$scores,xm$SJR,method="spearman")

## Warning in cor.test.default(xm$scores, xm$SJR, method = "spearman"): Cannot
## compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  xm$scores and xm$SJR
## S = 893414, p-value = 0.8257
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.01671343

Best and worst journals

Minimum of 5 analyses.

xmm <-xm[,c("Journal","scores")]
tab <- table(xmm$Journal)
tab <- tab[which(tab>3)]
xmm <- xmm[which(xmm$Journal %in% names(tab)),]
dim(xmm)

## [1] 120   2

jres <- aggregate(. ~ Journal, xmm ,mean)
jsd <- aggregate(. ~ Journal, xmm ,sd)
jres$sd <- jsd$scores
jres <- jres[order(jres$scores),]
dim(jres)

## [1] 18  3

head(jres)

##           Journal    scores        sd
## 17       RNA Biol -5.500000 0.5773503
## 5    Exp Ther Med -4.250000 0.9574271
## 7     Front Oncol -4.000000 0.8164966
## 10    Mol Med Rep -4.000000 0.8164966
## 12     Oncol Lett -3.888889 1.5365907
## 2  Biomed Res Int -3.833333 0.4082483

par(mar=c(5,12,3,1))
barplot(tail(jres$scores,20),names.arg = tail(jres$Journal,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="mean score",
        main = "Highest scoring journals",xlim=c(-4,0))
grid()

par(mar=c(5,12,3,1))
barplot(head(jres$scores,20),names.arg = head(jres$Journal,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="mean score",
        main = "Lowest scoring journals",xlim=c(-5,0))
grid()

j <- unique(xmm$Journal)
jscores <- sapply(j,function(jj) {
 xmm[which(xmm$Journal == jj ),2]  
})

jscores <- jscores[order(unlist(lapply(jscores,mean)))]

par(mar=c(5,12,3,1))
vioplot(tail(jscores,20),horizontal = TRUE,las=1,cex.axis=0.75,main="Highest scoring journals")

par(mar=c(5,12,3,1))
vioplot(head(jscores,20),horizontal = TRUE,las=1,cex.axis=0.75,main="Lowest scoring journals")

Session information

sessionInfo()

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] vioplot_0.3.7       zoo_1.8-9           sm_2.2-5.6         
##  [4] Biobase_2.52.0      BiocGenerics_0.38.0 kableExtra_1.3.4   
##  [7] XML_3.99-0.6        reutils_0.2.3       wordcloud2_0.2.1   
## [10] wordcloud_2.6       RColorBrewer_1.1-2 
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.7        highr_0.9         bslib_0.2.5.1     compiler_4.1.0   
##  [5] jquerylib_0.1.4   bitops_1.0-7      tools_4.1.0       digest_0.6.27    
##  [9] lattice_0.20-44   viridisLite_0.4.0 jsonlite_1.7.2    evaluate_0.14    
## [13] lifecycle_1.0.0   rlang_0.4.11      rstudioapi_0.13   yaml_2.2.1       
## [17] xfun_0.25         fastmap_1.1.0     stringr_1.4.0     httr_1.4.2       
## [21] knitr_1.33        xml2_1.3.2        systemfonts_1.0.2 htmlwidgets_1.5.3
## [25] sass_0.4.0        grid_4.1.0        webshot_0.5.2     svglite_2.0.0    
## [29] glue_1.4.2        R6_2.5.1          tcltk_4.1.0       rmarkdown_2.10   
## [33] magrittr_2.0.1    scales_1.1.1      htmltools_0.5.2   rvest_1.0.1      
## [37] colorspace_2.0-2  stringi_1.7.4     RCurl_1.98-1.3    munsell_0.5.0