Intro

Here we are performing an analysis of the ways enrichment analysis has been done in a sample of 1500 PMC articles from 2019.

knitr::opts_chunk$set(fig.width=7, fig.height=5) 

library("wordcloud")
## Loading required package: RColorBrewer
library("RColorBrewer")
library("wordcloud")

Overview of included and excluded analyses

x <- read.table("PMC_2019-analysis.tsv",header=TRUE,fill=TRUE,sep="\t")
head(x)
##   Pubmed.Central.ID Article.number Allocated          Journal
## 1        PMC6317205              1      Mark     J Transl Med
## 2        PMC6317219              2      Mark     J Transl Med
## 3        PMC6318854              3      Mark BMC Med Genomics
## 4        PMC6318897              4      Mark     BMC Genomics
## 5        PMC6322516              5      Mark Cancer Manag Res
## 6        PMC6323737              6      Mark   BMC Plant Biol
##              Omics.type                Organism         Gene.set.library
## 1               RNA-seq            Homo sapiens               Not stated
## 2 Gene expression array            Homo sapiens                 GO, KEGG
## 3 DNA methylation array            Homo sapiens                       GO
## 4               RNA-seq Gymnocypris przewalskii                     KEGG
## 5 Gene expression array            Homo sapiens Ingenuity Knowledge Base
## 6 Gene expression array             Arabidopsis                       GO
##   GS.version Statistical.test.used FDR.Correction                   App.used
## 1         No               No test           <NA>                      DAVID
## 2         No            Not stated             No                      DAVID
## 3         No               No test           <NA>                    PANTHER
## 4         No            Not stated            Yes                      KOBAS
## 5         No            Not stated             No Ingenuity Pathway Analysis
## 6        Yes            Not stated             No                      topGO
##   App.Version Code.availability Background.gene.set
## 1          No              <NA>          Not stated
## 2          No              <NA>          Not stated
## 3          No              <NA>          Not stated
## 4          No              <NA>          Not stated
## 5          No              <NA>          Not stated
## 6         Yes                No          Not stated
##                 Assumptions.violated Gene.lists.provided
## 1 Inference without test, Background                  No
## 2                    Background, FDR                  No
## 3 Inference without test, Background                  No
## 4                         Background                 Yes
## 5                    Background, FDR                  No
## 6                    Background, FDR                 Yes
colnames(x)
##  [1] "Pubmed.Central.ID"     "Article.number"        "Allocated"            
##  [4] "Journal"               "Omics.type"            "Organism"             
##  [7] "Gene.set.library"      "GS.version"            "Statistical.test.used"
## [10] "FDR.Correction"        "App.used"              "App.Version"          
## [13] "Code.availability"     "Background.gene.set"   "Assumptions.violated" 
## [16] "Gene.lists.provided"
dim(x)
## [1] 1762   16
exclude <- subset(x,x$GS.version=="EXCLUDE")
nrow(exclude)
## [1] 133
length(unique(exclude$Pubmed.Central.ID))
## [1] 133
x <- subset(x,x$GS.version!="EXCLUDE")
nrow(x)
## [1] 1624
length(unique(x$Pubmed.Central.ID))
## [1] 1363

Omics type

omics <- x$Omics.type
omics_split <- strsplit(omics,", ")
omics <- unlist(omics_split)
res <- table(omics)
res <- res[order(res)]
res
## omics
##                         ATAC-seq                    CRISPR screen 
##                                1                                1 
##                   Cytokine array                   EST sequencing 
##                                1                                1 
##                     Metagenomics            Methyation sequencing 
##                                1                                1 
##                       proteomics                   pyrosequencing 
##                                1                                1 
##                      QTL mapping                      RNAi screen 
##                                1                                1 
##                    TCGA database                         ChIP-seq 
##                                1                                2 
##        DNA Gene expression array                Genotyping array  
##                                2                                2 
##                    Metabolomics                        Microbiome 
##                                2                                2 
##                       NanoString                              PPI 
##                                2                                2 
## RNA-seqand Gene expression array                 Genome sequening 
##                                2                                3 
##                      Metablomics                        scRNA-seq 
##                                4                                4 
##                        PCR array                    Protein array 
##                                6                                8 
##           miRNA expression array       DNA methylation sequencing 
##                               11                               13 
##                Genome sequencing                        miRNA-seq 
##                               30                               38 
##            DNA methylation array                 Genotyping array 
##                               45                               47 
##                     Metabolomics                       Proteomics 
##                               68                              149 
##                         Database            Gene expression array 
##                              273                              500 
##                          RNA-seq 
##                              562
par(mar=c(1,1,1,1))
names(res) <- gsub("Gene expression array","RNA array",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1, 
    max.words=200, random.order=FALSE, rot.per=0.35, 
    colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Omics type", xlim=c(0,600))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,1))
barplot(res2,horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Omics type", xlim=c(0,600))
grid()

Organism

org <- x$Organism
org_split <- strsplit(org,", ")
org <- unlist(org_split)
res <- table(org)
res <- res[order(res)]
res
## org
##          Acinetobacter baumannii             Acropora cervicornis 
##                                1                                1 
##                  Agave tequilana           Ailuropoda melanoleuca 
##                                1                                1 
##      Alternanthera philoxeroides                   Ananas comosus 
##                                1                                1 
##    Anas platyrhynchus domesticus                 Annona × atemoya 
##                                1                                1 
##           Apostichopus japonicus                      Arabidopsis 
##                                1                                1 
##              Artemia franciscana                  Bacillus cereus 
##                                1                                1 
##                           Bombus                  Bubalus bubalis 
##                                1                                1 
##                 Cairina moschata                Camellia oleifera 
##                                1                                1 
## Campylomormyrus compressirostris          Campylomormyrus tshokwe 
##                                1                                1 
##                 Candida albicans                     Capra hircus 
##                                1                                1 
##              Caragana intermedia               Careolus capreolus 
##                                1                                1 
##                 Catalpa fargesii              Catharanthus roseus 
##                                1                                1 
##            Chlorella pyrenoidosa       Chromochloris zofingiensis 
##                                1                                1 
##                Coturnix japonica          Ctenopharyngodon idella 
##                                1                                1 
##                  Cucumis sativus              Cupriavidus necator 
##                                1                                1 
##             Damnacanthus indicus                 Desmosdesmus sp. 
##                                1                                1 
##             Diabrotica virgifera                       Drosophila 
##                                1                                1 
##              Dugesia ryukyuensis                Dunaliella salina 
##                                1                                1 
##                   Eisenia fetida            Enterococcus faecalis 
##                                1                                1 
##             Equus ferus caballus              Eriobotrya japonica 
##                                1                                1 
##               Eriocheir sinensis                 Euphorbia kansui 
##                                1                                1 
##               Exaiptasia pallida                 Fundulus majalis 
##                                1                                1 
##                  Gerbera hybrida             Gnathonemus petersii 
##                                1                                1 
##               Gossypium arboreum             Gossypium barbadense 
##                                1                                1 
##          Gymnocypris przewalskii        Haemaphysalis longicornis 
##                                1                                1 
##           Haemophilus influenzae       Hypophthalmichthys nobilis 
##                                1                                1 
##           Incilaria fruhstorferi                   Juncus effusus 
##                                1                                1 
##           Legionella pneumophila                   Lilium pumilum 
##                                1                                1 
##              Linum usitatissimum                   Lolium perenne 
##                                1                                1 
##                Lolium temulentum             Longissimus thoracis 
##                                1                                1 
##                    Lupinus albus                Lycoris longituba 
##                                1                                1 
##          Lymantria dispar dispar          Malapterurus electricus 
##                                1                                1 
##                  Malus sieversii                       Microbiota 
##                                1                                1 
##              Moschus berezovskii          Mycobacterium smegmatis 
##                                1                                1 
##       Mycobacterium tuberculosis        Mytilus galloprovincialis 
##                                1                                1 
##                      Mytilus sp.            Nicotiana benthamiana 
##                                1                                1 
##               Ocimum tenuiflorum            Oreochromis niloticus 
##                                1                                1 
##                     Oryza Sativa                    Panax ginseng 
##                                1                                1 
##                     Papio anubis              Phellinus igniarius 
##                                1                                1 
##                     Phyllosticta            Plasmodium falciparum 
##                                1                                1 
##             Pleurotus tuoliensis                Populus deltoides 
##                                1                                1 
##           Pseudomonas aeruginosa           Pygoscelis antarcticus 
##                                1                                1 
##                 Pygoscelis papua                     Ribes nigrum 
##                                1                                1 
##         Saccharomyces cerevisiae       Saccharomyces cerevisiae\n 
##                                1                                1 
##           Saccharum officinarum                     Saccharum sp. 
##                                1                                1 
##               Salvelinus alpinus              Serratia marcescens 
##                                1                                1 
##                  Setaria italica                   Sillago sihama 
##                                1                                1 
##           Simiiformes catarrhini                  Sorghum bicolor 
##                                1                                1 
##                  Spica prunellae             Sternopygus macrurus 
##                                1                                1 
##                     Suaeda salsa             Taraxacum kok-saghyz 
##                                1                                1 
##                        Taxus sp.        Trachemys scripta elegans 
##                                1                                1 
##         Trollius chinensis Bunge         Various order Chiroptera 
##                                1                                1 
##                       Vertebrata                       Vicia faba 
##                                1                                1 
##                     Vicia sativa                Xanthomonas citri 
##                                1                                1 
##           Zonotrichia albicollis          Zootermopsis nevadensis 
##                                1                                1 
##          Zygnema circumcarinatum           Abelmoschus esculentus 
##                                1                                2 
##          Aequipecten opercularis               Anas platyrhynchos 
##                                2                                2 
##                   Apis mellifera               Auricularia cornea 
##                                2                                2 
##                   Bemisia tabaci                Brachymeria lasus 
##                                2                                2 
##              Brassica alboglabra                Brassica oleracea 
##                                2                                2 
##                    Brassica rapa           Caenorhabditis elegans 
##                                2                                2 
##                      Danio rerio                 Escherichia coli 
##                                2                                2 
##         Fenneropenaeus chinensis          Granulicatella adiacens 
##                                2                                2 
##                  Hordeum vulgare                  Humulus lupulus 
##                                2                                2 
##                  Ipomoea batatas             Magnolia wufengensis 
##                                2                                2 
##                  Malus domestica                Mauremys reevesii 
##                                2                                2 
##         Megalobrama amblycephala   Meleagris gallopavo silvestris 
##                                2                                2 
##             Mesocricetus auratus          Mizuhopecten yessoensis 
##                                2                                2 
##              Morchella importuna              Nicotiana glutinosa 
##                                2                                2 
##               Nicotiana tabaccum               Oreochromis aureus 
##                                2                                2 
##                     Pagrus major            Pseudotsuga menziesii 
##                                2                                2 
##                 Raphanus sativus           Rhacophorus omeimontis 
##                                2                                2 
##            Schistosoma japonicum         Sclerotinia sclerotiorum 
##                                2                                2 
##          Solanum sisymbriifolium            Staphylococcus aureus 
##                                2                                2 
##          Vibrio parahaemolyticus                   Vitis vinifera 
##                                2                                2 
##                    Aedes aegypti                    Carica papaya 
##                                3                                3 
##                   Macaca mulatta                         Zea mays 
##                                3                                3 
##                    Bos grunniens           Canis lupus familiaris 
##                                4                                4 
##            Capra aegagrus hircus                  Citrus sinensis 
##                                4                                4 
##                  Cyprinus carpio            Oryctolagus cuniculus 
##                                4                                4 
##                Camellia sinensis                   Equus caballus 
##                                5                                5 
##                      Glycine max                Triticum aestivum 
##                                5                                6 
##               Gossypium hirsutum                       Ovis aries 
##                                8                                8 
##             Arabidopsis thaliana                   Brassica napus 
##                                9                                9 
##                     Oryza sativa                    Gallus gallus 
##                               15                               16 
##                       Sus scrofa                       Bos taurus 
##                               27                               28 
##                Rattus norvegicus                     Mus musculus 
##                               54                              136 
##                     Homo sapiens 
##                             1104
par(mar=c(1,1,1,1))
names(res) <- gsub("Homo sapiens","human",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1, 
    max.words=200, random.order=FALSE, rot.per=0.35, 
    colors=brewer.pal(8, "Dark2"), scale=c(4,.5))

par(mar=c(5,12,3,1))
names(res) <- gsub("human","Homo sapiens",names(res))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Organism", xlim=c(0,1200))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,1))
barplot(res2,horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Organism", xlim=c(0,1200))
grid()

Gene set library

GSL <-x$Gene.set.library
GSL_split <- strsplit(GSL,", ")
GSL <- unlist(GSL_split)
res <- table(GSL)
res <- res[order(res)]
which(names(res)=="Not stated")/sum(res)*100
## [1] 3.844614
res
## GSL
## Arabidopsis thaliana metabolic pathway                                 AraCyc 
##                                      1                                      1 
##                                   CCGA                                 ChEMBL 
##                                      1                                      1 
##                               ChemRICH                             ChemSpider 
##                                      1                                      1 
##                        ConsensusPathDB                                 Custom 
##                                      1                                      1 
##                               Cytoband                                  dbPTB 
##                                      1                                      1 
##                                  DGIdb                               DISEASES 
##                                      1                                      1 
##                                 DSigDB                                 EcoCyc 
##                                      1                                      1 
##                                 eggNOG                                   EHMN 
##                                      1                                      1 
##                                Enrichr                                   FUMA 
##                                      1                                      1 
##                                  FunDO                                  G2SBC 
##                                      1                                      1 
##                 Gene Set Knowledgebase                                GenMAPP 
##                                      1                                      1 
##                                    GEO                                 GOslim 
##                                      1                                      1 
##                                   GSEA                           GWAS-catalog 
##                                      1                                      1 
##              Human Metabolome Database                                 IMPaLA 
##                                      1                                      1 
##                              Ingenuity          Ingenuity Canonical Pathways  
##                                      1                                      1 
##                                   INOH               Jensen Diseases database 
##                                      1                                      1 
##                             MetaboLync                                 METLIN 
##                                      1                                      1 
##                                    MGI                                  miEAA 
##                                      1                                      1 
##                                miRBase                                   NCBI 
##                                      1                                      1 
##                  NCBI PubChem BioAssay                                    NCI 
##                                      1                                      1 
##                                   NDeX                              Omic Path 
##                                      1                                      1 
##                                pathDIP                       Pathway Ontology 
##                                      1                                      1 
##                         Pathway Studio                Pathway Studio Ontology 
##                                      1                                      1 
##                             PDSP Ki DB                               PharmGKB 
##                                      1                                      1 
##                                 PHAROS                  Phytozome annotations 
##                                      1                                      1 
##                               plantCyc                                PubChem 
##                                      1                                      1 
##                               REACTOME                              Schizo-Pi 
##                                      1                                      1 
##                                    SGD                              SignaLink 
##                                      1                                      1 
##                                 SIGNOR                                  SMART 
##                                      1                                      1 
##                                  SMPDB                                 STITCH 
##                                      1                                      1 
##                                 STRING                            ToppCluster 
##                                      1                                      1 
##                                ToppFun                               TRANSFAC 
##                                      1                                      1 
##                             Vectorbase                           Vesiclepedia 
##                                      1                                      1 
##                              YEASTRACT                               Hallmark 
##                                      1                                      2 
##                                    HPO                               HumanCyc 
##                                      2                                      2 
##                                 MapMan                               MetaCore 
##                                      2                                      2 
##                              Mummichog                                NetPath 
##                                      2                                      2 
##                                  NHGRI                        Pathway Commons 
##                                      2                                      2 
##                             Swiss-Prot                                    COG 
##                                      2                                      3 
##                                  CORUM                               DisGeNET 
##                                      3                                      3 
##                                     DO                                    KOG 
##                                      3                                      3 
##                                   OMIM                            WikiPathway 
##                                      3                                      4 
##                                   HMDB                                   PFAM 
##                                      5                                      6 
##                                 BioCyc                                UniProt 
##                                      7                                      7 
##                               InterPro                                    PID 
##                                      8                                     10 
##                               Metacore                           WikiPathways 
##                                     12                                     13 
##                          MetaboAnalyst                                PANTHER 
##                                     15                                     17 
##                               BioCarta                             Not stated 
##                                     28                                     85 
##                               Reactome                                 MSigDB 
##                                     85                                    103 
##               Ingenuity Knowledge Base                                   KEGG 
##                                    139                                    892 
##                                     GO 
##                                    956
par(mar=c(1,1,1,1))
names(res) <- gsub("Homo sapiens","human",names(res))
wordcloud(words = names(res), freq = res, min.freq = 1, 
    max.words=200, random.order=FALSE, rot.per=0.35, 
    colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Gene set library", xlim=c(0,1000))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,1))
barplot(res2,horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Gene set library", xlim=c(0,1200))
grid()

Gene set version

GSV <-x$GS.version
res <- table(GSV)
res
## GSV
##   No  Yes 
## 1516  108
res[1]/sum(res)*100
##       No 
## 93.34975
par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene set version defined", xlim=c(0,1600))
grid()

Statistical test used

test <-x$Statistical.test.used
test <- strsplit(test,", ")
test <- unlist(test)
res <- table(test)
res <- res[order(res)]
res[which(names(res)=="Not stated")] / sum(res) * 100
## Not stated 
##   63.57535
res
## test
##                          Binomial                     edgeR R package 
##                                  1                                  1 
##                             fisher                             FIsher 
##                                  1                                  1 
##                               GSVA                 Kolmogorov–Smirnov 
##                                  1                                  1 
##               Kolmogorov–Smirnov\n                     Kruskal-Wallis 
##                                  1                                  1 
##                     Kruskal–Wallis                Mann-Whitney U-test 
##                                  1                                  1 
##          modified Chi-squared test                             PASCAL 
##                                  1                                  1 
## Singular Enrichment Analysis (SEA)                               SPIA 
##                                  1                                  1 
##                               GVSA                           Binomial 
##                                  2                                  3 
##                        Chi-squared                        Permutation 
##                                  3                                  3 
##                             ssGSEA                         Not Stated 
##                                  3                                  4 
##                               MSEA                              ANOVA 
##                                  5                                  6 
##                               EASE                            No test 
##                                 10                                 69 
##                     Hypergeometric                             Fisher 
##                                120                                171 
##                               GSEA                         Not stated 
##                                184                               1042
par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1, 
    max.words=200, random.order=FALSE, rot.per=0.35, 
    colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Test used", xlim=c(0,1200))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,1))
barplot(res2,horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "Test used", xlim=c(0,1200))
grid()

FDR Correction

fdr <-x$FDR.Correction
fdr <- strsplit(fdr,", ")
fdr <- unlist(fdr)
res <- table(fdr)
res <- res[order(res)]
sum(res[which(names(res)!="Yes")])/sum(res)*100
## [1] 52.69762
res
## fdr
## Not stated        Yes         No 
##         48        754        792
par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1, 
    max.words=200, random.order=FALSE, rot.per=0.35, 
    colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "FDR", xlim=c(0,800))
grid()

App used

App <-x$App.used
App_split <- strsplit(App,", ")
App <- unlist(App_split)
res <- table(App)
res <- res[order(res)]
res[which(names(res)=="Not stated")]/sum(res)*100
## Not stated 
##   10.69453
res
## App
##                        anamiR  Basespace Correlation engine 
##                             1                             1 
##                      BioCloud                     Biosystem 
##                             1                             1 
##                    BlastKOALA                        BLASTx 
##                             1                             1 
##                        CAMERA                      ChemRICH 
##                             1                             1 
##                     CluePedia               ConsensuspathDB 
##                             1                             1 
##                          CPDB            Custom Perl script 
##                             1                             1 
##                        DEPICT                       DIAMOND 
##                             1                             1 
##                         ESGEA                   exRNA atlas 
##                             1                             1 
##                FUMA gene2func                        g:GOSt 
##                             1                             1 
##                      Genelibs                     GeneMANIA 
##                             1                             1 
##                    GeneSpring                    GeneTrail2 
##                             1                             1 
##                        GenFam                        GenGen 
##                             1                             1 
##                    globaltest                      GO-Elite 
##                             1                             1 
##                GO::TermFinder                  GOTermFinder 
##                             1                             1 
##                       GSA-SNP                          GVSA 
##                             1                             1 
##                   i‐Gsea4Gwas                          KSEA 
##                             1                             1 
##                         limma                   limma goana 
##                             1                             1 
##                   limma kegga                   LinkedOmics 
##                             1                             1 
##                       MAGENTA                         MATHT 
##                             1                             1 
##                Metabo Analyst                     Metabolon 
##                             1                             1 
##                    MetaboLync                        MiRNet 
##                             1                             1 
##                     miRSystem                    missMethyl 
##                             1                             1 
##          missMethyl R package    Molecule Annotation System 
##                             1                             1 
##                      MS Excel                   NOA web app 
##                             1                             1 
##                     Omicshare                        PANOGA 
##                             1                             1 
##                          PAPi                   Path-Finder 
##                             1                             1 
##                       pathDIP                     Pathifier 
##                             1                             1 
##                       pathVar                      Pathview 
##                             1                             1 
##                     PathVisio                Pathway Studio 
##                             1                             1 
##              PathwayConnector                          PIGE 
##                             1                             1 
##          Plant Pathway Studio                     PlantGSEA 
##                             1                             1 
##                   ProteINSIDE                          PSEA 
##                             1                             1 
##                       QuickGO                        RAVIGO 
##                             1                             1 
##                    ReactomePA                        REViGO 
##                             1                             1 
##                           SEA                       SetRank 
##                             1                             1 
## SGD Gene Ontology Slim Mapper                         Speed 
##                             1                             1 
##                          SPIA                          SPSS 
##                             1                             1 
##          ssGSEA (GenePattern)                         topGo 
##                             1                             1 
##                   ToppCluster                          VLAD 
##                             1                             1 
##                        webMeV                     Cluepedia 
##                             1                             2 
##               ConsensusPathDB          Custom Python script 
##                             2                             2 
##                          DOSE                         FGSEA 
##                             2                             2 
##                    g:Profiler                          GAGE 
##                             2                             2 
##                   GeneAnswers                        GOEAST 
##                             2                             2 
##                   i-GSEA4GWAS    Ingenuity pathway analysis 
##                             2                             2 
##                        MapMan                      MetaCore 
##                             2                             2 
##                         MeTPA                NetworkAnalyst 
##                             2                             2 
##                     OmicsBean                         PIANO 
##                             2                             2 
##         Reactome FI/Cytoscape                       ToppFun 
##                             2                             2 
##                   Uniprot/GOA                          FUMA 
##                             2                             3 
##                       GenCLiP                     GeneCodis 
##                             3                             3 
##               GeneGo Metacore                       GOstats 
##                             3                             3 
##                         GREAT                          GSVA 
##                             3                             3 
##                         MetPA                          MSEA 
##                             3                             3 
##                        Partek                        Pascal 
##                             3                             3 
##                        REVIGO                         AmiGO 
##                             3                             4 
##                 Custom script                          WEGO 
##                             4                             4 
##                  GSEA web app              Reactome web app 
##                             5                             5 
##  Cytoscape (No plugin stated)                      GOATOOLS 
##                             6                             6 
##                   KEGG mapper                         MAGMA 
##                             6                             6 
##                     Mummichog                       GOrilla 
##                             6                             7 
##                 DIANA-miRPath               Custom R script 
##                             9                            10 
##                          KAAS                         topGO 
##                            10                            10 
##                      ToppGene               BiNGO/Cytoscape 
##                            10                            12 
##                     Metascape                      Metacore 
##                            12                            13 
##                       FunRich                        STRING 
##                            14                            18 
##                        agriGO                      Blast2GO 
##                            19                            23 
##                       Enrichr                 MetaboAnalyst 
##                            28                            30 
##                    WebGestalt              ClueGO/Cytoscape 
##                            30                            33 
##                         GOseq                       PANTHER 
##                            38                            41 
##                         KOBAS               clusterProfiler 
##                            69                            87 
##    Ingenuity Pathway Analysis                    Not stated 
##                           158                           174 
##                          GSEA                         DAVID 
##                           186                           379
par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1, 
    max.words=200, random.order=FALSE, rot.per=0.35, 
    colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "App used", xlim=c(0,400))
grid()

other <- sum(res[1:(nrow(res)-10)])
res2 <- c(other,tail(res,9))
names(res2)[1] <- "Other"
par(mar=c(5,12,3,1))
barplot(res2,horiz=TRUE,las=1,cex.names = 0.7, xlab="no. analyses",
        main = "App used", xlim=c(0,1200))
grid()

App version

APV <-x$App.Version
res <- table(APV)
res
## APV
##   No  Yes 
## 1190  421
res[1]/sum(res)*100
##       No 
## 73.86716
par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "App version defined", xlim=c(0,1600))
grid()

Code available

code <-x$Code.availability
res <- table(code)
res
## code
##  No Yes 
## 283  10
res[1]/sum(res)*100
##       No 
## 96.58703
par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Code availability", xlim=c(0,300))
grid()

Background gene set

BG <-x$Background.gene.set
res <- table(BG)
res
## BG
##                    No            Not stated Stated, but incorrect 
##                   219                  1159                    15 
##                   Yes 
##                    67
sum(res[which(names(res)!="Yes")])/sum(res)*100
## [1] 95.41096
par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Background list defined", xlim=c(0,1200))
grid()

Gene lists provided

GL <-x$Gene.lists.provided
res <- table(GL)
res
## GL
##        No  Yes 
##    3 1039  579
sum(res[which(names(res)!="Yes")])/sum(res)*100
## [1] 64.28131
par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Gene lists provided", xlim=c(0,1200))
grid()

colnames(x)
##  [1] "Pubmed.Central.ID"     "Article.number"        "Allocated"            
##  [4] "Journal"               "Omics.type"            "Organism"             
##  [7] "Gene.set.library"      "GS.version"            "Statistical.test.used"
## [10] "FDR.Correction"        "App.used"              "App.Version"          
## [13] "Code.availability"     "Background.gene.set"   "Assumptions.violated" 
## [16] "Gene.lists.provided"

Assumptions violated

ok <- nrow(subset(x,Assumptions.violated=="No"))
ok
## [1] 160
bad <- nrow(subset(x,Assumptions.violated!="No"))
bad
## [1] 1464
ok/sum(bad,ok)*100
## [1] 9.852217
ass <-x$Assumptions.violated
ass <- strsplit(ass,", ")
ass <- unlist(ass)
res <- table(ass)
res <- res[order(res)]
res
## ass
##              Background\n Misinterpreted FDR values             No data shown 
##                         2                         3                        32 
##    Inference without test                        No                       FDR 
##                        61                       160                       761 
##                Background 
##                      1361
par(mar=c(1,1,1,1))
wordcloud(words = names(res), freq = res, min.freq = 1, 
    max.words=200, random.order=FALSE, rot.per=0.35, 
    colors=brewer.pal(8, "Dark2"))

par(mar=c(5,12,3,1))
barplot(tail(res,20),horiz=TRUE,las=1,cex.names = 1, xlab="no. analyses",
        main = "Assumptions violated", xlim=c(0,1400))
grid()

Session information

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] wordcloud_2.6      RColorBrewer_1.1-2
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.7        digest_0.6.27     R6_2.5.0          jsonlite_1.7.2   
##  [5] magrittr_2.0.1    evaluate_0.14     highr_0.9         rlang_0.4.11     
##  [9] stringi_1.7.3     jquerylib_0.1.4   bslib_0.2.5.1     rmarkdown_2.9    
## [13] tools_4.1.0       stringr_1.4.0     xfun_0.24         yaml_2.2.1       
## [17] compiler_4.1.0    htmltools_0.5.1.1 knitr_1.33        sass_0.4.0