The following code is a sample enrichment analysis done on a pre-clustered set of Sacharomyces cerevisiae genes from the dee2 repository using the clusterProfiler package from Bioconducter.

1. Prerequisite libraries

suppressPackageStartupMessages({c(library(org.Sc.sgd.db), library("clusterProfiler"), library("enrichplot"))})
##  [1] "org.Sc.sgd.db"   "AnnotationDbi"   "IRanges"         "S4Vectors"      
##  [5] "Biobase"         "BiocGenerics"    "parallel"        "stats4"         
##  [9] "stats"           "graphics"        "grDevices"       "utils"          
## [13] "datasets"        "methods"         "base"            "clusterProfiler"
## [17] "org.Sc.sgd.db"   "AnnotationDbi"   "IRanges"         "S4Vectors"      
## [21] "Biobase"         "BiocGenerics"    "parallel"        "stats4"         
## [25] "stats"           "graphics"        "grDevices"       "utils"          
## [29] "datasets"        "methods"         "base"            "enrichplot"     
## [33] "clusterProfiler" "org.Sc.sgd.db"   "AnnotationDbi"   "IRanges"        
## [37] "S4Vectors"       "Biobase"         "BiocGenerics"    "parallel"       
## [41] "stats4"          "stats"           "graphics"        "grDevices"      
## [45] "utils"           "datasets"        "methods"         "base"


2. Gene Enrichment Analysis

# Make an object containing a list for geneIDs of interest. In this example, we will be using the Ensembl geneID format
genesOfInterest <- c("YDL248W", "YDR542W", "YOL161C", "YOL155C", "YOR032C", "YGL261C", "RDN5-1", "YLR349W", "YPL062W", "YPL021W", "YMR321C", "YMR322C", "YMR323W", "YMR325W", "YBL108C-A", "YBL029W", "YBR090C", "YBR196C-A", "YNL143C", "YNL067W-A", "YJR159W", "YJR160C", "YJR161C", "YKL097C", "YER091C-A", "YHR145C", "YIL169C", "YIR043C", "YIR044C", "YFL062W", "YAL068C", "YAR066W", "YAR068W")

# The org.Sc.sgd.db is the organism-specific database for yeast. You can find the list of all OrgDbs by organism using *1 and use it directly as shown here or you can do an in-code-query of the organism using a package called AnnotationHub. Tutorials can be found in chapters 4 and five using *2 above.



# Here we can see all the keyTypes of the database. This will be used as one of the arguments for the enrichGO function. Since our genesOfInterest object uses the Ensembl geneID format, the appropriate keyType should be "ENSEMBL" as shown in the yeast_CP_go line
keytypes(org.Sc.sgd.db)
##  [1] "ALIAS"        "COMMON"       "DESCRIPTION"  "ENSEMBL"      "ENSEMBLPROT" 
##  [6] "ENSEMBLTRANS" "ENTREZID"     "ENZYME"       "EVIDENCE"     "EVIDENCEALL" 
## [11] "GENENAME"     "GO"           "GOALL"        "INTERPRO"     "ONTOLOGY"    
## [16] "ONTOLOGYALL"  "ORF"          "PATH"         "PFAM"         "PMID"        
## [21] "REFSEQ"       "SGD"          "SMART"        "UNIPROT"
#---------GO Analysis using clusterprofiler

yeast_CP_go <- enrichGO(genesOfInterest, "org.Sc.sgd.db", keyType = "ENSEMBL", ont = "ALL")
# Transform object into a dataframe for easier viewing
yeast_CP_go_df <- as.data.frame(yeast_CP_go)
yeast_CP_go_df
##            ONTOLOGY         ID
## GO:0043328       BP GO:0043328
## GO:0043162       BP GO:0043162
## GO:0032511       BP GO:0032511
## GO:0032509       BP GO:0032509
## GO:0071985       BP GO:0071985
## GO:0045324       BP GO:0045324
##                                                                                                                                   Description
## GO:0043328 protein transport to vacuole involved in ubiquitin-dependent protein catabolic process via the multivesicular body sorting pathway
## GO:0043162                                          ubiquitin-dependent protein catabolic process via the multivesicular body sorting pathway
## GO:0032511                                                         late endosome to vacuole transport via multivesicular body sorting pathway
## GO:0032509                                                                         endosome transport via multivesicular body sorting pathway
## GO:0071985                                                                                                multivesicular body sorting pathway
## GO:0045324                                                                                                 late endosome to vacuole transport
##            GeneRatio BgRatio       pvalue    p.adjust      qvalue
## GO:0043328      3/23 22/5798 7.996522e-05 0.009595827 0.009090783
## GO:0043162      3/23 33/5798 2.753495e-04 0.016520968 0.015651443
## GO:0032511      3/23 43/5798 6.069027e-04 0.018940948 0.017944056
## GO:0032509      3/23 47/5798 7.892062e-04 0.018940948 0.017944056
## GO:0071985      3/23 47/5798 7.892062e-04 0.018940948 0.017944056
## GO:0045324      3/23 59/5798 1.533898e-03 0.030677951 0.029063322
##                             geneID Count
## GO:0043328 YDL248W/YJR161C/YFL062W     3
## GO:0043162 YDL248W/YJR161C/YFL062W     3
## GO:0032511 YDL248W/YJR161C/YFL062W     3
## GO:0032509 YDL248W/YJR161C/YFL062W     3
## GO:0071985 YDL248W/YJR161C/YFL062W     3
## GO:0045324 YDL248W/YJR161C/YFL062W     3
# -------Visualization using dot plot
emapplot(yeast_CP_go)


Session Information

## R version 4.0.2 (2020-06-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
## [1] enrichplot_1.8.1       clusterProfiler_3.16.1 org.Sc.sgd.db_3.11.4  
## [4] AnnotationDbi_1.50.3   IRanges_2.22.2         S4Vectors_0.26.1      
## [7] Biobase_2.48.0         BiocGenerics_0.34.0   
## 
## loaded via a namespace (and not attached):
##  [1] bit64_0.9-7.1       RColorBrewer_1.1-2  progress_1.2.2     
##  [4] httr_1.4.2          tools_4.0.2         R6_2.4.1           
##  [7] DBI_1.1.0           colorspace_1.4-1    tidyselect_1.1.0   
## [10] gridExtra_2.3       prettyunits_1.1.1   bit_1.1-15.2       
## [13] compiler_4.0.2      scatterpie_0.1.5    xml2_1.3.2         
## [16] labeling_0.3        triebeard_0.3.0     scales_1.1.1       
## [19] ggridges_0.5.2      stringr_1.4.0       digest_0.6.25      
## [22] rmarkdown_2.3       DOSE_3.14.0         pkgconfig_2.0.3    
## [25] htmltools_0.5.0     rlang_0.4.7         RSQLite_2.2.0      
## [28] gridGraphics_0.5-0  farver_2.0.3        generics_0.0.2     
## [31] jsonlite_1.7.0      BiocParallel_1.22.0 GOSemSim_2.14.2    
## [34] dplyr_1.0.0         magrittr_1.5        ggplotify_0.0.5    
## [37] GO.db_3.11.4        Matrix_1.2-18       Rcpp_1.0.5         
## [40] munsell_0.5.0       viridis_0.5.1       lifecycle_0.2.0    
## [43] stringi_1.4.6       yaml_2.2.1          ggraph_2.0.3       
## [46] MASS_7.3-53         plyr_1.8.6          qvalue_2.20.0      
## [49] grid_4.0.2          blob_1.2.1          ggrepel_0.8.2      
## [52] DO.db_2.9           crayon_1.3.4        lattice_0.20-41    
## [55] graphlayouts_0.7.0  cowplot_1.0.0       splines_4.0.2      
## [58] hms_0.5.3           knitr_1.29          pillar_1.4.6       
## [61] fgsea_1.14.0        igraph_1.2.5        reshape2_1.4.4     
## [64] fastmatch_1.1-0     glue_1.4.1          evaluate_0.14      
## [67] downloader_0.4      data.table_1.13.0   BiocManager_1.30.10
## [70] vctrs_0.3.2         tweenr_1.0.1        urltools_1.7.3     
## [73] gtable_0.3.0        purrr_0.3.4         polyclip_1.10-0    
## [76] tidyr_1.1.0         ggplot2_3.3.2       xfun_0.16          
## [79] ggforce_0.3.2       europepmc_0.4       tidygraph_1.2.0    
## [82] viridisLite_0.3.0   tibble_3.0.3        rvcheck_0.1.8      
## [85] memoise_1.1.0       ellipsis_0.3.1