Source: https://github.com/markziemann/combined_enrichment

Intro

Here we are performing an analysis of some gene expression data to demonstrate the difference between ORA and FCS methods and to highlight the differences caused by improper background gene set use.

The dataset being used is SRP068733 and we are comparing the healthy endothelial cells with a scrambled siRNA to cells treated with a p300 targeting siRNA.

Data are obtained from http://dee2.io/

suppressPackageStartupMessages({
library("getDEE2") 
library("DESeq2")
library("clusterProfiler")
library("mitch")
library("kableExtra")
library("eulerr")
})

Get expression data

I’m using some RNA-seq data looking at the difference in healthy endothelial cell gene expression between vehicle and C646 samples.

name="SRP068733"
mdat<-getDEE2Metadata("hsapiens")
samplesheet <- mdat[grep("SRP068733",mdat$SRP_accession),]
samplesheet<-samplesheet[order(samplesheet$SRR_accession),]
SRRvec <- c("SRR3112216","SRR3112217","SRR3112218","SRR3112219","SRR3112220","SRR3112221")
samplesheet <- samplesheet[which(samplesheet$SRR_accession %in% SRRvec),]
samplesheet$trt<-as.factor(c(0,0,0,1,1,1)) 
s1 <- samplesheet

s1 %>% kbl(caption = "sample sheet") %>% kable_paper("hover", full_width = F)
sample sheet
SRR_accession QC_summary SRX_accession SRS_accession SRP_accession Sample_name GEO_series Library_name trt
299400 SRR3112216 PASS SRX1540348 SRS1256815 SRP068733 GSM2044428 GSE77108 0
299401 SRR3112217 PASS SRX1540349 SRS1256814 SRP068733 GSM2044429 GSE77108 0
299402 SRR3112218 PASS SRX1540350 SRS1256812 SRP068733 GSM2044430 GSE77108 0
299403 SRR3112219 PASS SRX1540351 SRS1256813 SRP068733 GSM2044431 GSE77108 1
299404 SRR3112220 PASS SRX1540352 SRS1256811 SRP068733 GSM2044432 GSE77108 1
299405 SRR3112221 PASS SRX1540353 SRS1256810 SRP068733 GSM2044433 GSE77108 1
w<-getDEE2("hsapiens",SRRvec,metadata=mdat,legacy = TRUE)
## For more information about DEE2 QC metrics, visit
##     https://github.com/markziemann/dee2/blob/master/qc/qc_metrics.md
x<-Tx2Gene(w)
x<-x$Tx2Gene

# save the genetable for later
gt<-w$GeneInfo[,1,drop=FALSE]
gt$accession<-rownames(gt)

# counts 
x1<-x[,which(colnames(x) %in% samplesheet$SRR_accession)]

Here show the number of genes in the annotation set, and those detected above the detection threshold.

# filter out lowly expressed genes
x1<-x1[which(rowSums(x1)/ncol(x1)>=(10)),]
nrow(x)
## [1] 39297
nrow(x1)
## [1] 14255

Now multidimensional scaling (MDS) plot to show the correlation between the datasets. If the control and case datasets are clustered separately, then it is likely that there will be many differentially expressed genes with FDR<0.05.

plot(cmdscale(dist(t(x1))), xlab="Coordinate 1", ylab="Coordinate 2", pch=19, col=s1$trt, main="MDS")

Differential expression

Now run DESeq2 for control vs case.

y <- DESeqDataSetFromMatrix(countData = round(x1), colData = s1, design = ~ trt)
## converting counts to integer mode
y <- DESeq(y)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
de <- results(y)
de<-as.data.frame(de[order(de$pvalue),])
rownames(de)<-sapply(strsplit(rownames(de),"\\."),"[[",1)
head(de) %>% kbl() %>% kable_paper("hover", full_width = F)
baseMean log2FoldChange lfcSE stat pvalue padj
ENSG00000049449 10375.678 -1.319179 0.0346992 -38.01753 0 0
ENSG00000065308 9062.133 -1.354569 0.0335259 -40.40362 0 0
ENSG00000066056 10429.917 -1.273349 0.0333485 -38.18314 0 0
ENSG00000068001 6526.375 -1.635372 0.0423455 -38.61975 0 0
ENSG00000076706 25433.097 -1.625573 0.0301945 -53.83679 0 0
ENSG00000087245 19077.593 -1.541325 0.0312561 -49.31281 0 0

Now let’s have a look at some of the charts showing differential expression. In particular, an MA plot and volcano plot.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=0.7)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 0.7)
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=0.5,pch=19,col="red")
}

maplot(de,name)

make_volcano(de,name)

Gene sets from Reactome

In order to perform gene set analysis, we need some gene sets.

if (! file.exists("ReactomePathways.gmt")) {
  download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", 
    destfile="ReactomePathways.gmt.zip")
  unzip("ReactomePathways.gmt.zip")
}
genesets<-gmt_import("ReactomePathways.gmt")

FCS with Mitch

Mitch uses rank-ANOVA statistics for enrichment detection.

Here I’m using the standard approach

m <- mitch_import(de,DEtype = "DEseq2", geneTable = gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 14255
## Note: no. genes in output = 13309
## Note: estimated proportion of input genes in output = 0.934
msep <- mitch_calc(m,genesets = genesets)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
ms_up <- subset(msep$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0)[,1]
ms_dn <- subset(msep$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0)[,1]
message(paste("Number of up-regulated pathways:",length(ms_up) ))
## Number of up-regulated pathways: 73
message(paste("Number of down-regulated pathways:",length(ms_dn) ))
## Number of down-regulated pathways: 183
head(msep$enrichment_result,10)  #%>% kbl() %>% kable_paper("hover", full_width = F)
##                                                                    set setSize
## 339                                  Extracellular matrix organization     191
## 1021       SRP-dependent cotranslational protein targeting to membrane     110
## 373                           Formation of a pool of free 40S subunits      99
## 333                                  Eukaryotic Translation Elongation      91
## 1300                                            Viral mRNA Translation      87
## 563  L13a-mediated translational silencing of Ceruloplasmin expression     109
## 413            GTP hydrolysis and joining of the 60S ribosomal subunit     110
## 774                                           Peptide chain elongation      87
## 130                               Cap-dependent Translation Initiation     117
## 334                                  Eukaryotic Translation Initiation     117
##            pANOVA     s.dist p.adjustANOVA
## 339  7.536703e-42 -0.5684105  1.012179e-38
## 1021 1.541149e-27 -0.5997810  1.034881e-24
## 373  5.045565e-26 -0.6132743  2.258731e-23
## 333  6.135975e-25 -0.6251432  2.060153e-22
## 1300 1.951027e-24 -0.6323578  5.240457e-22
## 563  2.722319e-24 -0.5636308  6.093458e-22
## 413  3.567581e-24 -0.5596347  6.844659e-22
## 774  4.829156e-24 -0.6269001  8.106945e-22
## 130  1.309151e-23 -0.5359684  1.758190e-21
## 334  1.309151e-23 -0.5359684  1.758190e-21
#mitch_report(msep,outfile="mitch_separate.html",overwrite=TRUE)

Here I’m using the combined approach.

mcom <- mitch_calc(abs(m),genesets = genesets)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
mc_up <- subset(mcom$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0)[,1]
mc_dn <- subset(mcom$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0)[,1]
message(paste("Number of up-regulated pathways:",length(mc_up) ))
## Number of up-regulated pathways: 225
message(paste("Number of down-regulated pathways:",length(mc_dn) ))
## Number of down-regulated pathways: 15
head(mcom$enrichment_result,10)  #%>% kbl() %>% kable_paper("hover", full_width = F)
##                                    set setSize       pANOVA    s.dist
## 1052               Signal Transduction    1741 3.636200e-28 0.1629695
## 271                            Disease    1201 4.666935e-23 0.1724173
## 339  Extracellular matrix organization     191 1.755053e-22 0.4097850
## 475                         Hemostasis     391 2.995162e-21 0.2800019
## 268              Developmental Biology     643 9.113310e-19 0.2061632
## 496                      Immune System    1320 5.130524e-17 0.1401958
## 86                       Axon guidance     415 2.280323e-16 0.2360126
## 697         Nervous system development     433 2.370334e-16 0.2310857
## 503                 Infectious disease     629 3.932209e-14 0.1782001
## 793             Platelet degranulation      84 1.578786e-13 0.4659591
##      p.adjustANOVA
## 1052  4.883416e-25
## 271   3.133847e-20
## 339   7.856788e-20
## 475   1.005626e-18
## 268   2.447835e-16
## 496   1.148382e-14
## 86    3.979199e-14
## 697   3.979199e-14
## 503   5.867730e-12
## 793   2.120310e-11
#mitch_report(mcom,outfile="mitch_combined.html",overwrite=TRUE)

Let’s look at the significant ones based on the combined analysis. There weren’t many gene sets classe d as significant. Let’s see how many have a direction agnostic enrichment score which is larger in magnitude than the direction informed enrichment score. There are only 11 such sets which would benefit from such combined analysis.

Euler diagram of the significant pathways found with each approach.

l0 <- list("sep up"=ms_up,"sep dn"=ms_dn,"comb up"=mc_up,"comb dn"=mc_dn)
par(cex.main=0.5)
plot(euler(l0),quantities = TRUE, edges = "gray", main="FCS: combined vs separated")

length(ms_up)
## [1] 73
length(ms_dn)
## [1] 183
length(ms_up)+length(ms_dn)
## [1] 256
length(mc_up)
## [1] 225
length(mc_dn)
## [1] 15
length(mc_up)+length(mc_dn)
## [1] 240
( length(ms_up)+length(ms_dn) ) / ( length(mc_up)+length(mc_dn) )
## [1] 1.066667

List gene sets which are specific to each approach.

ms <- c(ms_up,ms_dn)

# in sep but not comb
setdiff(ms,mc_up)
##   [1] "Gene expression (Transcription)"                                                                         
##   [2] "RNA Polymerase II Transcription"                                                                         
##   [3] "Generic Transcription Pathway"                                                                           
##   [4] "Antigen processing: Ubiquitination & Proteasome degradation"                                             
##   [5] "Chromatin modifying enzymes"                                                                             
##   [6] "Chromatin organization"                                                                                  
##   [7] "HDMs demethylate histones"                                                                               
##   [8] "Ub-specific processing proteases"                                                                        
##   [9] "Class I MHC mediated antigen processing & presentation"                                                  
##  [10] "Neddylation"                                                                                             
##  [11] "Separation of Sister Chromatids"                                                                         
##  [12] "Peroxisomal lipid metabolism"                                                                            
##  [13] "Deubiquitination"                                                                                        
##  [14] "Kinesins"                                                                                                
##  [15] "HATs acetylate histones"                                                                                 
##  [16] "Peroxisomal protein import"                                                                              
##  [17] "Retrograde transport at the Trans-Golgi-Network"                                                         
##  [18] "Cilium Assembly"                                                                                         
##  [19] "PKMTs methylate histone lysines"                                                                         
##  [20] "Class I peroxisomal membrane protein import"                                                             
##  [21] "Processing of Capped Intron-Containing Pre-mRNA"                                                         
##  [22] "mRNA Splicing"                                                                                           
##  [23] "Signaling by Insulin receptor"                                                                           
##  [24] "M Phase"                                                                                                 
##  [25] "Mitotic Anaphase"                                                                                        
##  [26] "Cell Cycle Checkpoints"                                                                                  
##  [27] "Synthesis of glycosylphosphatidylinositol (GPI)"                                                         
##  [28] "Protein localization"                                                                                    
##  [29] "Rab regulation of trafficking"                                                                           
##  [30] "Mitotic Metaphase and Anaphase"                                                                          
##  [31] "Intraflagellar transport"                                                                                
##  [32] "Endosomal Sorting Complex Required For Transport (ESCRT)"                                                
##  [33] "mRNA Splicing - Major Pathway"                                                                           
##  [34] "Resolution of Sister Chromatid Cohesion"                                                                 
##  [35] "RNA Polymerase II Pre-transcription Events"                                                              
##  [36] "Insulin receptor recycling"                                                                              
##  [37] "Synthesis of PIPs at the Golgi membrane"                                                                 
##  [38] "RAB GEFs exchange GTP for GDP on RABs"                                                                   
##  [39] "Cohesin Loading onto Chromatin"                                                                          
##  [40] "HIV Infection"                                                                                           
##  [41] "Regulation of PTEN stability and activity"                                                               
##  [42] "APC/C:Cdh1 mediated degradation of Cdc20 and other APC/C:Cdh1 targeted proteins in late mitosis/early G1"
##  [43] "Beta-oxidation of very long chain fatty acids"                                                           
##  [44] "Organelle biogenesis and maintenance"                                                                    
##  [45] "APC/C-mediated degradation of cell cycle proteins"                                                       
##  [46] "Regulation of mitotic cell cycle"                                                                        
##  [47] "PTEN Regulation"                                                                                         
##  [48] "Amino acids regulate mTORC1"                                                                             
##  [49] "Autodegradation of Cdh1 by Cdh1:APC/C"                                                                   
##  [50] "Formation of RNA Pol II elongation complex"                                                              
##  [51] "RNA Polymerase II Transcription Elongation"                                                              
##  [52] "Hedgehog 'off' state"                                                                                    
##  [53] "Cell Cycle"                                                                                              
##  [54] "Mitotic Spindle Checkpoint"                                                                              
##  [55] "NIK-->noncanonical NF-kB signaling"                                                                      
##  [56] "Regulation of TP53 Activity"                                                                             
##  [57] "Mitochondrial tRNA aminoacylation"                                                                       
##  [58] "Negative regulation of NOTCH4 signaling"                                                                 
##  [59] "Mitotic Telophase/Cytokinesis"                                                                           
##  [60] "Macroautophagy"                                                                                          
##  [61] "Dectin-1 mediated noncanonical NF-kB signaling"                                                          
##  [62] "Degradation of DVL"                                                                                      
##  [63] "Constitutive Signaling by AKT1 E17K in Cancer"                                                           
##  [64] "Synthesis of PIPs at the late endosome membrane"                                                         
##  [65] "Biotin transport and metabolism"                                                                         
##  [66] "RNA polymerase II transcribes snRNA genes"                                                               
##  [67] "Major pathway of rRNA processing in the nucleolus and cytosol"                                           
##  [68] "rRNA processing in the nucleus and cytosol"                                                              
##  [69] "rRNA processing"                                                                                         
##  [70] "GPCR ligand binding"                                                                                     
##  [71] "Class A/1 (Rhodopsin-like receptors)"                                                                    
##  [72] "Diseases associated with glycosaminoglycan metabolism"                                                   
##  [73] "Chondroitin sulfate/dermatan sulfate metabolism"                                                         
##  [74] "Cardiac conduction"                                                                                      
##  [75] "HS-GAG degradation"                                                                                      
##  [76] "G alpha (q) signalling events"                                                                           
##  [77] "Scavenging by Class A Receptors"                                                                         
##  [78] "Peptide ligand-binding receptors"                                                                        
##  [79] "Other semaphorin interactions"                                                                           
##  [80] "Ephrin signaling"                                                                                        
##  [81] "Pre-NOTCH Processing in Golgi"                                                                           
##  [82] "Interleukin-10 signaling"                                                                                
##  [83] "Keratinization"                                                                                          
##  [84] "Plasma lipoprotein assembly, remodeling, and clearance"                                                  
##  [85] "Ion homeostasis"                                                                                         
##  [86] "Hyaluronan uptake and degradation"                                                                       
##  [87] "HS-GAG biosynthesis"                                                                                     
##  [88] "A tetrasaccharide linker sequence is required for GAG synthesis"                                         
##  [89] "Defective B4GALT7 causes EDS, progeroid type"                                                            
##  [90] "Defective B3GALT6 causes EDSP2 and SEMDJL1"                                                              
##  [91] "Chemokine receptors bind chemokines"                                                                     
##  [92] "Defective EXT1 causes exostoses 1, TRPS2 and CHDS"                                                       
##  [93] "Defective EXT2 causes exostoses 2"                                                                       
##  [94] "Glutamate and glutamine metabolism"                                                                      
##  [95] "DNA strand elongation"                                                                                   
##  [96] "Class B/2 (Secretin family receptors)"                                                                   
##  [97] "TNFs bind their physiological receptors"                                                                 
##  [98] "Synthesis of substrates in N-glycan biosythesis"                                                         
##  [99] "Defective B3GAT3 causes JDSSDHD"                                                                         
## [100] "Plasma lipoprotein remodeling"                                                                           
## [101] "Striated Muscle Contraction"                                                                             
## [102] "Sensory Perception"                                                                                      
## [103] "Sema3A PAK dependent Axon repulsion"                                                                     
## [104] "Repression of WNT target genes"                                                                          
## [105] "Mucopolysaccharidoses"                                                                                   
## [106] "p130Cas linkage to MAPK signaling for integrins"                                                         
## [107] "rRNA modification in the nucleus and cytosol"                                                            
## [108] "Caspase activation via extrinsic apoptotic signalling pathway"                                           
## [109] "CS/DS degradation"                                                                                       
## [110] "Signal transduction by L1"                                                                               
## [111] "Apoptotic cleavage of cellular proteins"                                                                 
## [112] "ADORA2B mediated anti-inflammatory cytokines production"                                                 
## [113] "Plasma lipoprotein assembly"                                                                             
## [114] "Metabolism of nucleotides"                                                                               
## [115] "tRNA processing in the mitochondrion"                                                                    
## [116] "ATF6 (ATF6-alpha) activates chaperone genes"                                                             
## [117] "EPHB-mediated forward signaling"                                                                         
## [118] "Unwinding of DNA"                                                                                        
## [119] "Sialic acid metabolism"                                                                                  
## [120] "Other interleukin signaling"                                                                             
## [121] "Diseases of carbohydrate metabolism"                                                                     
## [122] "G alpha (12/13) signalling events"                                                                       
## [123] "Neuronal System"
# in comb but not sep
setdiff(mc_up,ms)
##  [1] "Diseases of signal transduction by growth factor receptors and second messengers"                    
##  [2] "Vesicle-mediated transport"                                                                          
##  [3] "Signaling by Rho GTPases, Miro GTPases and RHOBTB3"                                                  
##  [4] "Signaling by Rho GTPases"                                                                            
##  [5] "Membrane Trafficking"                                                                                
##  [6] "Metabolism"                                                                                          
##  [7] "Post-translational protein modification"                                                             
##  [8] "Signaling by ALK fusions and activated point mutants"                                                
##  [9] "Signaling by ALK in cancer"                                                                          
## [10] "Signaling by NOTCH"                                                                                  
## [11] "Signaling by NOTCH1"                                                                                 
## [12] "Signaling by WNT"                                                                                    
## [13] "Constitutive Signaling by NOTCH1 HD+PEST Domain Mutants"                                             
## [14] "Constitutive Signaling by NOTCH1 PEST Domain Mutants"                                                
## [15] "Signaling by NOTCH1 HD+PEST Domain Mutants in Cancer"                                                
## [16] "Signaling by NOTCH1 PEST Domain Mutants in Cancer"                                                   
## [17] "Signaling by NOTCH1 in Cancer"                                                                       
## [18] "Opioid Signalling"                                                                                   
## [19] "NOTCH1 Intracellular Domain Regulates Transcription"                                                 
## [20] "CRMPs in Sema3A signaling"                                                                           
## [21] "Signaling by TGFB family members"                                                                    
## [22] "Adaptive Immune System"                                                                              
## [23] "Intracellular signaling by second messengers"                                                        
## [24] "SARS-CoV-1 Infection"                                                                                
## [25] "Signaling by Interleukins"                                                                           
## [26] "Signaling by NOTCH3"                                                                                 
## [27] "G-protein mediated events"                                                                           
## [28] "TCF dependent signaling in response to WNT"                                                          
## [29] "RUNX3 regulates NOTCH signaling"                                                                     
## [30] "Integration of energy metabolism"                                                                    
## [31] "PLC beta mediated events"                                                                            
## [32] "Signaling by NOTCH2"                                                                                 
## [33] "Transport to the Golgi and subsequent modification"                                                  
## [34] "SARS-CoV Infections"                                                                                 
## [35] "RAF/MAP kinase cascade"                                                                              
## [36] "Clathrin-mediated endocytosis"                                                                       
## [37] "MAPK family signaling cascades"                                                                      
## [38] "Regulation of gene expression in late stage (branching morphogenesis) pancreatic bud precursor cells"
## [39] "Negative regulation of the PI3K/AKT network"                                                         
## [40] "Platelet sensitization by LDL"                                                                       
## [41] "PI5P, PP2A and IER3 Regulate PI3K/AKT Signaling"                                                     
## [42] "Cargo recognition for clathrin-mediated endocytosis"                                                 
## [43] "MAPK1/MAPK3 signaling"                                                                               
## [44] "Signaling by Non-Receptor Tyrosine Kinases"                                                          
## [45] "Signaling by PTK6"                                                                                   
## [46] "Notch-HLH transcription pathway"                                                                     
## [47] "Regulation of beta-cell development"                                                                 
## [48] "Pre-NOTCH Transcription and Translation"                                                             
## [49] "Signaling by TGF-beta Receptor Complex"                                                              
## [50] "Pexophagy"                                                                                           
## [51] "Formation of the beta-catenin:TCF transactivating complex"                                           
## [52] "Deactivation of the beta-catenin transactivating complex"                                            
## [53] "Uptake and actions of bacterial toxins"                                                              
## [54] "Arachidonic acid metabolism"                                                                         
## [55] "ER to Golgi Anterograde Transport"                                                                   
## [56] "Amyloid fiber formation"                                                                             
## [57] "Cytokine Signaling in Immune system"                                                                 
## [58] "PINK1-PRKN Mediated Mitophagy"                                                                       
## [59] "Transcriptional regulation by RUNX3"                                                                 
## [60] "Keratan sulfate biosynthesis"                                                                        
## [61] "CLEC7A (Dectin-1) induces NFAT activation"                                                           
## [62] "Extra-nuclear estrogen signaling"                                                                    
## [63] "Transcriptional activity of SMAD2/SMAD3:SMAD4 heterotrimer"                                          
## [64] "Cargo concentration in the ER"                                                                       
## [65] "ESR-mediated signaling"                                                                              
## [66] "Downregulation of SMAD2/3:SMAD4 transcriptional activity"                                            
## [67] "Caspase-mediated cleavage of cytoskeletal proteins"                                                  
## [68] "NOTCH3 Intracellular Domain Regulates Transcription"                                                 
## [69] "RHOQ GTPase cycle"                                                                                   
## [70] "Fatty acid metabolism"                                                                               
## [71] "SMAD2/SMAD3:SMAD4 heterotrimer regulates transcription"                                              
## [72] "FOXO-mediated transcription"                                                                         
## [73] "Transport of small molecules"                                                                        
## [74] "PI3K/AKT Signaling in Cancer"                                                                        
## [75] "RHOH GTPase cycle"                                                                                   
## [76] "RAC2 GTPase cycle"                                                                                   
## [77] "DAG and IP3 signaling"                                                                               
## [78] "Signaling by Nuclear Receptors"                                                                      
## [79] "Signaling by MET"                                                                                    
## [80] "ERKs are inactivated"                                                                                
## [81] "Signaling by ERBB2"                                                                                  
## [82] "Constitutive Signaling by Aberrant PI3K in Cancer"                                                   
## [83] "Phospholipid metabolism"                                                                             
## [84] "TGF-beta receptor signaling activates SMADs"                                                         
## [85] "Golgi-to-ER retrograde transport"                                                                    
## [86] "Common Pathway of Fibrin Clot Formation"                                                             
## [87] "Activated NOTCH1 Transmits Signal to the Nucleus"                                                    
## [88] "RHOJ GTPase cycle"                                                                                   
## [89] "Signaling by NOTCH4"                                                                                 
## [90] "Deregulated CDK5 triggers multiple neurodegenerative pathways in Alzheimer's disease models"         
## [91] "Neurodegenerative Diseases"                                                                          
## [92] "COPI-mediated anterograde transport"
# intersection
intersect(mc_up,ms)
##   [1] "Signal Transduction"                                                                                                        
##   [2] "Disease"                                                                                                                    
##   [3] "Extracellular matrix organization"                                                                                          
##   [4] "Hemostasis"                                                                                                                 
##   [5] "Developmental Biology"                                                                                                      
##   [6] "Immune System"                                                                                                              
##   [7] "Axon guidance"                                                                                                              
##   [8] "Nervous system development"                                                                                                 
##   [9] "Infectious disease"                                                                                                         
##  [10] "Platelet degranulation"                                                                                                     
##  [11] "Response to elevated platelet cytosolic Ca2+"                                                                               
##  [12] "Platelet activation, signaling and aggregation"                                                                             
##  [13] "Metabolism of proteins"                                                                                                     
##  [14] "SRP-dependent cotranslational protein targeting to membrane"                                                                
##  [15] "Innate Immune System"                                                                                                       
##  [16] "Neutrophil degranulation"                                                                                                   
##  [17] "Viral mRNA Translation"                                                                                                     
##  [18] "Formation of a pool of free 40S subunits"                                                                                   
##  [19] "Eukaryotic Translation Elongation"                                                                                          
##  [20] "Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)"
##  [21] "Peptide chain elongation"                                                                                                   
##  [22] "RHO GTPase cycle"                                                                                                           
##  [23] "Integrin cell surface interactions"                                                                                         
##  [24] "Selenocysteine synthesis"                                                                                                   
##  [25] "L13a-mediated translational silencing of Ceruloplasmin expression"                                                          
##  [26] "Cap-dependent Translation Initiation"                                                                                       
##  [27] "Eukaryotic Translation Initiation"                                                                                          
##  [28] "Cellular responses to stimuli"                                                                                              
##  [29] "GTP hydrolysis and joining of the 60S ribosomal subunit"                                                                    
##  [30] "Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)"                                               
##  [31] "Post-translational protein phosphorylation"                                                                                 
##  [32] "Eukaryotic Translation Termination"                                                                                         
##  [33] "Signaling by Receptor Tyrosine Kinases"                                                                                     
##  [34] "Cellular responses to stress"                                                                                               
##  [35] "Cell-Cell communication"                                                                                                    
##  [36] "Selenoamino acid metabolism"                                                                                                
##  [37] "Collagen formation"                                                                                                         
##  [38] "Non-integrin membrane-ECM interactions"                                                                                     
##  [39] "Response of EIF2AK4 (GCN2) to amino acid deficiency"                                                                        
##  [40] "Cell junction organization"                                                                                                 
##  [41] "Cell surface interactions at the vascular wall"                                                                             
##  [42] "Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)"                                                  
##  [43] "Nonsense-Mediated Decay (NMD)"                                                                                              
##  [44] "Laminin interactions"                                                                                                       
##  [45] "RHOA GTPase cycle"                                                                                                          
##  [46] "Elastic fibre formation"                                                                                                    
##  [47] "IRE1alpha activates chaperones"                                                                                             
##  [48] "Unfolded Protein Response (UPR)"                                                                                            
##  [49] "XBP1(S) activates chaperone genes"                                                                                          
##  [50] "Degradation of the extracellular matrix"                                                                                    
##  [51] "Signaling by ROBO receptors"                                                                                                
##  [52] "Molecules associated with elastic fibres"                                                                                   
##  [53] "RAC1 GTPase cycle"                                                                                                          
##  [54] "RHOC GTPase cycle"                                                                                                          
##  [55] "Asparagine N-linked glycosylation"                                                                                          
##  [56] "Leishmania infection"                                                                                                       
##  [57] "Influenza Infection"                                                                                                        
##  [58] "Collagen biosynthesis and modifying enzymes"                                                                                
##  [59] "Cellular response to starvation"                                                                                            
##  [60] "Platelet homeostasis"                                                                                                       
##  [61] "Influenza Viral RNA Transcription and Replication"                                                                          
##  [62] "Glycosaminoglycan metabolism"                                                                                               
##  [63] "RHOB GTPase cycle"                                                                                                          
##  [64] "Signaling by GPCR"                                                                                                          
##  [65] "Regulation of expression of SLITs and ROBOs"                                                                                
##  [66] "Semaphorin interactions"                                                                                                    
##  [67] "ECM proteoglycans"                                                                                                          
##  [68] "O-linked glycosylation"                                                                                                     
##  [69] "Formation of the ternary complex, and subsequently, the 43S complex"                                                        
##  [70] "Signaling by VEGF"                                                                                                          
##  [71] "NOTCH4 Intracellular Domain Regulates Transcription"                                                                        
##  [72] "SARS-CoV-2 Infection"                                                                                                       
##  [73] "GPCR downstream signalling"                                                                                                 
##  [74] "Metabolism of lipids"                                                                                                       
##  [75] "Syndecan interactions"                                                                                                      
##  [76] "Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S"                     
##  [77] "Metabolism of amino acids and derivatives"                                                                                  
##  [78] "Cell-extracellular matrix interactions"                                                                                     
##  [79] "Ribosomal scanning and start codon recognition"                                                                             
##  [80] "Translation initiation complex formation"                                                                                   
##  [81] "RAC3 GTPase cycle"                                                                                                          
##  [82] "Translation of Structural Proteins"                                                                                         
##  [83] "Pre-NOTCH Expression and Processing"                                                                                        
##  [84] "CDC42 GTPase cycle"                                                                                                         
##  [85] "Regulation of cholesterol biosynthesis by SREBP (SREBF)"                                                                    
##  [86] "Ca2+ pathway"                                                                                                               
##  [87] "Diseases of metabolism"                                                                                                     
##  [88] "Fcgamma receptor (FCGR) dependent phagocytosis"                                                                             
##  [89] "MET activates PTK2 signaling"                                                                                               
##  [90] "Anti-inflammatory response favouring Leishmania parasite infection"                                                         
##  [91] "Leishmania parasite growth and survival"                                                                                    
##  [92] "Assembly of collagen fibrils and other multimeric structures"                                                               
##  [93] "Smooth Muscle Contraction"                                                                                                  
##  [94] "G alpha (i) signalling events"                                                                                              
##  [95] "Metabolism of carbohydrates"                                                                                                
##  [96] "SEMA3A-Plexin repulsion signaling by inhibiting Integrin adhesion"                                                          
##  [97] "Diseases of glycosylation"                                                                                                  
##  [98] "PIP3 activates AKT signaling"                                                                                               
##  [99] "Adherens junctions interactions"                                                                                            
## [100] "L1CAM interactions"                                                                                                         
## [101] "Muscle contraction"                                                                                                         
## [102] "Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell"                                                   
## [103] "Binding and Uptake of Ligands by Scavenger Receptors"                                                                       
## [104] "MET promotes cell motility"                                                                                                 
## [105] "Collagen chain trimerization"                                                                                               
## [106] "Cell-cell junction organization"                                                                                            
## [107] "Basigin interactions"                                                                                                       
## [108] "VEGFA-VEGFR2 Pathway"                                                                                                       
## [109] "O-linked glycosylation of mucins"                                                                                           
## [110] "Interleukin-4 and Interleukin-13 signaling"                                                                                 
## [111] "Cholesterol biosynthesis"                                                                                                   
## [112] "Formation of Fibrin Clot (Clotting Cascade)"                                                                                
## [113] "Collagen degradation"                                                                                                       
## [114] "Metabolism of steroids"                                                                                                     
## [115] "Heparan sulfate/heparin (HS-GAG) metabolism"                                                                                
## [116] "Regulation of insulin secretion"                                                                                            
## [117] "Antigen Presentation: Folding, assembly and peptide loading of class I MHC"                                                 
## [118] "Intra-Golgi and retrograde Golgi-to-ER traffic"                                                                             
## [119] "Transcriptional regulation by the AP-2 (TFAP2) family of transcription factors"                                             
## [120] "Keratan sulfate/keratin metabolism"                                                                                         
## [121] "EPH-Ephrin signaling"                                                                                                       
## [122] "Diseases associated with O-glycosylation of proteins"                                                                       
## [123] "Platelet calcium homeostasis"                                                                                               
## [124] "O-glycosylation of TSR domain-containing proteins"                                                                          
## [125] "Amino acid transport across the plasma membrane"                                                                            
## [126] "Role of phospholipids in phagocytosis"                                                                                      
## [127] "Activation of gene expression by SREBF (SREBP)"                                                                             
## [128] "Constitutive Signaling by NOTCH1 HD Domain Mutants"                                                                         
## [129] "Signaling by NOTCH1 HD Domain Mutants in Cancer"                                                                            
## [130] "Translation"                                                                                                                
## [131] "Defective B3GALTL causes PpS"                                                                                               
## [132] "Activation of Matrix Metalloproteinases"                                                                                    
## [133] "G alpha (s) signalling events"

If we consider both strategies to be valid, then we can define the significant sets as dysregulated. We can calculate the percent sentitivity of both approaches.

all <- unique(c(ms_up,ms_dn,mc_up))

message("Sensitivity: separate only")
## Sensitivity: separate only
(length(ms_up)+length(ms_dn))/length(all)
## [1] 0.7356322
message("Sensitivity: combined only")
## Sensitivity: combined only
length(mc_up)/length(all)
## [1] 0.6465517

ORA with clusterprofiler

Clusterprofiler uses a hypergeometric test. Firstly I will conduct the analysis separately for up and down regulated genes and with the correct backgound (as intended by the developers).

genesets2 <- read.gmt("ReactomePathways.gmt")

de_up <- rownames(subset(de, padj<0.05 & log2FoldChange > 0))
de_up <- unique(gt[which(rownames(gt) %in% de_up),1])

de_dn <- rownames(subset(de, padj<0.05 & log2FoldChange < 0))
de_dn <- unique(gt[which(rownames(gt) %in% de_dn),1])

de_bg <- rownames(de)
de_bg <- unique(gt[which(rownames(gt) %in% de_bg),1])

o_up <- as.data.frame(enricher(gene = de_up, universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, pAdjustMethod="fdr"))
o_up <- rownames(subset(o_up, p.adjust < 0.05))
       
o_dn <- as.data.frame(enricher(gene = de_dn, universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, pAdjustMethod="fdr"))
o_dn <- rownames(subset(o_dn, p.adjust < 0.05))

o_com <- as.data.frame(enricher(gene = union(de_up,de_dn), universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, pAdjustMethod="fdr"))
o_com <- rownames(subset(o_com, p.adjust < 0.05))

length(o_up)
## [1] 29
length(o_dn)
## [1] 149
length(o_up) + length(o_dn)
## [1] 178
length(o_com)
## [1] 76
( length(o_up) + length(o_dn) ) / length(o_com)
## [1] 2.342105
all <- unique(c(o_up,o_dn,o_com))

message("Sensitivity: separate only")
## Sensitivity: separate only
(length(o_up)+length(o_dn))/length(all)
## [1] 0.9222798
message("Sensitivity: combined only")
## Sensitivity: combined only
length(o_com)/length(all)
## [1] 0.3937824

Euler diagram of the significant pathways found with each approach.

l2 <- list("sep up"=o_up,"sep dn"=o_dn,"comb"=o_com)

plot(euler(l2),quantities = TRUE, edges = "gray", main="ORA: combined vs separated")

List gene sets which are specific to each approach.

o_sep <- c(o_up,o_dn)

# in sep but not comb
setdiff(o_sep,o_com)
##   [1] "Antigen processing: Ubiquitination & Proteasome degradation"                
##   [2] "Chromatin modifying enzymes"                                                
##   [3] "Chromatin organization"                                                     
##   [4] "HDMs demethylate histones"                                                  
##   [5] "Cholesterol biosynthesis"                                                   
##   [6] "Ub-specific processing proteases"                                           
##   [7] "Class I MHC mediated antigen processing & presentation"                     
##   [8] "Regulation of cholesterol biosynthesis by SREBP (SREBF)"                    
##   [9] "Metabolism of steroids"                                                     
##  [10] "Peroxisomal lipid metabolism"                                               
##  [11] "Metabolism of lipids"                                                       
##  [12] "Deubiquitination"                                                           
##  [13] "Gene expression (Transcription)"                                            
##  [14] "Activation of gene expression by SREBF (SREBP)"                             
##  [15] "Autophagy"                                                                  
##  [16] "Macroautophagy"                                                             
##  [17] "RNA Polymerase II Transcription"                                            
##  [18] "Rab regulation of trafficking"                                              
##  [19] "Peroxisomal protein import"                                                 
##  [20] "Endosomal Sorting Complex Required For Transport (ESCRT)"                   
##  [21] "Intra-Golgi and retrograde Golgi-to-ER traffic"                             
##  [22] "Protein localization"                                                       
##  [23] "Generic Transcription Pathway"                                              
##  [24] "Beta-oxidation of very long chain fatty acids"                              
##  [25] "RAB GEFs exchange GTP for GDP on RABs"                                      
##  [26] "Neddylation"                                                                
##  [27] "Retrograde transport at the Trans-Golgi-Network"                            
##  [28] "Major pathway of rRNA processing in the nucleolus and cytosol"              
##  [29] "rRNA processing in the nucleus and cytosol"                                 
##  [30] "rRNA processing"                                                            
##  [31] "Post-translational protein phosphorylation"                                 
##  [32] "Translation"                                                                
##  [33] "Collagen formation"                                                         
##  [34] "Cell surface interactions at the vascular wall"                             
##  [35] "Muscle contraction"                                                         
##  [36] "Degradation of the extracellular matrix"                                    
##  [37] "Glycosaminoglycan metabolism"                                               
##  [38] "ECM proteoglycans"                                                          
##  [39] "Signaling by GPCR"                                                          
##  [40] "Collagen biosynthesis and modifying enzymes"                                
##  [41] "Smooth Muscle Contraction"                                                  
##  [42] "Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell"   
##  [43] "Leishmania infection"                                                       
##  [44] "Assembly of collagen fibrils and other multimeric structures"               
##  [45] "Metabolism of amino acids and derivatives"                                  
##  [46] "GPCR downstream signalling"                                                 
##  [47] "Unfolded Protein Response (UPR)"                                            
##  [48] "SEMA3A-Plexin repulsion signaling by inhibiting Integrin adhesion"          
##  [49] "MET activates PTK2 signaling"                                               
##  [50] "Platelet calcium homeostasis"                                               
##  [51] "Cellular responses to stimuli"                                              
##  [52] "GPCR ligand binding"                                                        
##  [53] "Cellular responses to stress"                                               
##  [54] "Anti-inflammatory response favouring Leishmania parasite infection"         
##  [55] "Leishmania parasite growth and survival"                                    
##  [56] "O-linked glycosylation"                                                     
##  [57] "Cardiac conduction"                                                         
##  [58] "Metabolism of proteins"                                                     
##  [59] "L1CAM interactions"                                                         
##  [60] "CDC42 GTPase cycle"                                                         
##  [61] "Basigin interactions"                                                       
##  [62] "Syndecan interactions"                                                      
##  [63] "Amino acid transport across the plasma membrane"                            
##  [64] "Diseases of glycosylation"                                                  
##  [65] "G alpha (i) signalling events"                                              
##  [66] "EPH-Ephrin signaling"                                                       
##  [67] "O-linked glycosylation of mucins"                                           
##  [68] "Metabolism of carbohydrates"                                                
##  [69] "Cell-extracellular matrix interactions"                                     
##  [70] "Adherens junctions interactions"                                            
##  [71] "Pre-NOTCH Processing in Golgi"                                              
##  [72] "Signaling by VEGF"                                                          
##  [73] "Signaling by Receptor Tyrosine Kinases"                                     
##  [74] "Ca2+ pathway"                                                               
##  [75] "Synthesis of substrates in N-glycan biosythesis"                            
##  [76] "Collagen degradation"                                                       
##  [77] "Pre-NOTCH Expression and Processing"                                        
##  [78] "Defective B3GALTL causes PpS"                                               
##  [79] "Sema3A PAK dependent Axon repulsion"                                        
##  [80] "Heparan sulfate/heparin (HS-GAG) metabolism"                                
##  [81] "Ion homeostasis"                                                            
##  [82] "Diseases of metabolism"                                                     
##  [83] "Diseases associated with glycosaminoglycan metabolism"                      
##  [84] "Integration of energy metabolism"                                           
##  [85] "MET promotes cell motility"                                                 
##  [86] "Interleukin-4 and Interleukin-13 signaling"                                 
##  [87] "Binding and Uptake of Ligands by Scavenger Receptors"                       
##  [88] "Sialic acid metabolism"                                                     
##  [89] "DAG and IP3 signaling"                                                      
##  [90] "Class A/1 (Rhodopsin-like receptors)"                                       
##  [91] "O-glycosylation of TSR domain-containing proteins"                          
##  [92] "Glutamate and glutamine metabolism"                                         
##  [93] "G alpha (s) signalling events"                                              
##  [94] "Fcgamma receptor (FCGR) dependent phagocytosis"                             
##  [95] "HS-GAG degradation"                                                         
##  [96] "Unwinding of DNA"                                                           
##  [97] "Chondroitin sulfate/dermatan sulfate metabolism"                            
##  [98] "Constitutive Signaling by NOTCH1 HD Domain Mutants"                         
##  [99] "Regulation of FZD by ubiquitination"                                        
## [100] "Signaling by NOTCH1 HD Domain Mutants in Cancer"                            
## [101] "Innate Immune System"                                                       
## [102] "Collagen chain trimerization"                                               
## [103] "Keratan sulfate/keratin metabolism"                                         
## [104] "Transport to the Golgi and subsequent modification"                         
## [105] "DNA strand elongation"                                                      
## [106] "PLC beta mediated events"                                                   
## [107] "RAC3 GTPase cycle"                                                          
## [108] "Interleukin-10 signaling"                                                   
## [109] "Nucleotide salvage"                                                         
## [110] "Regulation of insulin secretion"                                            
## [111] "Amyloid fiber formation"                                                    
## [112] "Insertion of tail-anchored proteins into the endoplasmic reticulum membrane"
## [113] "Immune System"                                                              
## [114] "G-protein mediated events"                                                  
## [115] "Formation of Fibrin Clot (Clotting Cascade)"                                
## [116] "NOTCH4 Intracellular Domain Regulates Transcription"                        
## [117] "Opioid Signalling"
# in comb but not sep
setdiff(o_com,o_sep)
##  [1] "Signaling by Rho GTPases, Miro GTPases and RHOBTB3"                              
##  [2] "Signaling by ALK fusions and activated point mutants"                            
##  [3] "Signaling by ALK in cancer"                                                      
##  [4] "Diseases of signal transduction by growth factor receptors and second messengers"
##  [5] "Signaling by Rho GTPases"                                                        
##  [6] "Platelet sensitization by LDL"                                                   
##  [7] "SARS-CoV-1 Infection"                                                            
##  [8] "Constitutive Signaling by NOTCH1 HD+PEST Domain Mutants"                         
##  [9] "Constitutive Signaling by NOTCH1 PEST Domain Mutants"                            
## [10] "Signaling by NOTCH1 HD+PEST Domain Mutants in Cancer"                            
## [11] "Signaling by NOTCH1 in Cancer"                                                   
## [12] "Signaling by NOTCH1 PEST Domain Mutants in Cancer"                               
## [13] "PINK1-PRKN Mediated Mitophagy"                                                   
## [14] "NOTCH1 Intracellular Domain Regulates Transcription"                             
## [15] "Signaling by WNT"
# intersection
intersect(mc_up,ms)
##   [1] "Signal Transduction"                                                                                                        
##   [2] "Disease"                                                                                                                    
##   [3] "Extracellular matrix organization"                                                                                          
##   [4] "Hemostasis"                                                                                                                 
##   [5] "Developmental Biology"                                                                                                      
##   [6] "Immune System"                                                                                                              
##   [7] "Axon guidance"                                                                                                              
##   [8] "Nervous system development"                                                                                                 
##   [9] "Infectious disease"                                                                                                         
##  [10] "Platelet degranulation"                                                                                                     
##  [11] "Response to elevated platelet cytosolic Ca2+"                                                                               
##  [12] "Platelet activation, signaling and aggregation"                                                                             
##  [13] "Metabolism of proteins"                                                                                                     
##  [14] "SRP-dependent cotranslational protein targeting to membrane"                                                                
##  [15] "Innate Immune System"                                                                                                       
##  [16] "Neutrophil degranulation"                                                                                                   
##  [17] "Viral mRNA Translation"                                                                                                     
##  [18] "Formation of a pool of free 40S subunits"                                                                                   
##  [19] "Eukaryotic Translation Elongation"                                                                                          
##  [20] "Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)"
##  [21] "Peptide chain elongation"                                                                                                   
##  [22] "RHO GTPase cycle"                                                                                                           
##  [23] "Integrin cell surface interactions"                                                                                         
##  [24] "Selenocysteine synthesis"                                                                                                   
##  [25] "L13a-mediated translational silencing of Ceruloplasmin expression"                                                          
##  [26] "Cap-dependent Translation Initiation"                                                                                       
##  [27] "Eukaryotic Translation Initiation"                                                                                          
##  [28] "Cellular responses to stimuli"                                                                                              
##  [29] "GTP hydrolysis and joining of the 60S ribosomal subunit"                                                                    
##  [30] "Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)"                                               
##  [31] "Post-translational protein phosphorylation"                                                                                 
##  [32] "Eukaryotic Translation Termination"                                                                                         
##  [33] "Signaling by Receptor Tyrosine Kinases"                                                                                     
##  [34] "Cellular responses to stress"                                                                                               
##  [35] "Cell-Cell communication"                                                                                                    
##  [36] "Selenoamino acid metabolism"                                                                                                
##  [37] "Collagen formation"                                                                                                         
##  [38] "Non-integrin membrane-ECM interactions"                                                                                     
##  [39] "Response of EIF2AK4 (GCN2) to amino acid deficiency"                                                                        
##  [40] "Cell junction organization"                                                                                                 
##  [41] "Cell surface interactions at the vascular wall"                                                                             
##  [42] "Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)"                                                  
##  [43] "Nonsense-Mediated Decay (NMD)"                                                                                              
##  [44] "Laminin interactions"                                                                                                       
##  [45] "RHOA GTPase cycle"                                                                                                          
##  [46] "Elastic fibre formation"                                                                                                    
##  [47] "IRE1alpha activates chaperones"                                                                                             
##  [48] "Unfolded Protein Response (UPR)"                                                                                            
##  [49] "XBP1(S) activates chaperone genes"                                                                                          
##  [50] "Degradation of the extracellular matrix"                                                                                    
##  [51] "Signaling by ROBO receptors"                                                                                                
##  [52] "Molecules associated with elastic fibres"                                                                                   
##  [53] "RAC1 GTPase cycle"                                                                                                          
##  [54] "RHOC GTPase cycle"                                                                                                          
##  [55] "Asparagine N-linked glycosylation"                                                                                          
##  [56] "Leishmania infection"                                                                                                       
##  [57] "Influenza Infection"                                                                                                        
##  [58] "Collagen biosynthesis and modifying enzymes"                                                                                
##  [59] "Cellular response to starvation"                                                                                            
##  [60] "Platelet homeostasis"                                                                                                       
##  [61] "Influenza Viral RNA Transcription and Replication"                                                                          
##  [62] "Glycosaminoglycan metabolism"                                                                                               
##  [63] "RHOB GTPase cycle"                                                                                                          
##  [64] "Signaling by GPCR"                                                                                                          
##  [65] "Regulation of expression of SLITs and ROBOs"                                                                                
##  [66] "Semaphorin interactions"                                                                                                    
##  [67] "ECM proteoglycans"                                                                                                          
##  [68] "O-linked glycosylation"                                                                                                     
##  [69] "Formation of the ternary complex, and subsequently, the 43S complex"                                                        
##  [70] "Signaling by VEGF"                                                                                                          
##  [71] "NOTCH4 Intracellular Domain Regulates Transcription"                                                                        
##  [72] "SARS-CoV-2 Infection"                                                                                                       
##  [73] "GPCR downstream signalling"                                                                                                 
##  [74] "Metabolism of lipids"                                                                                                       
##  [75] "Syndecan interactions"                                                                                                      
##  [76] "Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S"                     
##  [77] "Metabolism of amino acids and derivatives"                                                                                  
##  [78] "Cell-extracellular matrix interactions"                                                                                     
##  [79] "Ribosomal scanning and start codon recognition"                                                                             
##  [80] "Translation initiation complex formation"                                                                                   
##  [81] "RAC3 GTPase cycle"                                                                                                          
##  [82] "Translation of Structural Proteins"                                                                                         
##  [83] "Pre-NOTCH Expression and Processing"                                                                                        
##  [84] "CDC42 GTPase cycle"                                                                                                         
##  [85] "Regulation of cholesterol biosynthesis by SREBP (SREBF)"                                                                    
##  [86] "Ca2+ pathway"                                                                                                               
##  [87] "Diseases of metabolism"                                                                                                     
##  [88] "Fcgamma receptor (FCGR) dependent phagocytosis"                                                                             
##  [89] "MET activates PTK2 signaling"                                                                                               
##  [90] "Anti-inflammatory response favouring Leishmania parasite infection"                                                         
##  [91] "Leishmania parasite growth and survival"                                                                                    
##  [92] "Assembly of collagen fibrils and other multimeric structures"                                                               
##  [93] "Smooth Muscle Contraction"                                                                                                  
##  [94] "G alpha (i) signalling events"                                                                                              
##  [95] "Metabolism of carbohydrates"                                                                                                
##  [96] "SEMA3A-Plexin repulsion signaling by inhibiting Integrin adhesion"                                                          
##  [97] "Diseases of glycosylation"                                                                                                  
##  [98] "PIP3 activates AKT signaling"                                                                                               
##  [99] "Adherens junctions interactions"                                                                                            
## [100] "L1CAM interactions"                                                                                                         
## [101] "Muscle contraction"                                                                                                         
## [102] "Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell"                                                   
## [103] "Binding and Uptake of Ligands by Scavenger Receptors"                                                                       
## [104] "MET promotes cell motility"                                                                                                 
## [105] "Collagen chain trimerization"                                                                                               
## [106] "Cell-cell junction organization"                                                                                            
## [107] "Basigin interactions"                                                                                                       
## [108] "VEGFA-VEGFR2 Pathway"                                                                                                       
## [109] "O-linked glycosylation of mucins"                                                                                           
## [110] "Interleukin-4 and Interleukin-13 signaling"                                                                                 
## [111] "Cholesterol biosynthesis"                                                                                                   
## [112] "Formation of Fibrin Clot (Clotting Cascade)"                                                                                
## [113] "Collagen degradation"                                                                                                       
## [114] "Metabolism of steroids"                                                                                                     
## [115] "Heparan sulfate/heparin (HS-GAG) metabolism"                                                                                
## [116] "Regulation of insulin secretion"                                                                                            
## [117] "Antigen Presentation: Folding, assembly and peptide loading of class I MHC"                                                 
## [118] "Intra-Golgi and retrograde Golgi-to-ER traffic"                                                                             
## [119] "Transcriptional regulation by the AP-2 (TFAP2) family of transcription factors"                                             
## [120] "Keratan sulfate/keratin metabolism"                                                                                         
## [121] "EPH-Ephrin signaling"                                                                                                       
## [122] "Diseases associated with O-glycosylation of proteins"                                                                       
## [123] "Platelet calcium homeostasis"                                                                                               
## [124] "O-glycosylation of TSR domain-containing proteins"                                                                          
## [125] "Amino acid transport across the plasma membrane"                                                                            
## [126] "Role of phospholipids in phagocytosis"                                                                                      
## [127] "Activation of gene expression by SREBF (SREBP)"                                                                             
## [128] "Constitutive Signaling by NOTCH1 HD Domain Mutants"                                                                         
## [129] "Signaling by NOTCH1 HD Domain Mutants in Cancer"                                                                            
## [130] "Translation"                                                                                                                
## [131] "Defective B3GALTL causes PpS"                                                                                               
## [132] "Activation of Matrix Metalloproteinases"                                                                                    
## [133] "G alpha (s) signalling events"

Euler diagrams comparing FCS and ORA methods

par(cex.main=0.5)

par(mar=c(2,2,2,2))

l3 <- list("ORA up"=o_up,"ORA dn"=o_dn,"ORA comb"=o_com,
  "FCS up"=ms_up,"FCS dn"=ms_dn,"FCS comb"=mc_up)

plot(euler(l3),quantities = TRUE, edges = "gray", main="FCS compared to ORA")

Save data

dat <- list(  "FCS_up"=ms_up,
  "FCS_dn"=ms_dn,
  "FCS_com"=mc_up,
  "ORA_up"= o_up,
  "ORA_dn"=o_dn,
  "ORA_com"=o_com)

str(dat)
## List of 6
##  $ FCS_up : chr [1:73] "Gene expression (Transcription)" "RNA Polymerase II Transcription" "Generic Transcription Pathway" "Antigen processing: Ubiquitination & Proteasome degradation" ...
##  $ FCS_dn : chr [1:183] "Extracellular matrix organization" "SRP-dependent cotranslational protein targeting to membrane" "Formation of a pool of free 40S subunits" "Eukaryotic Translation Elongation" ...
##  $ FCS_com: chr [1:225] "Signal Transduction" "Disease" "Extracellular matrix organization" "Hemostasis" ...
##  $ ORA_up : chr [1:29] "Antigen processing: Ubiquitination & Proteasome degradation" "Chromatin modifying enzymes" "Chromatin organization" "HDMs demethylate histones" ...
##  $ ORA_dn : chr [1:149] "Formation of a pool of free 40S subunits" "Eukaryotic Translation Elongation" "Peptide chain elongation" "Viral mRNA Translation" ...
##  $ ORA_com: chr [1:76] "Formation of a pool of free 40S subunits" "Peptide chain elongation" "Viral mRNA Translation" "Eukaryotic Translation Elongation" ...
saveRDS(dat,file = "ex7dat.rds")

Conclusion

For mitch, it would appear that performing direction informed (DI) analysis clearly yields more differentially regulated pathways (413) as compared to the direction agnostic (DA) method (55).

That being said, there were 18 pathways identified only in the DA method that appeared to be related to the physiology of the model. These gene sets are likely to contain a mix of genes affected by the stimulus in different ways - for example a mix of up and downregulated genes. Are these really real? Not sure.

This pattern was consistent with ORA, where 80 sets were identified with separate analysis and only 23 with the combined analysis.

When comparing ORA to FCS, we found that FCS identified many more sets than ORA. In fact all gene sets that were identified by ORA were also identified by FCS, except for 3 that were specific to the ORA up set.

Let’s look at those now.

myfcs <- c(ms_up, mc_up)

setdiff(o_up,myfcs)
## [1] "Autophagy"

Session information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] eulerr_6.1.1                mitch_1.4.1                
##  [3] clusterProfiler_4.0.5       DESeq2_1.32.0              
##  [5] SummarizedExperiment_1.22.0 Biobase_2.52.0             
##  [7] MatrixGenerics_1.4.3        matrixStats_0.61.0         
##  [9] GenomicRanges_1.44.0        GenomeInfoDb_1.28.4        
## [11] IRanges_2.26.0              S4Vectors_0.30.2           
## [13] BiocGenerics_0.38.0         getDEE2_1.2.0              
## [15] beeswarm_0.4.0              kableExtra_1.3.4           
## 
## loaded via a namespace (and not attached):
##   [1] shadowtext_0.1.1       fastmatch_1.1-3        systemfonts_1.0.3     
##   [4] plyr_1.8.6             igraph_1.2.11          lazyeval_0.2.2        
##   [7] polylabelr_0.2.0       splines_4.1.2          BiocParallel_1.26.2   
##  [10] ggplot2_3.3.5          digest_0.6.29          yulab.utils_0.0.4     
##  [13] htmltools_0.5.2        GOSemSim_2.18.1        viridis_0.6.2         
##  [16] GO.db_3.13.0           fansi_1.0.0            magrittr_2.0.1        
##  [19] memoise_2.0.1          Biostrings_2.60.2      annotate_1.70.0       
##  [22] graphlayouts_0.8.0     svglite_2.0.0          enrichplot_1.12.3     
##  [25] colorspace_2.0-2       blob_1.2.2             rvest_1.0.2           
##  [28] ggrepel_0.9.1          xfun_0.29              dplyr_1.0.7           
##  [31] crayon_1.4.2           RCurl_1.98-1.5         jsonlite_1.7.2        
##  [34] scatterpie_0.1.7       genefilter_1.74.1      survival_3.2-13       
##  [37] ape_5.6-1              glue_1.6.0             polyclip_1.10-0       
##  [40] gtable_0.3.0           zlibbioc_1.38.0        XVector_0.32.0        
##  [43] webshot_0.5.2          htm2txt_2.1.1          DelayedArray_0.18.0   
##  [46] scales_1.1.1           DOSE_3.18.3            DBI_1.1.2             
##  [49] GGally_2.1.2           Rcpp_1.0.7             viridisLite_0.4.0     
##  [52] xtable_1.8-4           gridGraphics_0.5-1     tidytree_0.3.7        
##  [55] bit_4.0.4              htmlwidgets_1.5.4      httr_1.4.2            
##  [58] fgsea_1.18.0           gplots_3.1.1           RColorBrewer_1.1-2    
##  [61] ellipsis_0.3.2         reshape_0.8.8          pkgconfig_2.0.3       
##  [64] XML_3.99-0.8           farver_2.1.0           sass_0.4.0            
##  [67] locfit_1.5-9.4         utf8_1.2.2             later_1.3.0           
##  [70] ggplotify_0.1.0        tidyselect_1.1.1       rlang_0.4.12          
##  [73] reshape2_1.4.4         AnnotationDbi_1.54.1   munsell_0.5.0         
##  [76] tools_4.1.2            cachem_1.0.6           downloader_0.4        
##  [79] generics_0.1.1         RSQLite_2.2.9          evaluate_0.14         
##  [82] stringr_1.4.0          fastmap_1.1.0          yaml_2.2.1            
##  [85] ggtree_3.0.4           knitr_1.37             bit64_4.0.5           
##  [88] tidygraph_1.2.0        caTools_1.18.2         purrr_0.3.4           
##  [91] KEGGREST_1.32.0        ggraph_2.0.5           nlme_3.1-153          
##  [94] mime_0.12              aplot_0.1.2            DO.db_2.9             
##  [97] xml2_1.3.3             compiler_4.1.2         rstudioapi_0.13       
## [100] png_0.1-7              treeio_1.16.2          tibble_3.1.6          
## [103] tweenr_1.0.2           geneplotter_1.70.0     bslib_0.3.1           
## [106] stringi_1.7.6          highr_0.9              lattice_0.20-45       
## [109] Matrix_1.4-0           vctrs_0.3.8            pillar_1.6.4          
## [112] lifecycle_1.0.1        jquerylib_0.1.4        data.table_1.14.2     
## [115] cowplot_1.1.1          bitops_1.0-7           httpuv_1.6.5          
## [118] patchwork_1.1.1        qvalue_2.24.0          R6_2.5.1              
## [121] promises_1.2.0.1       KernSmooth_2.23-20     echarts4r_0.4.3       
## [124] gridExtra_2.3          gtools_3.9.2           MASS_7.3-54           
## [127] assertthat_0.2.1       GenomeInfoDbData_1.2.6 grid_4.1.2            
## [130] ggfun_0.0.4            tidyr_1.1.4            rmarkdown_2.11        
## [133] ggforce_0.3.3          shiny_1.7.1