Source: https://github.com/markziemann/combined_enrichment

Intro

Here we are performing an analysis of some gene expression data to understand whether direction agnostic (DA) or direction informed (DI) enrichment analysis is recommended.

This will be demonstrated by using two approaches: mitch and clusterprofiler. Mitch is a FCS method while clusterprofiler is an ORA method.

The dataset being used is SRP128998 and we are comparing the cells grown in normal glucose condition (control) to the high glucose condition (case).

Data are obtained from http://dee2.io/

suppressPackageStartupMessages({
library("getDEE2") 
library("DESeq2")
library("clusterProfiler")
library("mitch")
library("kableExtra")
library("eulerr")
library("gplots")
})

dir.create("images")
## Warning in dir.create("images"): 'images' already exists

Get expression data and make an MDS plot

I’m using some RNA-seq data looking at the effect of hyperglycemia on hepatocytes.

name="SRP128998"
mdat<-getDEE2Metadata("hsapiens")
samplesheet <- mdat[grep("SRP128998",mdat$SRP_accession),]
samplesheet<-samplesheet[order(samplesheet$SRR_accession),]
samplesheet$trt<-as.factor(c(1,1,1,1,1,1,0,0,0,0,0,0))
samplesheet$VPA<-as.factor(c(0,0,0,1,1,1,0,0,0,1,1,1))
s1 <- subset(samplesheet,VPA==0)

s1 #%>% kbl(caption = "sample sheet") %>% kable_paper("hover", full_width = F)
##        SRR_accession QC_summary SRX_accession SRS_accession SRP_accession
## 475895    SRR6467479       PASS    SRX3557428    SRS2830728     SRP128998
## 475896    SRR6467480       PASS    SRX3557429    SRS2830730     SRP128998
## 475897    SRR6467481       PASS    SRX3557430    SRS2830729     SRP128998
## 475901    SRR6467485       PASS    SRX3557434    SRS2830733     SRP128998
## 475902    SRR6467486       PASS    SRX3557435    SRS2830734     SRP128998
## 475903    SRR6467487       PASS    SRX3557436    SRS2830735     SRP128998
##        Sample_name GEO_series Library_name trt VPA
## 475895  GSM2932791  GSE109140                1   0
## 475896  GSM2932792  GSE109140                1   0
## 475897  GSM2932793  GSE109140                1   0
## 475901  GSM2932797  GSE109140                0   0
## 475902  GSM2932798  GSE109140                0   0
## 475903  GSM2932799  GSE109140                0   0
w<-getDEE2("hsapiens",samplesheet$SRR_accession,metadata=mdat,legacy = TRUE)
## For more information about DEE2 QC metrics, visit
##     https://github.com/markziemann/dee2/blob/master/qc/qc_metrics.md
x<-Tx2Gene(w)
x<-x$Tx2Gene

# save the genetable for later
gt<-w$GeneInfo[,1,drop=FALSE]
gt$accession<-rownames(gt)

# counts 
x1<-x[,which(colnames(x) %in% s1$SRR_accession)]

# calculate RPM
rpm <- x1/colSums(x1) * 1000000
rownames(rpm) <- sapply(strsplit(rownames(rpm),"\\."),"[[",1)

# convert to gene symbols
rpm2 <- rpm
rpm2$symbol <- w$GeneInfo[match(rownames(rpm),rownames(w$GeneInfo)),"GeneSymbol"]
rpm2 <- aggregate(. ~ symbol, rpm2, sum)
rownames(rpm2) <- rpm2$symbol
rpm2$symbol=NULL

Here show the number of genes in the annotation set, and those detected above the detection threshold.

# filter out lowly expressed genes
x1<-x1[which(rowMeans(x1)>=(10)),]
nrow(x)
## [1] 39297
nrow(x1)
## [1] 15635

Now multidimensional scaling (MDS) plot to show the correlation between the datasets. If the control and case datasets are clustered separately, then it is likely that there will be many differentially expressed genes with FDR<0.05.

plot(cmdscale(dist(t(x1))), xlab="Coordinate 1", ylab="Coordinate 2", pch=19, col=s1$trt, main="MDS")

Differential expression

Now run DESeq2 for control vs case.

y <- DESeqDataSetFromMatrix(countData = round(x1), colData = s1, design = ~ trt)
## converting counts to integer mode
y <- DESeq(y)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
de <- results(y)
de<-as.data.frame(de[order(de$pvalue),])
rownames(de)<-sapply(strsplit(rownames(de),"\\."),"[[",1)
head(de) #%>% kbl() %>% kable_paper("hover", full_width = F)
##                   baseMean log2FoldChange     lfcSE      stat       pvalue
## ENSG00000145050   5839.731      -2.753692 0.1518338 -18.13623 1.649761e-73
## ENSG00000149131   1346.633       2.161115 0.1427218  15.14215 8.537621e-52
## ENSG00000044574 124889.027      -2.033391 0.1343040 -15.14021 8.793666e-52
## ENSG00000128228   1676.368      -2.836358 0.1895729 -14.96183 1.303975e-50
## ENSG00000179218  78785.663      -2.227516 0.1586383 -14.04148 8.688158e-45
## ENSG00000090520   6751.044      -2.138112 0.1538129 -13.90073 6.270171e-44
##                         padj
## ENSG00000145050 2.427458e-69
## ENSG00000149131 4.313000e-48
## ENSG00000044574 4.313000e-48
## ENSG00000128228 4.796673e-47
## ENSG00000179218 2.556751e-41
## ENSG00000090520 1.537655e-40

Now let’s have a look at some of the charts showing differential expression. In particular, an MA plot and volcano plot.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=0.7)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 0.7)
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=0.5,pch=19,col="red")
}

make_volcano2 <- function(de,name,genes) {
  myens <- rownames(w$GeneInfo)[match(genes,w$GeneInfo$GeneSymbol)]
  sig <- de[which(rownames(de) %in% myens),]
    N_SIG=nrow(subset(sig,padj<0.05))
    N_UP=nrow(subset(sig,log2FoldChange>0 & padj<0.05))
    N_DN=nrow(subset(sig,log2FoldChange<0 & padj<0.05))
    DET=nrow(sig)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=0.5,pch=19,col="red")
}

maplot(de,name)

make_volcano(de,name)

Gene sets from Reactome

In order to perform gene set analysis, we need some gene sets.

if (! file.exists("ReactomePathways.gmt")) {
  download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", 
    destfile="ReactomePathways.gmt.zip")
  unzip("ReactomePathways.gmt.zip")
}
genesets<-gmt_import("ReactomePathways.gmt")

FCS with Mitch

Mitch uses rank-ANOVA statistics for enrichment detection.

Here I’m using the standard approach

m <- mitch_import(de,DEtype = "DEseq2", geneTable = gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15635
## Note: no. genes in output = 14533
## Note: estimated proportion of input genes in output = 0.93
msep <- mitch_calc(m,genesets = genesets)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
ms_up <- subset(msep$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0)[,1]
ms_dn <- subset(msep$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0)[,1]
message(paste("Number of up-regulated pathways:",length(ms_up) ))
## Number of up-regulated pathways: 96
message(paste("Number of down-regulated pathways:",length(ms_dn) ))
## Number of down-regulated pathways: 317
head(msep$enrichment_result,10)  #%>% kbl() %>% kable_paper("hover", full_width = F)
##                                                                    set setSize
## 155                                      Cellular responses to stimuli     641
## 156                                       Cellular responses to stress     633
## 509                                                 Infectious disease     637
## 1038       SRP-dependent cotranslational protein targeting to membrane     109
## 74                                   Asparagine N-linked glycosylation     252
## 87                                                       Axon guidance     424
## 154                                    Cellular response to starvation     149
## 630                                             Metabolism of proteins    1550
## 708                                         Nervous system development     444
## 573  L13a-mediated translational silencing of Ceruloplasmin expression     108
##            pANOVA     s.dist p.adjustANOVA
## 155  1.360591e-21 -0.2223004  1.873534e-18
## 156  2.199278e-20 -0.2167957  1.514203e-17
## 509  1.062198e-18 -0.2062708  4.875487e-16
## 1038 1.478693e-17 -0.4728714  5.090401e-15
## 74   6.699566e-17 -0.3060943  1.845061e-14
## 87   2.943290e-16 -0.2324048  6.754851e-14
## 154  2.022808e-14 -0.3633278  3.979152e-12
## 630  3.249987e-14 -0.1176239  5.594040e-12
## 708  6.246008e-14 -0.2086060  9.556392e-12
## 573  4.147553e-13 -0.4039887  5.711180e-11
#mitch_report(msep,outfile="mitch_separate.html",overwrite=TRUE)

Here I’m using the combined approach.

mcom <- mitch_calc(abs(m),genesets = genesets)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
mc_up <- subset(mcom$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0)[,1]
mc_dn <- subset(mcom$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0)[,1]
message(paste("Number of up-regulated pathways:",length(mc_up) ))
## Number of up-regulated pathways: 55
message(paste("Number of down-regulated pathways:",length(mc_dn) ))
## Number of down-regulated pathways: 1
head(mcom$enrichment_result,10)  #%>% kbl() %>% kable_paper("hover", full_width = F)
##                                                          set setSize
## 1318                         Unfolded Protein Response (UPR)      89
## 74                         Asparagine N-linked glycosylation     252
## 498                           IRE1alpha activates chaperones      50
## 520                                     Innate Immune System     740
## 162                                 Cholesterol biosynthesis      24
## 977  Regulation of cholesterol biosynthesis by SREBP (SREBF)      54
## 1341                       XBP1(S) activates chaperone genes      48
## 1072                                     Signal Transduction    1748
## 617                                               Metabolism    1696
## 502                                            Immune System    1374
##            pANOVA     s.dist p.adjustANOVA
## 1318 9.255221e-10 0.37558770  1.274444e-06
## 74   7.652970e-09 0.21181045  5.120234e-06
## 498  1.478028e-08 0.46305323  5.120234e-06
## 520  1.487359e-08 0.12331539  5.120234e-06
## 162  2.322430e-08 0.65857399  6.395972e-06
## 977  5.074137e-08 0.42870517  1.164514e-05
## 1341 1.207871e-07 0.44158037  2.210290e-05
## 1072 1.284119e-07 0.07772293  2.210290e-05
## 617  1.497471e-07 0.07832496  2.291130e-05
## 502  1.746973e-07 0.08548386  2.405582e-05
#mitch_report(mcom,outfile="mitch_combined.html",overwrite=TRUE)

Let’s look at the significant ones based on the combined analysis. There weren’t many gene sets classed as significant. Let’s see how many have a direction agnostic enrichment score which is larger in magnitude than the direction informed enrichment score. There are only 11 such sets which would benefit from such combined analysis.

Euler diagram of the significant pathways found with each approach.

l0 <- list("sep-DE up"=ms_up,"sep-DE dn"=ms_dn,"all-DE up"=mc_up,"all-DE dn"=mc_dn)
par(cex.main=0.5)
plot(euler(l0),quantities = TRUE, edges = "gray", main="FCS: combined vs separated")

length(ms_up)
## [1] 96
length(ms_dn)
## [1] 317
length(ms_up)+length(ms_dn)
## [1] 413
length(mc_up)
## [1] 55
length(mc_dn)
## [1] 1
length(mc_up)+length(mc_dn)
## [1] 56
( length(ms_up)+length(ms_dn) ) / ( length(mc_up)+length(mc_dn) )
## [1] 7.375

List gene sets which are specific to each approach.

ms <- c(ms_up,ms_dn)

# in sep but not comb
sep <- setdiff(ms,mc_up)
sep
##   [1] "Diseases of DNA repair"                                                                                                     
##   [2] "Homologous DNA Pairing and Strand Exchange"                                                                                 
##   [3] "Mitotic Prometaphase"                                                                                                       
##   [4] "Resolution of D-loop Structures through Holliday Junction Intermediates"                                                    
##   [5] "Biological oxidations"                                                                                                      
##   [6] "Resolution of D-Loop Structures"                                                                                            
##   [7] "Homology Directed Repair"                                                                                                   
##   [8] "Defective HDR through Homologous Recombination (HRR) due to BRCA1 loss-of-function"                                         
##   [9] "Defective HDR through Homologous Recombination (HRR) due to PALB2 loss of function"                                         
##  [10] "Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA1 binding function"                    
##  [11] "Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA2/RAD51/RAD51C binding function"       
##  [12] "Diseases of DNA Double-Strand Break Repair"                                                                                 
##  [13] "Presynaptic phase of homologous DNA pairing and strand exchange"                                                            
##  [14] "HDR through Homologous Recombination (HRR)"                                                                                 
##  [15] "Chromosome Maintenance"                                                                                                     
##  [16] "Resolution of D-loop Structures through Synthesis-Dependent Strand Annealing (SDSA)"                                        
##  [17] "HDR through Homologous Recombination (HRR) or Single Strand Annealing (SSA)"                                                
##  [18] "Unwinding of DNA"                                                                                                           
##  [19] "Activation of ATR in response to replication stress"                                                                        
##  [20] "Phase II - Conjugation of compounds"                                                                                        
##  [21] "DNA Double-Strand Break Repair"                                                                                             
##  [22] "Cilium Assembly"                                                                                                            
##  [23] "Deposition of new CENPA-containing nucleosomes at the centromere"                                                           
##  [24] "Nucleosome assembly"                                                                                                        
##  [25] "DNA Repair"                                                                                                                 
##  [26] "HDR through Single Strand Annealing (SSA)"                                                                                  
##  [27] "Telomere C-strand (Lagging Strand) Synthesis"                                                                               
##  [28] "Cell Cycle Checkpoints"                                                                                                     
##  [29] "MyD88 deficiency (TLR2/4)"                                                                                                  
##  [30] "Lagging Strand Synthesis"                                                                                                   
##  [31] "Mitotic Spindle Checkpoint"                                                                                                 
##  [32] "IRAK4 deficiency (TLR2/4)"                                                                                                  
##  [33] "Activation of the pre-replicative complex"                                                                                  
##  [34] "Base Excision Repair"                                                                                                       
##  [35] "Processive synthesis on the lagging strand"                                                                                 
##  [36] "Anchoring of the basal body to the plasma membrane"                                                                         
##  [37] "AURKA Activation by TPX2"                                                                                                   
##  [38] "Glutathione conjugation"                                                                                                    
##  [39] "Extension of Telomeres"                                                                                                     
##  [40] "Resolution of Sister Chromatid Cohesion"                                                                                    
##  [41] "Regulation of TLR by endogenous ligand"                                                                                     
##  [42] "Leading Strand Synthesis"                                                                                                   
##  [43] "Polymerase switching"                                                                                                       
##  [44] "Collagen chain trimerization"                                                                                               
##  [45] "Formation of Fibrin Clot (Clotting Cascade)"                                                                                
##  [46] "Removal of the Flap Intermediate"                                                                                           
##  [47] "Recruitment of NuMA to mitotic centrosomes"                                                                                 
##  [48] "Polymerase switching on the C-strand of the telomere"                                                                       
##  [49] "Resolution of Abasic Sites (AP sites)"                                                                                      
##  [50] "Loss of Nlp from mitotic centrosomes"                                                                                       
##  [51] "Loss of proteins required for interphase microtubule organization from the centrosome"                                      
##  [52] "Meiotic recombination"                                                                                                      
##  [53] "Centrosome maturation"                                                                                                      
##  [54] "Recruitment of mitotic centrosome proteins and complexes"                                                                   
##  [55] "Fanconi Anemia Pathway"                                                                                                     
##  [56] "Telomere Maintenance"                                                                                                       
##  [57] "G2/M DNA damage checkpoint"                                                                                                 
##  [58] "Intraflagellar transport"                                                                                                   
##  [59] "Base-Excision Repair, AP Site Formation"                                                                                    
##  [60] "Mismatch repair (MMR) directed by MSH2:MSH6 (MutSalpha)"                                                                    
##  [61] "Mismatch repair (MMR) directed by MSH2:MSH3 (MutSbeta)"                                                                     
##  [62] "Peroxisomal protein import"                                                                                                 
##  [63] "Cleavage of the damaged pyrimidine"                                                                                         
##  [64] "Depyrimidination"                                                                                                           
##  [65] "Recognition and association of DNA glycosylase with site containing an affected pyrimidine"                                 
##  [66] "Mismatch Repair"                                                                                                            
##  [67] "Phase I - Functionalization of compounds"                                                                                   
##  [68] "Processive synthesis on the C-strand of the telomere"                                                                       
##  [69] "Processing of DNA double-strand break ends"                                                                                 
##  [70] "Organelle biogenesis and maintenance"                                                                                       
##  [71] "Post-translational modification: synthesis of GPI-anchored proteins"                                                        
##  [72] "Inactivation of APC/C via direct inhibition of the APC/C complex"                                                           
##  [73] "Inhibition of the proteolytic activity of APC/C required for the onset of anaphase by mitotic spindle checkpoint components"
##  [74] "Defective pyroptosis"                                                                                                       
##  [75] "Carboxyterminal post-translational modifications of tubulin"                                                                
##  [76] "Ethanol oxidation"                                                                                                          
##  [77] "Integrin cell surface interactions"                                                                                         
##  [78] "M Phase"                                                                                                                    
##  [79] "Regulation of PLK1 Activity at G2/M Transition"                                                                             
##  [80] "Peroxisomal lipid metabolism"                                                                                               
##  [81] "Glyoxylate metabolism and glycine degradation"                                                                              
##  [82] "Common Pathway of Fibrin Clot Formation"                                                                                    
##  [83] "Metabolism of vitamins and cofactors"                                                                                       
##  [84] "Post-chaperonin tubulin folding pathway"                                                                                    
##  [85] "Infectious disease"                                                                                                         
##  [86] "SRP-dependent cotranslational protein targeting to membrane"                                                                
##  [87] "Axon guidance"                                                                                                              
##  [88] "Cellular response to starvation"                                                                                            
##  [89] "Nervous system development"                                                                                                 
##  [90] "L13a-mediated translational silencing of Ceruloplasmin expression"                                                          
##  [91] "GTP hydrolysis and joining of the 60S ribosomal subunit"                                                                    
##  [92] "Developmental Biology"                                                                                                      
##  [93] "Translation"                                                                                                                
##  [94] "Cap-dependent Translation Initiation"                                                                                       
##  [95] "Eukaryotic Translation Initiation"                                                                                          
##  [96] "Regulation of expression of SLITs and ROBOs"                                                                                
##  [97] "Signaling by ROBO receptors"                                                                                                
##  [98] "Diseases of signal transduction by growth factor receptors and second messengers"                                           
##  [99] "Response of EIF2AK4 (GCN2) to amino acid deficiency"                                                                        
## [100] "Formation of a pool of free 40S subunits"                                                                                   
## [101] "Metabolism of RNA"                                                                                                          
## [102] "Eukaryotic Translation Elongation"                                                                                          
## [103] "Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)"                                               
## [104] "Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)"                                                  
## [105] "Nonsense-Mediated Decay (NMD)"                                                                                              
## [106] "Viral mRNA Translation"                                                                                                     
## [107] "Influenza Infection"                                                                                                        
## [108] "rRNA processing in the nucleus and cytosol"                                                                                 
## [109] "Peptide chain elongation"                                                                                                   
## [110] "SARS-CoV Infections"                                                                                                        
## [111] "Major pathway of rRNA processing in the nucleolus and cytosol"                                                              
## [112] "Eukaryotic Translation Termination"                                                                                         
## [113] "Vesicle-mediated transport"                                                                                                 
## [114] "Signaling by Receptor Tyrosine Kinases"                                                                                     
## [115] "Membrane Trafficking"                                                                                                       
## [116] "Selenocysteine synthesis"                                                                                                   
## [117] "Transport to the Golgi and subsequent modification"                                                                         
## [118] "PIP3 activates AKT signaling"                                                                                               
## [119] "Ribosomal scanning and start codon recognition"                                                                             
## [120] "ER to Golgi Anterograde Transport"                                                                                          
## [121] "Selenoamino acid metabolism"                                                                                                
## [122] "Translation initiation complex formation"                                                                                   
## [123] "Influenza Viral RNA Transcription and Replication"                                                                          
## [124] "Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S"                     
## [125] "rRNA processing"                                                                                                            
## [126] "Intracellular signaling by second messengers"                                                                               
## [127] "Formation of the ternary complex, and subsequently, the 43S complex"                                                        
## [128] "Signaling by NTRK1 (TRKA)"                                                                                                  
## [129] "SARS-CoV-2 Infection"                                                                                                       
## [130] "Autophagy"                                                                                                                  
## [131] "Signaling by the B Cell Receptor (BCR)"                                                                                     
## [132] "MAPK family signaling cascades"                                                                                             
## [133] "TCR signaling"                                                                                                              
## [134] "RHO GTPase cycle"                                                                                                           
## [135] "Fc epsilon receptor (FCERI) signaling"                                                                                      
## [136] "Macroautophagy"                                                                                                             
## [137] "RAF/MAP kinase cascade"                                                                                                     
## [138] "MyD88 cascade initiated on plasma membrane"                                                                                 
## [139] "Toll Like Receptor 10 (TLR10) Cascade"                                                                                      
## [140] "Toll Like Receptor 5 (TLR5) Cascade"                                                                                        
## [141] "Downstream signaling events of B Cell Receptor (BCR)"                                                                       
## [142] "COPI-mediated anterograde transport"                                                                                        
## [143] "Signaling by WNT"                                                                                                           
## [144] "Toll Like Receptor 3 (TLR3) Cascade"                                                                                        
## [145] "Cytokine Signaling in Immune system"                                                                                        
## [146] "ER Quality Control Compartment (ERQC)"                                                                                      
## [147] "Hh mutants are degraded by ERAD"                                                                                            
## [148] "Signaling by NTRKs"                                                                                                         
## [149] "MyD88 dependent cascade initiated on endosome"                                                                              
## [150] "TRAF6 mediated induction of NFkB and MAP kinases upon TLR7/8 or 9 activation"                                               
## [151] "Toll Like Receptor 7/8 (TLR7/8) Cascade"                                                                                    
## [152] "Hh mutants abrogate ligand secretion"                                                                                       
## [153] "Hedgehog ligand biogenesis"                                                                                                 
## [154] "MAPK1/MAPK3 signaling"                                                                                                      
## [155] "Signaling by Insulin receptor"                                                                                              
## [156] "Toll Like Receptor 9 (TLR9) Cascade"                                                                                        
## [157] "Defective CFTR causes cystic fibrosis"                                                                                      
## [158] "MyD88-independent TLR4 cascade"                                                                                             
## [159] "TRIF(TICAM1)-mediated TLR4 signaling"                                                                                       
## [160] "Downstream TCR signaling"                                                                                                   
## [161] "Chromatin modifying enzymes"                                                                                                
## [162] "Chromatin organization"                                                                                                     
## [163] "Degradation of beta-catenin by the destruction complex"                                                                     
## [164] "MAP kinase activation"                                                                                                      
## [165] "Activation of NF-kappaB in B cells"                                                                                         
## [166] "PTEN Regulation"                                                                                                            
## [167] "Regulation of HMOX1 expression and activity"                                                                                
## [168] "Adaptive Immune System"                                                                                                     
## [169] "Insulin receptor recycling"                                                                                                 
## [170] "Interleukin-17 signaling"                                                                                                   
## [171] "RAC3 GTPase cycle"                                                                                                          
## [172] "trans-Golgi Network Vesicle Budding"                                                                                        
## [173] "Interleukin-1 signaling"                                                                                                    
## [174] "Deubiquitination"                                                                                                           
## [175] "UCH proteinases"                                                                                                            
## [176] "COPII-mediated vesicle transport"                                                                                           
## [177] "Cytoprotection by HMOX1"                                                                                                    
## [178] "Regulation of RUNX2 expression and activity"                                                                                
## [179] "RAC2 GTPase cycle"                                                                                                          
## [180] "Signaling by NOTCH"                                                                                                         
## [181] "ROS sensing by NFE2L2"                                                                                                      
## [182] "FCERI mediated NF-kB activation"                                                                                            
## [183] "Degradation of AXIN"                                                                                                        
## [184] "Circadian Clock"                                                                                                            
## [185] "Potential therapeutics for SARS"                                                                                            
## [186] "ABC transporter disorders"                                                                                                  
## [187] "ABC-family proteins mediated transport"                                                                                     
## [188] "Regulation of PTEN stability and activity"                                                                                  
## [189] "C-type lectin receptors (CLRs)"                                                                                             
## [190] "Nuclear Events (kinase and transcription factor activation)"                                                                
## [191] "TCF dependent signaling in response to WNT"                                                                                 
## [192] "Negative regulation of MAPK pathway"                                                                                        
## [193] "Regulation of mRNA stability by proteins that bind AU-rich elements"                                                        
## [194] "Cellular response to chemical stress"                                                                                       
## [195] "Interleukin-1 family signaling"                                                                                             
## [196] "mRNA Splicing - Major Pathway"                                                                                              
## [197] "Cargo concentration in the ER"                                                                                              
## [198] "CLEC7A (Dectin-1) signaling"                                                                                                
## [199] "Signaling by VEGF"                                                                                                          
## [200] "Dectin-1 mediated noncanonical NF-kB signaling"                                                                             
## [201] "NIK-->noncanonical NF-kB signaling"                                                                                         
## [202] "Regulation of RUNX3 expression and activity"                                                                                
## [203] "TGF-beta receptor signaling activates SMADs"                                                                                
## [204] "RAC1 GTPase cycle"                                                                                                          
## [205] "mRNA Splicing"                                                                                                              
## [206] "MAPK6/MAPK4 signaling"                                                                                                      
## [207] "Ubiquitin-dependent degradation of Cyclin D"                                                                                
## [208] "RHOG GTPase cycle"                                                                                                          
## [209] "Degradation of GLI1 by the proteasome"                                                                                      
## [210] "AUF1 (hnRNP D0) binds and destabilizes mRNA"                                                                                
## [211] "Degradation of GLI2 by the proteasome"                                                                                      
## [212] "GLI3 is processed to GLI3R by the proteasome"                                                                               
## [213] "Transcriptional regulation by RUNX3"                                                                                        
## [214] "Transport of inorganic cations/anions and amino acids/oligopeptides"                                                        
## [215] "Lysosome Vesicle Biogenesis"                                                                                                
## [216] "Degradation of DVL"                                                                                                         
## [217] "Heme signaling"                                                                                                             
## [218] "Intra-Golgi and retrograde Golgi-to-ER traffic"                                                                             
## [219] "PCP/CE pathway"                                                                                                             
## [220] "Regulation of activated PAK-2p34 by proteasome mediated degradation"                                                        
## [221] "FBXL7 down-regulates AURKA during mitotic entry and in early mitosis"                                                       
## [222] "Signaling by TGF-beta Receptor Complex"                                                                                     
## [223] "Golgi Associated Vesicle Biogenesis"                                                                                        
## [224] "Ub-specific processing proteases"                                                                                           
## [225] "Amino acids regulate mTORC1"                                                                                                
## [226] "Processing of Capped Intron-Containing Pre-mRNA"                                                                            
## [227] "PI3K/AKT Signaling in Cancer"                                                                                               
## [228] "ROS and RNS production in phagocytes"                                                                                       
## [229] "Signalling to ERKs"                                                                                                         
## [230] "Toll-like Receptor Cascades"                                                                                                
## [231] "Signaling by EGFR"                                                                                                          
## [232] "Transcriptional regulation by RUNX2"                                                                                        
## [233] "Cellular response to hypoxia"                                                                                               
## [234] "Autodegradation of the E3 ubiquitin ligase COP1"                                                                            
## [235] "Negative regulation of NOTCH4 signaling"                                                                                    
## [236] "Amino acid transport across the plasma membrane"                                                                            
## [237] "VEGFA-VEGFR2 Pathway"                                                                                                       
## [238] "Oxygen-dependent proline hydroxylation of Hypoxia-inducible Factor Alpha"                                                   
## [239] "Transcriptional regulation by RUNX1"                                                                                        
## [240] "Uptake and actions of bacterial toxins"                                                                                     
## [241] "RUNX1 regulates transcription of genes involved in differentiation of HSCs"                                                 
## [242] "Regulation of RAS by GAPs"                                                                                                  
## [243] "Signaling by NOTCH4"                                                                                                        
## [244] "Regulation of TP53 Activity through Acetylation"                                                                            
## [245] "PI Metabolism"                                                                                                              
## [246] "Downregulation of TGF-beta receptor signaling"                                                                              
## [247] "Vif-mediated degradation of APOBEC3G"                                                                                       
## [248] "Regulation of ornithine decarboxylase (ODC)"                                                                                
## [249] "SCF-beta-TrCP mediated degradation of Emi1"                                                                                 
## [250] "Clathrin-mediated endocytosis"                                                                                              
## [251] "Class I MHC mediated antigen processing & presentation"                                                                     
## [252] "Hedgehog 'on' state"                                                                                                        
## [253] "AKT phosphorylates targets in the cytosol"                                                                                  
## [254] "Regulation of Apoptosis"                                                                                                    
## [255] "Beta-catenin independent WNT signaling"                                                                                     
## [256] "HDMs demethylate histones"                                                                                                  
## [257] "Host Interactions of HIV factors"                                                                                           
## [258] "MyD88:MAL(TIRAP) cascade initiated on plasma membrane"                                                                      
## [259] "Toll Like Receptor 2 (TLR2) Cascade"                                                                                        
## [260] "Toll Like Receptor TLR1:TLR2 Cascade"                                                                                       
## [261] "Toll Like Receptor TLR6:TLR2 Cascade"                                                                                       
## [262] "Cross-presentation of soluble exogenous antigens (endosomes)"                                                               
## [263] "RHOV GTPase cycle"                                                                                                          
## [264] "Regulation of actin dynamics for phagocytic cup formation"                                                                  
## [265] "MAP3K8 (TPL2)-dependent MAPK1/3 activation"                                                                                 
## [266] "Vpu mediated degradation of CD4"                                                                                            
## [267] "Negative regulation of the PI3K/AKT network"                                                                                
## [268] "MAPK targets/ Nuclear events mediated by MAP kinases"                                                                       
## [269] "Infection with Mycobacterium tuberculosis"                                                                                  
## [270] "TBC/RABGAPs"                                                                                                                
## [271] "HIV Infection"                                                                                                              
## [272] "Toll Like Receptor 4 (TLR4) Cascade"                                                                                        
## [273] "Fcgamma receptor (FCGR) dependent phagocytosis"                                                                             
## [274] "EPH-Ephrin signaling"                                                                                                       
## [275] "Signaling by EGFR in Cancer"                                                                                                
## [276] "Nuclear Receptor transcription pathway"                                                                                     
## [277] "Signaling by NOTCH1"                                                                                                        
## [278] "RHOQ GTPase cycle"                                                                                                          
## [279] "Response of Mtb to phagocytosis"                                                                                            
## [280] "Retrograde transport at the Trans-Golgi-Network"                                                                            
## [281] "RHOBTB GTPase Cycle"                                                                                                        
## [282] "TNFR2 non-canonical NF-kB pathway"                                                                                          
## [283] "rRNA modification in the nucleus and cytosol"                                                                               
## [284] "ESR-mediated signaling"                                                                                                     
## [285] "The role of GTSE1 in G2/M progression after G2 checkpoint"                                                                  
## [286] "SARS-CoV-1 Infection"                                                                                                       
## [287] "Disorders of transmembrane transporters"                                                                                    
## [288] "Folding of actin by CCT/TriC"                                                                                               
## [289] "Stabilization of p53"                                                                                                       
## [290] "Cell-extracellular matrix interactions"                                                                                     
## [291] "Uptake and function of anthrax toxins"                                                                                      
## [292] "NGF-stimulated transcription"                                                                                               
## [293] "Response of EIF2AK1 (HRI) to heme deficiency"                                                                               
## [294] "Metabolism of polyamines"                                                                                                   
## [295] "Signaling by FGFR4"                                                                                                         
## [296] "EGFR downregulation"                                                                                                        
## [297] "Ion channel transport"                                                                                                      
## [298] "Extra-nuclear estrogen signaling"                                                                                           
## [299] "Cargo recognition for clathrin-mediated endocytosis"                                                                        
## [300] "TP53 Regulates Metabolic Genes"                                                                                             
## [301] "Signaling by TGFB family members"                                                                                           
## [302] "Apoptosis"                                                                                                                  
## [303] "Antigen Presentation: Folding, assembly and peptide loading of class I MHC"                                                 
## [304] "Signaling by Hedgehog"                                                                                                      
## [305] "Negative regulation of FGFR1 signaling"                                                                                     
## [306] "Constitutive Signaling by Ligand-Responsive EGFR Cancer Variants"                                                           
## [307] "Signaling by Ligand-Responsive EGFR Variants in Cancer"                                                                     
## [308] "CDT1 association with the CDC6:ORC:origin complex"                                                                          
## [309] "Signaling by FGFR3"                                                                                                         
## [310] "RND2 GTPase cycle"                                                                                                          
## [311] "Negative regulation of FGFR4 signaling"                                                                                     
## [312] "Antigen processing-Cross presentation"                                                                                      
## [313] "Ubiquitin Mediated Degradation of Phosphorylated Cdc25A"                                                                    
## [314] "p53-Independent DNA Damage Response"                                                                                        
## [315] "p53-Independent G1/S DNA damage checkpoint"                                                                                 
## [316] "PI5P, PP2A and IER3 Regulate PI3K/AKT Signaling"                                                                            
## [317] "p53-Dependent G1 DNA Damage Response"                                                                                       
## [318] "p53-Dependent G1/S DNA damage checkpoint"                                                                                   
## [319] "Cytosolic sensors of pathogen-associated DNA"                                                                               
## [320] "Regulation of signaling by CBL"                                                                                             
## [321] "Scavenging by Class A Receptors"                                                                                            
## [322] "RHOBTB2 GTPase cycle"                                                                                                       
## [323] "Attenuation phase"                                                                                                          
## [324] "Intra-Golgi traffic"                                                                                                        
## [325] "Antigen processing: Ubiquitination & Proteasome degradation"                                                                
## [326] "PERK regulates gene expression"                                                                                             
## [327] "RHOC GTPase cycle"                                                                                                          
## [328] "Nephrin family interactions"                                                                                                
## [329] "RIP-mediated NFkB activation via ZBP1"                                                                                      
## [330] "RUNX3 regulates p14-ARF"                                                                                                    
## [331] "SCF(Skp2)-mediated degradation of p27/p21"                                                                                  
## [332] "FCGR3A-mediated phagocytosis"                                                                                               
## [333] "Leishmania phagocytosis"                                                                                                    
## [334] "Parasite infection"                                                                                                         
## [335] "Activated NOTCH1 Transmits Signal to the Nucleus"                                                                           
## [336] "RHOA GTPase cycle"                                                                                                          
## [337] "Suppression of phagosomal maturation"                                                                                       
## [338] "RHOJ GTPase cycle"                                                                                                          
## [339] "RHOB GTPase cycle"                                                                                                          
## [340] "Insulin receptor signalling cascade"                                                                                        
## [341] "Cellular response to heat stress"                                                                                           
## [342] "ATF6 (ATF6-alpha) activates chaperone genes"                                                                                
## [343] "ERK/MAPK targets"                                                                                                           
## [344] "FOXO-mediated transcription"                                                                                                
## [345] "Signaling by FGFR1"                                                                                                         
## [346] "Sphingolipid metabolism"                                                                                                    
## [347] "RET signaling"                                                                                                              
## [348] "CDC42 GTPase cycle"                                                                                                         
## [349] "CD28 co-stimulation"                                                                                                        
## [350] "TRAF6 mediated NF-kB activation"                                                                                            
## [351] "Signaling by FGFR"                                                                                                          
## [352] "ATF6 (ATF6-alpha) activates chaperones"                                                                                     
## [353] "Incretin synthesis, secretion, and inactivation"                                                                            
## [354] "TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition)"                                                  
## [355] "NF-kB is activated and signals survival"                                                                                    
## [356] "Asymmetric localization of PCP proteins"                                                                                    
## [357] "MET activates RAS signaling"                                                                                                
## [358] "ZBP1(DAI) mediated induction of type I IFNs"                                                                                
## [359] "Negative regulation of FGFR3 signaling"                                                                                     
## [360] "Signaling by MET"                                                                                                           
## [361] "Negative regulation of MET activity"                                                                                        
## [362] "FLT3 Signaling"                                                                                                             
## [363] "Death Receptor Signalling"                                                                                                  
## [364] "CD28 dependent PI3K/Akt signaling"                                                                                          
## [365] "LDL clearance"                                                                                                              
## [366] "p75 NTR receptor-mediated signalling"                                                                                       
## [367] "ERKs are inactivated"                                                                                                       
## [368] "Constitutive Signaling by AKT1 E17K in Cancer"                                                                              
## [369] "Signaling by ALK fusions and activated point mutants"                                                                       
## [370] "Signaling by ALK in cancer"                                                                                                 
## [371] "HSF1-dependent transactivation"                                                                                             
## [372] "Semaphorin interactions"                                                                                                    
## [373] "Constitutive Signaling by EGFRvIII"                                                                                         
## [374] "Signaling by EGFRvIII in Cancer"                                                                                            
## [375] "Regulation of PTEN gene transcription"                                                                                      
## [376] "Spry regulation of FGF signaling"
sep_up_only_df <- msep$enrichment_result[which(msep$enrichment_result$set %in% sep),]
sep_up_only_df <- subset(sep_up_only_df,s.dist>0)
head(sep_up_only_df,10) %>% kbl(caption = "Upregulated sets identified in sep-DE only") %>% kable_paper("hover", full_width = F)
Upregulated sets identified in sep-DE only
set setSize pANOVA s.dist p.adjustANOVA
278 Diseases of DNA repair 34 0.0e+00 0.5465570 9.00e-07
484 Homologous DNA Pairing and Strand Exchange 42 1.0e-07 0.4773143 2.00e-06
658 Mitotic Prometaphase 181 1.0e-07 0.2285727 2.60e-06
1001 Resolution of D-loop Structures through Holliday Junction Intermediates 32 1.0e-07 0.5396093 2.70e-06
98 Biological oxidations 155 1.0e-07 0.2452573 2.90e-06
1000 Resolution of D-Loop Structures 33 5.0e-07 0.5056426 9.00e-06
485 Homology Directed Repair 101 5.0e-07 0.2893515 9.20e-06
243 Defective HDR through Homologous Recombination (HRR) due to BRCA1 loss-of-function 24 1.2e-06 0.5734199 1.89e-05
244 Defective HDR through Homologous Recombination (HRR) due to PALB2 loss of function 24 1.2e-06 0.5734199 1.89e-05
245 Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA1 binding function 24 1.2e-06 0.5734199 1.89e-05
sep_dn_only_df <- msep$enrichment_result[which(msep$enrichment_result$set %in% sep),]
sep_dn_only_df <- subset(sep_dn_only_df,s.dist<0)
head(sep_dn_only_df,10) %>% kbl(caption = "Downregulated sets identified in sep-DE only") %>% kable_paper("hover", full_width = F)
Downregulated sets identified in sep-DE only
set setSize pANOVA s.dist p.adjustANOVA
509 Infectious disease 637 0 -0.2062708 0
1038 SRP-dependent cotranslational protein targeting to membrane 109 0 -0.4728714 0
87 Axon guidance 424 0 -0.2324048 0
154 Cellular response to starvation 149 0 -0.3633278 0
708 Nervous system development 444 0 -0.2086060 0
573 L13a-mediated translational silencing of Ceruloplasmin expression 108 0 -0.4039887 0
416 GTP hydrolysis and joining of the 60S ribosomal subunit 109 0 -0.3994515 0
268 Developmental Biology 677 0 -0.1634970 0
1283 Translation 270 0 -0.2529451 0
128 Cap-dependent Translation Initiation 116 0 -0.3799346 0
# in comb but not sep
comb <- setdiff(mc_up,ms)
comb
##  [1] "Regulation of cholesterol biosynthesis by SREBP (SREBF)"                                                                    
##  [2] "Metabolism"                                                                                                                 
##  [3] "Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)"
##  [4] "Post-translational protein phosphorylation"                                                                                 
##  [5] "Metabolism of lipids"                                                                                                       
##  [6] "Platelet degranulation"                                                                                                     
##  [7] "Hemostasis"                                                                                                                 
##  [8] "Response to elevated platelet cytosolic Ca2+"                                                                               
##  [9] "Collagen degradation"                                                                                                       
## [10] "Apoptotic execution phase"                                                                                                  
## [11] "Plasma lipoprotein remodeling"                                                                                              
## [12] "Metabolism of steroids"                                                                                                     
## [13] "Platelet activation, signaling and aggregation"                                                                             
## [14] "Biosynthesis of the N-glycan precursor (dolichol lipid-linked oligosaccharide, LLO) and transfer to a nascent protein"      
## [15] "RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function"                                      
## [16] "Synthesis of substrates in N-glycan biosythesis"                                                                            
## [17] "NOTCH4 Intracellular Domain Regulates Transcription"                                                                        
## [18] "RHO GTPases Activate Formins"
comb_only_df <- mcom$enrichment_result[which(mcom$enrichment_result$set %in% comb),]
comb_only_df %>% kbl(caption = "Dysregulated sets identified in all-DE only") %>% kable_paper("hover", full_width = F)
Dysregulated sets identified in all-DE only
set setSize pANOVA s.dist p.adjustANOVA
977 Regulation of cholesterol biosynthesis by SREBP (SREBF) 54 0.0000001 0.4287052 0.0000116
617 Metabolism 1696 0.0000001 0.0783250 0.0000229
954 Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs) 99 0.0000006 0.2913057 0.0000700
815 Post-translational protein phosphorylation 91 0.0000011 0.2955832 0.0001182
624 Metabolism of lipids 592 0.0000078 0.1082935 0.0006700
804 Platelet degranulation 103 0.0000112 0.2507088 0.0008881
480 Hemostasis 409 0.0000239 0.1223159 0.0015383
1009 Response to elevated platelet cytosolic Ca2+ 106 0.0000366 0.2322898 0.0020992
183 Collagen degradation 20 0.0000985 0.5029904 0.0048222
70 Apoptotic execution phase 39 0.0001387 0.3526906 0.0061620
800 Plasma lipoprotein remodeling 29 0.0001744 0.4027569 0.0072764
632 Metabolism of steroids 130 0.0002402 0.1867763 0.0097262
802 Platelet activation, signaling and aggregation 189 0.0002514 0.1547287 0.0098898
99 Biosynthesis of the N-glycan precursor (dolichol lipid-linked oligosaccharide, LLO) and transfer to a nascent protein 64 0.0003036 0.2612266 0.0116139
922 RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function 34 0.0004080 0.3503649 0.0140469
1196 Synthesis of substrates in N-glycan biosythesis 49 0.0006007 0.2834542 0.0196942
686 NOTCH4 Intracellular Domain Regulates Transcription 15 0.0007771 0.5012077 0.0236923
864 RHO GTPases Activate Formins 116 0.0010686 0.1760745 0.0313078
writeLines(text=sep,con="sep_sets.txt")
writeLines(text=sep_up_only_df$set,con="sepup_sets.txt")
writeLines(text=sep_dn_only_df$set,con="sepdn_sets.txt")
writeLines(text=comb,con="comb_sets.txt")

# intersection
intersect(mc_up,ms)
##  [1] "Unfolded Protein Response (UPR)"                                                     
##  [2] "Asparagine N-linked glycosylation"                                                   
##  [3] "IRE1alpha activates chaperones"                                                      
##  [4] "Innate Immune System"                                                                
##  [5] "Cholesterol biosynthesis"                                                            
##  [6] "XBP1(S) activates chaperone genes"                                                   
##  [7] "Signal Transduction"                                                                 
##  [8] "Immune System"                                                                       
##  [9] "Complement cascade"                                                                  
## [10] "Post-translational protein modification"                                             
## [11] "Initial triggering of complement"                                                    
## [12] "Activation of gene expression by SREBF (SREBP)"                                      
## [13] "Transport of small molecules"                                                        
## [14] "Regulation of Complement cascade"                                                    
## [15] "N-glycan trimming in the ER and Calnexin/Calreticulin cycle"                         
## [16] "Neutrophil degranulation"                                                            
## [17] "Transferrin endocytosis and recycling"                                               
## [18] "DNA strand elongation"                                                               
## [19] "Cellular responses to stimuli"                                                       
## [20] "Signaling by Rho GTPases, Miro GTPases and RHOBTB3"                                  
## [21] "Cellular responses to stress"                                                        
## [22] "Signaling by Rho GTPases"                                                            
## [23] "Metabolism of proteins"                                                              
## [24] "Calnexin/calreticulin cycle"                                                         
## [25] "Gene and protein expression by JAK-STAT signaling after Interleukin-12 stimulation"  
## [26] "Cell Cycle, Mitotic"                                                                 
## [27] "Interleukin-12 signaling"                                                            
## [28] "Signaling by Interleukins"                                                           
## [29] "Cell Cycle"                                                                          
## [30] "Amplification  of signal from unattached  kinetochores via a MAD2  inhibitory signal"
## [31] "Amplification of signal from the kinetochores"                                       
## [32] "Disease"                                                                             
## [33] "EML4 and NUDC in mitotic spindle formation"                                          
## [34] "Insertion of tail-anchored proteins into the endoplasmic reticulum membrane"         
## [35] "Interleukin-12 family signaling"                                                     
## [36] "Fatty acid metabolism"                                                               
## [37] "Iron uptake and transport"

If we consider both strategies to be valid, then we can define the significant sets as dysregulated. We can calculate the percent sentitivity of both approaches.

all <- unique(c(ms_up,ms_dn,mc_up))

message("Sensitivity: separate only")
## Sensitivity: separate only
(length(ms_up)+length(ms_dn))/length(all)
## [1] 0.9582367
message("Sensitivity: combined only")
## Sensitivity: combined only
length(mc_up)/length(all)
## [1] 0.1276102

Now lets looks at the gene sets and see whether they were classified as (not)correlated

sep_up_only_df$set
##  [1] "Diseases of DNA repair"                                                                                                     
##  [2] "Homologous DNA Pairing and Strand Exchange"                                                                                 
##  [3] "Mitotic Prometaphase"                                                                                                       
##  [4] "Resolution of D-loop Structures through Holliday Junction Intermediates"                                                    
##  [5] "Biological oxidations"                                                                                                      
##  [6] "Resolution of D-Loop Structures"                                                                                            
##  [7] "Homology Directed Repair"                                                                                                   
##  [8] "Defective HDR through Homologous Recombination (HRR) due to BRCA1 loss-of-function"                                         
##  [9] "Defective HDR through Homologous Recombination (HRR) due to PALB2 loss of function"                                         
## [10] "Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA1 binding function"                    
## [11] "Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA2/RAD51/RAD51C binding function"       
## [12] "Diseases of DNA Double-Strand Break Repair"                                                                                 
## [13] "Presynaptic phase of homologous DNA pairing and strand exchange"                                                            
## [14] "HDR through Homologous Recombination (HRR)"                                                                                 
## [15] "Chromosome Maintenance"                                                                                                     
## [16] "Resolution of D-loop Structures through Synthesis-Dependent Strand Annealing (SDSA)"                                        
## [17] "HDR through Homologous Recombination (HRR) or Single Strand Annealing (SSA)"                                                
## [18] "Unwinding of DNA"                                                                                                           
## [19] "Activation of ATR in response to replication stress"                                                                        
## [20] "Phase II - Conjugation of compounds"                                                                                        
## [21] "DNA Double-Strand Break Repair"                                                                                             
## [22] "Cilium Assembly"                                                                                                            
## [23] "Deposition of new CENPA-containing nucleosomes at the centromere"                                                           
## [24] "Nucleosome assembly"                                                                                                        
## [25] "DNA Repair"                                                                                                                 
## [26] "HDR through Single Strand Annealing (SSA)"                                                                                  
## [27] "Telomere C-strand (Lagging Strand) Synthesis"                                                                               
## [28] "Cell Cycle Checkpoints"                                                                                                     
## [29] "MyD88 deficiency (TLR2/4)"                                                                                                  
## [30] "Lagging Strand Synthesis"                                                                                                   
## [31] "Mitotic Spindle Checkpoint"                                                                                                 
## [32] "IRAK4 deficiency (TLR2/4)"                                                                                                  
## [33] "Activation of the pre-replicative complex"                                                                                  
## [34] "Base Excision Repair"                                                                                                       
## [35] "Processive synthesis on the lagging strand"                                                                                 
## [36] "Anchoring of the basal body to the plasma membrane"                                                                         
## [37] "AURKA Activation by TPX2"                                                                                                   
## [38] "Glutathione conjugation"                                                                                                    
## [39] "Extension of Telomeres"                                                                                                     
## [40] "Resolution of Sister Chromatid Cohesion"                                                                                    
## [41] "Regulation of TLR by endogenous ligand"                                                                                     
## [42] "Leading Strand Synthesis"                                                                                                   
## [43] "Polymerase switching"                                                                                                       
## [44] "Collagen chain trimerization"                                                                                               
## [45] "Formation of Fibrin Clot (Clotting Cascade)"                                                                                
## [46] "Removal of the Flap Intermediate"                                                                                           
## [47] "Recruitment of NuMA to mitotic centrosomes"                                                                                 
## [48] "Polymerase switching on the C-strand of the telomere"                                                                       
## [49] "Resolution of Abasic Sites (AP sites)"                                                                                      
## [50] "Loss of Nlp from mitotic centrosomes"                                                                                       
## [51] "Loss of proteins required for interphase microtubule organization from the centrosome"                                      
## [52] "Meiotic recombination"                                                                                                      
## [53] "Centrosome maturation"                                                                                                      
## [54] "Recruitment of mitotic centrosome proteins and complexes"                                                                   
## [55] "Fanconi Anemia Pathway"                                                                                                     
## [56] "Telomere Maintenance"                                                                                                       
## [57] "G2/M DNA damage checkpoint"                                                                                                 
## [58] "Intraflagellar transport"                                                                                                   
## [59] "Base-Excision Repair, AP Site Formation"                                                                                    
## [60] "Mismatch repair (MMR) directed by MSH2:MSH6 (MutSalpha)"                                                                    
## [61] "Mismatch repair (MMR) directed by MSH2:MSH3 (MutSbeta)"                                                                     
## [62] "Peroxisomal protein import"                                                                                                 
## [63] "Cleavage of the damaged pyrimidine"                                                                                         
## [64] "Depyrimidination"                                                                                                           
## [65] "Recognition and association of DNA glycosylase with site containing an affected pyrimidine"                                 
## [66] "Mismatch Repair"                                                                                                            
## [67] "Phase I - Functionalization of compounds"                                                                                   
## [68] "Processive synthesis on the C-strand of the telomere"                                                                       
## [69] "Processing of DNA double-strand break ends"                                                                                 
## [70] "Organelle biogenesis and maintenance"                                                                                       
## [71] "Post-translational modification: synthesis of GPI-anchored proteins"                                                        
## [72] "Inactivation of APC/C via direct inhibition of the APC/C complex"                                                           
## [73] "Inhibition of the proteolytic activity of APC/C required for the onset of anaphase by mitotic spindle checkpoint components"
## [74] "Defective pyroptosis"                                                                                                       
## [75] "Carboxyterminal post-translational modifications of tubulin"                                                                
## [76] "Ethanol oxidation"                                                                                                          
## [77] "Integrin cell surface interactions"                                                                                         
## [78] "M Phase"                                                                                                                    
## [79] "Regulation of PLK1 Activity at G2/M Transition"                                                                             
## [80] "Peroxisomal lipid metabolism"                                                                                               
## [81] "Glyoxylate metabolism and glycine degradation"                                                                              
## [82] "Common Pathway of Fibrin Clot Formation"                                                                                    
## [83] "Metabolism of vitamins and cofactors"                                                                                       
## [84] "Post-chaperonin tubulin folding pathway"
cs <- read.table("cor.tsv",header=TRUE,sep="\t")
poscor <- rownames(subset(cs,pvalue<0.05&stat.t>0))
nocor <- rownames(subset(cs,pvalue>0.05))
negcor <- rownames(subset(cs,pvalue<0.05&stat.t<0))

sep_up_only_pos <- length(which(sep_up_only_df$set %in% poscor))
sep_up_only_neg <- length(which(sep_up_only_df$set %in% negcor))
sep_up_only_no <- length(which(sep_up_only_df$set %in% nocor))

sep_dn_only_pos <- length(which(sep_dn_only_df$set %in% poscor))
sep_dn_only_neg <- length(which(sep_dn_only_df$set %in% negcor))
sep_dn_only_no <- length(which(sep_dn_only_df$set %in% nocor))

comb_only_pos <- length(which(comb %in% poscor))
comb_only_neg <- length(which(comb %in% negcor))
comb_only_no <- length(which(comb %in% nocor))

sep_up_pie <- c("pos" = sep_up_only_pos , "no" = sep_up_only_no , "neg" = sep_up_only_neg )
lbls <- paste(names(sep_up_pie), " (",sep_up_pie,")",sep="")
pie(sep_up_pie, labels=lbls )

sep_dn_pie <- c("pos" = sep_dn_only_pos , "no" = sep_dn_only_no , "neg" = sep_dn_only_neg )
lbls <- paste(names(sep_dn_pie), " (",sep_dn_pie,")",sep="")
pie(sep_dn_pie, labels=lbls)

comb_pie <- c("pos" = comb_only_pos , "no" = comb_only_no , "neg" = comb_only_neg )
lbls <- paste(names(comb_pie), " (",comb_pie,")",sep="")
pie(comb_pie, labels=lbls)

Now use heatmap to visualise gene set regulation

comb_only_df
##                                                                                                                              set
## 977                                                                      Regulation of cholesterol biosynthesis by SREBP (SREBF)
## 617                                                                                                                   Metabolism
## 954  Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)
## 815                                                                                   Post-translational protein phosphorylation
## 624                                                                                                         Metabolism of lipids
## 804                                                                                                       Platelet degranulation
## 480                                                                                                                   Hemostasis
## 1009                                                                                Response to elevated platelet cytosolic Ca2+
## 183                                                                                                         Collagen degradation
## 70                                                                                                     Apoptotic execution phase
## 800                                                                                                Plasma lipoprotein remodeling
## 632                                                                                                       Metabolism of steroids
## 802                                                                               Platelet activation, signaling and aggregation
## 99         Biosynthesis of the N-glycan precursor (dolichol lipid-linked oligosaccharide, LLO) and transfer to a nascent protein
## 922                                        RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function
## 1196                                                                             Synthesis of substrates in N-glycan biosythesis
## 686                                                                          NOTCH4 Intracellular Domain Regulates Transcription
## 864                                                                                                 RHO GTPases Activate Formins
##      setSize       pANOVA     s.dist p.adjustANOVA
## 977       54 5.074137e-08 0.42870517  1.164514e-05
## 617     1696 1.497471e-07 0.07832496  2.291130e-05
## 954       99 5.589986e-07 0.29130574  6.997646e-05
## 815       91 1.115473e-06 0.29558324  1.181543e-04
## 624      592 7.785284e-06 0.10829349  6.700210e-04
## 804      103 1.120578e-05 0.25070881  8.880649e-04
## 480      409 2.391931e-05 0.12231586  1.538300e-03
## 1009     106 3.658656e-05 0.23228982  2.099154e-03
## 183       20 9.854523e-05 0.50299042  4.822212e-03
## 70        39 1.387231e-04 0.35269059  6.161993e-03
## 800       29 1.743803e-04 0.40275691  7.276414e-03
## 632      130 2.401539e-04 0.18677626  9.726233e-03
## 802      189 2.513735e-04 0.15472871  9.889753e-03
## 99        64 3.036302e-04 0.26122659  1.161385e-02
## 922       34 4.080429e-04 0.35036493  1.404688e-02
## 1196      49 6.006933e-04 0.28345423  1.969416e-02
## 686       15 7.770812e-04 0.50120770  2.369227e-02
## 864      116 1.068604e-03 0.17607446  3.130781e-02
lapply(1:nrow(comb_only_df),function(i) {
  myset <- comb_only_df$set[i]
  genes <- genesets[[which(names(genesets) == myset)]]
  mymx <- rpm2[which(rownames(rpm2) %in% genes),]
  mymx2 <-  mymx / rowMeans(mymx)
  mymx2 <- mymx2[which(!is.na(mymx2[,1])),]
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  heatmap.2(as.matrix(mymx2),main=myset,trace="none", col=colfunc(25), scale="row",margin=c(6,12),Colv="none",dendrogram="row",cexCol=0.8,cexRow=0.5)
  make_volcano2(de,myset,genes)
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## NULL
## 
## [[17]]
## NULL
## 
## [[18]]
## NULL

FCS overlap between sep and combined shown as volcano

msep_res <- msep$enrichment_result
msep_sig <- subset(msep_res,p.adjustANOVA<0.05)
mc_up_res <- msep_res[which(msep_res$set %in% mc_up),]

plot(msep_res$s.dist,-log10(msep_res$pANOVA),
  pch=19,cex=0.5,col="darkgray",
  xlab="s distance (sep)", ylab="-log10(p) (sep)")

fdrline <- msep_res[tail(which(msep_res$p.adjustANOVA<0.05),1),"pANOVA"]

abline(h=-log10(fdrline),lty=2,lwd=2,col="black")

#points(msep_sig$s.dist,-log10(msep_sig$pANOVA),
#  pch=19,cex=0.5,col="red")

points(mc_up_res$s.dist,-log10(mc_up_res$pANOVA),
  cex=0.7,col="blue")

mtext("blue: comb FDR<0.05")

mcom_res <- mcom$enrichment_result

mcom_sig <- subset(mcom_res,p.adjustANOVA<0.05 & s.dist > 0)

ms_res <- mcom_res[which(mcom_res$set %in% ms),]

plot(mcom_res$s.dist,-log10(mcom_res$pANOVA),
  pch=19,cex=0.5,col="darkgray",
  xlab="s distance (com)", ylab="-log10(p) (com)")

fdrline <- mcom_res[tail(which(mcom_res$p.adjustANOVA<0.05),1),"pANOVA"]

abline(h=-log10(fdrline),lty=2,lwd=2,col="black")

#points(mcom_sig$s.dist,-log10(mcom_sig$pANOVA),
#  pch=19,cex=0.5,col="blue")

points(ms_res$s.dist,-log10(ms_res$pANOVA),
  pch=1,cex=0.7,col="red")

mtext("red: sep FDR<0.05")

ORA with clusterprofiler

Clusterprofiler uses a hypergeometric test. Firstly I will conduct the analysis separately for up and down regulated genes and with the correct backgound (as intended by the developers).

genesets2 <- read.gmt("ReactomePathways.gmt")

de_up <- rownames(subset(de, padj<0.05 & log2FoldChange > 0))
de_up <- unique(gt[which(rownames(gt) %in% de_up),1])

de_dn <- rownames(subset(de, padj<0.05 & log2FoldChange < 0))
de_dn <- unique(gt[which(rownames(gt) %in% de_dn),1])

de_bg <- rownames(de)
de_bg <- unique(gt[which(rownames(gt) %in% de_bg),1])

o_up_res <- as.data.frame(enricher(gene = de_up, universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, 
  pAdjustMethod="fdr", pvalueCutoff = 1, qvalueCutoff = 1 ))
o_up <- rownames(subset(o_up_res, p.adjust < 0.05))
       
o_dn_res <- as.data.frame(enricher(gene = de_dn, universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, 
  pAdjustMethod="fdr", pvalueCutoff = 1, qvalueCutoff = 1 ))
o_dn <- rownames(subset(o_dn_res, p.adjust < 0.05))

o_com_res <- as.data.frame(enricher(gene = union(de_up,de_dn), universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, 
  pAdjustMethod="fdr",  pvalueCutoff = 1, qvalueCutoff = 1  ))
o_com <- rownames(subset(o_com_res, p.adjust < 0.05))

length(o_up)
## [1] 47
length(o_dn)
## [1] 33
length(o_up) + length(o_dn)
## [1] 80
length(o_com)
## [1] 23
( length(o_up) + length(o_dn) ) / length(o_com)
## [1] 3.478261
all <- unique(c(o_up,o_dn,o_com))

message("Sensitivity: separate only")
## Sensitivity: separate only
(length(o_up)+length(o_dn))/length(all)
## [1] 0.9411765
message("Sensitivity: combined only")
## Sensitivity: combined only
length(o_com)/length(all)
## [1] 0.2705882

Euler diagram of the significant pathways found with each approach.

l2 <- list("sep up"=o_up,"sep dn"=o_dn,"comb"=o_com)

plot(euler(l2),quantities = TRUE, edges = "gray", main="ORA: combined vs separated")

List gene sets which are specific to each approach.

o_sep <- c(o_up,o_dn)

# in sep but not comb
setdiff(o_sep,o_com)
##  [1] "DNA strand elongation"                                                                                                
##  [2] "Unwinding of DNA"                                                                                                     
##  [3] "Mitotic Spindle Checkpoint"                                                                                           
##  [4] "Biological oxidations"                                                                                                
##  [5] "Metabolism"                                                                                                           
##  [6] "Cell Cycle Checkpoints"                                                                                               
##  [7] "Diseases of DNA repair"                                                                                               
##  [8] "Cell Cycle, Mitotic"                                                                                                  
##  [9] "Resolution of D-loop Structures through Synthesis-Dependent Strand Annealing (SDSA)"                                  
## [10] "EML4 and NUDC in mitotic spindle formation"                                                                           
## [11] "Amplification  of signal from unattached  kinetochores via a MAD2  inhibitory signal"                                 
## [12] "Amplification of signal from the kinetochores"                                                                        
## [13] "Cell Cycle"                                                                                                           
## [14] "Resolution of D-loop Structures through Holliday Junction Intermediates"                                              
## [15] "Homologous DNA Pairing and Strand Exchange"                                                                           
## [16] "Resolution of Sister Chromatid Cohesion"                                                                              
## [17] "Formation of Fibrin Clot (Clotting Cascade)"                                                                          
## [18] "Defective HDR through Homologous Recombination (HRR) due to BRCA1 loss-of-function"                                   
## [19] "Defective HDR through Homologous Recombination (HRR) due to PALB2 loss of function"                                   
## [20] "Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA1 binding function"              
## [21] "Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA2/RAD51/RAD51C binding function" 
## [22] "Diseases of DNA Double-Strand Break Repair"                                                                           
## [23] "Resolution of D-Loop Structures"                                                                                      
## [24] "Extracellular matrix organization"                                                                                    
## [25] "Phase II - Conjugation of compounds"                                                                                  
## [26] "MyD88 deficiency (TLR2/4)"                                                                                            
## [27] "DNA Double-Strand Break Repair"                                                                                       
## [28] "Processive synthesis on the lagging strand"                                                                           
## [29] "HDR through Homologous Recombination (HRR)"                                                                           
## [30] "Presynaptic phase of homologous DNA pairing and strand exchange"                                                      
## [31] "RHO GTPases Activate Formins"                                                                                         
## [32] "Common Pathway of Fibrin Clot Formation"                                                                              
## [33] "IRAK4 deficiency (TLR2/4)"                                                                                            
## [34] "Metabolism of lipids"                                                                                                 
## [35] "Homology Directed Repair"                                                                                             
## [36] "DNA Replication"                                                                                                      
## [37] "Synthesis of DNA"                                                                                                     
## [38] "Activation of the pre-replicative complex"                                                                            
## [39] "Calnexin/calreticulin cycle"                                                                                          
## [40] "SARS-CoV-2 Infection"                                                                                                 
## [41] "Insulin receptor recycling"                                                                                           
## [42] "Neutrophil degranulation"                                                                                             
## [43] "ROS and RNS production in phagocytes"                                                                                 
## [44] "ER Quality Control Compartment (ERQC)"                                                                                
## [45] "ABC transporter disorders"                                                                                            
## [46] "Amino acid transport across the plasma membrane"                                                                      
## [47] "Cellular responses to stimuli"                                                                                        
## [48] "Signal Transduction"                                                                                                  
## [49] "RHOQ GTPase cycle"                                                                                                    
## [50] "Ion channel transport"                                                                                                
## [51] "ABC-family proteins mediated transport"                                                                               
## [52] "Antigen Presentation: Folding, assembly and peptide loading of class I MHC"                                           
## [53] "Biosynthesis of the N-glycan precursor (dolichol lipid-linked oligosaccharide, LLO) and transfer to a nascent protein"
## [54] "ATF6 (ATF6-alpha) activates chaperones"                                                                               
## [55] "Cellular responses to stress"                                                                                         
## [56] "Amino acids regulate mTORC1"                                                                                          
## [57] "Defective CFTR causes cystic fibrosis"                                                                                
## [58] "Transport to the Golgi and subsequent modification"                                                                   
## [59] "Transport of inorganic cations/anions and amino acids/oligopeptides"                                                  
## [60] "ATF6 (ATF6-alpha) activates chaperone genes"                                                                          
## [61] "Hedgehog ligand biogenesis"                                                                                           
## [62] "Signaling by Insulin receptor"
# in comb but not sep
setdiff(o_com,o_sep)
## [1] "Platelet degranulation"                                                                                                     
## [2] "Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)"
## [3] "Response to elevated platelet cytosolic Ca2+"                                                                               
## [4] "Post-translational protein phosphorylation"                                                                                 
## [5] "Hemostasis"
# intersection
intersect(mc_up,ms)
##  [1] "Unfolded Protein Response (UPR)"                                                     
##  [2] "Asparagine N-linked glycosylation"                                                   
##  [3] "IRE1alpha activates chaperones"                                                      
##  [4] "Innate Immune System"                                                                
##  [5] "Cholesterol biosynthesis"                                                            
##  [6] "XBP1(S) activates chaperone genes"                                                   
##  [7] "Signal Transduction"                                                                 
##  [8] "Immune System"                                                                       
##  [9] "Complement cascade"                                                                  
## [10] "Post-translational protein modification"                                             
## [11] "Initial triggering of complement"                                                    
## [12] "Activation of gene expression by SREBF (SREBP)"                                      
## [13] "Transport of small molecules"                                                        
## [14] "Regulation of Complement cascade"                                                    
## [15] "N-glycan trimming in the ER and Calnexin/Calreticulin cycle"                         
## [16] "Neutrophil degranulation"                                                            
## [17] "Transferrin endocytosis and recycling"                                               
## [18] "DNA strand elongation"                                                               
## [19] "Cellular responses to stimuli"                                                       
## [20] "Signaling by Rho GTPases, Miro GTPases and RHOBTB3"                                  
## [21] "Cellular responses to stress"                                                        
## [22] "Signaling by Rho GTPases"                                                            
## [23] "Metabolism of proteins"                                                              
## [24] "Calnexin/calreticulin cycle"                                                         
## [25] "Gene and protein expression by JAK-STAT signaling after Interleukin-12 stimulation"  
## [26] "Cell Cycle, Mitotic"                                                                 
## [27] "Interleukin-12 signaling"                                                            
## [28] "Signaling by Interleukins"                                                           
## [29] "Cell Cycle"                                                                          
## [30] "Amplification  of signal from unattached  kinetochores via a MAD2  inhibitory signal"
## [31] "Amplification of signal from the kinetochores"                                       
## [32] "Disease"                                                                             
## [33] "EML4 and NUDC in mitotic spindle formation"                                          
## [34] "Insertion of tail-anchored proteins into the endoplasmic reticulum membrane"         
## [35] "Interleukin-12 family signaling"                                                     
## [36] "Fatty acid metabolism"                                                               
## [37] "Iron uptake and transport"

Overlap between sep and combined shown as volcano

o_up_res$er <- sapply(o_up_res$GeneRatio , function(x) eval(parse(text=x))) / 
  sapply(o_up_res$BgRatio , function(x) eval(parse(text=x)))
o_up_res <- subset(o_up_res,er>1)

o_dn_res$er <- sapply(o_dn_res$GeneRatio , function(x) eval(parse(text=x))) / 
  sapply(o_dn_res$BgRatio , function(x) eval(parse(text=x))) 
o_dn_res <- subset(o_dn_res,er>1)
o_dn_res$er <- o_dn_res$er  * -1
o_sep_res <- rbind(o_up_res,o_dn_res)
o_sep_res <- o_sep_res[order(o_sep_res$pvalue),]
o_sep_sig <- subset(o_sep_res, p.adjust < 0.05)

o_sep_com <-  o_sep_res[which(o_sep_res$ID %in% o_com),]
o_sep_com <- o_sep_com[which(!duplicated(o_sep_com$ID)),]

plot(o_sep_res$er,-log10(o_sep_res$pvalue),
  pch=19,cex=0.5,col="darkgray",
  xlab="enrichment ratio (sep)", ylab="-log10(p) (sep)")

fdrline <- o_sep_res[tail(which(o_sep_res$p.adjust<0.05),1),"pvalue"]

abline(h=-log10(fdrline),lty=2,lwd=2,col="black")

#points(o_sep_sig$er,-log10(o_sep_sig$pvalue),
#  pch=19,cex=0.5,col="red")

points(o_sep_com$er,-log10(o_sep_com$pvalue),
  ,cex=0.7,col="blue")

mtext("blue: comb FDR<0.05")

o_com_res$er <- sapply(o_com_res$GeneRatio , function(x) eval(parse(text=x))) /
  sapply(o_com_res$BgRatio , function(x) eval(parse(text=x)))

o_com_sep <- o_com_res[which(o_com_res$ID %in% o_sep_sig$ID),]

plot(o_com_res$er,-log10(o_com_res$pvalue),
  pch=19,cex=0.5,col="darkgray",
  xlab="enrichment ratio (com)", ylab="-log10(p) (com)")

fdrline <- o_com_res[tail(which(o_com_res$p.adjust<0.05),1),"pvalue"]
abline(h=-log10(fdrline),lty=2,lwd=2,col="black")

points(o_com_sep$er,-log10(o_com_sep$pvalue),
  ,cex=0.7,col="red")

mtext("red: sep FDR<0.05")

Euler diagrams comparing FCS and ORA methods

par(cex.main=0.5)

par(mar=c(2,2,2,2))

l3 <- list("ORA up"=o_up,"ORA dn"=o_dn,"ORA comb"=o_com,
  "FCS up"=ms_up,"FCS dn"=ms_dn,"FCS comb"=mc_up)

plot(euler(l3),quantities = TRUE, edges = "gray", main="FCS compared to ORA")

Save data

dat <- list(  "FCS_up"=ms_up,
  "FCS_dn"=ms_dn,
  "FCS_com"=mc_up,
  "ORA_up"= o_up,
  "ORA_dn"=o_dn,
  "ORA_com"=o_com)

str(dat)
## List of 6
##  $ FCS_up : chr [1:96] "Cholesterol biosynthesis" "DNA strand elongation" "Diseases of DNA repair" "Homologous DNA Pairing and Strand Exchange" ...
##  $ FCS_dn : chr [1:317] "Cellular responses to stimuli" "Cellular responses to stress" "Infectious disease" "SRP-dependent cotranslational protein targeting to membrane" ...
##  $ FCS_com: chr [1:55] "Unfolded Protein Response (UPR)" "Asparagine N-linked glycosylation" "IRE1alpha activates chaperones" "Innate Immune System" ...
##  $ ORA_up : chr [1:47] "Cholesterol biosynthesis" "Regulation of Complement cascade" "Complement cascade" "Fatty acid metabolism" ...
##  $ ORA_dn : chr [1:33] "Asparagine N-linked glycosylation" "IRE1alpha activates chaperones" "Unfolded Protein Response (UPR)" "XBP1(S) activates chaperone genes" ...
##  $ ORA_com: chr [1:23] "IRE1alpha activates chaperones" "Unfolded Protein Response (UPR)" "XBP1(S) activates chaperone genes" "Regulation of cholesterol biosynthesis by SREBP (SREBF)" ...
saveRDS(dat,file = "ex1dat.rds")

Conclusion

For mitch, it would appear that performing direction informed (DI) analysis clearly yields more differentially regulated pathways (413) as compared to the direction agnostic (DA) method (55).

That being said, there were 18 pathways identified only in the DA method that appeared to be related to the physiology of the model. These gene sets are likely to contain a mix of genes affected by the stimulus in different ways - for example a mix of up and downregulated genes. Are these really real? Not sure.

This pattern was consistent with ORA, where 80 sets were identified with separate analysis and only 23 with the combined analysis.

When comparing ORA to FCS, we found that FCS identified many more sets than ORA. In fact all gene sets that were identified by ORA were also identified by FCS, except for 3 that were specific to the ORA up set.

Let’s look at those now.

myfcs <- c(ms_up, mc_up)

setdiff(o_up,myfcs)
## [1] "Extracellular matrix organization" "DNA Replication"                  
## [3] "Synthesis of DNA"

Session information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] gplots_3.1.1                eulerr_6.1.1               
##  [3] kableExtra_1.3.4            mitch_1.4.1                
##  [5] clusterProfiler_4.0.5       DESeq2_1.32.0              
##  [7] SummarizedExperiment_1.22.0 Biobase_2.52.0             
##  [9] MatrixGenerics_1.4.3        matrixStats_0.61.0         
## [11] GenomicRanges_1.44.0        GenomeInfoDb_1.28.4        
## [13] IRanges_2.26.0              S4Vectors_0.30.2           
## [15] BiocGenerics_0.38.0         getDEE2_1.2.0              
## 
## loaded via a namespace (and not attached):
##   [1] shadowtext_0.1.1       fastmatch_1.1-3        systemfonts_1.0.3     
##   [4] plyr_1.8.6             igraph_1.2.11          lazyeval_0.2.2        
##   [7] polylabelr_0.2.0       splines_4.1.2          BiocParallel_1.26.2   
##  [10] ggplot2_3.3.5          digest_0.6.29          yulab.utils_0.0.4     
##  [13] htmltools_0.5.2        GOSemSim_2.18.1        viridis_0.6.2         
##  [16] GO.db_3.13.0           fansi_1.0.0            magrittr_2.0.1        
##  [19] memoise_2.0.1          Biostrings_2.60.2      annotate_1.70.0       
##  [22] graphlayouts_0.8.0     svglite_2.0.0          enrichplot_1.12.3     
##  [25] colorspace_2.0-2       rvest_1.0.2            blob_1.2.2            
##  [28] ggrepel_0.9.1          xfun_0.29              dplyr_1.0.7           
##  [31] crayon_1.4.2           RCurl_1.98-1.5         jsonlite_1.7.2        
##  [34] scatterpie_0.1.7       genefilter_1.74.1      survival_3.2-13       
##  [37] ape_5.6-1              glue_1.6.0             polyclip_1.10-0       
##  [40] gtable_0.3.0           zlibbioc_1.38.0        XVector_0.32.0        
##  [43] webshot_0.5.2          htm2txt_2.1.1          DelayedArray_0.18.0   
##  [46] scales_1.1.1           DOSE_3.18.3            DBI_1.1.2             
##  [49] GGally_2.1.2           Rcpp_1.0.7             viridisLite_0.4.0     
##  [52] xtable_1.8-4           gridGraphics_0.5-1     tidytree_0.3.7        
##  [55] bit_4.0.4              htmlwidgets_1.5.4      httr_1.4.2            
##  [58] fgsea_1.18.0           RColorBrewer_1.1-2     ellipsis_0.3.2        
##  [61] pkgconfig_2.0.3        reshape_0.8.8          XML_3.99-0.8          
##  [64] farver_2.1.0           sass_0.4.0             locfit_1.5-9.4        
##  [67] utf8_1.2.2             ggplotify_0.1.0        tidyselect_1.1.1      
##  [70] rlang_0.4.12           reshape2_1.4.4         later_1.3.0           
##  [73] AnnotationDbi_1.54.1   munsell_0.5.0          tools_4.1.2           
##  [76] cachem_1.0.6           downloader_0.4         generics_0.1.1        
##  [79] RSQLite_2.2.9          evaluate_0.14          stringr_1.4.0         
##  [82] fastmap_1.1.0          yaml_2.2.1             ggtree_3.0.4          
##  [85] knitr_1.37             bit64_4.0.5            tidygraph_1.2.0       
##  [88] caTools_1.18.2         purrr_0.3.4            KEGGREST_1.32.0       
##  [91] ggraph_2.0.5           nlme_3.1-155           mime_0.12             
##  [94] aplot_0.1.2            xml2_1.3.3             DO.db_2.9             
##  [97] rstudioapi_0.13        compiler_4.1.2         beeswarm_0.4.0        
## [100] png_0.1-7              treeio_1.16.2          tibble_3.1.6          
## [103] tweenr_1.0.2           geneplotter_1.70.0     bslib_0.3.1           
## [106] stringi_1.7.6          highr_0.9              lattice_0.20-45       
## [109] Matrix_1.4-0           vctrs_0.3.8            pillar_1.6.4          
## [112] lifecycle_1.0.1        jquerylib_0.1.4        data.table_1.14.2     
## [115] cowplot_1.1.1          bitops_1.0-7           httpuv_1.6.5          
## [118] patchwork_1.1.1        qvalue_2.24.0          R6_2.5.1              
## [121] promises_1.2.0.1       KernSmooth_2.23-20     echarts4r_0.4.3       
## [124] gridExtra_2.3          gtools_3.9.2           MASS_7.3-55           
## [127] assertthat_0.2.1       GenomeInfoDbData_1.2.6 grid_4.1.2            
## [130] ggfun_0.0.4            tidyr_1.1.4            rmarkdown_2.11        
## [133] ggforce_0.3.3          shiny_1.7.1