Example gene set analysis: The case of LHON causing mutations

Source: https://github.com/markziemann/combined_enrichment

Intro

Here we are performing an analysis of some gene expression data to demonstrate the difference between ORA and FCS methods and to highlight the differences caused by improper background gene set use.

The dataset being used is SRP247621 and we are comparing the fibroblast cells from LHON patients (case) versus healthy controls.

Data are obtained from http://dee2.io/

suppressPackageStartupMessages({
library("getDEE2") 
library("DESeq2")
library("clusterProfiler")
library("mitch")
library("kableExtra")
library("eulerr")
})

Get expression data

I’m using some RNA-seq data looking at the difference in fibroblast gene expression between control and LHON patients.

name="SRP247621"
mdat<-getDEE2Metadata("hsapiens")
samplesheet <- mdat[grep("SRP247621",mdat$SRP_accession),]
samplesheet<-samplesheet[order(samplesheet$SRR_accession),]
samplesheet$trt<-as.factor(c(1,1,1,2,2,2,0,0,0)) # exclude carriers for simplicity
samplesheet <- samplesheet[which(samplesheet$trt!=2),]
s1 <- samplesheet

s1 %>% kbl(caption = "sample sheet") %>% kable_paper("hover", full_width = F)

sample sheet
	SRR_accession	QC_summary	SRX_accession	SRS_accession	SRP_accession	Sample_name	GEO_series	trt
204421	SRR11040359	PASS	SRX7692166	SRS6118270	SRP247621	GSM4300731	GSE144914	1
204422	SRR11040360	WARN(3,4,6)	SRX7692167	SRS6118272	SRP247621	GSM4300732	GSE144914	1
204423	SRR11040361	PASS	SRX7692168	SRS6118271	SRP247621	GSM4300733	GSE144914	1
204427	SRR11040365	PASS	SRX7692172	SRS6118276	SRP247621	GSM4300737	GSE144914	0
204428	SRR11040366	PASS	SRX7692173	SRS6118277	SRP247621	GSM4300738	GSE144914	0
204429	SRR11040367	PASS	SRX7692174	SRS6118278	SRP247621	GSM4300739	GSE144914	0

w<-getDEE2("hsapiens",samplesheet$SRR_accession,metadata=mdat,legacy = TRUE)

## For more information about DEE2 QC metrics, visit
##     https://github.com/markziemann/dee2/blob/master/qc/qc_metrics.md

x<-Tx2Gene(w)
x<-x$Tx2Gene

# save the genetable for later
gt<-w$GeneInfo[,1,drop=FALSE]
gt$accession<-rownames(gt)

# counts 
x1<-x[,which(colnames(x) %in% samplesheet$SRR_accession)]

Here show the number of genes in the annotation set, and those detected above the detection threshold.

# filter out lowly expressed genes
x1<-x1[which(rowSums(x1)/ncol(x1)>=(10)),]
nrow(x)

## [1] 39297

nrow(x1)

## [1] 14288

Now multidimensional scaling (MDS) plot to show the correlation between the datasets. If the control and case datasets are clustered separately, then it is likely that there will be many differentially expressed genes with FDR<0.05.

plot(cmdscale(dist(t(x1))), xlab="Coordinate 1", ylab="Coordinate 2", pch=19, col=s1$trt, main="MDS")

Differential expression

Now run DESeq2 for control vs case.

y <- DESeqDataSetFromMatrix(countData = round(x1), colData = s1, design = ~ trt)

## converting counts to integer mode

## factor levels were dropped which had no samples

y <- DESeq(y)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

de <- results(y)
de<-as.data.frame(de[order(de$pvalue),])
rownames(de)<-sapply(strsplit(rownames(de),"\\."),"[[",1)
head(de) %>% kbl() %>% kable_paper("hover", full_width = F)

	baseMean	log2FoldChange	lfcSE	stat	padj
ENSG00000233948	156.47249	-2.955957	0.3146608	-9.394106	0e+00
ENSG00000103326	241.05789	1.543670	0.2180326	7.079999	0e+00
ENSG00000268861	72.30512	-6.854266	1.0078846	-6.800645	0e+00
ENSG00000175221	419.46036	1.246384	0.1836166	6.787967	0e+00
ENSG00000273542	17.71562	21.242888	3.1581883	6.726289	0e+00
ENSG00000168140	1121.18373	1.113196	0.1771604	6.283553	7e-07

Now let’s have a look at some of the charts showing differential expression. In particular, an MA plot and volcano plot.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=0.7)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 0.7)
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=0.5,pch=19,col="red")
}

maplot(de,name)

make_volcano(de,name)

Gene sets from Reactome

In order to perform gene set analysis, we need some gene sets.

if (! file.exists("ReactomePathways.gmt")) {
  download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", 
    destfile="ReactomePathways.gmt.zip")
  unzip("ReactomePathways.gmt.zip")
}
genesets<-gmt_import("ReactomePathways.gmt")

FCS with Mitch

Mitch uses rank-ANOVA statistics for enrichment detection.

Here I’m using the standard approach

m <- mitch_import(de,DEtype = "DEseq2", geneTable = gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 14288

## Note: no. genes in output = 13355

## Note: estimated proportion of input genes in output = 0.935

msep <- mitch_calc(m,genesets = genesets)

## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.

ms_up <- subset(msep$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0)[,1]
ms_dn <- subset(msep$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0)[,1]
message(paste("Number of up-regulated pathways:",length(ms_up) ))

## Number of up-regulated pathways: 69

message(paste("Number of down-regulated pathways:",length(ms_dn) ))

## Number of down-regulated pathways: 156

head(msep$enrichment_result,10)  #%>% kbl() %>% kable_paper("hover", full_width = F)

##                                              set setSize       pANOVA
## 77             Asparagine N-linked glycosylation     251 5.574217e-11
## 242        Defective CFTR causes cystic fibrosis      57 1.306378e-09
## 315            ER to Golgi Anterograde Transport     124 3.370379e-09
## 1305        Vif-mediated degradation of APOBEC3G      52 5.178549e-09
## 2                      ABC transporter disorders      65 5.233657e-09
## 696      Negative regulation of NOTCH4 signaling      53 7.892847e-09
## 932                      Regulation of Apoptosis      51 1.019099e-08
## 1312             Vpu mediated degradation of CD4      49 1.379079e-08
## 478         Hh mutants abrogate ligand secretion      54 1.866448e-08
## 1293 Ubiquitin-dependent degradation of Cyclin D      50 2.355232e-08
##          s.dist p.adjustANOVA
## 77   -0.2409732  7.536341e-08
## 242  -0.4646418  8.831112e-07
## 315  -0.3078072  1.415181e-06
## 1305 -0.4683385  1.415181e-06
## 2    -0.4189732  1.415181e-06
## 696  -0.4583167  1.778522e-06
## 932  -0.4636878  1.968316e-06
## 1312 -0.4687681  2.330644e-06
## 478  -0.4425343  2.803820e-06
## 1293 -0.4565351  3.184274e-06

#mitch_report(msep,outfile="mitch_separate.html",overwrite=TRUE)

Here I’m using the combined approach.

mcom <- mitch_calc(abs(m),genesets = genesets)

## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.

mc_up <- subset(mcom$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0)[,1]
mc_dn <- subset(mcom$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0)[,1]
message(paste("Number of up-regulated pathways:",length(mc_up) ))

## Number of up-regulated pathways: 34

message(paste("Number of down-regulated pathways:",length(mc_dn) ))

## Number of down-regulated pathways: 1

head(mcom$enrichment_result,10)  #%>% kbl() %>% kable_paper("hover", full_width = F)

##                                                                                                                      set
## 990  Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins.
## 989                                                                                       Respiratory electron transport
## 831                                                                             Protein-protein interactions at synapses
## 1350                                                                                tRNA processing in the mitochondrion
## 1185                                                                          TCF dependent signaling in response to WNT
## 190                                                                                                 Complex I biogenesis
## 1224                                                      The citric acid (TCA) cycle and respiratory electron transport
## 1235                                                                                 Toll Like Receptor 4 (TLR4) Cascade
## 108                                                                                       C-type lectin receptors (CLRs)
## 1138                                                                                                    Signaling by WNT
##      setSize       pANOVA    s.dist p.adjustANOVA
## 990      107 4.058993e-06 0.2581705   0.005487758
## 989      103 1.551183e-05 0.2467200   0.010485995
## 831       54 2.557534e-05 0.3313340   0.011525951
## 1350      17 8.624254e-05 0.5500428   0.029149977
## 1185     151 1.781581e-04 0.1770766   0.040270892
## 190       57 2.472321e-04 0.2808337   0.040270892
## 1224     153 2.756356e-04 0.1707238   0.040270892
## 1235     108 3.764216e-04 0.1983356   0.040270892
## 108      109 3.909377e-04 0.1968785   0.040270892
## 1138     225 3.916582e-04 0.1376167   0.040270892

#mitch_report(mcom,outfile="mitch_combined.html",overwrite=TRUE)

Let’s look at the significant ones based on the combined analysis. There weren’t many gene sets classe d as significant. Let’s see how many have a direction agnostic enrichment score which is larger in magnitude than the direction informed enrichment score. There are only 11 such sets which would benefit from such combined analysis.

Euler diagram of the significant pathways found with each approach.

l0 <- list("sep up"=ms_up,"sep dn"=ms_dn,"comb up"=mc_up,"comb dn"=mc_dn)
par(cex.main=0.5)
plot(euler(l0),quantities = TRUE, edges = "gray", main="FCS: combined vs separated")

length(ms_up)

## [1] 69

length(ms_dn)

## [1] 156

length(ms_up)+length(ms_dn)

## [1] 225

length(mc_up)

## [1] 34

length(mc_dn)

## [1] 1

length(mc_up)+length(mc_dn)

## [1] 35

( length(ms_up)+length(ms_dn) ) / ( length(mc_up)+length(mc_dn) )

## [1] 6.428571

List gene sets which are specific to each approach.

ms <- c(ms_up,ms_dn)

# in sep but not comb
setdiff(ms,mc_up)

##   [1] "Signal Transduction"                                                                                     
##   [2] "Developmental Biology"                                                                                   
##   [3] "Extracellular matrix organization"                                                                       
##   [4] "Heparan sulfate/heparin (HS-GAG) metabolism"                                                             
##   [5] "A tetrasaccharide linker sequence is required for GAG synthesis"                                         
##   [6] "Chondroitin sulfate/dermatan sulfate metabolism"                                                         
##   [7] "RORA activates gene expression"                                                                          
##   [8] "Mitotic Prometaphase"                                                                                    
##   [9] "Assembly of collagen fibrils and other multimeric structures"                                            
##  [10] "CDC42 GTPase cycle"                                                                                      
##  [11] "Defective B3GALT6 causes EDSP2 and SEMDJL1"                                                              
##  [12] "Chromatin modifying enzymes"                                                                             
##  [13] "Chromatin organization"                                                                                  
##  [14] "Defective B4GALT7 causes EDS, progeroid type"                                                            
##  [15] "PKMTs methylate histone lysines"                                                                         
##  [16] "SEMA3A-Plexin repulsion signaling by inhibiting Integrin adhesion"                                       
##  [17] "Defective B3GAT3 causes JDSSDHD"                                                                         
##  [18] "Plasma lipoprotein remodeling"                                                                           
##  [19] "Pre-NOTCH Transcription and Translation"                                                                 
##  [20] "Centrosome maturation"                                                                                   
##  [21] "Recruitment of mitotic centrosome proteins and complexes"                                                
##  [22] "HS-GAG degradation"                                                                                      
##  [23] "RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function"                   
##  [24] "Collagen biosynthesis and modifying enzymes"                                                             
##  [25] "Collagen formation"                                                                                      
##  [26] "Signaling by Rho GTPases"                                                                                
##  [27] "Collagen chain trimerization"                                                                            
##  [28] "NCAM1 interactions"                                                                                      
##  [29] "Recruitment of NuMA to mitotic centrosomes"                                                              
##  [30] "Kinesins"                                                                                                
##  [31] "Heme signaling"                                                                                          
##  [32] "FOXO-mediated transcription of cell death genes"                                                         
##  [33] "RHOA GTPase cycle"                                                                                       
##  [34] "RHOB GTPase cycle"                                                                                       
##  [35] "Signaling by Rho GTPases, Miro GTPases and RHOBTB3"                                                      
##  [36] "Nervous system development"                                                                              
##  [37] "Transcriptional regulation of white adipocyte differentiation"                                           
##  [38] "NCAM signaling for neurite out-growth"                                                                   
##  [39] "Activation of SMO"                                                                                       
##  [40] "ECM proteoglycans"                                                                                       
##  [41] "Axon guidance"                                                                                           
##  [42] "Unwinding of DNA"                                                                                        
##  [43] "Resolution of D-loop Structures through Holliday Junction Intermediates"                                 
##  [44] "Regulation of TP53 Activity through Acetylation"                                                         
##  [45] "Resolution of D-Loop Structures"                                                                         
##  [46] "Neurexins and neuroligins"                                                                               
##  [47] "Ephrin signaling"                                                                                        
##  [48] "Plasma lipoprotein assembly, remodeling, and clearance"                                                  
##  [49] "Semaphorin interactions"                                                                                 
##  [50] "Glycosaminoglycan metabolism"                                                                            
##  [51] "RHO GTPases Activate Formins"                                                                            
##  [52] "SUMO E3 ligases SUMOylate target proteins"                                                               
##  [53] "RAC1 GTPase cycle"                                                                                       
##  [54] "RHO GTPase cycle"                                                                                        
##  [55] "AURKA Activation by TPX2"                                                                                
##  [56] "Signaling by PDGF"                                                                                       
##  [57] "Signaling by Receptor Tyrosine Kinases"                                                                  
##  [58] "Activation of HOX genes during differentiation"                                                          
##  [59] "Activation of anterior HOX genes in hindbrain development during early embryogenesis"                    
##  [60] "RHOC GTPase cycle"                                                                                       
##  [61] "Pre-NOTCH Expression and Processing"                                                                     
##  [62] "Neuronal System"                                                                                         
##  [63] "Sema3A PAK dependent Axon repulsion"                                                                     
##  [64] "Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell"                                
##  [65] "RHO GTPase Effectors"                                                                                    
##  [66] "Nuclear Receptor transcription pathway"                                                                  
##  [67] "Formation of the beta-catenin:TCF transactivating complex"                                               
##  [68] "Asparagine N-linked glycosylation"                                                                       
##  [69] "Defective CFTR causes cystic fibrosis"                                                                   
##  [70] "Vif-mediated degradation of APOBEC3G"                                                                    
##  [71] "ABC transporter disorders"                                                                               
##  [72] "Negative regulation of NOTCH4 signaling"                                                                 
##  [73] "Regulation of Apoptosis"                                                                                 
##  [74] "Vpu mediated degradation of CD4"                                                                         
##  [75] "Hh mutants abrogate ligand secretion"                                                                    
##  [76] "Ubiquitin-dependent degradation of Cyclin D"                                                             
##  [77] "Hh mutants are degraded by ERAD"                                                                         
##  [78] "Autodegradation of the E3 ubiquitin ligase COP1"                                                         
##  [79] "SCF(Skp2)-mediated degradation of p27/p21"                                                               
##  [80] "Stabilization of p53"                                                                                    
##  [81] "Activation of NF-kappaB in B cells"                                                                      
##  [82] "Regulation of RUNX3 expression and activity"                                                             
##  [83] "Autodegradation of Cdh1 by Cdh1:APC/C"                                                                   
##  [84] "Hedgehog ligand biogenesis"                                                                              
##  [85] "ER-Phagosome pathway"                                                                                    
##  [86] "FCERI mediated NF-kB activation"                                                                         
##  [87] "Ubiquitin Mediated Degradation of Phosphorylated Cdc25A"                                                 
##  [88] "p53-Independent DNA Damage Response"                                                                     
##  [89] "p53-Independent G1/S DNA damage checkpoint"                                                              
##  [90] "Degradation of GLI1 by the proteasome"                                                                   
##  [91] "NIK-->noncanonical NF-kB signaling"                                                                      
##  [92] "Regulation of activated PAK-2p34 by proteasome mediated degradation"                                     
##  [93] "APC/C:Cdc20 mediated degradation of Securin"                                                             
##  [94] "Degradation of DVL"                                                                                      
##  [95] "FBXL7 down-regulates AURKA during mitotic entry and in early mitosis"                                    
##  [96] "SCF-beta-TrCP mediated degradation of Emi1"                                                              
##  [97] "Regulation of ornithine decarboxylase (ODC)"                                                             
##  [98] "Degradation of AXIN"                                                                                     
##  [99] "Metabolism of proteins"                                                                                  
## [100] "Downstream signaling events of B Cell Receptor (BCR)"                                                    
## [101] "Degradation of GLI2 by the proteasome"                                                                   
## [102] "Antigen processing-Cross presentation"                                                                   
## [103] "Downstream TCR signaling"                                                                                
## [104] "GLI3 is processed to GLI3R by the proteasome"                                                            
## [105] "Transport to the Golgi and subsequent modification"                                                      
## [106] "Dectin-1 mediated noncanonical NF-kB signaling"                                                          
## [107] "ROS sensing by NFE2L2"                                                                                   
## [108] "Metabolism of polyamines"                                                                                
## [109] "p53-Dependent G1 DNA Damage Response"                                                                    
## [110] "p53-Dependent G1/S DNA damage checkpoint"                                                                
## [111] "Cdc20:Phospho-APC/C mediated degradation of Cyclin A"                                                    
## [112] "CDT1 association with the CDC6:ORC:origin complex"                                                       
## [113] "Oxygen-dependent proline hydroxylation of Hypoxia-inducible Factor Alpha"                                
## [114] "CDK-mediated phosphorylation and removal of Cdc6"                                                        
## [115] "Class I MHC mediated antigen processing & presentation"                                                  
## [116] "Metabolism"                                                                                              
## [117] "APC:Cdc20 mediated degradation of cell cycle proteins prior to satisfation of the cell cycle checkpoint" 
## [118] "RUNX1 regulates transcription of genes involved in differentiation of HSCs"                              
## [119] "G1/S DNA Damage Checkpoints"                                                                             
## [120] "Cross-presentation of soluble exogenous antigens (endosomes)"                                            
## [121] "Regulation of RUNX2 expression and activity"                                                             
## [122] "Regulation of HMOX1 expression and activity"                                                             
## [123] "APC/C:Cdc20 mediated degradation of mitotic proteins"                                                    
## [124] "ABC-family proteins mediated transport"                                                                  
## [125] "The role of GTSE1 in G2/M progression after G2 checkpoint"                                               
## [126] "APC/C:Cdh1 mediated degradation of Cdc20 and other APC/C:Cdh1 targeted proteins in late mitosis/early G1"
## [127] "Regulation of APC/C activators between G1/S and early anaphase"                                          
## [128] "AUF1 (hnRNP D0) binds and destabilizes mRNA"                                                             
## [129] "Neddylation"                                                                                             
## [130] "Cyclin E associated events during G1/S transition"                                                       
## [131] "Activation of APC/C and APC/C:Cdc20 mediated degradation of mitotic proteins"                            
## [132] "Antigen processing: Ubiquitination & Proteasome degradation"                                             
## [133] "Disorders of transmembrane transporters"                                                                 
## [134] "Peroxisomal protein import"                                                                              
## [135] "Apoptosis"                                                                                               
## [136] "Regulation of RAS by GAPs"                                                                               
## [137] "TCR signaling"                                                                                           
## [138] "Regulation of PTEN stability and activity"                                                               
## [139] "CLEC7A (Dectin-1) signaling"                                                                             
## [140] "COPII-mediated vesicle transport"                                                                        
## [141] "Interleukin-1 signaling"                                                                                 
## [142] "Protein localization"                                                                                    
## [143] "Signaling by NOTCH4"                                                                                     
## [144] "APC/C-mediated degradation of cell cycle proteins"                                                       
## [145] "Regulation of mitotic cell cycle"                                                                        
## [146] "TNFR2 non-canonical NF-kB pathway"                                                                       
## [147] "Programmed Cell Death"                                                                                   
## [148] "Host Interactions of HIV factors"                                                                        
## [149] "Asymmetric localization of PCP proteins"                                                                 
## [150] "Cellular response to hypoxia"                                                                            
## [151] "Cyclin A:Cdk2-associated events at S phase entry"                                                        
## [152] "Signaling by the B Cell Receptor (BCR)"                                                                  
## [153] "HIV Infection"                                                                                           
## [154] "Orc1 removal from chromatin"                                                                             
## [155] "Cargo concentration in the ER"                                                                           
## [156] "Fc epsilon receptor (FCERI) signaling"                                                                   
## [157] "Switching of origins to a post-replicative state"                                                        
## [158] "Formation of Incision Complex in GG-NER"                                                                 
## [159] "Iron uptake and transport"                                                                               
## [160] "Interleukin-1 family signaling"                                                                          
## [161] "Translation"                                                                                             
## [162] "Phase II - Conjugation of compounds"                                                                     
## [163] "PINK1-PRKN Mediated Mitophagy"                                                                           
## [164] "Assembly of the pre-replicative complex"                                                                 
## [165] "Macroautophagy"                                                                                          
## [166] "Post-translational protein modification"                                                                 
## [167] "Ub-specific processing proteases"                                                                        
## [168] "Regulation of mRNA stability by proteins that bind AU-rich elements"                                     
## [169] "DNA Replication Pre-Initiation"                                                                          
## [170] "Cytoprotection by HMOX1"                                                                                 
## [171] "Membrane Trafficking"                                                                                    
## [172] "Innate Immune System"                                                                                    
## [173] "Metabolism of non-coding RNA"                                                                            
## [174] "snRNP Assembly"                                                                                          
## [175] "Immune System"                                                                                           
## [176] "Autophagy"                                                                                               
## [177] "G1/S Transition"                                                                                         
## [178] "Synthesis of DNA"                                                                                        
## [179] "Metabolism of amino acids and derivatives"                                                               
## [180] "UCH proteinases"                                                                                         
## [181] "Neutrophil degranulation"                                                                                
## [182] "Glutamate and glutamine metabolism"                                                                      
## [183] "ER Quality Control Compartment (ERQC)"                                                                   
## [184] "Adaptive Immune System"                                                                                  
## [185] "Global Genome Nucleotide Excision Repair (GG-NER)"                                                       
## [186] "Cytochrome c-mediated apoptotic response"                                                                
## [187] "Selective autophagy"                                                                                     
## [188] "COPI-independent Golgi-to-ER retrograde traffic"                                                         
## [189] "MAPK6/MAPK4 signaling"                                                                                   
## [190] "Vesicle-mediated transport"                                                                              
## [191] "Dual Incision in GG-NER"                                                                                 
## [192] "Hedgehog 'off' state"                                                                                    
## [193] "Defects in vitamin and cofactor metabolism"                                                              
## [194] "Transport of small molecules"                                                                            
## [195] "DNA Replication"                                                                                         
## [196] "Pexophagy"                                                                                               
## [197] "MAP3K8 (TPL2)-dependent MAPK1/3 activation"                                                              
## [198] "Mitotic G1 phase and G1/S transition"                                                                    
## [199] "Formation of apoptosome"                                                                                 
## [200] "Regulation of the apoptosome activity"                                                                   
## [201] "MyD88-independent TLR4 cascade"                                                                          
## [202] "TRIF(TICAM1)-mediated TLR4 signaling"                                                                    
## [203] "Synthesis of PIPs at the early endosome membrane"                                                        
## [204] "HSP90 chaperone cycle for steroid hormone receptors (SHR) in the presence of ligand"                     
## [205] "Hedgehog 'on' state"                                                                                     
## [206] "NRIF signals cell death from the nucleus"                                                                
## [207] "PCP/CE pathway"

# in comb but not sep
setdiff(mc_up,ms)

##  [1] "TCF dependent signaling in response to WNT"                                  
##  [2] "Complex I biogenesis"                                                        
##  [3] "Toll Like Receptor 4 (TLR4) Cascade"                                         
##  [4] "Signaling by WNT"                                                            
##  [5] "Transcriptional regulation by RUNX1"                                         
##  [6] "MyD88 dependent cascade initiated on endosome"                               
##  [7] "TRAF6 mediated induction of NFkB and MAP kinases upon TLR7/8 or 9 activation"
##  [8] "Toll Like Receptor 7/8 (TLR7/8) Cascade"                                     
##  [9] "Toll Like Receptor 9 (TLR9) Cascade"                                         
## [10] "MyD88:MAL(TIRAP) cascade initiated on plasma membrane"                       
## [11] "Toll Like Receptor 2 (TLR2) Cascade"                                         
## [12] "Toll Like Receptor TLR1:TLR2 Cascade"                                        
## [13] "Toll Like Receptor TLR6:TLR2 Cascade"                                        
## [14] "Cellular responses to stress"                                                
## [15] "Toll-like Receptor Cascades"                                                 
## [16] "Cellular responses to stimuli"

# intersection
intersect(mc_up,ms)

##  [1] "Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins."
##  [2] "Respiratory electron transport"                                                                                     
##  [3] "Protein-protein interactions at synapses"                                                                           
##  [4] "tRNA processing in the mitochondrion"                                                                               
##  [5] "The citric acid (TCA) cycle and respiratory electron transport"                                                     
##  [6] "C-type lectin receptors (CLRs)"                                                                                     
##  [7] "Mitochondrial translation termination"                                                                              
##  [8] "IKK complex recruitment mediated by RIP1"                                                                           
##  [9] "ER to Golgi Anterograde Transport"                                                                                  
## [10] "Mitochondrial translation initiation"                                                                               
## [11] "Formation of TC-NER Pre-Incision Complex"                                                                           
## [12] "COPI-mediated anterograde transport"                                                                                
## [13] "Cellular response to chemical stress"                                                                               
## [14] "Mitochondrial translation"                                                                                          
## [15] "TICAM1, RIP1-mediated IKK complex recruitment"                                                                      
## [16] "Transcriptional regulation by RUNX2"                                                                                
## [17] "Mitochondrial translation elongation"                                                                               
## [18] "Degradation of beta-catenin by the destruction complex"

If we consider both strategies to be valid, then we can define the significant sets as dysregulated. We can calculate the percent sentitivity of both approaches.

all <- unique(c(ms_up,ms_dn,mc_up))

message("Sensitivity: separate only")

## Sensitivity: separate only

(length(ms_up)+length(ms_dn))/length(all)

## [1] 0.93361

message("Sensitivity: combined only")

## Sensitivity: combined only

length(mc_up)/length(all)

## [1] 0.1410788

ORA with clusterprofiler

Clusterprofiler uses a hypergeometric test. Firstly I will conduct the analysis separately for up and down regulated genes and with the correct backgound (as intended by the developers).

genesets2 <- read.gmt("ReactomePathways.gmt")

de_up <- rownames(subset(de, padj<0.05 & log2FoldChange > 0))
de_up <- unique(gt[which(rownames(gt) %in% de_up),1])

de_dn <- rownames(subset(de, padj<0.05 & log2FoldChange < 0))
de_dn <- unique(gt[which(rownames(gt) %in% de_dn),1])

de_bg <- rownames(de)
de_bg <- unique(gt[which(rownames(gt) %in% de_bg),1])

o_up <- as.data.frame(enricher(gene = de_up, universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, pAdjustMethod="fdr"))
o_up <- rownames(subset(o_up, p.adjust < 0.05))
       
o_dn <- as.data.frame(enricher(gene = de_dn, universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, pAdjustMethod="fdr"))
o_dn <- rownames(subset(o_dn, p.adjust < 0.05))

o_com <- as.data.frame(enricher(gene = union(de_up,de_dn), universe = de_bg,  maxGSSize = 5000, TERM2GENE = genesets2, pAdjustMethod="fdr"))
o_com <- rownames(subset(o_com, p.adjust < 0.05))

length(o_up)

## [1] 4

length(o_dn)

## [1] 1

length(o_up) + length(o_dn)

## [1] 5

length(o_com)

## [1] 3

( length(o_up) + length(o_dn) ) / length(o_com)

## [1] 1.666667

all <- unique(c(o_up,o_dn,o_com))

message("Sensitivity: separate only")

## Sensitivity: separate only

(length(o_up)+length(o_dn))/length(all)

## [1] 0.8333333

message("Sensitivity: combined only")

## Sensitivity: combined only

length(o_com)/length(all)

## [1] 0.5

Euler diagram of the significant pathways found with each approach.

l2 <- list("sep up"=o_up,"sep dn"=o_dn,"comb"=o_com)

plot(euler(l2),quantities = TRUE, edges = "gray", main="ORA: combined vs separated")

List gene sets which are specific to each approach.

o_sep <- c(o_up,o_dn)

# in sep but not comb
setdiff(o_sep,o_com)

## [1] "Assembly of collagen fibrils and other multimeric structures"
## [2] "Collagen formation"                                          
## [3] "TRP channels"

# in comb but not sep
setdiff(o_com,o_sep)

## [1] "Synaptic adhesion-like molecules"

# intersection
intersect(mc_up,ms)

##  [1] "Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins."
##  [2] "Respiratory electron transport"                                                                                     
##  [3] "Protein-protein interactions at synapses"                                                                           
##  [4] "tRNA processing in the mitochondrion"                                                                               
##  [5] "The citric acid (TCA) cycle and respiratory electron transport"                                                     
##  [6] "C-type lectin receptors (CLRs)"                                                                                     
##  [7] "Mitochondrial translation termination"                                                                              
##  [8] "IKK complex recruitment mediated by RIP1"                                                                           
##  [9] "ER to Golgi Anterograde Transport"                                                                                  
## [10] "Mitochondrial translation initiation"                                                                               
## [11] "Formation of TC-NER Pre-Incision Complex"                                                                           
## [12] "COPI-mediated anterograde transport"                                                                                
## [13] "Cellular response to chemical stress"                                                                               
## [14] "Mitochondrial translation"                                                                                          
## [15] "TICAM1, RIP1-mediated IKK complex recruitment"                                                                      
## [16] "Transcriptional regulation by RUNX2"                                                                                
## [17] "Mitochondrial translation elongation"                                                                               
## [18] "Degradation of beta-catenin by the destruction complex"

Euler diagrams comparing FCS and ORA methods

par(cex.main=0.5)

par(mar=c(2,2,2,2))

l3 <- list("ORA up"=o_up,"ORA dn"=o_dn,"ORA comb"=o_com,
  "FCS up"=ms_up,"FCS dn"=ms_dn,"FCS comb"=mc_up)

plot(euler(l3),quantities = TRUE, edges = "gray", main="FCS compared to ORA")

Save data

dat <- list(  "FCS_up"=ms_up,
  "FCS_dn"=ms_dn,
  "FCS_com"=mc_up,
  "ORA_up"= o_up,
  "ORA_dn"=o_dn,
  "ORA_com"=o_com)

str(dat)

## List of 6
##  $ FCS_up : chr [1:69] "Signal Transduction" "Developmental Biology" "Extracellular matrix organization" "Heparan sulfate/heparin (HS-GAG) metabolism" ...
##  $ FCS_dn : chr [1:156] "Asparagine N-linked glycosylation" "Defective CFTR causes cystic fibrosis" "ER to Golgi Anterograde Transport" "Vif-mediated degradation of APOBEC3G" ...
##  $ FCS_com: chr [1:34] "Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins." "Respiratory electron transport" "Protein-protein interactions at synapses" "tRNA processing in the mitochondrion" ...
##  $ ORA_up : chr [1:4] "Extracellular matrix organization" "Collagen chain trimerization" "Assembly of collagen fibrils and other multimeric structures" "Collagen formation"
##  $ ORA_dn : chr "TRP channels"
##  $ ORA_com: chr [1:3] "Extracellular matrix organization" "Synaptic adhesion-like molecules" "Collagen chain trimerization"

saveRDS(dat,file = "ex5dat.rds")

Conclusion

For mitch, it would appear that performing direction informed (DI) analysis clearly yields more differentially regulated pathways (413) as compared to the direction agnostic (DA) method (55).

That being said, there were 18 pathways identified only in the DA method that appeared to be related to the physiology of the model. These gene sets are likely to contain a mix of genes affected by the stimulus in different ways - for example a mix of up and downregulated genes. Are these really real? Not sure.

This pattern was consistent with ORA, where 80 sets were identified with separate analysis and only 23 with the combined analysis.

When comparing ORA to FCS, we found that FCS identified many more sets than ORA. In fact all gene sets that were identified by ORA were also identified by FCS, except for 3 that were specific to the ORA up set.

Let’s look at those now.

myfcs <- c(ms_up, mc_up)

setdiff(o_up,myfcs)

## character(0)

Session information

sessionInfo()

## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] eulerr_6.1.1                mitch_1.4.1                
##  [3] clusterProfiler_4.0.5       DESeq2_1.32.0              
##  [5] SummarizedExperiment_1.22.0 Biobase_2.52.0             
##  [7] MatrixGenerics_1.4.3        matrixStats_0.61.0         
##  [9] GenomicRanges_1.44.0        GenomeInfoDb_1.28.4        
## [11] IRanges_2.26.0              S4Vectors_0.30.2           
## [13] BiocGenerics_0.38.0         getDEE2_1.2.0              
## [15] beeswarm_0.4.0              kableExtra_1.3.4           
## 
## loaded via a namespace (and not attached):
##   [1] shadowtext_0.1.1       fastmatch_1.1-3        systemfonts_1.0.3     
##   [4] plyr_1.8.6             igraph_1.2.11          lazyeval_0.2.2        
##   [7] polylabelr_0.2.0       splines_4.1.2          BiocParallel_1.26.2   
##  [10] ggplot2_3.3.5          digest_0.6.29          yulab.utils_0.0.4     
##  [13] htmltools_0.5.2        GOSemSim_2.18.1        viridis_0.6.2         
##  [16] GO.db_3.13.0           fansi_1.0.0            magrittr_2.0.1        
##  [19] memoise_2.0.1          Biostrings_2.60.2      annotate_1.70.0       
##  [22] graphlayouts_0.8.0     svglite_2.0.0          enrichplot_1.12.3     
##  [25] colorspace_2.0-2       blob_1.2.2             rvest_1.0.2           
##  [28] ggrepel_0.9.1          xfun_0.29              dplyr_1.0.7           
##  [31] crayon_1.4.2           RCurl_1.98-1.5         jsonlite_1.7.2        
##  [34] scatterpie_0.1.7       genefilter_1.74.1      survival_3.2-13       
##  [37] ape_5.6-1              glue_1.6.0             polyclip_1.10-0       
##  [40] gtable_0.3.0           zlibbioc_1.38.0        XVector_0.32.0        
##  [43] webshot_0.5.2          htm2txt_2.1.1          DelayedArray_0.18.0   
##  [46] scales_1.1.1           DOSE_3.18.3            DBI_1.1.2             
##  [49] GGally_2.1.2           Rcpp_1.0.7             viridisLite_0.4.0     
##  [52] xtable_1.8-4           gridGraphics_0.5-1     tidytree_0.3.7        
##  [55] bit_4.0.4              htmlwidgets_1.5.4      httr_1.4.2            
##  [58] fgsea_1.18.0           gplots_3.1.1           RColorBrewer_1.1-2    
##  [61] ellipsis_0.3.2         reshape_0.8.8          pkgconfig_2.0.3       
##  [64] XML_3.99-0.8           farver_2.1.0           sass_0.4.0            
##  [67] locfit_1.5-9.4         utf8_1.2.2             later_1.3.0           
##  [70] ggplotify_0.1.0        tidyselect_1.1.1       rlang_0.4.12          
##  [73] reshape2_1.4.4         AnnotationDbi_1.54.1   munsell_0.5.0         
##  [76] tools_4.1.2            cachem_1.0.6           downloader_0.4        
##  [79] generics_0.1.1         RSQLite_2.2.9          evaluate_0.14         
##  [82] stringr_1.4.0          fastmap_1.1.0          yaml_2.2.1            
##  [85] ggtree_3.0.4           knitr_1.37             bit64_4.0.5           
##  [88] tidygraph_1.2.0        caTools_1.18.2         purrr_0.3.4           
##  [91] KEGGREST_1.32.0        ggraph_2.0.5           nlme_3.1-153          
##  [94] mime_0.12              aplot_0.1.2            DO.db_2.9             
##  [97] xml2_1.3.3             compiler_4.1.2         rstudioapi_0.13       
## [100] png_0.1-7              treeio_1.16.2          tibble_3.1.6          
## [103] tweenr_1.0.2           geneplotter_1.70.0     bslib_0.3.1           
## [106] stringi_1.7.6          highr_0.9              lattice_0.20-45       
## [109] Matrix_1.4-0           vctrs_0.3.8            pillar_1.6.4          
## [112] lifecycle_1.0.1        jquerylib_0.1.4        data.table_1.14.2     
## [115] cowplot_1.1.1          bitops_1.0-7           httpuv_1.6.5          
## [118] patchwork_1.1.1        qvalue_2.24.0          R6_2.5.1              
## [121] promises_1.2.0.1       KernSmooth_2.23-20     echarts4r_0.4.3       
## [124] gridExtra_2.3          gtools_3.9.2           MASS_7.3-54           
## [127] assertthat_0.2.1       GenomeInfoDbData_1.2.6 grid_4.1.2            
## [130] ggfun_0.0.4            tidyr_1.1.4            rmarkdown_2.11        
## [133] ggforce_0.3.3          shiny_1.7.1