Introduction

Yap KD profile

Packages

suppressPackageStartupMessages({
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("MASS")
    library("mitch")
})

Import profiling data

Here is the Yap knock-down data wee generated previously.

x <- read.table("paired-edgeR.xls",header=TRUE,row.names=1)
head(x)
##                                 logFC    logCPM       LR       PValue
## ENSMUSG00000031880_Rrad      3.213046 3.8147728 97.23076 6.170060e-23
## ENSMUSG00000097418_Mir155hg  2.073579 5.4823122 67.78697 1.821492e-16
## ENSMUSG00000068614_Actc1    -1.191931 8.1779897 59.15924 1.454148e-14
## ENSMUSG00000023043_Krt18     2.668002 2.1851742 59.08167 1.512616e-14
## ENSMUSG00000053110_Yap1     -1.128658 5.6493272 58.21689 2.347550e-14
## ENSMUSG00000057400_Ces1c    -6.104755 0.7602132 38.15317 6.540352e-10
##                              adj.p.value Dispersion totreads  predLFC1
## ENSMUSG00000031880_Rrad     8.141394e-19 0.05215232     1041  3.177282
## ENSMUSG00000097418_Mir155hg 1.201729e-12 0.03796428     3341  2.068754
## ENSMUSG00000068614_Actc1    4.989743e-11 0.01638376    21540 -1.191579
## ENSMUSG00000023043_Krt18    4.989743e-11 0.03579011      324  2.572206
## ENSMUSG00000053110_Yap1     6.195184e-11 0.01355845     3716 -1.126844
## ENSMUSG00000057400_Ces1c    1.438332e-06 0.11883120      113 -4.319255
##                              predLFC3  predLFC5    Y_YAP1    Y_YAP2    Y_YAP3
## ENSMUSG00000031880_Rrad      3.099560  3.027000  4.414797  4.131310  5.188433
## ENSMUSG00000097418_Mir155hg  2.057827  2.047042  6.214675  6.702181  5.303606
## ENSMUSG00000068614_Actc1    -1.190775 -1.189973  6.965110  7.992973  7.331905
## ENSMUSG00000023043_Krt18     2.379678  2.218070  3.514757 -1.997335  3.493719
## ENSMUSG00000053110_Yap1     -1.122720 -1.118627  4.999306  4.578748  5.303606
## ENSMUSG00000057400_Ces1c    -2.846752 -2.233624 -5.633764 -3.367146 -5.633764
##                                   C_C1       C_C2      C_C3 Y_YAP1.1 Y_YAP2.1
## ENSMUSG00000031880_Rrad      2.1810119  0.9584748 0.8019469      254      228
## ENSMUSG00000097418_Mir155hg  3.5479142  4.2223398 4.2523728      885     1356
## ENSMUSG00000068614_Actc1     8.5485078  8.9090382 8.4132372     1489     3318
## ENSMUSG00000023043_Krt18     0.9170423 -3.2687604 0.6593102      136        3
## ENSMUSG00000053110_Yap1      6.1546449  6.0295060 6.0954641      381      311
## ENSMUSG00000057400_Ces1c    -5.6337641 -0.5373202 3.0967067        0        1
##                             Y_YAP3.1 C_C1.1 C_C2.1 C_C3.1
## ENSMUSG00000031880_Rrad          457     58     23     21
## ENSMUSG00000097418_Mir155hg      495    150    223    232
## ENSMUSG00000068614_Actc1        2020   4810   5749   4154
## ENSMUSG00000023043_Krt18         141     24      1     19
## ENSMUSG00000053110_Yap1          495    915    781    833
## ENSMUSG00000057400_Ces1c           0      0      8    104
sig <- subset(x,adj.p.value<0.05)
plot(x$logCPM, x$logFC, col="gray", xlab="logCPM",ylab="log2 fold change")
points(sig$logCPM, sig$logFC, col="red")

Gene sets

We will perform analysis using Reactome gene sets as well as Cistrome transcription factor targets (promoter and enhancer).

download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
unzip("ReactomePathways.gmt.zip")
reactome <- gmt_import("ReactomePathways.gmt")

tft_tss <- gmt_import("MouseTfPeaks.gmt")

tft_enh <- gmt_import("MouseTfPeaks_enh.gmt")

Lets look at the make up of the gene set library. Reactome has more smaller sets while TFT targets has mostly 1000 genes.

length(reactome)
## [1] 2400
length(tft_tss)
## [1] 9054
length(tft_enh)
## [1] 9055
summary(unname(unlist(lapply(reactome,length))))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    5.00   15.00   49.03   43.00 2825.00
summary(unname(unlist(lapply(tft_tss,length))))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   187.0  1000.0   691.8  1000.0  1000.0
summary(unname(unlist(lapply(tft_enh,length))))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1    1000    1000     916    1000    1000

Mitch

Here we will run the mitch package with reactome gene sets (human). Reactome gives a broad brush analysis of major biochemical and signaling pathways.

rownames(x) <- sapply(strsplit(rownames(x),"_"),"[[",1)

m2h <- read.table("mouse2human.txt.sort")
m2h[,1]=NULL

# import
m <- mitch_import(x=x,DEtype="edger",geneTable=m2h)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 13195
## Note: no. genes in output = 11694
## Note: estimated proportion of input genes in output = 0.886
# calc
capture.output(
    res <- mitch_calc(x=m,genesets=reactome,priority="effect")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(res$enrichment_result,20)
##                                                                 set setSize
## 488                                Initial triggering of complement      12
## 993                   Signal regulatory protein family interactions      13
## 1236                                                    Xenobiotics      14
## 163                             Chemokine receptors bind chemokines      16
## 823                           RIP-mediated NFkB activation via ZBP1      17
## 757                                     Plasma lipoprotein assembly      15
## 519                      Interleukin-4 and Interleukin-13 signaling      73
## 202                                               Cristae formation      10
## 150                    Cell recruitment (pro-inflammatory response)      17
## 803                 Purinergic signaling in leishmaniasis infection      17
## 41                          Activation of Matrix Metalloproteinases      15
## 44           Activation of PPARGC1A (PGC-1alpha) by phosphorylation      10
## 1140                                         The NLRP3 inflammasome      13
## 510                                        Interleukin-10 signaling      19
## 267              Diseases associated with the TLR signaling cascade      19
## 268                                       Diseases of Immune System      19
## 357  Formation of Senescence-Associated Heterochromatin Foci (SAHF)      10
## 1238                    ZBP1(DAI) mediated induction of type I IFNs      20
## 237                    Defective B4GALT7 causes EDS, progeroid type      15
## 518                                        Interleukin-37 signaling      17
##            pANOVA     s.dist p.adjustANOVA
## 488  1.087822e-05  0.7333077  9.239236e-04
## 993  1.702453e-05  0.6888504  1.355578e-03
## 1236 1.941340e-05 -0.6593200  1.454863e-03
## 163  2.477303e-04  0.5292056  9.912677e-03
## 823  1.627449e-04  0.5282934  7.149551e-03
## 757  7.080126e-04 -0.5050775  2.031764e-02
## 519  1.791109e-13  0.4986167  1.140937e-10
## 202  7.291862e-03 -0.4900719  9.676908e-02
## 150  5.077529e-04  0.4871064  1.617193e-02
## 803  5.077529e-04  0.4871064  1.617193e-02
## 41   2.188838e-03  0.4569056  4.316931e-02
## 44   1.367372e-02 -0.4503595  1.417995e-01
## 1140 6.061536e-03  0.4396818  8.303652e-02
## 510  9.137808e-04  0.4394906  2.476929e-02
## 267  1.136361e-03  0.4313535  2.680970e-02
## 268  1.136361e-03  0.4313535  2.680970e-02
## 357  1.846133e-02  0.4303663  1.603145e-01
## 1238 1.003077e-03  0.4250043  2.583913e-02
## 237  4.798947e-03  0.4206582  7.113707e-02
## 518  2.915456e-03  0.4170642  5.088070e-02
# report
unlink("yaprna_reactome.html")
capture.output(
    mitch_report(res, "yaprna_reactome.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpukgUOB/./yaprna_reactome.RData ".
## 
## 
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/yap_kd/mitch/mitch.knit.md
## 
## Output created: /tmp/RtmpukgUOB/mitch_report.html

Here I'm going to run TFBS enrichment analysis based on TSS binding.

# set up gene table
x <- read.table("paired-edgeR.xls",header=TRUE,row.names=1)
gt <-  data.frame(rownames(x))
gt$gname <-  sapply(strsplit(gt[,1],"_"),"[[",2)

# import
m <- mitch_import(x=x,DEtype="edger",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 13195
## Note: no. genes in output = 13193
## Note: estimated proportion of input genes in output = 1
# calc
capture.output(
    res <- mitch_calc(x=m,genesets=tft_tss,priority="effect",minsetsize=50)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(res$enrichment_result,20)
##                                        set setSize       pANOVA     s.dist
## 6786    STAT4_None_NaturalKillerCell_86782      80 2.866500e-05  0.2708228
## 3655             BCL6_None_Hepatocyte_5856     143 6.646365e-08 -0.2620422
## 4099     RNF2_None_EmbryonicStemCell_68313      51 1.741390e-03  0.2536039
## 838             PER2_None_Epithelium_37826      53 1.977680e-03 -0.2458086
## 920            STAT1_None_Macrophage_38373      68 8.846188e-04  0.2333625
## 763             RELA_None_Macrophage_37417     619 1.675613e-20  0.2202755
## 6339           EP300_None_Macrophage_83658     529 5.817833e-17  0.2141554
## 3570    NEUROD1_R1_EmbryonicStemCell_58027      89 8.828038e-04  0.2041799
## 2522        FOSB_None_CorticalNeuron_49586      57 8.226172e-03  0.2025183
## 6543           STAT6_None_Macrophage_85093     575 2.056471e-16  0.2021030
## 6452             JUNB_MIN_Macrophage_84498     486 3.547982e-14  0.2019823
## 1379                    AR_None_None_41602      96 6.419736e-04  0.2018369
## 6052 POU5F1_None_EmbryonicFibroblast_81810      58 8.387325e-03  0.2002730
## 6211                  IRF4_None_Treg_82830      50 1.516701e-02  0.1986487
## 6996              FOS_MIN_Macrophage_88308     615 9.635147e-17  0.1978791
## 6865           EP300_None_Macrophage_87483     602 5.980800e-16  0.1946338
## 4528            RELA_None_Macrophage_70691     617 3.038181e-16  0.1943152
## 783             RELA_None_Macrophage_37420     623 3.304342e-16  0.1931841
## 6337           EP300_None_Macrophage_83657     540 3.190790e-14  0.1923736
## 2710          NLRC5_None_TLymphocyte_51237      60 1.010146e-02  0.1921622
##      p.adjustANOVA
## 6786  5.502256e-04
## 3655  3.332478e-06
## 4099  1.321245e-02
## 838   1.444483e-02
## 920   7.735021e-03
## 763   1.201414e-16
## 6339  1.390462e-13
## 3570  7.728576e-03
## 2522  4.369011e-02
## 6543  2.948979e-13
## 6452  1.211382e-11
## 1379  6.080516e-03
## 6052  4.422218e-02
## 6211  6.817470e-02
## 6996  1.727100e-13
## 6865  5.360292e-13
## 4528  3.384591e-13
## 783   3.384591e-13
## 6337  1.211060e-11
## 2710  5.089774e-02
unlink("yaprna_tss.html")
capture.output(
    mitch_report(res, "yaprna_tss.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpukgUOB/./yaprna_tss.RData ".
## 
## 
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/yap_kd/mitch/mitch.knit.md
## 
## Output created: /tmp/RtmpukgUOB/mitch_report.html

Here I'm going to run TFBS enrichment analysis based on enhancer binding.

# calc
capture.output(
    res <- mitch_calc(x=m,genesets=tft_enh,priority="effect",minsetsize=50)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(res$enrichment_result,20)
##                                                        set setSize       pANOVA
## 8405                                 MECP2_None_None_87515      53 1.432209e-02
## 8352 CXXC1_Mouseembryonicstemcells_EmbryonicStemCell_87252      52 1.928949e-02
## 8035                                   SPEN_MIN_None_85440      50 3.307592e-02
## 7522 CXXC1_Mouseembryonicstemcells_EmbryonicStemCell_82995      58 5.420931e-02
## 5304                                   RELA_None_Th1_69834     552 2.131003e-08
## 8349 CXXC1_Mouseembryonicstemcells_EmbryonicStemCell_87249      50 8.985524e-02
## 8327                     POLR2A_Cone_M_Y-257.03_iPSC_87099      82 3.688276e-02
## 8404                                 MECP2_None_None_87514      72 5.399709e-02
## 8441                           STAT3_None_Hepatocyte_87718      91 3.480773e-02
## 241                              BMI1_None_Astrocyte_32783      80 6.215504e-02
## 312                   SRSF2_None_EmbryonicFibroblast_33050     152 1.377207e-02
## 8403                           EP300_None_Macrophage_87481     547 4.744125e-06
## 3953                           CREB1_None_Macrophage_55168      81 7.732811e-02
## 8350 CXXC1_Mouseembryonicstemcells_EmbryonicStemCell_87251      73 9.646052e-02
## 8285                    MED12_None_EmbryonicStemCell_86791     527 1.172950e-05
## 4916                     RNF2_None_EmbryonicStemCell_68315      94 6.033290e-02
## 5487                            SPI1_None_Macrophage_70701     578 5.198124e-06
## 1793                          RUNX3_None_TLymphocyte_41959      69 1.085194e-01
## 8449                           STAT3_None_Hepatocyte_87722      55 1.556160e-01
## 2413                              RAG2_None_Thymocyte_4505     540 1.416120e-05
##          s.dist p.adjustANOVA
## 8405 -0.1946124  0.1687192223
## 8352 -0.1877096  0.1903545261
## 8035 -0.1743407  0.2383253953
## 7522 -0.1462715  0.2876132447
## 5304  0.1405402  0.0001875709
## 8349 -0.1387598  0.3553426900
## 8327  0.1334831  0.2492971036
## 8404 -0.1314686  0.2870062923
## 8441  0.1281828  0.2431783599
## 241   0.1207676  0.3029283788
## 312   0.1160157  0.1649564083
## 8403  0.1153391  0.0129170112
## 3953  0.1136684  0.3337972107
## 8350 -0.1126420  0.3670335024
## 8285  0.1124596  0.0129170112
## 4916  0.1122580  0.2986059804
## 5487  0.1118710  0.0129170112
## 1793 -0.1118451  0.3870992035
## 8449  0.1107820  0.4505696104
## 2413  0.1101103  0.0129170112
unlink("yaprna_enh.html")
capture.output(
    mitch_report(res, "yaprna_enh.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpukgUOB/./yaprna_enh.RData ".
## 
## 
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/yap_kd/mitch/mitch.knit.md
## 
## Output created: /tmp/RtmpukgUOB/mitch_report.html