Introduction

Yap KD profile

Packages

suppressPackageStartupMessages({
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("MASS")
    library("mitch")
})

Import profiling data

Here is the Yap knock-down data we generated previously.

x<-read.table("edgeR_rna_LacZvsYap.xls",header=T,row.names=1)

head(x)
##                                   logFC    logCPM       LR       PValue
## ENSMUSG00000090556_Olfr753-ps1 2.902399 0.2753907 278.4418 1.641213e-62
## ENSMUSG00000020641_Rsad2       2.898768 0.4462312 266.5600 6.377743e-60
## ENSMUSG00000028874_Fgr         3.031279 1.3941254 258.6872 3.316488e-58
## ENSMUSG00000040264_Gbp2b       3.519186 1.4699838 256.2288 1.139138e-57
## ENSMUSG00000038037_Socs1       2.801552 0.7219252 256.6370 9.281046e-58
## ENSMUSG00000074896_Ifit3       3.026735 0.9071686 256.2250 1.141291e-57
##                                 adj.p.value       disp    Lacz.1    Lacz.2
## ENSMUSG00000090556_Olfr753-ps1 2.697498e-58 0.01898323  3.277764  2.513037
## ENSMUSG00000020641_Rsad2       5.241229e-56 0.01945147  5.861156  4.493704
## ENSMUSG00000028874_Fgr         1.816993e-54 0.02157487 22.126610 16.964305
## ENSMUSG00000040264_Gbp2b       3.126377e-54 0.03213525 12.758172  9.781594
## ENSMUSG00000038037_Socs1       3.126377e-54 0.01776111 12.075605  9.258276
## ENSMUSG00000074896_Ifit3       3.126377e-54 0.02256274 11.321296  8.679952
##                                   Lacz.3    Yap.1    Yap.2    Yap.3 sampleLacz
## ENSMUSG00000090556_Olfr753-ps1  4.158072 183.3558 126.7408 159.7393  -14.87752
## ENSMUSG00000020641_Rsad2        7.435284 205.4915 142.0417 179.0239  -14.77293
## ENSMUSG00000028874_Fgr         28.069145 388.7369 268.7062 338.6670  -14.27751
## ENSMUSG00000040264_Gbp2b       16.184629 423.2850 292.5868 368.7652  -14.53526
## ENSMUSG00000038037_Socs1       15.318746 243.7032 168.4547 212.3138  -14.55147
## ENSMUSG00000074896_Ifit3       14.361852 280.6797 194.0140 244.5277  -14.57815
##                                sampleYap
## ENSMUSG00000090556_Olfr753-ps1 -12.86573
## ENSMUSG00000020641_Rsad2       -12.76366
## ENSMUSG00000028874_Fgr         -12.17639
## ENSMUSG00000040264_Gbp2b       -12.09595
## ENSMUSG00000038037_Socs1       -12.60959
## ENSMUSG00000074896_Ifit3       -12.48018
gt<-data.frame(rownames(x),stringsAsFactors=FALSE)

gt$gene<-sapply(strsplit(gt[,1],"_"),"[[",2)

# gene table
head(gt)
##                      rownames.x.        gene
## 1 ENSMUSG00000090556_Olfr753-ps1 Olfr753-ps1
## 2       ENSMUSG00000020641_Rsad2       Rsad2
## 3         ENSMUSG00000028874_Fgr         Fgr
## 4       ENSMUSG00000040264_Gbp2b       Gbp2b
## 5       ENSMUSG00000038037_Socs1       Socs1
## 6       ENSMUSG00000074896_Ifit3       Ifit3

Gene sets

We will perform analysis using Reactome gene sets as well as Cistrome transcription factor targets (promoter and enhancer).

download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
unzip("ReactomePathways.gmt.zip")
reactome <- gmt_import("ReactomePathways.gmt")
tft_tss <- gmt_import("MouseTfPeaks.gmt")
tft_enh <- gmt_import("MouseTfPeaks_enh.gmt")
atac_gset<-gmt_import("atac.gmt")

Enrichment analysis with mouse gene sets

ATAC gene sets

m<-mitch_import(x,DEtype="edgeR",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 16436
## Note: no. genes in output = 16428
## Note: estimated proportion of input genes in output = 1
capture.output(
res<-mitch_calc(m,atac_gset)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(res$enrichment_result,20)
##                                  set setSize       pANOVA     s.dist
## 2                 ATAC enhancer down     141 4.098161e-10 -0.3050516
## 3                        ATAC tss up      94 2.334283e-09  0.3565217
## 1                   ATAC enhancer up      88 7.359889e-08  0.3320185
## 4                      ATAC tss down     112 9.714728e-07 -0.2680093
## 5 ATAC enhancer down with TEAD motif      28 4.840224e-02 -0.2155488
## 6   ATAC enhancer up with TEAD motif      14 9.554903e-02  0.2573152
##   p.adjustANOVA
## 2  2.458896e-09
## 3  7.002850e-09
## 1  1.471978e-07
## 4  1.457209e-06
## 5  5.808269e-02
## 6  9.554903e-02
capture.output(
    mitch_plots(res,outfile="atac_mitch.pdf")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

unlink("atac_mitch.html")
capture.output(
    mitch_report(res,outfile="atac_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpT467pk/./atac_mitch.RData ".
## 
## 
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/yap_enhancer_analysis/rna/mitch.knit.md
## 
## Output created: /tmp/RtmpT467pk/mitch_report.html

TSS based TFBS

capture.output(
    res<-mitch_calc(m,tft_tss)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(res$enrichment_result,20)
##                                  set setSize       pANOVA     s.dist
## 3651      STAT1_None_Macrophage_5379     696 7.126142e-52  0.3377381
## 8594      NCOR2_None_Macrophage_9637     684 2.890613e-51  0.3384984
## 8279          POLR2A_None_None_87698     841 9.998539e-47 -0.2924893
## 3655      STAT1_None_Macrophage_5383     704 1.070880e-46  0.3181840
## 3642      STAT1_None_Macrophage_5377     713 2.235769e-46  0.3151398
## 3137      IRF8_None_Macrophage_51059     694 4.651191e-46  0.3180984
## 5912          POLR2A_None_None_72694     882 7.439971e-46 -0.2832180
## 7258     HDAC3_None_Macrophage_82102     681 1.932959e-44  0.3151040
## 3333 SIRT1_None_SatelliteCells_51941     844 1.997002e-44 -0.2844750
## 3649      STAT1_None_Macrophage_5385     723 5.959922e-44  0.3044745
## 7146      IRF8_None_Macrophage_81563     682 6.095241e-44  0.3130486
## 3842  SPI1_X18.1.1_MyeloidCell_54713     720 9.424369e-44  0.3043630
## 5911          POLR2A_None_None_72693     872 1.021300e-42 -0.2744891
## 5913          POLR2A_None_None_72696     882 3.036724e-42 -0.2714439
## 344    PPARG_10T1_2_Fibroblast_32988     854 3.325198e-42 -0.2754764
## 3635      STAT1_None_Macrophage_5375     691 4.290068e-42  0.3042440
## 8289          POLR2A_None_None_87700     831 6.256433e-42 -0.2781137
## 5906          POLR2A_None_None_72695     875 8.837851e-42 -0.2709098
## 3658      STAT1_None_Macrophage_5381     693 3.260407e-41  0.3005081
## 7162     EP300_None_Macrophage_81589     645 9.577788e-41  0.3091760
##      p.adjustANOVA
## 3651  6.142022e-48
## 8594  1.245710e-47
## 8279  2.307480e-43
## 3655  2.307480e-43
## 3642  3.854019e-43
## 3137  6.681435e-43
## 5912  9.160729e-43
## 7258  1.912462e-41
## 3333  1.912462e-41
## 3649  4.775898e-41
## 7146  4.775898e-41
## 3842  6.769053e-41
## 5911  6.771220e-40
## 5913  1.869537e-39
## 344   1.910659e-39
## 3635  2.311006e-39
## 8289  3.172012e-39
## 5906  4.231858e-39
## 3658  1.479024e-38
## 7162  4.127548e-38
capture.output(
    mitch_plots(res,outfile="tft_tss_mitch.pdf")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

unlink("tft_tss_mitch.html")
capture.output(
    mitch_report(res,outfile="tft_tss_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpT467pk/./tft_tss_mitch.RData ".
## 
## 
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/yap_enhancer_analysis/rna/mitch.knit.md
## 
## Output created: /tmp/RtmpT467pk/mitch_report.html

Enhancer based TFBS

capture.output(
    res<-mitch_calc(m,tft_enh)
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.
head(res$enrichment_result,20)
##                                         set setSize       pANOVA    s.dist
## 2871      POLR2A_Swiss-3T3_Fibroblast_48405     594 1.168779e-21 0.2303909
## 4602          POLR2A_S194_BLymphocyte_59366     630 2.930701e-21 0.2217329
## 4656      MAPK8_None_EmbryonicStemCell_6011     616 5.891514e-21 0.2224092
## 4659   MAPK8_None_NeuralProgenitorCell_6021     617 1.080809e-20 0.2207229
## 2877      POLR2A_Swiss-3T3_Fibroblast_48404     594 7.821003e-20 0.2196920
## 5589     POLR2A_KH2_EmbryonicStemCell_70887     619 9.173194e-20 0.2149720
## 1208            STAT1_None_Macrophage_38421     610 1.462626e-19 0.2152843
## 5953 SMARCA4_None_EmbryonicFibroblast_72190     580 1.511793e-19 0.2204849
## 3290            STAT1_None_Macrophage_51073     598 2.096373e-19 0.2164070
## 3291            STAT1_None_Macrophage_51089     608 4.134470e-19 0.2129094
## 2465          SMARCA4_None_TLymphocyte_4548     644 6.617810e-19 0.2059022
## 2258              WDR5_C2C12_Myoblast_44373     593 7.148617e-19 0.2140213
## 2709    POLR2A_None_EmbryonicStemCell_47348     605 1.160660e-18 0.2106787
## 5195            TCF3_None_BLymphocyte_68985     603 1.446415e-18 0.2104252
## 2705    POLR2A_None_EmbryonicStemCell_47346     601 1.948574e-18 0.2099600
## 3311            STAT1_None_Macrophage_51075     630 2.383399e-18 0.2047273
## 1403            NR1D1_None_Macrophage_39665     620 2.580051e-18 0.2060953
## 3076          FOS_None_CorticalNeuron_49581     610 2.804810e-18 0.2074876
## 4676                 MAPK8_None_Neuron_6036     632 3.005125e-18 0.2038038
## 3426            STAT1_None_Macrophage_51921     579 3.146521e-18 0.2124442
##      p.adjustANOVA
## 2871  1.056694e-17
## 4602  1.324823e-17
## 4656  1.775506e-17
## 4659  2.442898e-17
## 2877  1.382247e-16
## 5589  1.382247e-16
## 1208  1.708516e-16
## 5953  1.708516e-16
## 3290  2.105923e-16
## 3291  3.737974e-16
## 2465  5.385887e-16
## 2258  5.385887e-16
## 2709  8.071945e-16
## 5195  9.340742e-16
## 2705  1.174470e-15
## 3311  1.346769e-15
## 1403  1.372132e-15
## 3076  1.408794e-15
## 4676  1.422385e-15
## 3426  1.422385e-15
capture.output(
    mitch_plots(res,outfile="tft_enh_mitch.pdf")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

unlink("tft_enh_mitch.html")
capture.output(
    mitch_report(res,outfile="tft_enh_mitch.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpT467pk/./tft_enh_mitch.RData ".
## 
## 
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/yap_enhancer_analysis/rna/mitch.knit.md
## 
## Output created: /tmp/RtmpT467pk/mitch_report.html

Reactome gene sets (human)

x<-read.table("edgeR_rna_LacZvsYap.xls",header=T,row.names=1)
rownames(x) <- sapply(strsplit(rownames(x),"_"),"[[",1)
m2h <- read.table("mouse2human.txt.sort")
m2h[,1]=NULL

m <- mitch_import(x=x,DEtype="edger",geneTable=m2h)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 16436
## Note: no. genes in output = 13056
## Note: estimated proportion of input genes in output = 0.794
capture.output(
    res <- mitch_calc(x=m,genesets=reactome,priority="effect")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
head(res$enrichment_result,20)
##                                                                           set
## 353                                                           FCGR activation
## 756                                                            PD-1 signaling
## 503  Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell
## 538                                                  Interleukin-10 signaling
## 798                                Phosphorylation of CD3 and TCR zeta chains
## 533                                           Interferon alpha/beta signaling
## 1040                            Signal regulatory protein family interactions
## 435                                  Generation of second messenger molecules
## 749                                               Other interleukin signaling
## 544                                                   Interleukin-2 signaling
## 376                                              Folding of actin by CCT/TriC
## 218                                          Crosslinking of collagen fibrils
## 99                                  BBSome-mediated cargo-targeting to cilium
## 250                                                           Dectin-2 family
## 184                                             Citric acid cycle (TCA cycle)
## 757                                                       PECAM1 interactions
## 927                                              Regulation of IFNA signaling
## 89               Assembly of collagen fibrils and other multimeric structures
## 509                                                             Inflammasomes
## 195                                              Collagen chain trimerization
##      setSize       pANOVA     s.dist p.adjustANOVA
## 353       11 3.453606e-07  0.8872644  1.091997e-05
## 756       13 8.375517e-07  0.7889584  2.471708e-05
## 503       49 2.044241e-20  0.7640078  9.049173e-18
## 538       26 1.386850e-10  0.7268404  8.007550e-09
## 798       12 1.654699e-05  0.7179802  3.433500e-04
## 533       42 2.846722e-15  0.7039214  4.200496e-13
## 1040      13 2.736896e-05  0.6718016  5.192283e-04
## 435       24 4.347035e-08  0.6456543  1.603573e-06
## 749       18 3.588792e-06  0.6308270  9.531833e-05
## 544       10 7.002171e-04  0.6189943  7.749070e-03
## 376       10 8.943730e-04 -0.6066380  9.426407e-03
## 218       10 1.981105e-03  0.5648781  1.730860e-02
## 99        20 1.376215e-05 -0.5615565  2.947764e-04
## 250       10 2.139086e-03  0.5607083  1.850414e-02
## 184       21 9.188208e-06 -0.5591064  2.103783e-04
## 757       11 1.377017e-03  0.5571274  1.344616e-02
## 927       12 9.329772e-04  0.5518565  9.679638e-03
## 89        49 7.731280e-11  0.5372099  4.933650e-09
## 509       19 6.241228e-05  0.5305507  1.062609e-03
## 195       40 8.325879e-09  0.5264943  3.455240e-07
capture.output(
    mitch_plots(res,outfile="yaprna_mitch.pdf")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

# report
unlink("yaprna_reactome.html")
capture.output(
    mitch_report(res, "yaprna_reactome.html")
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpT467pk/./yaprna_reactome.RData ".
## 
## 
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/yap_enhancer_analysis/rna/mitch.knit.md
## 
## Output created: /tmp/RtmpT467pk/mitch_report.html