Yap KD profile
Reactome
TFBS TSS
TFBS ENH
suppressPackageStartupMessages({
library("tidyverse")
library("reshape2")
library("DESeq2")
library("gplots")
library("fgsea")
library("MASS")
library("mitch")
})
Here is the method we used to generate these gene sets. After ATAC-seq we had a list of peaks with reduced accessibility. Homer analysis was performed to identify the top 25 motifs in this peak set. Homer also outputs the locations of these motifs in the peaks. I mapped these motifs to promoters as well as enhancers with known target genes. The output was a set of genes where the regulatory element carries a binding site.
enh <- gmt_import("enhancer_motifs.gmt")
pro <- gmt_import("promoter_motifs.gmt")
Lets look at the make up of the gene set library. Reactome has more smaller sets while TFT targets has mostly 1000 genes.
length(enh)
## [1] 26
length(pro)
## [1] 26
summary(unname(unlist(lapply(enh,length))))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 11.5 54.5 111.9 114.8 835.0
summary(unname(unlist(lapply(pro,length))))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 2.000 5.769 6.750 35.000
Here is the Yap knock-down data we generated previously (7 day dataset).
x <- read.table("paired-edgeR.xls",header=TRUE,row.names=1)
head(x)
## logFC logCPM LR PValue
## ENSMUSG00000031880_Rrad 3.213046 3.8147728 97.23076 6.170060e-23
## ENSMUSG00000097418_Mir155hg 2.073579 5.4823122 67.78697 1.821492e-16
## ENSMUSG00000068614_Actc1 -1.191931 8.1779897 59.15924 1.454148e-14
## ENSMUSG00000023043_Krt18 2.668002 2.1851742 59.08167 1.512616e-14
## ENSMUSG00000053110_Yap1 -1.128658 5.6493272 58.21689 2.347550e-14
## ENSMUSG00000057400_Ces1c -6.104755 0.7602132 38.15317 6.540352e-10
## adj.p.value Dispersion totreads predLFC1
## ENSMUSG00000031880_Rrad 8.141394e-19 0.05215232 1041 3.177282
## ENSMUSG00000097418_Mir155hg 1.201729e-12 0.03796428 3341 2.068754
## ENSMUSG00000068614_Actc1 4.989743e-11 0.01638376 21540 -1.191579
## ENSMUSG00000023043_Krt18 4.989743e-11 0.03579011 324 2.572206
## ENSMUSG00000053110_Yap1 6.195184e-11 0.01355845 3716 -1.126844
## ENSMUSG00000057400_Ces1c 1.438332e-06 0.11883120 113 -4.319255
## predLFC3 predLFC5 Y_YAP1 Y_YAP2 Y_YAP3
## ENSMUSG00000031880_Rrad 3.099560 3.027000 4.414797 4.131310 5.188433
## ENSMUSG00000097418_Mir155hg 2.057827 2.047042 6.214675 6.702181 5.303606
## ENSMUSG00000068614_Actc1 -1.190775 -1.189973 6.965110 7.992973 7.331905
## ENSMUSG00000023043_Krt18 2.379678 2.218070 3.514757 -1.997335 3.493719
## ENSMUSG00000053110_Yap1 -1.122720 -1.118627 4.999306 4.578748 5.303606
## ENSMUSG00000057400_Ces1c -2.846752 -2.233624 -5.633764 -3.367146 -5.633764
## C_C1 C_C2 C_C3 Y_YAP1.1 Y_YAP2.1
## ENSMUSG00000031880_Rrad 2.1810119 0.9584748 0.8019469 254 228
## ENSMUSG00000097418_Mir155hg 3.5479142 4.2223398 4.2523728 885 1356
## ENSMUSG00000068614_Actc1 8.5485078 8.9090382 8.4132372 1489 3318
## ENSMUSG00000023043_Krt18 0.9170423 -3.2687604 0.6593102 136 3
## ENSMUSG00000053110_Yap1 6.1546449 6.0295060 6.0954641 381 311
## ENSMUSG00000057400_Ces1c -5.6337641 -0.5373202 3.0967067 0 1
## Y_YAP3.1 C_C1.1 C_C2.1 C_C3.1
## ENSMUSG00000031880_Rrad 457 58 23 21
## ENSMUSG00000097418_Mir155hg 495 150 223 232
## ENSMUSG00000068614_Actc1 2020 4810 5749 4154
## ENSMUSG00000023043_Krt18 141 24 1 19
## ENSMUSG00000053110_Yap1 495 915 781 833
## ENSMUSG00000057400_Ces1c 0 0 8 104
sig <- subset(x,adj.p.value<0.05)
plot(x$logCPM, x$logFC, col="gray", xlab="logCPM",ylab="log2 fold change")
points(sig$logCPM, sig$logFC, col="red")
Here we will run the mitch package.
gt <- as.data.frame(rownames(x))
gt$gn <- sapply(strsplit(rownames(x),"_"),"[[",2)
m <- mitch_import(x=x,DEtype="edger",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 13195
## Note: no. genes in output = 13193
## Note: estimated proportion of input genes in output = 1
capture.output(
res <- mitch_calc(x=m,genesets=pro,priority="effect")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(res$enrichment_result,20)
## set setSize pANOVA s.dist p.adjustANOVA
## 1 motif2.motif.bed_PGR(NR) 12 0.2196094 0.20470121 0.4392189
## 2 motif7.motif.bed_NF1-halfsite(CTF) 15 0.6347389 0.07086558 0.6347389
capture.output(
res <- mitch_calc(x=m,genesets=enh, priority="effect")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(res$enrichment_result,20)
## set setSize pANOVA s.dist
## 9 motif21.motif.bed_PR(NR) 20 0.04048406 -0.264700524
## 14 motif5.motif.bed_Six1(Homeobox) 15 0.37263892 0.132989326
## 6 motif19.motif.bed_TGA1(bZIP) 61 0.07601755 -0.131467121
## 1 motif10.motif.bed_Mef2a(MADS) 47 0.12986664 0.127789053
## 2 motif11.motif.bed_Atf1(bZIP) 18 0.61922390 0.067678684
## 5 motif17.motif.bed_TEAD(TEA) 21 0.59188061 0.067603719
## 7 motif1.motif.bed_Six2(Homeobox) 80 0.33855160 0.061969038
## 13 motif4.motif.bed_GRE(NR),IR3 26 0.58561790 -0.061797851
## 17 motif8.motif.bed_TEAD4(TEA) 26 0.58561790 -0.061797851
## 15 motif6.motif.bed_ARE(NR) 63 0.45630445 -0.054321256
## 11 motif2.motif.bed_PGR(NR) 171 0.29884386 -0.046172873
## 3 motif12.motif.bed_TEAD2(TEA) 60 0.57494900 -0.041897002
## 10 motif23.motif.bed_bZIP50(bZIP) 65 0.64769129 0.032808325
## 12 motif3.motif.bed_GRE(NR),IR3 27 0.80531766 -0.027416297
## 16 motif7.motif.bed_NF1-halfsite(CTF) 511 0.30105725 0.026942049
## 4 motif14.motif.bed_Six4(Homeobox) 113 0.62396194 0.026743525
## 8 motif20.motif.bed_Atf2(bZIP) 325 0.88959168 0.004502044
## p.adjustANOVA
## 9 0.6461492
## 14 0.7340501
## 6 0.6461492
## 1 0.7340501
## 2 0.7340501
## 5 0.7340501
## 7 0.7340501
## 13 0.7340501
## 17 0.7340501
## 15 0.7340501
## 11 0.7340501
## 3 0.7340501
## 10 0.7340501
## 12 0.8556500
## 16 0.7340501
## 4 0.7340501
## 8 0.8895917
Here is the Yap knock-down data we generated previously (28 day dataset).
x <- read.table("edgeR_rna_LacZvsYap.xls",header=TRUE,row.names=1)
head(x)
## logFC logCPM LR PValue
## ENSMUSG00000090556_Olfr753-ps1 2.902399 0.2753907 278.4418 1.641213e-62
## ENSMUSG00000020641_Rsad2 2.898768 0.4462312 266.5600 6.377743e-60
## ENSMUSG00000028874_Fgr 3.031279 1.3941254 258.6872 3.316488e-58
## ENSMUSG00000040264_Gbp2b 3.519186 1.4699838 256.2288 1.139138e-57
## ENSMUSG00000038037_Socs1 2.801552 0.7219252 256.6370 9.281046e-58
## ENSMUSG00000074896_Ifit3 3.026735 0.9071686 256.2250 1.141291e-57
## adj.p.value disp Lacz.1 Lacz.2
## ENSMUSG00000090556_Olfr753-ps1 2.697498e-58 0.01898323 3.277764 2.513037
## ENSMUSG00000020641_Rsad2 5.241229e-56 0.01945147 5.861156 4.493704
## ENSMUSG00000028874_Fgr 1.816993e-54 0.02157487 22.126610 16.964305
## ENSMUSG00000040264_Gbp2b 3.126377e-54 0.03213525 12.758172 9.781594
## ENSMUSG00000038037_Socs1 3.126377e-54 0.01776111 12.075605 9.258276
## ENSMUSG00000074896_Ifit3 3.126377e-54 0.02256274 11.321296 8.679952
## Lacz.3 Yap.1 Yap.2 Yap.3 sampleLacz
## ENSMUSG00000090556_Olfr753-ps1 4.158072 183.3558 126.7408 159.7393 -14.87752
## ENSMUSG00000020641_Rsad2 7.435284 205.4915 142.0417 179.0239 -14.77293
## ENSMUSG00000028874_Fgr 28.069145 388.7369 268.7062 338.6670 -14.27751
## ENSMUSG00000040264_Gbp2b 16.184629 423.2850 292.5868 368.7652 -14.53526
## ENSMUSG00000038037_Socs1 15.318746 243.7032 168.4547 212.3138 -14.55147
## ENSMUSG00000074896_Ifit3 14.361852 280.6797 194.0140 244.5277 -14.57815
## sampleYap
## ENSMUSG00000090556_Olfr753-ps1 -12.86573
## ENSMUSG00000020641_Rsad2 -12.76366
## ENSMUSG00000028874_Fgr -12.17639
## ENSMUSG00000040264_Gbp2b -12.09595
## ENSMUSG00000038037_Socs1 -12.60959
## ENSMUSG00000074896_Ifit3 -12.48018
sig <- subset(x,adj.p.value<0.05)
plot(x$logCPM, x$logFC, col="gray", xlab="logCPM",ylab="log2 fold change")
points(sig$logCPM, sig$logFC, col="red")
Here we will run the mitch package.
gt <- as.data.frame(rownames(x))
gt$gn <- sapply(strsplit(rownames(x),"_"),"[[",2)
m <- mitch_import(x=x,DEtype="edger",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
## it to a list for you.
## Note: Mean no. genes in input = 16436
## Note: no. genes in output = 16428
## Note: estimated proportion of input genes in output = 1
capture.output(
res <- mitch_calc(x=m,genesets=pro,priority="effect")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(res$enrichment_result,20)
## set setSize pANOVA s.dist
## 1 motif12.motif.bed_TEAD2(TEA) 12 0.018315723 -0.39333577
## 3 motif7.motif.bed_NF1-halfsite(CTF) 24 0.008878294 -0.30859851
## 2 motif2.motif.bed_PGR(NR) 15 0.653280281 -0.06699974
## p.adjustANOVA
## 1 0.02747358
## 3 0.02663488
## 2 0.65328028
unlink("yap28d_pro.html")
capture.output(
mitch_report(res, "yap28d_pro.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpObyPEv/./yap28d_pro.RData ".
##
##
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/homer_analysis/mz_2020/mitch/mitch.knit.md
##
## Output created: /tmp/RtmpObyPEv/mitch_report.html
capture.output(
res <- mitch_calc(x=m,genesets=enh, priority="effect")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Note: Enrichments with large effect sizes may not be
## statistically significant.
head(res$enrichment_result,20)
## set setSize pANOVA s.dist
## 9 motif21.motif.bed_PR(NR) 21 0.0080758144 -0.333931220
## 3 motif12.motif.bed_TEAD2(TEA) 68 0.0003226246 -0.252292176
## 13 motif4.motif.bed_GRE(NR),IR3 32 0.0610724885 -0.191350024
## 17 motif8.motif.bed_TEAD4(TEA) 32 0.0610724885 -0.191350024
## 5 motif17.motif.bed_TEAD(TEA) 21 0.1715652736 0.172368356
## 14 motif5.motif.bed_Six1(Homeobox) 21 0.2033339881 0.160381603
## 12 motif3.motif.bed_GRE(NR),IR3 34 0.1748999242 -0.134471722
## 7 motif1.motif.bed_Six2(Homeobox) 92 0.0736020785 0.107995678
## 4 motif14.motif.bed_Six4(Homeobox) 114 0.1083407443 -0.087132137
## 2 motif11.motif.bed_Atf1(bZIP) 22 0.4870277054 0.085617235
## 1 motif10.motif.bed_Mef2a(MADS) 51 0.3613695973 0.073911643
## 6 motif19.motif.bed_TGA1(bZIP) 66 0.4375630634 -0.055285158
## 10 motif23.motif.bed_bZIP50(bZIP) 70 0.4425514878 -0.053106387
## 11 motif2.motif.bed_PGR(NR) 186 0.3319571594 0.041308012
## 8 motif20.motif.bed_Atf2(bZIP) 337 0.3081332119 -0.032388306
## 15 motif6.motif.bed_ARE(NR) 73 0.7784406606 0.019055795
## 16 motif7.motif.bed_NF1-halfsite(CTF) 537 0.9257723473 -0.002360236
## p.adjustANOVA
## 9 0.068644423
## 3 0.005484618
## 13 0.250247067
## 17 0.250247067
## 5 0.371662339
## 14 0.384075311
## 12 0.371662339
## 7 0.250247067
## 4 0.306965442
## 2 0.551964733
## 1 0.511940263
## 6 0.537383949
## 10 0.537383949
## 11 0.511940263
## 8 0.511940263
## 15 0.827093202
## 16 0.925772347
unlink("yap28d_enh.html")
capture.output(
mitch_report(res, "yap28d_enh.html")
, file = "/dev/null", append = FALSE,
type = c("output", "message"), split = FALSE)
## Dataset saved as " /tmp/RtmpObyPEv/./yap28d_enh.RData ".
##
##
## processing file: mitch.Rmd
## Contour plot does not apply to unidimensional analysis.
## output file: /mnt/bfx6/bfx/kevinwatt/homer_analysis/mz_2020/mitch/mitch.knit.md
##
## Output created: /tmp/RtmpObyPEv/mitch_report.html