library("edgeR")
library("DESeq2")
library("eulerr")
x <- read.table("mouseATAC_peaks_cov2.bed.saf.pe.mx",row.names=1,header=TRUE)
colnames(x) <- sapply(strsplit(colnames(x),"\\.A"),"[[",1)
xx <- x[which(rowMeans(x)>10),]
ss <- as.data.frame(colnames(xx))
ss$trt <- as.integer(grepl("Yap",ss[,1]))
rownames(ss) <- ss[,1]
ss[,1]=NULL
design <- model.matrix(~ss$trt)
rownames(design) <- rownames(ss)
z <- DGEList(counts=xx)
z <- calcNormFactors(z)
z <- estimateDisp(z, design,robust=TRUE,prior.df=1)
fit <- glmFit(z, design)
lrt<-glmLRT(fit)
dge<-as.data.frame(topTags(lrt,n=Inf))
dge$dispersion<-lrt$dispersion
dge<-merge(dge,lrt$fitted.values,by='row.names')
rownames(dge)=dge$Row.names
dge$Row.names=NULL
dge<-dge[order(dge$PValue),]
head(dge)
## logFC logCPM LR PValue
## PK39606_9:72863611-72864135 -0.9990238 5.403241 124.26043 7.388163e-29
## PK27939_4:41664347-41666008 -0.8936960 5.630466 75.95619 2.900284e-18
## PK16790_15:55230805-55231912 -0.9610760 4.410686 72.75256 1.469672e-17
## PK29551_4:140927904-140929048 -0.7547172 5.256924 60.82050 6.252332e-15
## PK31831_5:132653673-132654424 -0.9556798 4.128865 59.10887 1.491849e-14
## PK3571_1:154065455-154066705 -0.7934496 5.548893 55.86682 7.755048e-14
## FDR dispersion Lacz.1 Lacz.2
## PK39606_9:72863611-72864135 2.658113e-24 0.063211460 210.6868 135.95119
## PK27939_4:41664347-41666008 5.217322e-14 0.015293360 238.6822 154.01593
## PK16790_15:55230805-55231912 1.762529e-13 0.009139116 104.9374 67.71357
## PK29551_4:140927904-140929048 5.623660e-11 0.170248436 176.4899 113.88470
## PK31831_5:132653673-132654424 1.073475e-10 0.012274419 86.6584 55.91860
## PK3571_1:154065455-154066705 4.650185e-10 0.020670980 220.2282 142.10803
## Lacz.3 Yap.1 Yap.2 Yap.3
## PK39606_9:72863611-72864135 392.1378 200.62781 207.78752 174.56741
## PK27939_4:41664347-41666008 444.2438 244.52442 253.25064 212.76210
## PK16790_15:55230805-55231912 195.3131 102.55419 106.21399 89.23299
## PK29551_4:140927904-140929048 328.4892 199.08994 206.19477 173.22930
## PK31831_5:132653673-132654424 161.2917 84.99487 88.02804 73.95453
## PK3571_1:154065455-154066705 409.8966 241.85830 250.48938 210.44230
dge_edger <- dge
sig <- subset(dge_edger,FDR<0.05)
dge_edger_up <- rownames(subset(sig,logFC>0))
dge_edger_dn <- rownames(subset(sig,logFC<0))
length(dge_edger_up)
## [1] 297
length(dge_edger_dn)
## [1] 589
design <- model.matrix(~ss$trt)
rownames(design) <- rownames(ss)
z <- DGEList(counts=xx)
z <- calcNormFactors(z)
z <- estimateDisp(z, design,robust=TRUE,prior.df=1)
fit <- glmQLFit(z, design)
lrt <- glmQLFTest(fit)
dge<-as.data.frame(topTags(lrt,n=Inf))
dge$dispersion<-lrt$dispersion
dge<-merge(dge,lrt$fitted.values,by='row.names')
rownames(dge)=dge$Row.names
dge$Row.names=NULL
dge<-dge[order(dge$PValue),]
head(dge)
## logFC logCPM F PValue
## PK41420_X:100443287-100443571 -1.343802 6.368508 94.55504 2.016344e-12
## PK509_1:24611235-24616833 -1.399796 6.419712 85.59495 8.706031e-12
## PK21632_18:71423149-71423596 -1.371021 7.071626 72.89145 8.359661e-11
## PK36857_8:22350562-22351133 -1.157867 6.035898 71.88163 1.011420e-10
## PK7350_10:122866576-122866810 -1.217188 6.390173 71.23243 1.144245e-10
## PK37878_8:94099404-94099784 -1.263650 6.581057 70.35900 1.352407e-10
## FDR dispersion Lacz.1 Lacz.2
## PK41420_X:100443287-100443571 7.254402e-08 0.04880198 440.0001 283.9216
## PK509_1:24611235-24616833 1.566128e-07 0.04873792 460.6099 297.2206
## PK21632_18:71423149-71423596 8.109481e-07 0.04624077 716.6225 462.4195
## PK36857_8:22350562-22351133 8.109481e-07 0.04530695 336.1733 216.9246
## PK7350_10:122866576-122866810 8.109481e-07 0.04578038 434.5877 280.4291
## PK37878_8:94099404-94099784 8.109481e-07 0.04616163 500.3624 322.8720
## Lacz.3 Yap.1 Yap.2 Yap.3
## PK41420_X:100443287-100443571 818.9438 329.9613 341.7365 287.1013
## PK509_1:24611235-24616833 857.3035 332.2650 344.1224 289.1057
## PK21632_18:71423149-71423596 1333.8033 527.4162 546.2379 458.9079
## PK36857_8:22350562-22351133 625.6977 286.7744 297.0084 249.5241
## PK7350_10:122866576-122866810 808.8700 355.8104 368.5080 309.5927
## PK37878_8:94099404-94099784 931.2923 396.6872 410.8436 345.1598
dge_edgerql <- dge
sig <- subset(dge_edgerql,FDR<0.05)
dge_edgerql_up <- rownames(subset(sig,logFC>0))
dge_edgerql_dn <- rownames(subset(sig,logFC<0))
length(dge_edgerql_up)
## [1] 15
length(dge_edgerql_dn)
## [1] 204
dds <- DESeqDataSetFromMatrix(countData = xx , colData = ss, design = ~ trt )
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
head(dge)
## baseMean log2FoldChange lfcSE stat
## PK41420_X:100443287-100443571 427.3872 -1.346014 0.1595261 -8.437579
## PK33480_6:91826809-91827415 693.6957 -1.077427 0.1350267 -7.979363
## PK34012_6:128828803-128829375 1991.4646 -1.242463 0.1579345 -7.866947
## PK36091_7:116452147-116452748 909.5136 -1.190549 0.1525483 -7.804408
## PK11189_12:40893251-40894105 1052.7539 -1.148181 0.1477635 -7.770398
## PK17897_16:10832447-10832803 877.0216 -1.227448 0.1603871 -7.653029
## pvalue padj Lacz.1 Lacz.2
## PK41420_X:100443287-100443571 3.239786e-17 5.328476e-13 9.52174 9.578793
## PK33480_6:91826809-91827415 1.470899e-15 1.209594e-11 10.06862 10.138880
## PK34012_6:128828803-128829375 3.634010e-15 1.992285e-11 11.67192 11.541197
## PK36091_7:116452147-116452748 5.978164e-15 2.458072e-11 10.53043 10.451598
## PK11189_12:40893251-40894105 7.823988e-15 2.573623e-11 10.74049 10.683742
## PK17897_16:10832447-10832803 1.962989e-14 5.380879e-11 10.55893 10.359512
## Lacz.3 Yap.1 Yap.2 Yap.3
## PK41420_X:100443287-100443571 9.574391 8.259974 8.615183 8.733535
## PK33480_6:91826809-91827415 10.038921 9.016384 9.126365 9.403446
## PK34012_6:128828803-128829375 11.341701 9.953281 10.456535 10.629467
## PK36091_7:116452147-116452748 10.396047 9.083131 9.510534 9.663835
## PK11189_12:40893251-40894105 10.483736 9.341947 9.634867 9.879878
## PK17897_16:10832447-10832803 10.340573 8.999707 9.451396 9.611707
dge_deseq2 <- dge
sig <- subset(dge,padj<0.05)
dge_deseq2_up <- rownames(subset(sig,log2FoldChange>0))
dge_deseq2_dn <- rownames(subset(sig,log2FoldChange<0))
length(dge_deseq2_up)
## [1] 3
length(dge_deseq2_dn)
## [1] 113
v1 <- list("edgeR up"=dge_edger_up, "edgeR dn"=dge_edger_dn,
"DESeq2 up"=dge_deseq2_up,"DESeq2 dn"=dge_deseq2_dn,
"edgeR_QL up"=dge_edgerql_up, "edgeR_QL dn"=dge_edgerql_dn)
plot(euler(v1),quantities = TRUE)
Let's look at the Idh2 peak.
# edgeR
t(dge_edger[grep("7:80121026-80121603",rownames(dge_edger)),])
## PK35487_7:80121026-80121603
## logFC -0.59346075
## logCPM 3.86300164
## LR 8.28403282
## PValue 0.00399952
## FDR 0.10526316
## dispersion 0.04085917
## Lacz.1 64.05216724
## Lacz.2 41.33133786
## Lacz.3 119.21616897
## Yap.1 80.77175152
## Yap.2 83.65421344
## Yap.3 70.27996525
# DESeq2
t(dge_deseq2[grep("7:80121026-80121603",rownames(dge_deseq2)),])
## PK35487_7:80121026-80121603
## baseMean 74.07000999
## log2FoldChange -0.59786219
## lfcSE 0.23716547
## stat -2.52086521
## pvalue 0.01170667
## padj NA
## Lacz.1 7.70579721
## Lacz.2 7.73493182
## Lacz.3 7.73116945
## Yap.1 7.60769593
## Yap.2 7.35148484
## Yap.3 7.39611721
Now the clear peak for comparison.
# edgeR
t(dge_edger[grep("9:72863611-72864135",rownames(dge_edger)),])
## PK39606_9:72863611-72864135
## logFC -9.990238e-01
## logCPM 5.403241e+00
## LR 1.242604e+02
## PValue 7.388163e-29
## FDR 2.658113e-24
## dispersion 6.321146e-02
## Lacz.1 2.106868e+02
## Lacz.2 1.359512e+02
## Lacz.3 3.921378e+02
## Yap.1 2.006278e+02
## Yap.2 2.077875e+02
## Yap.3 1.745674e+02
# DESeq2
t(dge_deseq2[grep("9:72863611-72864135",rownames(dge_deseq2)),])
## PK39606_9:72863611-72864135
## baseMean 2.183262e+02
## log2FoldChange -9.903488e-01
## lfcSE 1.503444e-01
## stat -6.587199e+00
## pvalue 4.482004e-11
## padj 5.265394e-08
## Lacz.1 8.662340e+00
## Lacz.2 8.736919e+00
## Lacz.3 8.795317e+00
## Yap.1 8.080352e+00
## Yap.2 8.120300e+00
## Yap.3 8.103670e+00