Source: https://github.com/markziemann/fran_mbd/blob/master/main_report_meth.Rmd

Introduction

In this report, I will take you through an analysis of the MBD-seq that was previosly generated. I will perform an analysis that begins with DESeq2 for differential methylation followed by pathway enrichment analysis with mitch.

suppressPackageStartupMessages({
    # to be run in R4
    library("tidyverse")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("fgsea")
    library("mitch")
})

Import profiling data

This analysis uses the 3kbp area arount the TSS.

hm <- read.table("heart_meth_3kbpTSS.txt",header=TRUE,row.names=1)
sm <-read.table("spleen_meth_3kbpTSS.txt",header=TRUE,row.names=1)
b <- data.frame(sm,hm)

MDS plots

plot(cmdscale(dist(t(b))), bty='l', xlab="Coordinate 1", ylab="Coordinate 2", 
 type = "p", pch=19, cex.axis=1.3, cex.lab=1.3 , col="gray") 
text(cmdscale(dist(t(b))), labels=colnames(b),cex=1.3) 
mtext("spleen and heart samples")

plot(cmdscale(dist(t(hm))), bty='l', xlab="Coordinate 1", ylab="Coordinate 2",
 type = "p", pch=19, cex.axis=1.3, cex.lab=1.3 , col="gray")
text(cmdscale(dist(t(hm))), labels=colnames(hm),cex=1.3)
mtext("heart samples")

plot(cmdscale(dist(t(sm))), bty='l', xlab="Coordinate 1", ylab="Coordinate 2",
 type = "p", pch=19, cex.axis=1.3, cex.lab=1.3 , col="gray")
text(cmdscale(dist(t(sm))), labels=colnames(sm),cex=1.3)
mtext("spleen samples")

Curate the samplesheet

ss_sm <- as.data.frame(colnames(sm))
colnames(ss_sm) <- "samplename"
ss_sm$group  <- sapply(strsplit(as.character(ss_sm$samplename) , "_"),"[[",2)
ss_sm$group <- gsub("[0-9]+","",ss_sm$group)
rownames(ss_sm) <- ss_sm$samplename
ss_sm$samplename=NULL

ss_hm <- as.data.frame(colnames(hm))
colnames(ss_hm) <- "samplename"
ss_hm$group  <- sapply(strsplit(as.character(ss_hm$samplename) , "_"),"[[",2)
ss_hm$group <- gsub("[0-9]+","",ss_hm$group)
rownames(ss_hm) <- ss_hm$samplename
ss_hm$samplename=NULL

Differential analysis

We will be running some comparisons between groups:

  1. HEART ctrl vs acetate

  2. HEART ctrl vs hifib

  3. SPLEEN ctrl vs acetate

  4. SPLEEN ctrl vs hifibre

HEART ctrl vs acetate

x <- hm[,grep("hifi" , colnames(hm) ,invert=TRUE)]
x<-x[which(rowSums(x)/ncol(x)>=(10)),]
xs <- ss_hm[which( rownames(ss_hm) %in% colnames(x) ),1,drop=FALSE]
xs$trt <- factor(as.numeric(grepl("acetate",xs$group)))
dds <- DESeqDataSetFromMatrix(countData = x , colData = xs, design = ~ trt )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
head(dge)
##                              baseMean log2FoldChange     lfcSE      stat
## ENSMUSG00000065881_Gm22866  119.44050     -1.1822235 0.1953514 -6.051780
## ENSMUSG00000037410_Tbc1d2b  341.09596     -0.6874842 0.1693469 -4.059621
## ENSMUSG00000095041_ensembl 2011.26913     -0.4232428 0.1175254 -3.601287
## ENSMUSG00000097088_Gm26615   88.60375     -0.6820023 0.2078954 -3.280507
## ENSMUSG00000084652_Gm22717   77.77103     -0.8097174 0.2606830 -3.106138
## ENSMUSG00000001504_Irx2      43.95295      0.8353355 0.2776611  3.008472
##                                  pvalue         padj H11_acetate1 H12_acetate2
## ENSMUSG00000065881_Gm22866 1.432537e-09 4.590422e-05     7.541961     7.497128
## ENSMUSG00000037410_Tbc1d2b 4.915234e-05 7.875188e-01     8.739803     8.358340
## ENSMUSG00000095041_ensembl 3.166462e-04 9.998971e-01    10.982013    10.796732
## ENSMUSG00000097088_Gm26615 1.036206e-03 9.998971e-01     7.513994     7.357383
## ENSMUSG00000084652_Gm22717 1.895484e-03 9.998971e-01     7.165050     7.523048
## ENSMUSG00000001504_Irx2    2.625651e-03 9.998971e-01     7.362324     7.252666
##                            H17_acetate3  H1_ctrl1  H2_ctrl2  H3_ctrl3  H4_ctrl4
## ENSMUSG00000065881_Gm22866     7.348881  7.942201  8.135014  8.139167  8.128256
## ENSMUSG00000037410_Tbc1d2b     8.511046  8.800931  9.064895  9.173260  9.129819
## ENSMUSG00000095041_ensembl    10.679232 11.221358 11.141312 11.380333 11.123503
## ENSMUSG00000097088_Gm26615     7.422928  7.843455  7.702842  7.646114  7.835305
## ENSMUSG00000084652_Gm22717     7.182478  7.493675  7.709468  7.761882  7.764490
## ENSMUSG00000001504_Irx2        7.438707  6.945925  7.025067  6.997513  7.174816
write.table(dge,file="heart_ctrl_vs_acetate_meth.tsv",quote=F,sep="\t")
#some plots
sig<-subset(dge,padj<0.05)
SIG=nrow(sig)
DN=nrow(subset(sig,log2FoldChange<0))
UP=nrow(subset(sig,log2FoldChange>0))
HEADER=paste("ctrl vs acetate:", SIG , "DGEs,", UP ,"up,", DN, "down")
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, 
 xlab="log2 base mean",
 ,ylab="log2 fold change" ,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.2)

top<-head(sig,20)
plot(dge$log2FoldChange, -log2(dge$pvalue)+1E-307 ,cex=0.6, cex.lab=1.3,cex.axis=1.2,
 xlim=c(-3,3),xlab="log2 fold change", ylab="-log2 p-value" ,pch=19,col="#838383")
points(sig$log2FoldChange, -log2(sig$pvalue)+1E-307, cex=0.6,pch=19,col="red")  
mtext((HEADER),cex=1.2)