Source: https://github.com/markziemann/phytophthora-lupin
Vew the reports: http://ziemann-lab.net/public/barry/stats.html
Phytophthora infected Lupinus angustifolius root.
Here is the reference information
Lupinus angustifolius from Ensembl Lupinus_angustifolius.LupAngTanjil_v1.0.49.gtf.gz Lupinus_angustifolius.LupAngTanjil_v1.0.dna_sm.toplevel.fa.gz
Phytophthora cinnamomi genomic information obtained from a new assembly found on genbank https://ftp.ncbi.nlm.nih.gov/genomes/genbank/protozoa/Phytophthora_cinnamomi/latest_assembly_versions/GCA_018691715.1_ASM1869171v1/
fasta files were combined before indexing gtf files were combined before indexing index created by STAR
Based on the below genome annotation data, we classified genes with the names containing “Tanjil” and “ENSRNA” as belonging to Lupin and genes containing “IUM83” belonging to lupin
(base) ziemannm@bio-g-2:~/barry/ref3$ grep -c Tanjil gtf combined.gtf:497300 GCA_018691715.1_ASM1869171v1_genomic.gtf:0 Lupinus_angustifolius.LupAngTanjil_v1.0.49.gtf:497300 (base) ziemannm@bio-g-2:~/barry/ref3$ grep -c IUM83 gtf combined.gtf:191926 GCA_018691715.1_ASM1869171v1_genomic.gtf:191926 Lupinus_angustifolius.LupAngTanjil_v1.0.49.gtf:0 (base) ziemannm@bio-g-2:~/barry/ref3$ grep -c ENS *gtf combined.gtf:6291 GCA_018691715.1_ASM1869171v1_genomic.gtf:3 Lupinus_angustifolius.LupAngTanjil_v1.0.49.gtf:6288
Read trimming with Skewer v0.2.2 followed by STAR v2.7.7a. STAR gene level counts (reverse strand) were imported into R. Lupin and phytophthora counts were separated. DESeq2 was run to quantify differences between treatment groups.
#conda activate R40
library("DESeq2")
library("mitch")
library("reshape2")
library("Biostrings")
library("gplots")
Load counts into R
tmp <- read.table("3col.tsv.gz")
y<-as.matrix(acast(tmp, V2~V1, value.var="V3"))
colnames(y) <- sapply(strsplit(colnames(y),"_"),"[[",1)
head(y)
## BS01 BS02 BS03 BS04 BS05 BS06 BS07 BS08 BS09 BS10 BS11 BS12
## ENSRNA049758817 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 0 0 0 0 0 0 0 1
## ENSRNA049758848 0 0 0 1 0 0 0 0 0 0 0 0
## ENSRNA049758851 3 1 1 1 2 1 0 0 2 2 2 3
## ENSRNA049758853 0 0 0 0 0 0 0 0 0 0 0 0
## BS13 BS14 BS15 BS16 BS17 BS18 BS19 BS20 BS21 BS22 BS23 BS24
## ENSRNA049758817 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 1 0 0 0 0 0 0 0
## ENSRNA049758848 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758851 0 0 0 0 0 2 0 0 1 0 1 0
## ENSRNA049758853 0 0 0 0 0 0 0 0 0 0 0 0
Load samplesheet and change the colnames.
ss <- read.table("samplesheet.tsv")
ss
## V1 V2
## 1 BS01 LaH-0_1
## 2 BS02 LaH-0_2
## 3 BS03 LaH-0_3
## 4 BS04 LaH-18_1
## 5 BS05 LaH-18_2
## 6 BS06 LaH-18_3
## 7 BS07 LaH-30_1
## 8 BS08 LaH-30_2
## 9 BS09 LaH-30_3
## 10 BS10 LaH-48_1
## 11 BS11 LaH-48_2
## 12 BS12 LaH-48_3
## 13 BS13 PcHY-0_1
## 14 BS14 PcHY-0_2
## 15 BS15 PcHY-0_3
## 16 BS16 LaP-18_1
## 17 BS17 LaP-18_2
## 18 BS18 LaP-18_3
## 19 BS19 LaP-30_1
## 20 BS20 LaP-30_2
## 21 BS21 LaP-30_3
## 22 BS22 LaP-48_1
## 23 BS23 LaP-48_2
## 24 BS24 LaP-48_3
ss$V1 == colnames(y)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
colnames(y) <- ss$V2
QC analysis of counts and split by species.
# make sure all genes are accounted for
dim(y)
## [1] 55151 24
length(grep("ENS",rownames(y)))
## [1] 2096
length(grep("Tanjil",rownames(y)))
## [1] 33074
length(grep("IUM83",rownames(y)))
## [1] 19981
length(grep("ENS",rownames(y))) + length(grep("Tanjil",rownames(y))) + length(grep("IUM83",rownames(y)))
## [1] 55151
par(mar=c(5,10,4,2))
pc <- y[grep("IUM83",rownames(y)),]
dim(pc)
## [1] 19981 24
la <- y[grep("IUM83",rownames(y),invert=TRUE),]
dim(la)
## [1] 35170 24
barplot(colSums(y),horiz=TRUE,las=1,xlab="total number of assigned reads")
grid()
barplot(colSums(pc),horiz=TRUE,las=1,xlab="number of PC assigned reads")
grid()
barplot(colSums(la),horiz=TRUE,las=1,xlab="number of LA assigned reads")
grid()
barplot(log10(colSums(pc)),horiz=TRUE,las=1,xlab="log10 number of PC assigned reads")
grid()
barplot(log10(colSums(la)),horiz=TRUE,las=1,xlab="log10 number of LA assigned reads")
grid()
barplot( colSums(pc)/colSums(y), horiz=TRUE,las=1,
xlim=c(0,1),xlab="proportion of PC/total reads" )
grid()
# revert to defaults
par(mar=c(5.1,4.1,4.1,2.1))
mds <- cmdscale(dist(t(y)))
plot(mds*1.05, xlab="Coordinate 1", ylab="Coordinate 2", type = "p",pch=19,col="lightblue",bty="n")
text(mds, labels=colnames(y),cex=0.9)
mtext("overall MDS plot")
mds <- cmdscale(dist(t(pc)))
plot(mds*1.05, xlab="Coordinate 1", ylab="Coordinate 2", type = "p",pch=19,col="lightblue",bty="n")
text(mds, labels=colnames(pc), cex=0.9)
mtext("Pc MDS plot")
mds <- cmdscale(dist(t(la)))
plot(mds*1.05, xlab="Coordinate 1", ylab="Coordinate 2", type = "p",pch=19,col="lightblue",bty="n")
text(mds, labels=colnames(la), cex=0.9)
mtext("La MDS plot")
The goal here is to identify genes whose expression is stable over a range of samples to use as qPCR controls.
pcx <- pc[,grep("P",colnames(pc))]
pcx <- pcx[which(rowMeans(pcx)>10),]
pcy <- pcx/colSums(pcx) * 1000000
pcy_means <- rowMeans(pcy)
pcy_cv <- apply(pcy,1,sd) / rowMeans(pcy) *100
pcy_logmeans <- log10(rowMeans(pcy))
hist(pcy_cv,xlab="%CoV")
hist(pcy_logmeans,xlab="log10 CPM")
plot(pcy_logmeans,pcy_cv,xlab="baseline expression (log10 CPM)",ylab="coefficient of variation (%)")
grid()
abline(h=110, v=2.5,col="red")
pcydf <- data.frame(pcy_logmeans,pcy_cv)
pcydf_subset <- subset(pcydf,pcy_logmeans>2.5 & pcy_cv<110)
pcy[which(rownames(pcy) %in% rownames(pcydf_subset)),]
## PcHY-0_1 PcHY-0_2 PcHY-0_3 LaP-18_1 LaP-18_2 LaP-18_3 LaP-30_1
## 5733.50661 1069.57641 5918.16104 19.22755 24.77862 21.14243 5073.28464
## LaP-30_2 LaP-30_3 LaP-48_1 LaP-48_2 LaP-48_3
## 1700.64969 3222.79360 427.66288 671.30376 2133.56555
pcydf_subset
## pcy_logmeans pcy_cv
## IUM83_06738 3.336053 104.8562
P cinnamomi. These will be called Pc1 to Pc6
PcHY vs LaP18. “DE early”
PcHY vs LaP30. “DE mid”
PcHY vs LaP48. “DE late”
LaPc18 vs LaPc30 “Change from early to mid”
LaPc30 vs LaPc48 “Change from mid to late”
LaPc18 vs LaPc48 “Difference between early and late”
ss$grp <- sapply(strsplit(ss$V2,"_"),"[[",1)
head(pc)
## LaH-0_1 LaH-0_2 LaH-0_3 LaH-18_1 LaH-18_2 LaH-18_3 LaH-30_1
## IUM83_00001 0 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0 0
## IUM83_00006 0 0 0 0 0 0 0
## LaH-30_2 LaH-30_3 LaH-48_1 LaH-48_2 LaH-48_3 PcHY-0_1 PcHY-0_2
## IUM83_00001 0 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0 0
## IUM83_00006 0 0 0 0 0 1 0
## PcHY-0_3 LaP-18_1 LaP-18_2 LaP-18_3 LaP-30_1 LaP-30_2 LaP-30_3
## IUM83_00001 0 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0 0
## IUM83_00006 0 0 0 0 0 0 0
## LaP-48_1 LaP-48_2 LaP-48_3
## IUM83_00001 0 0 0
## IUM83_00002 0 0 0
## IUM83_00003 0 0 0
## IUM83_00004 0 0 0
## IUM83_00005 0 0 0
## IUM83_00006 0 1 2
sspc1 <- subset(ss,grp=="PcHY-0" | grp=="LaP-18")
sspc1$trt <- as.numeric(sspc1$grp=="LaP-18")
pc1 <- pc[,which(colnames(pc) %in% sspc1$V2)]
head(pc1)
## PcHY-0_1 PcHY-0_2 PcHY-0_3 LaP-18_1 LaP-18_2 LaP-18_3
## IUM83_00001 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0
## IUM83_00006 1 0 0 0 0 0
head(sspc1)
## V1 V2 grp trt
## 13 BS13 PcHY-0_1 PcHY-0 0
## 14 BS14 PcHY-0_2 PcHY-0 0
## 15 BS15 PcHY-0_3 PcHY-0 0
## 16 BS16 LaP-18_1 LaP-18 1
## 17 BS17 LaP-18_2 LaP-18 1
## 18 BS18 LaP-18_3 LaP-18 1
sspc2 <- subset(ss,grp=="PcHY-0" | grp=="LaP-30")
sspc2$trt <- as.numeric(sspc2$grp=="LaP-30")
pc2 <- pc[,which(colnames(pc) %in% sspc2$V2)]
head(pc2)
## PcHY-0_1 PcHY-0_2 PcHY-0_3 LaP-30_1 LaP-30_2 LaP-30_3
## IUM83_00001 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0
## IUM83_00006 1 0 0 0 0 0
head(sspc2)
## V1 V2 grp trt
## 13 BS13 PcHY-0_1 PcHY-0 0
## 14 BS14 PcHY-0_2 PcHY-0 0
## 15 BS15 PcHY-0_3 PcHY-0 0
## 19 BS19 LaP-30_1 LaP-30 1
## 20 BS20 LaP-30_2 LaP-30 1
## 21 BS21 LaP-30_3 LaP-30 1
sspc3 <- subset(ss,grp=="PcHY-0" | grp=="LaP-48")
sspc3$trt <- as.numeric(sspc3$grp=="LaP-48")
pc3 <- pc[,which(colnames(pc) %in% sspc3$V2)]
head(pc3)
## PcHY-0_1 PcHY-0_2 PcHY-0_3 LaP-48_1 LaP-48_2 LaP-48_3
## IUM83_00001 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0
## IUM83_00006 1 0 0 0 1 2
head(sspc3)
## V1 V2 grp trt
## 13 BS13 PcHY-0_1 PcHY-0 0
## 14 BS14 PcHY-0_2 PcHY-0 0
## 15 BS15 PcHY-0_3 PcHY-0 0
## 22 BS22 LaP-48_1 LaP-48 1
## 23 BS23 LaP-48_2 LaP-48 1
## 24 BS24 LaP-48_3 LaP-48 1
sspc4 <- subset(ss,grp=="LaP-18" | grp=="LaP-30")
sspc4$trt <- as.numeric(sspc4$grp=="LaP-30")
pc4 <- pc[,which(colnames(pc) %in% sspc4$V2)]
head(pc4)
## LaP-18_1 LaP-18_2 LaP-18_3 LaP-30_1 LaP-30_2 LaP-30_3
## IUM83_00001 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0
## IUM83_00006 0 0 0 0 0 0
head(sspc4)
## V1 V2 grp trt
## 16 BS16 LaP-18_1 LaP-18 0
## 17 BS17 LaP-18_2 LaP-18 0
## 18 BS18 LaP-18_3 LaP-18 0
## 19 BS19 LaP-30_1 LaP-30 1
## 20 BS20 LaP-30_2 LaP-30 1
## 21 BS21 LaP-30_3 LaP-30 1
sspc5 <- subset(ss,grp=="LaP-30" | grp=="LaP-48")
sspc5$trt <- as.numeric(sspc5$grp=="LaP-48")
pc5 <- pc[,which(colnames(pc) %in% sspc5$V2)]
head(pc5)
## LaP-30_1 LaP-30_2 LaP-30_3 LaP-48_1 LaP-48_2 LaP-48_3
## IUM83_00001 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0
## IUM83_00006 0 0 0 0 1 2
head(sspc5)
## V1 V2 grp trt
## 19 BS19 LaP-30_1 LaP-30 0
## 20 BS20 LaP-30_2 LaP-30 0
## 21 BS21 LaP-30_3 LaP-30 0
## 22 BS22 LaP-48_1 LaP-48 1
## 23 BS23 LaP-48_2 LaP-48 1
## 24 BS24 LaP-48_3 LaP-48 1
sspc6 <- subset(ss,grp=="LaP-18" | grp=="LaP-48")
sspc6$trt <- as.numeric(sspc6$grp=="LaP-48")
pc6 <- pc[,which(colnames(pc) %in% sspc6$V2)]
head(pc6)
## LaP-18_1 LaP-18_2 LaP-18_3 LaP-48_1 LaP-48_2 LaP-48_3
## IUM83_00001 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0
## IUM83_00006 0 0 0 0 1 2
head(sspc6)
## V1 V2 grp trt
## 16 BS16 LaP-18_1 LaP-18 0
## 17 BS17 LaP-18_2 LaP-18 0
## 18 BS18 LaP-18_3 LaP-18 0
## 22 BS22 LaP-48_1 LaP-48 1
## 23 BS23 LaP-48_2 LaP-48 1
## 24 BS24 LaP-48_3 LaP-48 1
Lupin. These will be called La1 to La5
LaH-0 vs LaP18 “DE early”
LaH-0 vs LaP30 “DE mid”
LaH-0 vs LaP48 “DE late”
LaPc18 vs LaPc30 “Change from early to mid”
LaPc30 vs LaPc48 “Change from mid to late”
LaPc18 vs LaPc48 “Difference between early and late”
head(la)
## LaH-0_1 LaH-0_2 LaH-0_3 LaH-18_1 LaH-18_2 LaH-18_3 LaH-30_1
## ENSRNA049758817 0 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 0 0 0
## ENSRNA049758848 0 0 0 1 0 0 0
## ENSRNA049758851 3 1 1 1 2 1 0
## ENSRNA049758853 0 0 0 0 0 0 0
## LaH-30_2 LaH-30_3 LaH-48_1 LaH-48_2 LaH-48_3 PcHY-0_1 PcHY-0_2
## ENSRNA049758817 0 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 1 0 0
## ENSRNA049758848 0 0 0 0 0 0 0
## ENSRNA049758851 0 2 2 2 3 0 0
## ENSRNA049758853 0 0 0 0 0 0 0
## PcHY-0_3 LaP-18_1 LaP-18_2 LaP-18_3 LaP-30_1 LaP-30_2 LaP-30_3
## ENSRNA049758817 0 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0 0
## ENSRNA049758846 0 0 1 0 0 0 0
## ENSRNA049758848 0 0 0 0 0 0 0
## ENSRNA049758851 0 0 0 2 0 0 1
## ENSRNA049758853 0 0 0 0 0 0 0
## LaP-48_1 LaP-48_2 LaP-48_3
## ENSRNA049758817 0 0 0
## ENSRNA049758844 0 0 0
## ENSRNA049758846 0 0 0
## ENSRNA049758848 0 0 0
## ENSRNA049758851 0 1 0
## ENSRNA049758853 0 0 0
ssla1 <- subset(ss,grp=="LaH-0" | grp=="LaP-18")
ssla1$trt <- as.numeric(ssla1$grp=="LaP-18")
la1 <- la[,which(colnames(pc) %in% ssla1$V2)]
head(la1)
## LaH-0_1 LaH-0_2 LaH-0_3 LaP-18_1 LaP-18_2 LaP-18_3
## ENSRNA049758817 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 1 0
## ENSRNA049758848 0 0 0 0 0 0
## ENSRNA049758851 3 1 1 0 0 2
## ENSRNA049758853 0 0 0 0 0 0
head(ssla1)
## V1 V2 grp trt
## 1 BS01 LaH-0_1 LaH-0 0
## 2 BS02 LaH-0_2 LaH-0 0
## 3 BS03 LaH-0_3 LaH-0 0
## 16 BS16 LaP-18_1 LaP-18 1
## 17 BS17 LaP-18_2 LaP-18 1
## 18 BS18 LaP-18_3 LaP-18 1
ssla2 <- subset(ss,grp=="LaH-0" | grp=="LaP-30")
ssla2$trt <- as.numeric(ssla2$grp=="LaP-30")
la2 <- la[,which(colnames(pc) %in% ssla2$V2)]
head(la2)
## LaH-0_1 LaH-0_2 LaH-0_3 LaP-30_1 LaP-30_2 LaP-30_3
## ENSRNA049758817 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 0 0
## ENSRNA049758848 0 0 0 0 0 0
## ENSRNA049758851 3 1 1 0 0 1
## ENSRNA049758853 0 0 0 0 0 0
head(ssla2)
## V1 V2 grp trt
## 1 BS01 LaH-0_1 LaH-0 0
## 2 BS02 LaH-0_2 LaH-0 0
## 3 BS03 LaH-0_3 LaH-0 0
## 19 BS19 LaP-30_1 LaP-30 1
## 20 BS20 LaP-30_2 LaP-30 1
## 21 BS21 LaP-30_3 LaP-30 1
ssla3 <- subset(ss,grp=="LaH-0" | grp=="LaP-48")
ssla3$trt <- as.numeric(ssla3$grp=="LaP-48")
la3 <- la[,which(colnames(pc) %in% ssla3$V2)]
head(la3)
## LaH-0_1 LaH-0_2 LaH-0_3 LaP-48_1 LaP-48_2 LaP-48_3
## ENSRNA049758817 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 0 0
## ENSRNA049758848 0 0 0 0 0 0
## ENSRNA049758851 3 1 1 0 1 0
## ENSRNA049758853 0 0 0 0 0 0
head(ssla3)
## V1 V2 grp trt
## 1 BS01 LaH-0_1 LaH-0 0
## 2 BS02 LaH-0_2 LaH-0 0
## 3 BS03 LaH-0_3 LaH-0 0
## 22 BS22 LaP-48_1 LaP-48 1
## 23 BS23 LaP-48_2 LaP-48 1
## 24 BS24 LaP-48_3 LaP-48 1
ssla4 <- subset(ss,grp=="LaP-18" | grp=="LaP-30")
ssla4$trt <- as.numeric(ssla4$grp=="LaP-30")
la4 <- la[,which(colnames(pc) %in% ssla4$V2)]
head(la4)
## LaP-18_1 LaP-18_2 LaP-18_3 LaP-30_1 LaP-30_2 LaP-30_3
## ENSRNA049758817 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0
## ENSRNA049758846 0 1 0 0 0 0
## ENSRNA049758848 0 0 0 0 0 0
## ENSRNA049758851 0 0 2 0 0 1
## ENSRNA049758853 0 0 0 0 0 0
head(ssla4)
## V1 V2 grp trt
## 16 BS16 LaP-18_1 LaP-18 0
## 17 BS17 LaP-18_2 LaP-18 0
## 18 BS18 LaP-18_3 LaP-18 0
## 19 BS19 LaP-30_1 LaP-30 1
## 20 BS20 LaP-30_2 LaP-30 1
## 21 BS21 LaP-30_3 LaP-30 1
ssla5 <- subset(ss,grp=="LaH-30" | grp=="LaP-48")
ssla5$trt <- as.numeric(ssla5$grp=="LaP-48")
la5 <- la[,which(colnames(pc) %in% ssla4$V2)]
head(la5)
## LaP-18_1 LaP-18_2 LaP-18_3 LaP-30_1 LaP-30_2 LaP-30_3
## ENSRNA049758817 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0
## ENSRNA049758846 0 1 0 0 0 0
## ENSRNA049758848 0 0 0 0 0 0
## ENSRNA049758851 0 0 2 0 0 1
## ENSRNA049758853 0 0 0 0 0 0
head(ssla5)
## V1 V2 grp trt
## 7 BS07 LaH-30_1 LaH-30 0
## 8 BS08 LaH-30_2 LaH-30 0
## 9 BS09 LaH-30_3 LaH-30 0
## 22 BS22 LaP-48_1 LaP-48 1
## 23 BS23 LaP-48_2 LaP-48 1
## 24 BS24 LaP-48_3 LaP-48 1
ssla6 <- subset(ss,grp=="LaH-18" | grp=="LaP-48")
ssla6$trt <- as.numeric(ssla6$grp=="LaP-48")
la6 <- la[,which(colnames(pc) %in% ssla6$V2)]
head(la6)
## LaH-18_1 LaH-18_2 LaH-18_3 LaP-48_1 LaP-48_2 LaP-48_3
## ENSRNA049758817 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 0 0
## ENSRNA049758848 1 0 0 0 0 0
## ENSRNA049758851 1 2 1 0 1 0
## ENSRNA049758853 0 0 0 0 0 0
head(ssla6)
## V1 V2 grp trt
## 4 BS04 LaH-18_1 LaH-18 0
## 5 BS05 LaH-18_2 LaH-18 0
## 6 BS06 LaH-18_3 LaH-18 0
## 22 BS22 LaP-48_1 LaP-48 1
## 23 BS23 LaP-48_2 LaP-48 1
## 24 BS24 LaP-48_3 LaP-48 1
Here is a function for differential expression.
run_de <- function(ss,xx){
y <- round(xx)
y <- y[rowMeans(y)>10,]
# MDS
mds <- cmdscale(dist(t(y)))
XMAX=max(mds[,1])*1.1
XMIN=min(mds[,1])*1.1
plot( mds , xlab="Coordinate 1", ylab="Coordinate 2",
type = "n" , xlim=c(XMIN,XMAX),main="MDS plot",bty="n")
text(mds, labels=colnames(y) )
# DE
dds <- DESeqDataSetFromMatrix(countData=y, colData = ss, design = ~ trt)
dds <- DESeq(dds)
de <- DESeq2::results(dds)
de <- de[order(de$pvalue),]
up <- rownames(subset(de, log2FoldChange>0 & padj<0.05 ))
dn <- rownames(subset(de, log2FoldChange<0 & padj<0.05 ))
str(up)
str(dn)
# MA plot
sig <-subset(de, padj < 0.05 )
GENESUP <- length(up)
GENESDN <- length(dn)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
xlab="log2 basemean", ylab="log2 foldchange",
pch=19, cex=0.5, col="dark gray",
main="smear plot")
points(log2(sig$baseMean),sig$log2FoldChange,
pch=19, cex=0.5, col="red")
mtext(SUBHEADER)
# heatmap
yn <- y/colSums(y)*1000000
yf <- yn[which(rownames(yn) %in% rownames(de)[1:50]),]
mycols <- gsub("0","yellow",ss$trt)
mycols <- gsub("1","orange",mycols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2( as.matrix(yf), col=colfunc(25),scale="row",
ColSideColors =mycols ,trace="none",
margin = c(10,10), cexRow=0.6, cexCol=0.8 , main="Top 50 genes by p-val")
mtext("yellow=ctrl, orange=trt")
return(de)
}
Here execute the analysis.
pc1de <- run_de(sspc1,pc1)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:3136] "IUM83_01122" "IUM83_03179" "IUM83_07136" "IUM83_07793" ...
## chr [1:5629] "IUM83_04620" "IUM83_16160" "IUM83_01966" "IUM83_01434" ...
head(as.data.frame(pc1de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## IUM83_01122 4210.5812 8.580222 0.16548705 51.84830 0.000000e+00
## IUM83_03179 1473.4189 10.535319 0.27745188 37.97170 0.000000e+00
## IUM83_07136 1169.0198 5.441863 0.09944102 54.72453 0.000000e+00
## IUM83_07793 1210.0379 8.281645 0.18404194 44.99869 0.000000e+00
## IUM83_08479 1066.8770 4.327653 0.11481370 37.69282 0.000000e+00
## IUM83_09638 2561.7692 11.340236 0.23454277 48.35040 0.000000e+00
## IUM83_17710 7709.0796 11.068748 0.22484765 49.22777 0.000000e+00
## IUM83_05885 727.7067 8.790729 0.24167443 36.37426 1.086793e-289
## IUM83_11668 498.3178 4.576079 0.12733863 35.93630 8.284929e-283
## IUM83_11197 316.9235 7.575188 0.21283946 35.59109 1.924317e-277
## IUM83_00096 542.0876 4.584821 0.13169651 34.81354 1.517776e-265
## IUM83_08286 486.5145 8.448189 0.24352890 34.69070 1.087971e-263
## IUM83_17269 5016.9258 9.597839 0.27723921 34.61934 1.292822e-262
## IUM83_09629 1194.5548 8.339565 0.24120753 34.57423 6.164974e-262
## IUM83_06713 879.7268 9.913781 0.28840949 34.37397 6.175151e-259
## IUM83_05707 278.7101 7.571483 0.22635619 33.44942 2.624233e-245
## IUM83_08175 1472.7874 3.182423 0.09517409 33.43792 3.856886e-245
## IUM83_01958 2003.5926 12.079341 0.37615238 32.11289 2.913199e-226
## IUM83_04620 5663.4633 -4.019301 0.12523856 -32.09316 5.492670e-226
## IUM83_03959 340.3035 7.727625 0.24095392 32.07097 1.120151e-225
## padj
## IUM83_01122 0.000000e+00
## IUM83_03179 0.000000e+00
## IUM83_07136 0.000000e+00
## IUM83_07793 0.000000e+00
## IUM83_08479 0.000000e+00
## IUM83_09638 0.000000e+00
## IUM83_17710 0.000000e+00
## IUM83_05885 1.849858e-286
## IUM83_11668 1.253510e-279
## IUM83_11197 2.620342e-274
## IUM83_00096 1.878868e-262
## IUM83_08286 1.234575e-260
## IUM83_17269 1.354181e-259
## IUM83_09629 5.996318e-259
## IUM83_06713 5.605802e-256
## IUM83_05707 2.233386e-242
## IUM83_08175 3.089366e-242
## IUM83_01958 2.203835e-223
## IUM83_04620 3.936510e-223
## IUM83_03959 7.626551e-223
write.table(pc1de,file="pc1de.tsv",quote=FALSE,sep="\t")
pc2de <- run_de(sspc2,pc2)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:4446] "IUM83_01122" "IUM83_02861" "IUM83_04848" "IUM83_07133" ...
## chr [1:5810] "IUM83_00547" "IUM83_03530" "IUM83_18636" "IUM83_02933" ...
head(as.data.frame(pc2de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## IUM83_01122 7235.0810 8.019328 0.1940760 41.32057 0.000000e+00
## IUM83_02861 1223.5433 5.632765 0.1366587 41.21776 0.000000e+00
## IUM83_04848 890.8418 7.428702 0.1914021 38.81201 0.000000e+00
## IUM83_07133 11847.5842 9.606021 0.2452247 39.17231 0.000000e+00
## IUM83_07136 7549.2813 6.810755 0.1524387 44.67865 0.000000e+00
## IUM83_09638 5793.4746 11.178375 0.2824958 39.57006 0.000000e+00
## IUM83_12298 2154.0483 8.204638 0.2141706 38.30890 0.000000e+00
## IUM83_14881 9609.9262 12.084472 0.3163842 38.19556 0.000000e+00
## IUM83_18141 974.1256 4.882919 0.1285064 37.99747 0.000000e+00
## IUM83_06950 6639.1308 6.487868 0.1731896 37.46106 3.967696e-307
## IUM83_18581 1131.4185 5.031393 0.1379533 36.47171 3.115993e-291
## IUM83_09714 1889.6688 6.987938 0.2063924 33.85754 2.810996e-251
## IUM83_06856 1972.8682 8.595594 0.2593064 33.14841 5.970509e-241
## IUM83_07793 3006.0565 8.258433 0.2547470 32.41817 1.522226e-230
## IUM83_14821 1979.4517 10.909206 0.3493220 31.22966 4.217576e-214
## IUM83_17048 445.1894 6.601399 0.2125269 31.06148 7.984541e-212
## IUM83_09629 2188.7958 7.873364 0.2548086 30.89912 1.227230e-209
## IUM83_17269 9577.5134 9.186824 0.2994394 30.68008 1.049685e-206
## IUM83_02136 575.4116 6.686969 0.2201628 30.37285 1.254718e-202
## IUM83_02472 1828.3976 3.743060 0.1245734 30.04701 2.388594e-198
## padj
## IUM83_01122 0.000000e+00
## IUM83_02861 0.000000e+00
## IUM83_04848 0.000000e+00
## IUM83_07133 0.000000e+00
## IUM83_07136 0.000000e+00
## IUM83_09638 0.000000e+00
## IUM83_12298 0.000000e+00
## IUM83_14881 0.000000e+00
## IUM83_18141 0.000000e+00
## IUM83_06950 5.522636e-304
## IUM83_18581 3.942865e-288
## IUM83_09714 3.260521e-248
## IUM83_06856 6.392578e-238
## IUM83_07793 1.513419e-227
## IUM83_14821 3.913629e-211
## IUM83_17048 6.946051e-209
## IUM83_09629 1.004812e-206
## IUM83_17269 8.116978e-204
## IUM83_02136 9.191797e-200
## IUM83_02472 1.662342e-195
write.table(pc2de,file="pc2de.tsv",quote=FALSE,sep="\t")
pc3de <- run_de(sspc3,pc3)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:4396] "IUM83_01168" "IUM83_04438" "IUM83_07133" "IUM83_07136" ...
## chr [1:4229] "IUM83_12944" "IUM83_02435" "IUM83_00547" "IUM83_07943" ...
head(as.data.frame(pc3de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## IUM83_01168 26595.6211 10.779845 0.2516355 42.83912 0.000000e+00
## IUM83_04438 4910.6880 8.384545 0.2111023 39.71792 0.000000e+00
## IUM83_07133 11675.8984 8.857139 0.1991909 44.46557 0.000000e+00
## IUM83_07136 12704.7400 6.832579 0.1453489 47.00813 0.000000e+00
## IUM83_08120 17465.4205 12.019145 0.3127729 38.42771 0.000000e+00
## IUM83_14714 2749.6052 7.938367 0.1977747 40.13843 0.000000e+00
## IUM83_14881 13602.8794 11.853423 0.2804689 42.26287 0.000000e+00
## IUM83_03677 14061.7672 11.724852 0.3144994 37.28100 3.335045e-304
## IUM83_09638 5166.9409 10.284121 0.2789602 36.86591 1.626323e-297
## IUM83_13388 19966.4992 6.253938 0.1703599 36.71015 5.030181e-295
## IUM83_17374 2847.7329 5.705378 0.1566495 36.42130 1.959237e-290
## IUM83_09281 1897.7237 8.664962 0.2572806 33.67903 1.172312e-248
## IUM83_03901 1159.9475 3.613264 0.1098924 32.88003 4.241041e-237
## IUM83_16097 946.2560 6.406059 0.1965262 32.59646 4.603383e-233
## IUM83_14688 595.5363 6.404594 0.2032897 31.50477 7.473749e-218
## IUM83_16592 992.9030 6.241454 0.1992827 31.31960 2.524807e-215
## IUM83_04045 2859.2681 4.089208 0.1336976 30.58549 1.908783e-205
## IUM83_02144 1531.9858 8.814803 0.2944430 29.93721 6.455690e-197
## IUM83_07793 2029.2710 6.957828 0.2330567 29.85465 7.639170e-196
## IUM83_06338 1200.5014 7.601838 0.2559530 29.70013 7.649825e-194
## padj
## IUM83_01168 0.000000e+00
## IUM83_04438 0.000000e+00
## IUM83_07133 0.000000e+00
## IUM83_07136 0.000000e+00
## IUM83_08120 0.000000e+00
## IUM83_14714 0.000000e+00
## IUM83_14881 0.000000e+00
## IUM83_03677 5.875516e-301
## IUM83_09638 2.546822e-294
## IUM83_13388 7.089538e-292
## IUM83_17374 2.510317e-287
## IUM83_09281 1.376881e-245
## IUM83_03901 4.597940e-234
## IUM83_16097 4.634292e-230
## IUM83_14688 7.022335e-215
## IUM83_16592 2.224039e-212
## IUM83_04045 1.582494e-202
## IUM83_02144 5.054805e-194
## IUM83_07793 5.666656e-193
## IUM83_06338 5.390832e-191
write.table(pc3de,file="pc3de.tsv",quote=FALSE,sep="\t")
pc4de <- run_de(sspc4,pc4)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:1302] "IUM83_14881" "IUM83_18967" "IUM83_05483" "IUM83_02472" ...
## chr [1:1058] "IUM83_05896" "IUM83_09627" "IUM83_02399" "IUM83_05003" ...
head(as.data.frame(pc4de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## IUM83_14881 1032.70698 3.497833 0.2150541 16.264898 1.751574e-59
## IUM83_18967 305.59168 3.126049 0.2591336 12.063467 1.647004e-33
## IUM83_05483 380.25945 2.180499 0.1811812 12.034904 2.329038e-33
## IUM83_02472 177.29834 4.039189 0.3412478 11.836529 2.526975e-32
## IUM83_06118 448.99599 3.202126 0.2895173 11.060225 1.956066e-28
## IUM83_04980 477.07940 3.703995 0.3371572 10.985958 4.464768e-28
## IUM83_01125 170.48246 4.808415 0.4421747 10.874470 1.525404e-27
## IUM83_06986 228.23889 5.170809 0.4819390 10.729179 7.425272e-27
## IUM83_14885 160.49172 4.669531 0.4388062 10.641443 1.911451e-26
## IUM83_01164 1230.34677 1.606912 0.1522933 10.551428 5.003068e-26
## IUM83_05896 440.21172 -1.507009 0.1436848 -10.488292 9.778014e-26
## IUM83_03677 383.42878 2.996023 0.3040228 9.854599 6.547729e-23
## IUM83_00594 159.03278 3.823932 0.3882536 9.849058 6.918934e-23
## IUM83_14714 167.00309 5.541560 0.5662950 9.785642 1.297687e-22
## IUM83_09229 176.77497 3.488835 0.3606119 9.674764 3.859804e-22
## IUM83_17870 186.16865 3.417547 0.3710014 9.211682 3.210530e-20
## IUM83_01141 495.22366 2.758553 0.3011541 9.159939 5.192678e-20
## IUM83_02522 158.49264 3.409524 0.3770207 9.043333 1.519657e-19
## IUM83_09627 90.70647 -2.400034 0.2682898 -8.945679 3.696703e-19
## IUM83_02399 100.09166 -2.133110 0.2420581 -8.812387 1.225086e-18
## padj
## IUM83_14881 1.651384e-55
## IUM83_18967 7.319390e-30
## IUM83_05483 7.319390e-30
## IUM83_02472 5.956081e-29
## IUM83_06118 3.688357e-25
## IUM83_04980 7.015639e-25
## IUM83_01125 2.054501e-24
## IUM83_06986 8.750683e-24
## IUM83_14885 2.002351e-23
## IUM83_01164 4.716892e-23
## IUM83_05896 8.380647e-23
## IUM83_03677 5.017824e-20
## IUM83_00594 5.017824e-20
## IUM83_14714 8.738993e-20
## IUM83_09229 2.426016e-19
## IUM83_17870 1.891805e-17
## IUM83_01141 2.879798e-17
## IUM83_02522 7.959626e-17
## IUM83_09627 1.834343e-16
## IUM83_02399 5.775055e-16
write.table(pc4de,file="pc4de.tsv",quote=FALSE,sep="\t")
pc5de <- run_de(sspc5,pc5)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:3070] "IUM83_02591" "IUM83_09989" "IUM83_12098" "IUM83_10775" ...
## chr [1:2671] "IUM83_02923" "IUM83_12298" "IUM83_19239" "IUM83_16483" ...
head(as.data.frame(pc5de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## IUM83_02923 428.8610 -3.952064 0.2078462 -19.01437 1.296895e-80
## IUM83_12298 1104.1015 -3.117178 0.2127954 -14.64871 1.373048e-48
## IUM83_19239 2122.1636 -2.934087 0.2048902 -14.32029 1.634400e-46
## IUM83_16483 1530.3503 -3.501404 0.2522163 -13.88255 8.082090e-44
## IUM83_02591 417.8171 3.468904 0.2534136 13.68871 1.186027e-42
## IUM83_09989 12942.3256 4.836334 0.3734421 12.95069 2.328438e-38
## IUM83_12098 315.3544 5.198795 0.4058562 12.80945 1.451630e-37
## IUM83_10775 325.5029 3.736130 0.3038525 12.29586 9.533101e-35
## IUM83_13909 1161.0556 2.631056 0.2165579 12.14943 5.776380e-34
## IUM83_08149 1133.8722 4.412170 0.3686990 11.96686 5.299435e-33
## IUM83_04127 227.2871 4.600187 0.3890590 11.82388 2.937922e-32
## IUM83_08629 481.5565 -2.273043 0.1935318 -11.74506 7.486846e-32
## IUM83_12624 306.0436 3.103358 0.2654812 11.68956 1.441343e-31
## IUM83_01122 3724.8799 -3.052227 0.2726287 -11.19554 4.287633e-29
## IUM83_00813 194.0046 3.797088 0.3419953 11.10275 1.216421e-28
## IUM83_07714 305.2219 2.359421 0.2133943 11.05662 2.036223e-28
## IUM83_07279 937.1336 4.194469 0.3823316 10.97076 5.282465e-28
## IUM83_12617 160.0210 5.024564 0.4605009 10.91108 1.020324e-27
## IUM83_18541 469.0208 2.568360 0.2370293 10.83562 2.333717e-27
## IUM83_04460 1897.8039 -2.141299 0.1977379 -10.82897 2.509558e-27
## padj
## IUM83_02923 1.572096e-76
## IUM83_12298 8.322043e-45
## IUM83_19239 6.604066e-43
## IUM83_16483 2.449277e-40
## IUM83_02591 2.875404e-39
## IUM83_09989 4.704221e-35
## IUM83_12098 2.513808e-34
## IUM83_10775 1.444503e-31
## IUM83_13909 7.780142e-31
## IUM83_08149 6.423975e-30
## IUM83_04127 3.237590e-29
## IUM83_08629 7.562963e-29
## IUM83_12624 1.343996e-28
## IUM83_01122 3.712478e-26
## IUM83_00813 9.830306e-26
## IUM83_07714 1.542693e-25
## IUM83_07279 3.766709e-25
## IUM83_12617 6.871314e-25
## IUM83_18541 1.488912e-24
## IUM83_04460 1.521043e-24
write.table(pc5de,file="pc5de.tsv",quote=FALSE,sep="\t")
la1de <- run_de(ssla1,la1)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:6517] "TanjilG_25668" "TanjilG_13054" "TanjilG_31530" ...
## chr [1:7035] "TanjilG_21738" "TanjilG_30005" "TanjilG_03097" ...
head(as.data.frame(la1de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## TanjilG_21738 6084.1400 -5.652533 0.13932033 -40.57220 0.000000e+00
## TanjilG_25668 3933.4972 3.767635 0.10574325 35.63003 4.804090e-278
## TanjilG_30005 1200.9191 -6.397760 0.19248261 -33.23812 3.031153e-242
## TanjilG_03097 1621.5620 -5.792443 0.17643668 -32.83015 2.186895e-236
## TanjilG_08363 1845.6019 -3.806619 0.12165854 -31.28937 6.509543e-215
## TanjilG_03248 21390.5900 -7.396273 0.24434489 -30.26981 2.863358e-201
## TanjilG_10617 1120.1158 -5.145384 0.17660603 -29.13482 1.301040e-186
## TanjilG_26762 2768.7370 -8.161863 0.28742427 -28.39657 2.229426e-177
## TanjilG_13054 9628.9179 5.093071 0.17972694 28.33783 1.182407e-176
## TanjilG_06910 1323.2163 -3.372838 0.12117061 -27.83545 1.616033e-170
## TanjilG_25621 4335.5609 -2.136993 0.07878027 -27.12599 4.861579e-162
## TanjilG_31530 8652.1362 4.561893 0.16840246 27.08923 1.318711e-161
## TanjilG_01633 1052.7180 -5.487512 0.20438165 -26.84934 8.586732e-159
## TanjilG_00341 867.8308 5.108745 0.19028595 26.84773 8.966020e-159
## TanjilG_30454 1151.2831 5.869112 0.21960382 26.72591 2.353576e-157
## TanjilG_23160 2955.6248 -4.705190 0.17617061 -26.70814 3.785916e-157
## TanjilG_06104 576.4564 -5.507389 0.21112921 -26.08540 5.339623e-150
## TanjilG_14202 2118.7073 -5.539773 0.21265074 -26.05104 1.309293e-149
## TanjilG_27770 2787.6047 -3.449587 0.13383062 -25.77577 1.657923e-146
## TanjilG_31035 12138.2929 -4.078766 0.15881322 -25.68279 1.820186e-145
## padj
## TanjilG_21738 0.000000e+00
## TanjilG_25668 6.118729e-274
## TanjilG_30005 2.573752e-238
## TanjilG_03097 1.392669e-232
## TanjilG_08363 3.316352e-211
## TanjilG_03248 1.215639e-197
## TanjilG_10617 4.734483e-183
## TanjilG_26762 7.098772e-174
## TanjilG_13054 3.346607e-173
## TanjilG_06910 4.116522e-167
## TanjilG_25621 1.125809e-158
## TanjilG_31530 2.799293e-158
## TanjilG_01633 1.631367e-155
## TanjilG_00341 1.631367e-155
## TanjilG_30454 3.996843e-154
## TanjilG_23160 6.027415e-154
## TanjilG_06104 8.000953e-147
## TanjilG_14202 1.852868e-146
## TanjilG_27770 2.222751e-143
## TanjilG_31035 2.318280e-142
write.table(la1de,file="la1de.tsv",quote=FALSE,sep="\t")
la2de <- run_de(ssla2,la2)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:7798] "TanjilG_31530" "TanjilG_32339" "TanjilG_22068" ...
## chr [1:7818] "TanjilG_18821" "TanjilG_00128" "TanjilG_09966" ...
head(as.data.frame(la2de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## TanjilG_31530 18832.596 5.970443 0.1184584 50.40118 0.000000e+00
## TanjilG_32339 3137.963 5.036922 0.1504717 33.47422 1.143492e-245
## TanjilG_22068 3672.811 8.102585 0.2528447 32.04570 2.519979e-225
## TanjilG_13054 24326.340 6.705579 0.2100138 31.92923 1.049489e-223
## TanjilG_13015 4361.878 4.665651 0.1476112 31.60771 2.893084e-219
## TanjilG_17787 1569.765 7.148923 0.2268092 31.51955 4.689117e-218
## TanjilG_12890 2251.967 6.526898 0.2113552 30.88117 2.137887e-209
## TanjilG_28363 7500.812 6.952173 0.2329029 29.85009 8.755160e-196
## TanjilG_00065 2936.097 3.472813 0.1191328 29.15078 8.166336e-187
## TanjilG_21470 4248.817 4.295686 0.1492821 28.77563 4.330081e-182
## TanjilG_18821 2292.248 -4.777542 0.1663621 -28.71773 2.291792e-181
## TanjilG_17822 15372.085 7.411297 0.2608743 28.40946 1.545207e-177
## TanjilG_00128 2405.507 -7.393965 0.2605994 -28.37291 4.366486e-177
## TanjilG_14085 4546.806 4.318809 0.1542369 28.00114 1.573583e-172
## TanjilG_09966 6826.922 -4.020693 0.1443088 -27.86173 7.766456e-171
## TanjilG_17902 26140.268 3.882193 0.1401819 27.69397 8.253205e-169
## TanjilG_07217 6522.869 3.879360 0.1407747 27.55723 3.624782e-167
## TanjilG_01075 1773.081 5.692145 0.2067665 27.52934 7.823235e-167
## TanjilG_20161 1733.641 7.952990 0.2897321 27.44946 7.051364e-166
## TanjilG_06910 1053.572 -4.714313 0.1736479 -27.14869 2.623766e-162
## padj
## TanjilG_31530 0.000000e+00
## TanjilG_32339 1.447146e-241
## TanjilG_22068 2.126106e-221
## TanjilG_13054 6.640901e-220
## TanjilG_13015 1.464537e-215
## TanjilG_17787 1.978104e-214
## TanjilG_12890 7.730294e-206
## TanjilG_28363 2.770023e-192
## TanjilG_00065 2.296646e-183
## TanjilG_21470 1.095987e-178
## TanjilG_18821 5.273414e-178
## TanjilG_17822 3.259227e-174
## TanjilG_00128 8.501547e-174
## TanjilG_14085 2.844925e-169
## TanjilG_09966 1.310512e-167
## TanjilG_17902 1.305605e-165
## TanjilG_07217 5.396874e-164
## TanjilG_01075 1.100077e-163
## TanjilG_20161 9.393531e-163
## TanjilG_06910 3.320507e-159
write.table(la2de,file="la2de.tsv",quote=FALSE,sep="\t")
la3de <- run_de(ssla3,la3)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:9142] "TanjilG_14085" "TanjilG_22068" "TanjilG_32339" ...
## chr [1:9822] "TanjilG_05765" "TanjilG_03248" "TanjilG_13344" ...
head(as.data.frame(la3de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## TanjilG_14085 10545.602 6.012970 0.1505238 39.94698 0.000000e+00
## TanjilG_22068 11970.307 10.242473 0.2683612 38.16674 0.000000e+00
## TanjilG_32339 5098.775 6.192089 0.1656661 37.37693 9.263895e-306
## TanjilG_04385 12641.106 7.233638 0.1951156 37.07360 7.485495e-301
## TanjilG_15363 17076.249 10.933807 0.2950063 37.06296 1.110509e-300
## TanjilG_05765 5216.664 -5.403319 0.1457949 -37.06109 1.190348e-300
## TanjilG_10317 2384.184 6.416640 0.1753643 36.59035 4.073202e-293
## TanjilG_13823 19788.341 5.507493 0.1549678 35.53960 1.202843e-276
## TanjilG_13054 34530.081 7.650202 0.2153747 35.52044 2.377470e-276
## TanjilG_11778 3029.272 8.226735 0.2357189 34.90062 7.275945e-267
## TanjilG_07217 7117.597 4.467583 0.1312137 34.04815 4.323636e-254
## TanjilG_17902 37227.830 4.870142 0.1437748 33.87341 1.641567e-251
## TanjilG_00940 2665.770 8.374366 0.2475366 33.83082 6.950055e-251
## TanjilG_31530 40303.515 7.513981 0.2227453 33.73352 1.865205e-249
## TanjilG_25612 11146.268 6.413779 0.1913527 33.51809 2.627098e-246
## TanjilG_03248 13281.497 -10.355936 0.3128453 -33.10242 2.743195e-240
## TanjilG_28363 13187.031 8.203602 0.2489327 32.95510 3.575806e-238
## TanjilG_13842 6945.655 4.757204 0.1454342 32.71036 1.112748e-234
## TanjilG_32922 6553.516 5.109672 0.1563541 32.68012 2.992753e-234
## TanjilG_13015 7088.361 5.828878 0.1786170 32.63339 1.378709e-233
## padj
## TanjilG_14085 0.000000e+00
## TanjilG_22068 0.000000e+00
## TanjilG_32339 7.739984e-302
## TanjilG_04385 4.690598e-297
## TanjilG_15363 4.972678e-297
## TanjilG_05765 4.972678e-297
## TanjilG_10317 1.458497e-289
## TanjilG_13823 3.768657e-273
## TanjilG_13054 6.621253e-273
## TanjilG_11778 1.823716e-263
## TanjilG_07217 9.851994e-251
## TanjilG_17902 3.428823e-248
## TanjilG_00940 1.340024e-247
## TanjilG_31530 3.339383e-246
## TanjilG_25612 4.389881e-243
## TanjilG_03248 4.297386e-237
## TanjilG_28363 5.272211e-235
## TanjilG_13842 1.549502e-231
## TanjilG_32922 3.948071e-231
## TanjilG_13015 1.727868e-230
write.table(la3de,file="la3de.tsv",quote=FALSE,sep="\t")
la4de <- run_de(ssla4,la4)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:6149] "TanjilG_24871" "TanjilG_32339" "TanjilG_21471" ...
## chr [1:5358] "TanjilG_18821" "TanjilG_29255" "TanjilG_28251" ...
head(as.data.frame(la4de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## TanjilG_18821 958.6126 -3.368825 0.13898367 -24.23900 8.637181e-130
## TanjilG_24871 1605.7976 1.872209 0.08259049 22.66858 9.150402e-114
## TanjilG_29255 2061.9072 -3.454959 0.15263837 -22.63493 1.963620e-113
## TanjilG_32339 3883.5178 2.217830 0.10550744 21.02060 4.250364e-98
## TanjilG_21471 308.1544 2.863669 0.15227636 18.80574 6.777162e-79
## TanjilG_28744 3799.6734 1.635891 0.08740344 18.71655 3.628904e-78
## TanjilG_25070 1107.8139 4.176151 0.22321842 18.70881 4.196455e-78
## TanjilG_13779 346.0804 5.484526 0.30649436 17.89438 1.304470e-71
## TanjilG_05531 1692.1152 3.534794 0.20147398 17.54467 6.533229e-69
## TanjilG_28251 3274.8185 -1.853010 0.10705301 -17.30927 4.004298e-67
## TanjilG_31962 2778.2436 2.515711 0.14708902 17.10332 1.401760e-65
## TanjilG_02661 3432.6499 3.347301 0.19958870 16.77099 3.978011e-63
## TanjilG_01449 888.6088 3.454707 0.21214784 16.28443 1.273091e-59
## TanjilG_06979 6173.2676 -1.258842 0.07789539 -16.16068 9.550316e-59
## TanjilG_09968 3110.7875 -1.244091 0.07745535 -16.06204 4.707808e-58
## TanjilG_03391 7387.6291 1.938783 0.12171868 15.92839 4.025929e-57
## TanjilG_06485 1715.9094 4.857996 0.30931283 15.70577 1.380907e-55
## TanjilG_05088 1458.3840 -4.076374 0.26004080 -15.67590 2.210700e-55
## TanjilG_21567 909.0501 -2.522264 0.16297137 -15.47673 4.981707e-54
## TanjilG_07959 968.6645 1.628447 0.10611114 15.34661 3.731661e-53
## padj
## TanjilG_18821 2.156963e-125
## TanjilG_24871 1.142565e-109
## TanjilG_29255 1.634583e-109
## TanjilG_32339 2.653608e-94
## TanjilG_21471 3.384921e-75
## TanjilG_28744 1.497115e-74
## TanjilG_25070 1.497115e-74
## TanjilG_13779 4.072066e-68
## TanjilG_05531 1.812826e-65
## TanjilG_28251 9.999933e-64
## TanjilG_31962 3.182378e-62
## TanjilG_02661 8.278572e-60
## TanjilG_01449 2.445607e-56
## TanjilG_06979 1.703572e-55
## TanjilG_09968 7.837873e-55
## TanjilG_03391 6.283720e-54
## TanjilG_06485 2.028552e-52
## TanjilG_05088 3.067100e-52
## TanjilG_21567 6.547798e-51
## TanjilG_07959 4.659538e-50
write.table(la4de,file="la4de.tsv",quote=FALSE,sep="\t")
la5de <- run_de(ssla5,la5)
## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## chr [1:6149] "TanjilG_24871" "TanjilG_32339" "TanjilG_21471" ...
## chr [1:5358] "TanjilG_18821" "TanjilG_29255" "TanjilG_28251" ...
head(as.data.frame(la5de),20)
## baseMean log2FoldChange lfcSE stat pvalue
## TanjilG_18821 958.6126 -3.368825 0.13898367 -24.23900 8.637181e-130
## TanjilG_24871 1605.7976 1.872209 0.08259049 22.66858 9.150402e-114
## TanjilG_29255 2061.9072 -3.454959 0.15263837 -22.63493 1.963620e-113
## TanjilG_32339 3883.5178 2.217830 0.10550744 21.02060 4.250364e-98
## TanjilG_21471 308.1544 2.863669 0.15227636 18.80574 6.777162e-79
## TanjilG_28744 3799.6734 1.635891 0.08740344 18.71655 3.628904e-78
## TanjilG_25070 1107.8139 4.176151 0.22321842 18.70881 4.196455e-78
## TanjilG_13779 346.0804 5.484526 0.30649436 17.89438 1.304470e-71
## TanjilG_05531 1692.1152 3.534794 0.20147398 17.54467 6.533229e-69
## TanjilG_28251 3274.8185 -1.853010 0.10705301 -17.30927 4.004298e-67
## TanjilG_31962 2778.2436 2.515711 0.14708902 17.10332 1.401760e-65
## TanjilG_02661 3432.6499 3.347301 0.19958870 16.77099 3.978011e-63
## TanjilG_01449 888.6088 3.454707 0.21214784 16.28443 1.273091e-59
## TanjilG_06979 6173.2676 -1.258842 0.07789539 -16.16068 9.550316e-59
## TanjilG_09968 3110.7875 -1.244091 0.07745535 -16.06204 4.707808e-58
## TanjilG_03391 7387.6291 1.938783 0.12171868 15.92839 4.025929e-57
## TanjilG_06485 1715.9094 4.857996 0.30931283 15.70577 1.380907e-55
## TanjilG_05088 1458.3840 -4.076374 0.26004080 -15.67590 2.210700e-55
## TanjilG_21567 909.0501 -2.522264 0.16297137 -15.47673 4.981707e-54
## TanjilG_07959 968.6645 1.628447 0.10611114 15.34661 3.731661e-53
## padj
## TanjilG_18821 2.156963e-125
## TanjilG_24871 1.142565e-109
## TanjilG_29255 1.634583e-109
## TanjilG_32339 2.653608e-94
## TanjilG_21471 3.384921e-75
## TanjilG_28744 1.497115e-74
## TanjilG_25070 1.497115e-74
## TanjilG_13779 4.072066e-68
## TanjilG_05531 1.812826e-65
## TanjilG_28251 9.999933e-64
## TanjilG_31962 3.182378e-62
## TanjilG_02661 8.278572e-60
## TanjilG_01449 2.445607e-56
## TanjilG_06979 1.703572e-55
## TanjilG_09968 7.837873e-55
## TanjilG_03391 6.283720e-54
## TanjilG_06485 2.028552e-52
## TanjilG_05088 3.067100e-52
## TanjilG_21567 6.547798e-51
## TanjilG_07959 4.659538e-50
write.table(la5de,file="la5de.tsv",quote=FALSE,sep="\t")
sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS/LAPACK: /ceph-g/opt/miniconda3/envs/R40/lib/libopenblasp-r0.3.9.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] gplots_3.1.0 Biostrings_2.56.0
## [3] XVector_0.28.0 reshape2_1.4.4
## [5] mitch_1.0.10 DESeq2_1.28.1
## [7] SummarizedExperiment_1.18.2 DelayedArray_0.14.1
## [9] matrixStats_0.57.0 Biobase_2.48.0
## [11] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [13] IRanges_2.22.2 S4Vectors_0.26.1
## [15] BiocGenerics_0.34.0
##
## loaded via a namespace (and not attached):
## [1] bit64_4.0.5 splines_4.0.1 gtools_3.8.2
## [4] shiny_1.4.0.2 blob_1.2.1 GenomeInfoDbData_1.2.3
## [7] yaml_2.2.1 pillar_1.4.4 RSQLite_2.2.1
## [10] lattice_0.20-41 glue_1.4.1 digest_0.6.25
## [13] RColorBrewer_1.1-2 promises_1.1.1 colorspace_1.4-1
## [16] htmltools_0.5.0 httpuv_1.5.4 Matrix_1.2-18
## [19] plyr_1.8.6 XML_3.99-0.5 pkgconfig_2.0.3
## [22] genefilter_1.70.0 zlibbioc_1.34.0 purrr_0.3.4
## [25] xtable_1.8-4 scales_1.1.1 later_1.1.0.1
## [28] BiocParallel_1.22.0 tibble_3.0.1 annotate_1.66.0
## [31] echarts4r_0.3.3 generics_0.0.2 ggplot2_3.3.2
## [34] ellipsis_0.3.1 survival_3.2-3 magrittr_1.5
## [37] crayon_1.3.4 mime_0.9 memoise_1.1.0
## [40] evaluate_0.14 GGally_2.0.0 MASS_7.3-51.6
## [43] beeswarm_0.2.3 tools_4.0.1 lifecycle_0.2.0
## [46] stringr_1.4.0 munsell_0.5.0 locfit_1.5-9.4
## [49] AnnotationDbi_1.50.3 compiler_4.0.1 caTools_1.18.0
## [52] rlang_0.4.6 grid_4.0.1 RCurl_1.98-1.2
## [55] htmlwidgets_1.5.1 bitops_1.0-6 rmarkdown_2.3
## [58] gtable_0.3.0 DBI_1.1.0 reshape_0.8.8
## [61] R6_2.4.1 gridExtra_2.3 knitr_1.28
## [64] dplyr_1.0.0 fastmap_1.0.1 bit_4.0.4
## [67] KernSmooth_2.23-17 stringi_1.4.6 Rcpp_1.0.4.6
## [70] vctrs_0.3.1 geneplotter_1.66.0 tidyselect_1.1.0
## [73] xfun_0.15