Tohidul: baseline 3: Priming gene expression

Background
Load data
Sample sheet
MDS
Functions
Split data into contrast groups
DE
Pathway analysis
Mitch reports
Make PDFs
Session information
References

Source codes: https://github.com/markziemann/tohidul_rnaseq

Background

Here we have n=3 control (H2O; “H”) and n=3 seaweed based fertiliser treatments:

S80 ANDP
S94 DP
S93 AN

Reads underwent quality trimming using Skewer (Jiang et al, 2014). I mapped the reads to the Arabidopsis transcriptome (TAIR10/Ensembl47) using Kallisto (Bray et al, 2016). Expression counts were loaded into R and then DE analysis was performed with DESeq2 (Love et al, 2014). Enrichment analysis was performed using Plant Reactome genesets with the Mitch package (Kaspi & Ziemann 2020).

suppressPackageStartupMessages({
  library("tidyverse")
  library("reshape2")
  library("gplots")
  library("DESeq2")
  library("mitch")
  library("kableExtra")
  library("eulerr")
  library("UpSetR")
  library("vioplot")
  library("clusterProfiler")
})

Load data

Here we load the data in from the aligner. Take note that the columns are arranged in order.

tmp <- read.table("3col.tsv.gz")
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3"))
x$gene <- sapply(strsplit(rownames(x),"\\."),"[[",1)
xx <- aggregate(. ~ gene, x, sum)
rownames(xx) <- xx$gene
xx$gene = NULL
xx <- xx[,order(colnames(xx))]

Sample sheet

The sample sheet is read in and put in order as well.

ss <- read.table("samplesheet.tsv",header=TRUE)
ss <- ss[order(ss$Run),]
ss$label <- gsub("-0hpi","",ss$SampleName)
# check that names are in order
colnames(xx) == ss$Run

##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

# change data header
colnames(xx) <- ss$label
kbl(ss) %>% kable_styling()

	Run	SampleName	Treatment	label
6	SRR11404271	S80-0hpi_R3	Seaweed_16280	S80_R3
5	SRR11404272	S80-0hpi_R2	Seaweed_16280	S80_R2
4	SRR11404273	S80-0hpi_R1	Seaweed_16280	S80_R1
12	SRR11404287	S94-0hpi_R3	Seaweed_15294	S94_R3
11	SRR11404288	S94-0hpi_R2	Seaweed_15294	S94_R2
10	SRR11404289	S94-0hpi_R1	Seaweed_15294	S94_R1
3	SRR11404301	H-0hpi_R3	Water	H_R3
9	SRR11404304	S93-0hpi_R3	Seaweed_15293	S93_R3
8	SRR11404305	S93-0hpi_R2	Seaweed_15293	S93_R2
7	SRR11404306	S93-0hpi_R1	Seaweed_15293	S93_R1
2	SRR11404312	H-0hpi_R2	Water	H_R2
1	SRR11404313	H-0hpi_R1	Water	H_R1

MDS

MDS is just like PCA. The more similar (correlated) the data sets are the closer they will appear on the scatterplot.

cols <- as.numeric(factor(ss$Treatment))
plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2", 
  col=cols, cex=4 , main="MDS plot")
text(cmdscale(dist(t(xx))), labels=ss$label )

#colfunc <- colorRampPalette(c("white", "yellow", "orange" ,  "red", "darkred"))
colfunc <- colorRampPalette(c("white","blue"))

heatmap.2(cor(xx,method="pearson"),col=colfunc(25),trace="none",
  scale="none",margin=c(10,10),main="Pearson correlation")

heatmap.2(cor(xx,method="spearman"),col=colfunc(25),trace="none",
  scale="none",margin=c(10,10),main="Spearman correlation")

Functions

run_de <- function(ss,xx){
xx <- xx[which(rowMeans(xx)>10),]
y <- round(xx)
# MDS
mds <- cmdscale(dist(t(y)))
XMAX=max(mds[,1])*1.1
XMIN=min(mds[,1])*1.1
plot( mds , xlab="Coordinate 1", ylab="Coordinate 2",
  type = "n" , xlim=c(XMIN,XMAX),main="MDS plot",bty="n")
text(mds, labels=colnames(y) )
# DE
dds <- DESeqDataSetFromMatrix(countData=y, colData = ss, design = ~ trt)
dds <- DESeq(dds)
de <- DESeq2::results(dds)
de <- de[order(de$pvalue),]
up <- rownames(subset(de, log2FoldChange>0 & padj<0.05 ))
dn <- rownames(subset(de, log2FoldChange<0 & padj<0.05 ))
str(up)
str(dn)

# MA plot
sig <-subset(de, padj < 0.05 )
GENESUP <- length(up)
GENESDN <- length(dn)
SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down")
ns <-subset(de, padj > 0.05 )
plot(log2(de$baseMean),de$log2FoldChange,
     xlab="log2 basemean", ylab="log2(fold change)",
     pch=19, cex=1, col="dark gray",
     ylim = c(-6,6),
     main="smear plot")
points(log2(sig$baseMean),sig$log2FoldChange,
      pch=19, cex=1, col="red")
mtext(SUBHEADER)

# Volcano
NLAB=12
plot(de$log2FoldChange, -log10(de$pvalue) ,
    xlab="-log2(fold change)", ylab="-log10(p-value)",
    xlim = c(-6,6),
    pch=19, cex=1, col="dark gray",
    main="smear plot")
    grid()
mtext(SUBHEADER)
points(sig$log2FoldChange, -log10(sig$pvalue) ,
     pch=19, cex=1, col="red")
text(head(sig$log2FoldChange,NLAB), head(-log10(sig$pvalue),NLAB)+0.5 , 
    labels=head(rownames(sig),NLAB) , cex=1 )

# organellar genomes
chl <- de[grep("ATCG",rownames(de)),]
mit <- de[grep("ATMG",rownames(de)),]
NLAB=12 
plot(de$log2FoldChange, -log10(de$pvalue) ,
    xlab="-log2(fold change)", ylab="-log10(p-value)",
    xlim = c(-6,6),
    pch=19, cex=1, col="dark gray",
    main="smear plot: organellar genomes")
    grid()
    abline(h=tail(-log10(sig$pvalue),1),lty=2,lwd=2)
mtext("Grey: nuclear, Red: plastid, Blue: mito")
points(chl$log2FoldChange, -log10(chl$pvalue) ,
     pch=19, cex=1, col="red")
points(mit$log2FoldChange, -log10(mit$pvalue) ,
     pch=19, cex=1, col="blue")

# summary plot
chl_up <- nrow(subset(chl,padj<0.05 & log2FoldChange>1))
chl_dn <- nrow(subset(chl,padj<0.05 & log2FoldChange<1))
mit_up <- nrow(subset(mit,padj<0.05 & log2FoldChange>1))
mit_dn <- nrow(subset(mit,padj<0.05 & log2FoldChange<1))
sig_up <- length(up)
sig_dn <- length(dn)
n_up <- sig_up - ( chl_up + mit_up )
n_dn <- sig_dn - ( chl_dn + mit_dn )

# barplot
par(mar=c(5,10,2,2))
mycounts <- c("nuclear up"=sig_up,
    "chl up"=chl_up,
    "mit up"=mit_up,
    "nuclear dn"=sig_dn,
    "chl dn"=chl_dn,
    "mit dn"=mit_dn)
mycounts
barplot(mycounts,
    horiz=TRUE,
    las=1,
    main="number of DE genes")
par(mar=c(5.1,4.1,4.1,2.1))

# heatmap
yn <- y/colSums(y)*1000000
yf <- yn[which(rownames(yn) %in% rownames(de)[1:50]),]
mycols <- gsub("0","yellow",ss$trt)
mycols <- gsub("1","orange",mycols)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(  as.matrix(yf), col=colfunc(25),scale="row",
    ColSideColors =mycols ,trace="none",
    margin = c(10,10), cexRow=0.6, cexCol=0.8 , main="Top 50 genes by p-val")
mtext("yellow=ctrl, orange=trt")
return(de)
}

mitch_barplot <- function(res){
  sig <- res$enrichment_result
  sig <- head( sig[order(sig$pANOVA),] ,30)
  sig <- sig[order(sig$s.dist),]
  par(mar=c(3,25,1,1)); barplot(sig$s.dist,horiz=TRUE,las=2,cex.names = 0.6,cex.axis = 0.6,
    names.arg=sig$set,main="Enrichment score") ;grid()
}

Split data into contrast groups

ss80 <- subset(ss,Treatment=="Seaweed_16280"|Treatment=="Water")
ss80$trt <- as.numeric(grepl("Seaweed",ss80$Treatment))
xx80 <- xx[,which(colnames(xx) %in% ss80$label)]

DE

Here, were using DESeq2 to perform differential expression analysis to understand gene expression changes caused by the different formulations. Enrichment analysis is performed at the end using mitch with all 4 timepoints.

de80 <- run_de(ss80,xx80)

## converting counts to integer mode

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

##  chr [1:729] "AT1G65970" "AT4G06665" "AT3G11510" "AT5G64100" "AT1G24996" ...
##  chr [1:730] "AT2G47450" "AT1G55673" "AT2G22510" "AT2G32870" "AT2G18328" ...

de80_top <- head(as.data.frame(de80),40)
kbl(de80_top[order(-de80_top$log2FoldChange),]) %>% kable_styling()

	baseMean	log2FoldChange	lfcSE	stat
AT4G06665	132.02756	10.6703428	1.2214435	8.735846
AT2G08750	37.84520	8.8747367	1.2919905	6.869042
AT1G24996	323.82079	4.3818892	0.5618073	7.799630
AT3G20810	1371.83363	3.1462473	0.4775527	6.588273
AT5G06760	1048.96459	2.9414243	0.4414667	6.662845
AT4G33905	330.48540	2.1883867	0.3352305	6.528005
AT1G65970	319.83244	1.8879852	0.2028938	9.305287
AT3G48510	71.68292	1.7527497	0.2646986	6.621681
AT1G19250	177.80497	1.3027098	0.2005740	6.494908
AT3G14440	696.62229	1.1073515	0.1674480	6.613105
AT5G03345	831.78940	0.9323214	0.1398791	6.665195
AT5G64100	3886.71544	0.9061570	0.1091747	8.300067
AT4G12600	1344.31545	0.8738919	0.1317509	6.632912
AT3G11510	6114.57364	0.8224084	0.0964760	8.524483
AT5G42090	3764.73635	0.7636518	0.1042944	7.322079
AT1G14980	1578.18877	0.7608785	0.1126658	6.753414
AT3G22230	3405.70403	0.7473244	0.0963363	7.757453
AT3G09735	1220.96912	0.7320855	0.0981377	7.459776
AT2G44310	1560.18430	0.6872796	0.0950144	7.233422
AT3G18740	3328.73251	0.6726190	0.1004285	6.697488
AT4G09320	6585.58229	0.6659963	0.0894218	7.447803
AT4G20150	3164.43679	0.6605573	0.0950525	6.949394
AT5G02870	7649.88412	0.6494672	0.0860326	7.549084
AT1G55330	2376.62233	0.6279042	0.0917265	6.845395
AT3G53430	5234.77439	0.6126416	0.0824943	7.426474
AT5G57660	1896.01283	-0.7450316	0.1135806	-6.559499
AT2G32870	1214.28389	-0.8573352	0.1021669	-8.391513
AT3G47470	34652.01704	-0.8809525	0.1335732	-6.595277
AT3G15353	23919.22730	-0.9793861	0.1511211	-6.480802
AT2G18328	689.16212	-1.0454080	0.1455697	-7.181493
AT4G39720	211.17876	-1.0949131	0.1686647	-6.491655
AT2G47450	3594.05175	-1.1612794	0.1044117	-11.122115
AT5G48490	1040.34652	-1.2382333	0.1779417	-6.958647
AT5G54270	14956.45719	-1.2741526	0.1886138	-6.755352
AT1G64370	5582.52780	-1.2768304	0.1829606	-6.978719
AT4G38080	3041.08385	-1.4147394	0.1978268	-7.151404
AT2G22510	1187.84821	-1.5712553	0.1672951	-9.392116
AT3G02400	210.64282	-2.0573297	0.3175210	-6.479350
AT1G15830	157.00814	-2.1186271	0.3203694	-6.613077
AT1G55673	735.08530	-12.7838910	1.2309372	-10.385494

write.table(de80,file="de80.tsv",quote=FALSE,sep="\t")

de80_up <- rownames(subset(de80,padj<0.05&log2FoldChange>0))
de80_dn <- rownames(subset(de80,padj<0.05&log2FoldChange<0))

Pathway analysis

Mapman pathways last modified in 2012 and used in the previous RNA-seq analysis.

genesets <- gmt_import("../baseline/ref/Ath_AGI_LOCUS_TAIR10_Aug2012.txt.gmt")

gt <- read.table("../baseline/ref/Arabidopsis_thaliana.TAIR10.46.geneaccession2symbol.tsv",
    fill=TRUE) 

m <- mitch_import(x=as.data.frame(de80),DEtype="deseq2",geneTable=gt)

## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.

## Note: Mean no. genes in input = 21761

## Note: no. genes in output = 21471

## Note: estimated proportion of input genes in output = 0.987

# significance
res_sig <- mitch_calc(x=m,genesets=genesets,priority="significance")

## Note: When prioritising by significance (ie: small
##             p-values), large effect sizes might be missed.

top <- head(subset(res_sig$enrichment_result,p.adjustANOVA<0.05),50)
kbl(head(top,30)) %>% kable_styling()

	set	setSize	pANOVA	s.dist	p.adjustANOVA
91	NOT_ASSIGNED_NO_ONTOLOGY_PENTATRICOPEPTIDE_(PPR)_REPEAT-CONTAINING_PROTEIN	418	0.0000000	0.1877951	0.0000000
114	PROTEIN_DEGRADATION_UBIQUITIN_PROTEASOM	60	0.0000000	0.4827129	0.0000000
8	CELL_ORGANISATION	345	0.0000000	-0.1881141	0.0000001
243	STRESS_BIOTIC_PR-PROTEINS	172	0.0000000	-0.2651555	0.0000001
25	DNA_SYNTHESIS/CHROMATIN_STRUCTURE	204	0.0000000	-0.2396256	0.0000002
89	NOT_ASSIGNED_NO_ONTOLOGY_GLYCINE_RICH_PROTEINS	55	0.0000000	-0.4532720	0.0000003
92	NOT_ASSIGNED_NO_ONTOLOGY_PROLINE_RICH_FAMILY	39	0.0000000	-0.5225783	0.0000006
112	PROTEIN_DEGRADATION_UBIQUITIN_E3_SCF_FBOX	253	0.0000000	0.2050586	0.0000006
13	CELL_WALL_CELL_WALL_PROTEINS_HRGP	14	0.0000002	-0.8049854	0.0000054
187	RNA_REGULATION_OF_TRANSCRIPTION_MYB-RELATED_TRANSCRIPTION_FACTOR_FAMILY	42	0.0000003	-0.4550153	0.0000089
197	RNA_REGULATION_OF_TRANSCRIPTION_TRIHELIX,_TRIPLE-HELIX_TRANSCRIPTION_FACTOR_FAMILY	29	0.0000009	-0.5265238	0.0000222
186	RNA_REGULATION_OF_TRANSCRIPTION_MYB_DOMAIN_TRANSCRIPTION_FACTOR_FAMILY	121	0.0000019	-0.2507256	0.0000421
202	RNA_RNA_BINDING	164	0.0000022	-0.2140846	0.0000458
179	RNA_REGULATION_OF_TRANSCRIPTION_HB,HOMEOBOX_TRANSCRIPTION_FACTOR_FAMILY	67	0.0000025	-0.3323929	0.0000482
199	RNA_REGULATION_OF_TRANSCRIPTION_WRKY_DOMAIN_TRANSCRIPTION_FACTOR_FAMILY	62	0.0000050	-0.3352826	0.0000881
215	SIGNALLING_LIGHT	89	0.0000090	-0.2722420	0.0001501
173	RNA_REGULATION_OF_TRANSCRIPTION_CCAAT_BOX_BINDING_FACTOR_FAMILY,_HAP2	10	0.0000113	-0.8018452	0.0001767
20	DEVELOPMENT_LATE_EMBRYOGENESIS_ABUNDANT	18	0.0000186	0.5826691	0.0002765
88	NOT_ASSIGNED_NO_ONTOLOGY_FORMIN_HOMOLOGY_2_DOMAIN-CONTAINING_PROTEIN	16	0.0000421	-0.5913190	0.0005912
99	PROTEIN_ASSEMBLY_AND_COFACTOR_LIGATION	23	0.0000446	0.4916157	0.0005960
154	REDOX_THIOREDOXIN_PDIL	13	0.0000516	0.6483291	0.0006559
14	CELL_WALL_CELL_WALL_PROTEINS_LRR	17	0.0000540	-0.5654999	0.0006559
126	PROTEIN_SYNTHESIS_INITIATION	77	0.0000566	0.2653602	0.0006574
90	NOT_ASSIGNED_NO_ONTOLOGY_HYDROXYPROLINE_RICH_PROTEINS	53	0.0000647	-0.3171790	0.0006936
222	SIGNALLING_RECEPTOR_KINASES_DUF_26	40	0.0000670	-0.3642131	0.0006936
175	RNA_REGULATION_OF_TRANSCRIPTION_G2-LIKE_TRANSCRIPTION_FACTOR_FAMILY,_GARP	38	0.0000675	-0.3734919	0.0006936
125	PROTEIN_SYNTHESIS_ELONGATION	29	0.0000788	0.4235001	0.0007794
174	RNA_REGULATION_OF_TRANSCRIPTION_CHROMATIN_REMODELING_FACTORS	35	0.0000935	-0.3815477	0.0008912
171	RNA_REGULATION_OF_TRANSCRIPTION_C2H2_ZINC_FINGER_FAMILY	100	0.0001248	-0.2219952	0.0011488
18	CELL_WALL_MODIFICATION	55	0.0001333	0.2977587	0.0011863

mitch_barplot(res_sig)

# effect size
res_eff <- mitch_calc(x=m,genesets=genesets,priority="effect")

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

top <- head(subset(res_eff$enrichment_result,p.adjustANOVA<0.05),50)
kbl(head(top,30)) %>% kable_styling()

	set	setSize	pANOVA	s.dist	p.adjustANOVA
13	CELL_WALL_CELL_WALL_PROTEINS_HRGP	14	0.0000002	-0.8049854	0.0000054
173	RNA_REGULATION_OF_TRANSCRIPTION_CCAAT_BOX_BINDING_FACTOR_FAMILY,_HAP2	10	0.0000113	-0.8018452	0.0001767
154	REDOX_THIOREDOXIN_PDIL	13	0.0000516	0.6483291	0.0006559
88	NOT_ASSIGNED_NO_ONTOLOGY_FORMIN_HOMOLOGY_2_DOMAIN-CONTAINING_PROTEIN	16	0.0000421	-0.5913190	0.0005912
253	TRANSPORT_MAJOR_INTRINSIC_PROTEINS_PIP	13	0.0002226	0.5912875	0.0017483
20	DEVELOPMENT_LATE_EMBRYOGENESIS_ABUNDANT	18	0.0000186	0.5826691	0.0002765
14	CELL_WALL_CELL_WALL_PROTEINS_LRR	17	0.0000540	-0.5654999	0.0006559
161	RNA_REGULATION_OF_TRANSCRIPTION_ARF,_AUXIN_RESPONSE_FACTOR_FAMILY	16	0.0001897	-0.5388662	0.0015347
189	RNA_REGULATION_OF_TRANSCRIPTION_NUCLEOSOME/CHROMATIN_ASSEMBLY_FACTOR_GROUP	13	0.0008377	-0.5349556	0.0053377
197	RNA_REGULATION_OF_TRANSCRIPTION_TRIHELIX,_TRIPLE-HELIX_TRANSCRIPTION_FACTOR_FAMILY	29	0.0000009	-0.5265238	0.0000222
145	PS_LIGHTREACTION_PHOTOSYSTEM_II_LHC-II	14	0.0007003	-0.5231393	0.0046742
92	NOT_ASSIGNED_NO_ONTOLOGY_PROLINE_RICH_FAMILY	39	0.0000000	-0.5225783	0.0000006
244	STRESS_BIOTIC_PR-PROTEINS_PLANT_DEFENSINS	15	0.0005973	0.5118755	0.0041850
139	PROTEIN_TARGETING_SECRETORY_PATHWAY_ER	14	0.0010249	0.5068010	0.0062194
193	RNA_REGULATION_OF_TRANSCRIPTION_PWWP_DOMAIN_PROTEIN	11	0.0036743	-0.5057782	0.0188663
209	SECONDARY_METABOLISM_SIMPLE_PHENOLS	16	0.0006058	-0.4950827	0.0041850
99	PROTEIN_ASSEMBLY_AND_COFACTOR_LIGATION	23	0.0000446	0.4916157	0.0005960
235	SIGNALLING_RECEPTOR_KINASES_WHEAT_LRK10_LIKE	12	0.0032494	-0.4906333	0.0173518
114	PROTEIN_DEGRADATION_UBIQUITIN_PROTEASOM	60	0.0000000	0.4827129	0.0000000
38	HORMONE_METABOLISM_GIBBERELIN_INDUCED-REGULATED-RESPONSIVE-ACTIVATED	12	0.0038992	0.4811501	0.0196430
232	SIGNALLING_RECEPTOR_KINASES_PROLINE_EXTENSIN_LIKE	15	0.0013746	-0.4771564	0.0078087
110	PROTEIN_DEGRADATION_UBIQUITIN_E3_BTB/POZ_CULLIN3_BTB/POZ	10	0.0106845	0.4661852	0.0413444
133	PROTEIN_SYNTHESIS_RIBOSOME_BIOGENESIS_PRE-RRNA_PROCESSING_AND_MODIFICATIONS_SNORNAS	16	0.0012692	0.4653228	0.0073670
211	SIGNALLING_14-3-3_PROTEINS	11	0.0077265	0.4638143	0.0324841
187	RNA_REGULATION_OF_TRANSCRIPTION_MYB-RELATED_TRANSCRIPTION_FACTOR_FAMILY	42	0.0000003	-0.4550153	0.0000089
89	NOT_ASSIGNED_NO_ONTOLOGY_GLYCINE_RICH_PROTEINS	55	0.0000000	-0.4532720	0.0000003
125	PROTEIN_SYNTHESIS_ELONGATION	29	0.0000788	0.4235001	0.0007794
144	PS_LIGHTREACTION_NADH_DH	12	0.0130143	0.4140066	0.0476001
26	DNA_SYNTHESIS/CHROMATIN_STRUCTURE_HISTONE_CORE_H2A	14	0.0090832	0.4026858	0.0367458
136	PROTEIN_TARGETING_MITOCHONDRIA	32	0.0001536	0.3865648	0.0012818

mitch_barplot(res_eff)

Bubble plot with top 50, 30 and 20 gene sets.

My_Theme = theme(
  axis.title.x = element_text(size = 16),
  axis.text.y = element_text(size = 8),
  axis.text.x = element_text(size = 8),
  axis.title.y = element_text(size = 16))

top <- head(res_eff$enrichment_result,50)
top <- top[order(top$s.dist),]
top$set <- factor(top$set, levels = top$set[order(top$s.dist)])
ggplot(top, aes(s.dist, set, size = setSize)) + geom_point(aes(colour=-log10(top$p.adjustANOVA))) + My_Theme

top <- head(res_eff$enrichment_result,30)
top <- top[order(top$s.dist),]
top$set <- factor(top$set, levels = top$set[order(top$s.dist)])
ggplot(top, aes(s.dist, set, size = setSize)) + geom_point(aes(colour=-log10(top$p.adjustANOVA))) + My_Theme

top <- head(res_eff$enrichment_result,20)
top <- top[order(top$s.dist),]
top$set <- factor(top$set, levels = top$set[order(top$s.dist)])
ggplot(top, aes(s.dist, set, size = setSize)) + geom_point(aes(colour=-log10(top$p.adjustANOVA))) + My_Theme

Heatmaps of a few top ranked gene sets by effect size.

mysets <- res_eff$input_genesets[which(names(res_eff$input_genesets) %in% head(res_eff$enrichment_result$set,10))]

xxx <- xx[,c(grep("S80",colnames(xx)),grep("H",colnames(xx)))]
xxx <- xxx / colSums(xxx) * 1e6

colfunc <- colorRampPalette(c("blue", "white", "red"))

plots <- lapply(1:length(mysets), function(x) {
  at <- gt[which(gt$V2 %in% mysets[[x]]),1]
  my_genes  <- xxx[which(rownames(xxx) %in% at),]
  if(nrow(my_genes)>2) {
    heatmap.2(as.matrix(my_genes),trace="none",scale="row",
      margin=c(8,10),main=names(mysets[x]),
      col=colfunc(25))
  }
})

Heatmaps of a few top ranked gene sets by small p-values.

mysets <- res_eff$input_genesets[which(names(res_eff$input_genesets) %in% head(res_sig$enrichment_result$set,10))]

xxx <- xx[,c(grep("S80",colnames(xx)),grep("H",colnames(xx)))]
xxx <- xxx / colSums(xxx) * 1e6

colfunc <- colorRampPalette(c("blue", "white", "red"))

plots <- lapply(1:length(mysets), function(x) {
  at <- gt[which(gt$V2 %in% mysets[[x]]),1]
  my_genes  <- xxx[which(rownames(xxx) %in% at),]
  if(nrow(my_genes)>2) {
    heatmap.2(as.matrix(my_genes),trace="none",scale="row",
      margin=c(8,10),main=names(mysets[x]),
      col=colfunc(25))
  }
})

Gene sets of interest

mysets <- c("TRANSPORT_CALCIUM",
  "REDOX_THIOREDOXIN_PDIL",
  "CELL_WALL_CELLULOSE_SYNTHESIS",
  "PROTEIN_TARGETING_CHLOROPLAST",
  "MITOCHONDRIAL_ELECTRON_TRANSPORT_/_ATP_SYNTHESIS_CYTOCHROME_C")

mysets <- genesets[which(names(genesets) %in% mysets)]
 
myres <- mitch_calc(x=m,genesets=mysets,priority="effect")

## Note: Enrichments with large effect sizes may not be
##             statistically significant.

par(mar=c(5,20,5,2))
vioplot(myres$detailed_sets,horizontal=TRUE,las=2,side="right",cex.axis=0.7)
grid()

Heat map of the gene sets of interest. Values plotted are the CPM after library size correction.

xxx <- xx[,c(grep("S80",colnames(xx)),grep("H",colnames(xx)))]
xxx <- xxx / colSums(xxx) * 1e6

colfunc <- colorRampPalette(c("blue", "white", "red"))

plots <- lapply(1:length(mysets), function(x) {
  at <- gt[which(gt$V2 %in% mysets[[x]]),1]
  my_genes  <- xxx[which(rownames(xxx) %in% at),]
  if(nrow(my_genes)>2) {
    heatmap.2(as.matrix(my_genes),trace="none",scale="row",
      margin=c(8,10),main=names(mysets[x]),
      col=colfunc(25))
  }
})

Mitch reports

unlink("mapman_report_de80_sig.html")
capture.output(
    mitch_report(res_sig,outfile=paste("mapman_report_de80_sig.html"))
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

## Dataset saved as " /tmp/Rtmp1TDkcW/mapman_report_de80_sig.rds ".

## 
## 
## processing file: mitch.Rmd

## Contour plot does not apply to unidimensional analysis.

## output file: /mnt/bfx4/tohidul/baseline3/mitch.knit.md

## 
## Output created: /tmp/Rtmp1TDkcW/mitch_report.html

unlink("mapman_report_de80_eff.html")
capture.output(
    mitch_report(res_eff,outfile=paste("mapman_report_de80_eff.html"))
    , file = "/dev/null", append = FALSE,
    type = c("output", "message"), split = FALSE)

## Dataset saved as " /tmp/Rtmp1TDkcW/mapman_report_de80_eff.rds ".

## 
## 
## processing file: mitch.Rmd

## Contour plot does not apply to unidimensional analysis.

## output file: /mnt/bfx4/tohidul/baseline3/mitch.knit.md

## 
## Output created: /tmp/Rtmp1TDkcW/mitch_report.html

Make PDFs

pdf("de80.pdf")
de80 <- run_de(ss80,xx80)

## converting counts to integer mode

##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

##  chr [1:729] "AT1G65970" "AT4G06665" "AT3G11510" "AT5G64100" "AT1G24996" ...
##  chr [1:730] "AT2G47450" "AT1G55673" "AT2G22510" "AT2G32870" "AT2G18328" ...

mitch_barplot(res_eff)
par(mar=c(5,20,5,2))
vioplot(myres$detailed_sets,horizontal=TRUE,las=2,side="right",cex.axis=0.7)
dev.off()

## X11cairo 
##        2

Session information

So you know what version of R and packages was used.

sessionInfo()

## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pkgload_1.2.0               GGally_2.1.1               
##  [3] beeswarm_0.3.1              gtools_3.8.2               
##  [5] echarts4r_0.4.0             forcats_0.5.1              
##  [7] stringr_1.4.0               dplyr_1.0.5                
##  [9] purrr_0.3.4                 readr_1.4.0                
## [11] tidyr_1.1.3                 tibble_3.1.0               
## [13] ggplot2_3.3.3               tidyverse_1.3.0            
## [15] clusterProfiler_3.18.1      vioplot_0.3.5              
## [17] zoo_1.8-9                   sm_2.2-5.6                 
## [19] UpSetR_1.4.0                eulerr_6.1.0               
## [21] kableExtra_1.3.4            mitch_1.2.2                
## [23] DESeq2_1.30.0               SummarizedExperiment_1.20.0
## [25] Biobase_2.50.0              MatrixGenerics_1.2.0       
## [27] matrixStats_0.58.0          GenomicRanges_1.42.0       
## [29] GenomeInfoDb_1.26.0         IRanges_2.24.0             
## [31] S4Vectors_0.28.0            BiocGenerics_0.36.0        
## [33] gplots_3.1.1                reshape2_1.4.4             
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.2.1        shadowtext_0.0.7      
##   [4] fastmatch_1.1-0        systemfonts_1.0.1      plyr_1.8.6            
##   [7] igraph_1.2.6           splines_4.0.3          BiocParallel_1.24.1   
##  [10] digest_0.6.27          htmltools_0.5.1.1      GOSemSim_2.16.1       
##  [13] viridis_0.6.0          GO.db_3.12.1           fansi_0.4.2           
##  [16] magrittr_2.0.1         memoise_2.0.0          annotate_1.68.0       
##  [19] graphlayouts_0.7.1     modelr_0.1.8           svglite_2.0.0         
##  [22] enrichplot_1.10.2      colorspace_2.0-0       blob_1.2.1            
##  [25] rvest_1.0.0            ggrepel_0.9.1          haven_2.3.1           
##  [28] xfun_0.22              tcltk_4.0.3            crayon_1.4.1          
##  [31] RCurl_1.98-1.3         jsonlite_1.7.2         scatterpie_0.1.5      
##  [34] genefilter_1.72.0      survival_3.2-10        glue_1.4.2            
##  [37] polyclip_1.10-0        gtable_0.3.0           zlibbioc_1.36.0       
##  [40] XVector_0.30.0         webshot_0.5.2          DelayedArray_0.16.0   
##  [43] scales_1.1.1           DOSE_3.16.0            DBI_1.1.1             
##  [46] Rcpp_1.0.6             viridisLite_0.4.0      xtable_1.8-4          
##  [49] bit_4.0.4              htmlwidgets_1.5.3      httr_1.4.2            
##  [52] fgsea_1.16.0           RColorBrewer_1.1-2     ellipsis_0.3.1        
##  [55] pkgconfig_2.0.3        reshape_0.8.8          XML_3.99-0.6          
##  [58] farver_2.1.0           dbplyr_2.1.0           sass_0.3.1            
##  [61] locfit_1.5-9.4         utf8_1.2.1             labeling_0.4.2        
##  [64] tidyselect_1.1.0       rlang_0.4.10           later_1.1.0.1         
##  [67] AnnotationDbi_1.52.0   cellranger_1.1.0       munsell_0.5.0         
##  [70] tools_4.0.3            cachem_1.0.4           cli_2.3.1             
##  [73] downloader_0.4         generics_0.1.0         RSQLite_2.2.4         
##  [76] broom_0.7.5            evaluate_0.14          fastmap_1.1.0         
##  [79] yaml_2.2.1             fs_1.5.0               knitr_1.31            
##  [82] bit64_4.0.5            tidygraph_1.2.0        caTools_1.18.1        
##  [85] ggraph_2.0.5           mime_0.10              DO.db_2.9             
##  [88] xml2_1.3.2             compiler_4.0.3         rstudioapi_0.13       
##  [91] testthat_3.0.2         reprex_1.0.0           tweenr_1.0.2          
##  [94] geneplotter_1.68.0     bslib_0.2.4            stringi_1.5.3         
##  [97] ps_1.6.0               highr_0.8              desc_1.3.0            
## [100] lattice_0.20-41        Matrix_1.3-2           vctrs_0.3.6           
## [103] pillar_1.5.1           lifecycle_1.0.0        BiocManager_1.30.10   
## [106] jquerylib_0.1.3        data.table_1.14.0      cowplot_1.1.1         
## [109] bitops_1.0-6           httpuv_1.5.5           qvalue_2.22.0         
## [112] R6_2.5.0               promises_1.2.0.1       KernSmooth_2.23-18    
## [115] gridExtra_2.3          MASS_7.3-53.1          assertthat_0.2.1      
## [118] rprojroot_2.0.2        withr_2.4.1            GenomeInfoDbData_1.2.4
## [121] hms_1.0.0              grid_4.0.3             rmarkdown_2.7         
## [124] rvcheck_0.1.8          ggforce_0.3.3          lubridate_1.7.10      
## [127] shiny_1.6.0

References

Bray NL, Pimentel H, Melsted P, Pachter L. Near-optimal probabilistic RNA-seq quantification [published correction appears in Nat Biotechnol. 2016 Aug 9;34(8):888]. Nat Biotechnol. 2016;34(5):525-527. doi:10.1038/nbt.3519

Jiang H, Lei R, Ding SW, Zhu S. Skewer: a fast and accurate adapter trimmer for next-generation sequencing paired-end reads. BMC Bioinformatics. 2014;15:182. Published 2014 Jun 12. doi:10.1186/1471-2105-15-182

Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. doi:10.1186/s13059-014-0550-8

Kaspi A, Ziemann M. mitch: multi-contrast pathway enrichment for multi-omics and single-cell profiling data. BMC Genomics. 2020;21(1):447. Published 2020 Jun 29. doi:10.1186/s12864-020-06856-9