Source codes: https://github.com/markziemann/dftd_rnaseq

suppressPackageStartupMessages({
    library("reshape2")
    library("gplots")
    library("DESeq2")
    library("mitch")
    library("limma")
    library("kableExtra")
    library("dplyr")
})

Background

The goal of our study is to compare the expression of genes related to metabolism of DFT1 cell lines to DFT2 cell lines. Our main goal is comparing genes expressed in the following pathways: Glycolysis, Pentose Phosphate Pathway, Fatty Acid Metabolism, Glutaminolysis and Oxidative Phosphorylation. A secondary goal would be to compare the expression of genes related to the production of Reactive Oxygen Species (ROS) and protection against ROS (genes involved in DNA repair, production of antioxydants etc). Finally, we might also be interested in looking into genes linked to cholesterol metabolism as this has been shown to be important for DFT1 but I need to do some additional reading to know what exactly we’re after. See sample sheet below (edited from Yin Peng), there are 3 DFT1 and 3 DFT2 cell lines, and 3 replicates of each cell line:

ss <- read.table("../ss.tsv",sep="\t",fill=TRUE,header=TRUE)
ss$DFT <- as.factor(ss$DFT)
ss$clone <- sapply(strsplit(ss$ClientID,"_"),"[[",1)

ss %>%
  kbl(caption="Sample sheet for all samples") %>%
  kable_paper("hover", full_width = F)
Sample sheet for all samples
C ClientID DFT Replicate Index1sequence Index2sequence X.ReadsIdentified.PF. TotalReads TotalBases.Gbases. clone
6180766 4906_1-1 DFT1 1 TTGTTGCA GACGTCGT 2.4465 103481165 15.5 4906
6180767 C5065_1-1 DFT1 1 CCACACTT CGTCATAC 2.8116 118924031 17.8 C5065
6180768 1426_1-1 DFT1 1 CCCGTTTG TTGCTGTA 2.3264 98401219 14.8 1426
6180769 RV_1-1 DFT2 1 ATGCTCCC CCCTGCTG 2.2811 96485136 14.5 RV
6180770 SN_1-1 DFT2 1 GCTCAATA CATTTATC 2.3947 101290147 15.2 SN
6180771 TD549_1-1 DFT2 1 GTAGTTCG CTTGATGC 2.1732 91921221 13.8 TD549
6180772 4906_2-1 DFT1 2 CGAGAACC ATGTATCG 2.2322 94416781 14.2 4906
6180773 C5065_2-1 DFT1 2 GCCATGTA ATAGGGTT 2.4505 103650355 15.5 C5065
6180774 1426_2-1 DFT1 2 TTTCTCTA CTCGACGT 2.4778 104805081 15.7 1426
6180775 RV_2-1 DFT2 2 CCAGCGAT TCTAGTCA 2.9882 126393794 19.0 RV
6180776 SN_2-1 DFT2 2 TGGGAGTG CCCGTCTA 2.4459 103455786 15.5 SN
6180777 TD549_2-1 DFT2 2 CCCTCGTA TAGAAGAG 2.5437 107592495 16.1 TD549
6180778 4906_3-1 DFT1 3 CGATATGG GGGACGTA 2.2711 96062159 14.4 4906
6180779 C5065_3-1 DFT1 3 TTGTGCCC TTTCCATC 2.2763 96282107 14.4 C5065
6180780 1426_3-1 DFT1 3 TGTCCTCT CGACGAAC 2.4952 105541059 15.8 1426
6180781 RV_3-1 DFT2 3 GTATAGTC TTGGTCTC 2.3325 98659234 14.8 RV
6180782 SN_3-1 DFT2 3 TTTGGGAT GTTCACGT 2.6750 113146174 17.0 SN
6180783 TD549_3-1 DFT2 3 CACCAAGC AAAGGGAA 2.1610 91405190 13.7 TD549
DEA5_4NEG DEA4_6NEG NA CGGAGAGG CGAGCGTC 0.0002 8460 0.0 DEA4

We are mainly interested in comparing all DFT1samples against all DFT2 samples. However, we did notice that some cell lines within a same DFT produced different results in the metabolism experiments so we might need to compare all 6 cell lines against one another and not be able to simply do a DFT1 vs DFT2 contrast, if that makes sense.

Regarding the reference transcriptome, we will use Ensembl v109.

Functions

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange,
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=1, col="dark gray",
       main=contrast_name, cex.main=0.7)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=1, col="red")
  mtext(SUBHEADER,cex = 0.7)
  grid()
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="red")
}

Load data

Here we load the data in from the aligner.

tmp <- read.table("../fastq/3col.tsv.gz")
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3",fun.aggregate=sum))
dim(x)
## [1] 43512    19

Load gene names.

gn <- read.table("../ref/Sarcophilus_harrisii.mSarHar1.11.cdna+ncrna.genenames.tsv",fill=TRUE)

gn <- gn[order(gn$V1),]

dim(gn)
## [1] 43512     3

Load homology map

hm <- read.table("../ref/mart_export_ensembl109_2023-07-14.txt",sep="\t",header=TRUE)

Now need to collapse transcript data to genes.

x$gene <- paste(gn$V2,gn$V3)

y <- aggregate(. ~ gene,x,sum)

rownames(y) <- y$gene
y$gene = NULL

dim(y)
## [1] 23829    19

Quality control

Samples with <1M reads should be omitted. Will also round values to integers.

cs <- colSums(y)
cs <- cs[order(cs)]

par(mar=c(5,10,5,2))
barplot(cs,,main="All samples",horiz=TRUE,las=1)
abline(v=1e7,col="red",lty=2)

y <- round(y)

MDS

This will help us to visualise the sources of variability in the overall dataset.

Plot MDS and then remove the negative control and run Plot MDS again.

Also fix the sample names.

par(mar = c(5.1, 4.1, 4.1, 2.1) )

plotMDS(y)

y <- y[,colnames(y) != "DEA5-4NEG"]

plotMDS(y)

ss <- ss[ss$ClientID != "DEA4_6NEG",]

colnames(y) <- sapply(strsplit(ss$ClientID,"-"),"[[",1)

cs <- colSums(y)
cs <- cs[order(cs)]

par(mar=c(5,10,5,2))
barplot(cs,,main="All samples",horiz=TRUE,las=1)

par(mar = c(5.1, 4.1, 4.1, 2.1) )

plotMDS(y)

cols <- ss$DFT
cols <- gsub("DFT1","pink",cols)
cols <- gsub("DFT2","lightblue",cols)
mymds <- plotMDS(y,plot=FALSE)

# fix the xlims
XMIN=min(mymds$x)
XMAX=max(mymds$x)
XMID=(XMAX+XMIN)/2
XMIN <- XMID + (XMIN-XMID)*1.1
XMAX <- XMID+(XMAX-XMID)*1.1
plotMDS(mymds,pch=19,cex=3,col=cols,main="MDS plot",xlim=c(XMIN,XMAX))
text(mymds,labels=colnames(y))
mtext("pink=DFT1,blue=DFT2")

DESeq2

Run a simple differential analysis comparing DFT1 vs DFT2 ignoring the clone type as a source of variation.

# split data will be necessary for the smaller comparisons (but not this one)
y1 <- y
ss1 <- ss

y1 <- y1[which(rowMeans(y1)>10),]
dim(y1)
## [1] 16009    18
dds <- DESeqDataSetFromMatrix(countData = y1 , colData = ss1, design = ~ DFT )
## converting counts to integer mode
## factor levels were dropped which had no samples
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 4 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z<- results(res)
vsd <- vst(dds)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge1 <- dge
sig <- subset(dge,padj<0.05)
sig1_up <- rownames(subset(sig,log2FoldChange>0))
sig1_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig1_up)
## [1] 5871
length(sig1_dn)
## [1] 6006
maplot(dge1,"DFT1 vs DFT2")

make_volcano(dge1,"DFT1 vs DFT2")

sig[1:50,1:6] %>%
  kbl(caption="Comparison of DFT1 vs DFT2") %>%
  kable_paper("hover", full_width = F)
Comparison of DFT1 vs DFT2
baseMean log2FoldChange lfcSE stat pvalue padj
ENSSHAG00000000893.2 NA 761.6994 -4.745935 0.1042514 -45.52393 0 0
ENSSHAG00000001230.2 NA 9964.7623 -7.268224 0.1727670 -42.06952 0 0
ENSSHAG00000001308.2 NA 1591.3721 -4.481411 0.1093947 -40.96554 0 0
ENSSHAG00000001552.2 TDRD5 1386.5142 -5.550630 0.0945061 -58.73301 0 0
ENSSHAG00000001728.2 ABRAXAS2 2447.7801 -2.611974 0.0580287 -45.01174 0 0
ENSSHAG00000001754.2 NA 573.3750 -4.129922 0.1018094 -40.56522 0 0
ENSSHAG00000002134.2 NA 31866.6922 2.222360 0.0386947 57.43325 0 0
ENSSHAG00000002808.2 MICAL1 9230.0290 -2.032272 0.0528958 -38.42032 0 0
ENSSHAG00000003434.2 NA 5640.3455 -9.630769 0.2085777 -46.17354 0 0
ENSSHAG00000003458.2 JMJD1C 2343.7511 -2.896812 0.0747449 -38.75599 0 0
ENSSHAG00000003737.2 NA 9677.3586 11.783418 0.2132465 55.25727 0 0
ENSSHAG00000004947.2 THBS2 24172.5467 13.496286 0.3231374 41.76640 0 0
ENSSHAG00000005182.2 SERPINE1 7064.0510 11.869506 0.2994322 39.64005 0 0
ENSSHAG00000005550.2 MEIOC 1485.7532 -7.289256 0.1694244 -43.02364 0 0
ENSSHAG00000007070.2 SMC1B 3980.3698 -10.063501 0.1851324 -54.35841 0 0
ENSSHAG00000007582.2 NA 2902.0019 -7.095020 0.0846261 -83.83964 0 0
ENSSHAG00000007784.2 NA 5517.6882 -7.314052 0.1015625 -72.01526 0 0
ENSSHAG00000007965.2 NA 5215.8477 -1.747971 0.0370471 -47.18243 0 0
ENSSHAG00000008051.2 TEX14 1537.4858 -3.531985 0.0617160 -57.22965 0 0
ENSSHAG00000009584.2 PLEKHG1 1861.7962 9.379637 0.2481401 37.79977 0 0
ENSSHAG00000010367.2 ECM1 12497.0681 9.411970 0.1164250 80.84150 0 0
ENSSHAG00000010645.2 COL16A1 6094.7776 6.056254 0.0950462 63.71906 0 0
ENSSHAG00000010824.2 MPDZ 4232.6012 -5.438458 0.0941097 -57.78850 0 0
ENSSHAG00000010902.2 SMARCD3 2489.7010 -2.649718 0.0440929 -60.09400 0 0
ENSSHAG00000011135.2 NA 5620.2000 4.064532 0.0895867 45.36980 0 0
ENSSHAG00000011480.2 ADGRB2 1933.7632 -7.544502 0.1814507 -41.57880 0 0
ENSSHAG00000011750.2 NA 3080.7121 3.088431 0.0735615 41.98432 0 0
ENSSHAG00000011809.2 NA 5508.1945 3.140454 0.0593806 52.88691 0 0
ENSSHAG00000012182.2 NA 1038.5611 7.742252 0.2036373 38.01982 0 0
ENSSHAG00000013722.2 NA 524.0949 -4.688777 0.0900165 -52.08798 0 0
ENSSHAG00000014390.2 IFRD1 2454.6507 -1.386811 0.0345922 -40.09028 0 0
ENSSHAG00000014430.2 NA 1157.2248 -1.742533 0.0399895 -43.57472 0 0
ENSSHAG00000015236.2 FKBP6 598.2945 -5.562106 0.1042482 -53.35446 0 0
ENSSHAG00000016212.2 SLC25A14 887.2862 -6.791421 0.1667459 -40.72915 0 0
ENSSHAG00000016252.2 LOXL2 8715.4741 8.867010 0.1933921 45.84991 0 0
ENSSHAG00000016335.2 NA 21006.6606 2.668634 0.0567139 47.05436 0 0
ENSSHAG00000017128.2 F10 515.2000 6.578090 0.1519688 43.28578 0 0
ENSSHAG00000017996.2 TNFRSF21 1876.2975 -7.424893 0.1564041 -47.47248 0 0
ENSSHAG00000018060.2 HR 3216.8474 -7.036189 0.1791811 -39.26859 0 0
ENSSHAG00000018469.2 PDE10A 1279.8709 -5.655401 0.1229592 -45.99414 0 0
ENSSHAG00000020521.1 CDK3 2068.9726 4.241310 0.0605836 70.00762 0 0
ENSSHAG00000021318.1 NA 2418.0580 -2.504689 0.0455484 -54.98958 0 0
ENSSHAG00000021464.1 NA 3962.2831 -8.392331 0.1785238 -47.00960 0 0
ENSSHAG00000022172.1 HAPLN2 3697.2723 -7.391660 0.1427380 -51.78480 0 0
ENSSHAG00000022666.1 NA 550.9103 6.404774 0.1677749 38.17480 0 0
ENSSHAG00000022966.1 SOX8 1304.3613 -4.733014 0.1196103 -39.57027 0 0
ENSSHAG00000023142.1 NA 20998.8159 -2.483109 0.0319815 -77.64204 0 0
ENSSHAG00000023249.1 NA 974.9689 5.489140 0.1204114 45.58656 0 0
ENSSHAG00000023680.1 EGR3 7293.3439 -4.014795 0.1044284 -38.44541 0 0
ENSSHAG00000024021.1 NA 1284.6234 5.713744 0.1294418 44.14140 0 0
write.table(dge,file="dge1.tsv",sep="\t")

# heatmap
mx <- sig[,7:ncol(sig)]
mx <- head(mx,30)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(mx),trace="none",scale="row",
  col=colfunc(25),ColSideColors=cols,mar=c(5,14),
  cexRow=0.8,cexCol=0.7)

Combine the three replicates

Sum replicates.

x4906 <- rowSums(y[,ss$clone=="4906"])
xC5065 <- rowSums(y[,ss$clone=="C5065"])
x1426 <- rowSums(y[,ss$clone=="1426"])
xRV <- rowSums(y[,ss$clone=="RV"])
xSN <- rowSums(y[,ss$clone=="SN"])
xTD549 <- rowSums(y[,ss$clone=="TD549"])

y2 <- data.frame(x4906,xC5065,x1426,xRV,xSN,xTD549)
ss2 <- as.data.frame(colnames(y2))
colnames(ss2) <- "clone"
ss2$DFT <- factor(c("DFT1","DFT1","DFT1","DFT2","DFT2","DFT2"))

y2 <- y2[which(rowMeans(y2)>10),]
dim(y2)
## [1] 17468     6
dds <- DESeqDataSetFromMatrix(countData = y2 , colData = ss2, design = ~ DFT )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge2 <- dge
sig <- subset(dge,padj<0.05)
sig2_up <- rownames(subset(sig,log2FoldChange>0))
sig2_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig2_up)
## [1] 4200
length(sig2_dn)
## [1] 4412
maplot(dge2,"DFT1 vs DFT2 aggregated")

make_volcano(dge2,"DFT1 vs DFT2 aggregated")

sig[1:50,1:6] %>%
  kbl(caption="Comparison of DFT1 vs DFT2") %>%
  kable_paper("hover", full_width = F)
Comparison of DFT1 vs DFT2
baseMean log2FoldChange lfcSE stat pvalue padj
ENSSHAG00000007582.2 NA 8720.2072 -7.090091 0.1734257 -40.88258 0 0
ENSSHAG00000024692.1 NA 11176.1973 -8.580770 0.2275391 -37.71118 0 0
ENSSHAG00000007784.2 NA 16519.5514 -7.300242 0.1954949 -37.34236 0 0
ENSSHAG00000010367.2 ECM1 37884.8561 9.429385 0.2567216 36.73000 0 0
ENSSHAG00000026705.1 LOXL3 35605.2404 3.028419 0.0834311 36.29846 0 0
ENSSHAG00000003737.2 NA 29315.0122 11.793401 0.3388989 34.79917 0 0
ENSSHAG00000032086.1 NA 4270.9538 -9.087633 0.2656490 -34.20918 0 0
ENSSHAG00000020521.1 CDK3 6254.9578 4.247335 0.1329351 31.95045 0 0
ENSSHAG00000010645.2 COL16A1 18470.4887 6.069424 0.1909073 31.79252 0 0
ENSSHAG00000015236.2 FKBP6 1799.3194 -5.552135 0.1749135 -31.74217 0 0
ENSSHAG00000031815.1 RAB33A 4153.4227 -8.057672 0.2543513 -31.67930 0 0
ENSSHAG00000031607.1 NA 2388.4986 6.299003 0.2049613 30.73264 0 0
ENSSHAG00000003434.2 NA 16944.8869 -9.624643 0.3183782 -30.23022 0 0
ENSSHAG00000013199.2 PDGFRA 101648.6230 15.673386 0.5200441 30.13857 0 0
ENSSHAG00000024117.1 NA 6007.3116 7.935832 0.2646438 29.98684 0 0
ENSSHAG00000031627.1 PRX 228602.7417 -10.824218 0.3631920 -29.80302 0 0
ENSSHAG00000007782.2 CA8 2062.4938 8.726159 0.2990458 29.18000 0 0
ENSSHAG00000018310.2 SLIT2 44695.3018 -14.414172 0.4983908 -28.92142 0 0
ENSSHAG00000021464.1 NA 11908.1052 -8.384005 0.2916414 -28.74765 0 0
ENSSHAG00000010824.2 MPDZ 12727.1471 -5.429957 0.1920191 -28.27821 0 0
ENSSHAG00000000643.2 STK31 4182.6920 -9.537041 0.3373093 -28.27387 0 0
ENSSHAG00000023142.1 NA 63163.2147 -2.474963 0.0903962 -27.37905 0 0
ENSSHAG00000025566.1 ABLIM1 7772.7034 5.814237 0.2129981 27.29713 0 0
ENSSHAG00000021789.1 NA 1860.1298 8.577176 0.3143469 27.28571 0 0
ENSSHAG00000024021.1 NA 3895.6967 5.733097 0.2107928 27.19778 0 0
ENSSHAG00000030292.1 NA 1933.0281 -5.410220 0.1994706 -27.12289 0 0
ENSSHAG00000031074.1 NEXMIF 4117.0924 -8.404272 0.3123135 -26.90973 0 0
ENSSHAG00000008051.2 TEX14 4625.9527 -3.525105 0.1326009 -26.58431 0 0
ENSSHAG00000004947.2 THBS2 73193.5959 13.509919 0.5132476 26.32242 0 0
ENSSHAG00000022172.1 HAPLN2 11123.1806 -7.382997 0.2822149 -26.16090 0 0
ENSSHAG00000026637.1 NA 2213.7312 -5.071669 0.1942619 -26.10738 0 0
ENSSHAG00000005380.2 NA 3108.7669 10.228790 0.3940820 25.95599 0 0
ENSSHAG00000018469.2 PDE10A 3854.3231 -5.647349 0.2179279 -25.91384 0 0
ENSSHAG00000011809.2 NA 16636.2524 3.149445 0.1232734 25.54845 0 0
ENSSHAG00000029638.1 NA 18764.0415 -6.179495 0.2425972 -25.47224 0 0
ENSSHAG00000016252.2 LOXL2 26417.8025 8.878268 0.3521803 25.20944 0 0
ENSSHAG00000001552.2 TDRD5 4168.0657 -5.549233 0.2251434 -24.64755 0 0
ENSSHAG00000006728.2 NEFM 7555.0072 -10.855938 0.4479166 -24.23652 0 0
ENSSHAG00000013722.2 NA 1575.4562 -4.678030 0.1936782 -24.15363 0 0
ENSSHAG00000014886.2 CYP2R1 2488.5667 -7.465866 0.3091695 -24.14813 0 0
ENSSHAG00000031108.1 NA 964.2578 -7.703956 0.3207488 -24.01866 0 0
ENSSHAG00000005492.2 NA 1659.5069 -8.999424 0.3748897 -24.00553 0 0
ENSSHAG00000027513.1 NA 1802.2901 -5.271118 0.2200576 -23.95335 0 0
ENSSHAG00000000893.2 NA 2287.3973 -4.736996 0.1987138 -23.83828 0 0
ENSSHAG00000005182.2 SERPINE1 21367.7733 11.866757 0.5041184 23.53962 0 0
ENSSHAG00000010102.2 UNC5A 1472.0133 -8.985720 0.3860205 -23.27783 0 0
ENSSHAG00000031142.1 TPPP 2664.1676 -5.954920 0.2565656 -23.21012 0 0
ENSSHAG00000022666.1 NA 1669.6189 6.414065 0.2781309 23.06132 0 0
ENSSHAG00000010515.2 NA 2972.5658 9.167129 0.3999684 22.91964 0 0
ENSSHAG00000010902.2 SMARCD3 7489.5510 -2.642803 0.1163671 -22.71092 0 0
write.table(dge,file="dge2.tsv",sep="\t")

# heatmap
mx <- sig[,7:ncol(sig)]
mx <- head(mx,30)
colfunc <- colorRampPalette(c("blue", "white", "red"))
heatmap.2(as.matrix(mx),trace="none",scale="row",
  col=colfunc(25),ColSideColors=cols[1:6],mar=c(8,14),
  cexRow=0.7,cexCol=1)

Enrichment analysis

Need to get human homologs of these genes. These were obtained from Ensembl v109 biomart (website).

rownames(dge2) <- sapply(strsplit(rownames(dge2),"\\."),"[[",1)

hm2 <- hm[hm$Tasmanian.devil.gene.stable.ID != "",]

gt <- hm2[,2:3]
length(unique(gt$Tasmanian.devil.gene.stable.ID))
## [1] 16979
length(unique(gt$Gene.name))
## [1] 16646

Now run mitch for DGE2. There will be a report generated that has more details on the enrichment analysis. The pathways are sourced from Reactome 7th July 2023.

genesets <- gmt_import("../ref/ReactomePathways_2023-07-14.gmt")

m2 <- mitch_import(dge2, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 17468
## Note: no. genes in output = 13505
## Note: estimated proportion of input genes in output = 0.773
head(m2)
##                   x
##        -35.37246663
## A2M    -13.13626596
## A4GALT  -0.07278225
## AAAS    -4.33508400
## AACS     2.13961007
## AADAC   -2.90578338
res2 <- mitch_calc(m2, genesets, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
if ( !file.exists("mitch2.html") ) {
  mitch_report(res2, "mitch2.html")
}

top <- subset(res2$enrichment_result,p.adjustANOVA<0.05)

topup <- head(subset(top,s.dist>0),20)

topup %>%
  kbl(caption="Top 20 upregulated pathways") %>%
  kable_paper("hover", full_width = F)
Top 20 upregulated pathways
set setSize pANOVA s.dist p.adjustANOVA
687 Metallothioneins bind metals 10 0.0000122 0.7985180 0.0013450
1075 Response to metal ions 13 0.0000083 0.7140414 0.0009936
229 Crosslinking of collagen fibrils 16 0.0001190 0.5556657 0.0071547
42 Activation of Matrix Metalloproteinases 22 0.0000130 0.5368882 0.0013450
207 Collagen degradation 47 0.0000000 0.5178538 0.0000004
157 Caspase activation via Death Receptors in the presence of ligand 12 0.0019600 0.5162245 0.0468695
456 GRB2:SOS provides linkage to MAPK signaling for Integrins 12 0.0019968 0.5153042 0.0468695
206 Collagen chain trimerization 30 0.0000013 0.5097934 0.0001768
185 Cholesterol biosynthesis 24 0.0000363 0.4870311 0.0027570
205 Collagen biosynthesis and modifying enzymes 50 0.0000000 0.4808651 0.0000010
171 Cell-extracellular matrix interactions 18 0.0004420 0.4784031 0.0211221
572 Integrin cell surface interactions 63 0.0000000 0.4333917 0.0000008
337 ECM proteoglycans 61 0.0000000 0.4176109 0.0000031
1147 Signal transduction by L1 21 0.0016204 0.3974128 0.0436118
208 Collagen formation 68 0.0000001 0.3788135 0.0000097
573 Integrin signaling 23 0.0016795 0.3785079 0.0436118
1247 Syndecan interactions 24 0.0014070 0.3766227 0.0436118
93 Assembly of collagen fibrils and other multimeric structures 47 0.0000246 0.3557735 0.0022208
183 Chemokine receptors bind chemokines 27 0.0015670 0.3516513 0.0436118
287 Degradation of the extracellular matrix 107 0.0000000 0.3430412 0.0000004
topdn <- head(subset(top,s.dist<0),20)

topdn %>%
  kbl(caption="Top 20 downregulated pathways") %>%
  kable_paper("hover", full_width = F)
Top 20 downregulated pathways
set setSize pANOVA s.dist p.adjustANOVA
286 Degradation of cysteine and homocysteine 11 0.0003126 -0.6276460 0.0161108
1243 Sulfur amino acid metabolism 23 0.0016925 -0.3782370 0.0436118
1359 Transmission across Chemical Synapses 195 0.0009446 -0.1376906 0.0334999
top2 <- rbind(head(topup,10),head(topdn,10))
top2 <- top2[order(-top2$s.dist),]
par(mar=c(5,15,5,2))

barplot(rev(abs(top2$s.dist)),horiz=TRUE,las=1,names.arg=rev(top2$set),
  main="Top Pathways",cex.names=0.6,xlab="Enrichment Score",
  col=c(rep("blue",nrow(head(topdn,10))),rep("red",nrow(head(topup,10)))),
  xlim=c(0,0.8))

Session information

For reproducibility.

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] pkgload_1.3.2               GGally_2.1.2               
##  [3] ggplot2_3.4.0               beeswarm_0.4.0             
##  [5] gtools_3.9.4                tibble_3.1.8               
##  [7] echarts4r_0.4.4             dplyr_1.0.10               
##  [9] kableExtra_1.3.4            limma_3.54.0               
## [11] mitch_1.10.0                DESeq2_1.38.2              
## [13] SummarizedExperiment_1.28.0 Biobase_2.58.0             
## [15] MatrixGenerics_1.10.0       matrixStats_0.63.0         
## [17] GenomicRanges_1.50.2        GenomeInfoDb_1.34.6        
## [19] IRanges_2.32.0              S4Vectors_0.36.1           
## [21] BiocGenerics_0.44.0         gplots_3.1.3               
## [23] reshape2_1.4.4             
## 
## loaded via a namespace (and not attached):
##  [1] DBI_1.1.3              bitops_1.0-7           gridExtra_2.3         
##  [4] rlang_1.0.6            magrittr_2.0.3         compiler_4.3.1        
##  [7] RSQLite_2.2.20         systemfonts_1.0.4      png_0.1-8             
## [10] vctrs_0.5.1            rvest_1.0.3            stringr_1.5.0         
## [13] pkgconfig_2.0.3        crayon_1.5.2           fastmap_1.1.0         
## [16] XVector_0.38.0         ellipsis_0.3.2         caTools_1.18.2        
## [19] utf8_1.2.2             promises_1.2.0.1       rmarkdown_2.19        
## [22] bit_4.0.5              xfun_0.36              zlibbioc_1.44.0       
## [25] cachem_1.0.6           jsonlite_1.8.4         blob_1.2.3            
## [28] highr_0.10             later_1.3.0            DelayedArray_0.24.0   
## [31] reshape_0.8.9          BiocParallel_1.32.5    parallel_4.3.1        
## [34] R6_2.5.1               bslib_0.4.2            stringi_1.7.8         
## [37] RColorBrewer_1.1-3     jquerylib_0.1.4        Rcpp_1.0.9            
## [40] assertthat_0.2.1       knitr_1.41             httpuv_1.6.7          
## [43] Matrix_1.5-4.1         tidyselect_1.2.0       yaml_2.3.6            
## [46] rstudioapi_0.14        codetools_0.2-19       lattice_0.21-8        
## [49] plyr_1.8.8             withr_2.5.0            shiny_1.7.4           
## [52] KEGGREST_1.38.0        evaluate_0.19          xml2_1.3.3            
## [55] Biostrings_2.66.0      pillar_1.8.1           KernSmooth_2.23-20    
## [58] generics_0.1.3         RCurl_1.98-1.9         munsell_0.5.0         
## [61] scales_1.2.1           xtable_1.8-4           glue_1.6.2            
## [64] tools_4.3.1            webshot_0.5.4          annotate_1.76.0       
## [67] locfit_1.5-9.7         XML_3.99-0.13          grid_4.3.1            
## [70] AnnotationDbi_1.60.0   colorspace_2.0-3       GenomeInfoDbData_1.2.9
## [73] cli_3.6.0              fansi_1.0.3            viridisLite_0.4.1     
## [76] svglite_2.1.0          gtable_0.3.1           sass_0.4.4            
## [79] digest_0.6.31          geneplotter_1.76.0     htmlwidgets_1.6.1     
## [82] memoise_2.0.1          htmltools_0.5.4        lifecycle_1.0.3       
## [85] httr_1.4.4             mime_0.12              bit64_4.0.5           
## [88] MASS_7.3-59