Source: TBA

Introduction

Here we analyse the effect of VLCAD mutation.

Reads were mapped to the human transcriptome version 47 from GENCODE genes (gencode.v47.transcripts.fa).

Genes with mean counts > 10 are classified as detected.

Differential expression is conducted with DESeq2.

Pathway enrichment analysis is conducted with mitch.

Gene sets were obtained from the gene ontology database (q3 2023). Biological process sets were used.

suppressPackageStartupMessages({
    library("zoo")
    library("dplyr")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("MASS")
    library("mitch")
    library("eulerr")
    library("kableExtra")
    library("beeswarm")

library("network")

})

knitr::opts_chunk$set(dev = 'svg') # set output device to svg

Import read counts

tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
xx$geneid = NULL
xx <- round(xx)
head(xx)
##                           Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
## ENSG00000000003.16 TSPAN6 4228 3204 2812 3362 2725 2330 2284 2711
## ENSG00000000005.6 TNMD       1    4    2    0    0    0    0    0
## ENSG00000000419.14 DPM1   2538 2211 1837 1867 2632 2677 2873 3146
## ENSG00000000457.14 SCYL3   526  440  444  607  360  385  519  605
## ENSG00000000460.17 FIRRM  1572 1325  652  744 1174 1142  674  870
## ENSG00000000938.13 FGR       0    0    1    3    0    1    0    4

Sample sheet

ss <- data.frame(colnames(xx))
ss$case <- as.numeric(grepl("Mut",ss[,1]))
rownames(ss) <- ss[,1]
ss[,1] = NULL
ss
##      case
## Ctl1    0
## Ctl2    0
## Ctl3    0
## Ctl4    0
## Mut1    1
## Mut2    1
## Mut3    1
## Mut4    1

QC analysis

Here I’ll look at a few different quality control measures.

par(mar=c(5,8,3,1))
barplot(colSums(xx),horiz=TRUE,las=1,xlab="num reads")

colSums(xx)
##     Ctl1     Ctl2     Ctl3     Ctl4     Mut1     Mut2     Mut3     Mut4 
## 59500246 47803671 50526219 52506392 50626163 51905681 59571523 68184815

We have 47M to 68M assigned reads, which is very good.

MDS plot

All sample cluster with others from the same group. Ctl and Mut samples are distinguished on dimension 2. No outliers recorded.

cols <- c(rep("tan1",4),rep("violet",4))

plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4, col=cols)

text(cmdscale(dist(t(xx))), labels=colnames(xx) )

Correlation heatmap

Correlation heatmap shows a slight batch effect with batch A consisting of replicates 1 and 2, while batch B consists of replicates 3 and 4.

heatmap.2(cor(xx),trace="n",main="Pearson correlation heatmap")

cor(xx) %>% kbl(caption = "Pearson correlation coefficients") %>% kable_paper("hover", full_width = F)
Pearson correlation coefficients
Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
Ctl1 1.0000000 0.9955678 0.9641994 0.9673286 0.9572850 0.9613203 0.9463720 0.9518116
Ctl2 0.9955678 1.0000000 0.9622280 0.9663104 0.9576128 0.9636163 0.9494607 0.9514847
Ctl3 0.9641994 0.9622280 1.0000000 0.9967999 0.9069284 0.9241674 0.9663318 0.9752521
Ctl4 0.9673286 0.9663104 0.9967999 1.0000000 0.9159501 0.9308417 0.9717279 0.9798979
Mut1 0.9572850 0.9576128 0.9069284 0.9159501 1.0000000 0.9938098 0.9502892 0.9428478
Mut2 0.9613203 0.9636163 0.9241674 0.9308417 0.9938098 1.0000000 0.9654085 0.9573851
Mut3 0.9463720 0.9494607 0.9663318 0.9717279 0.9502892 0.9654085 1.0000000 0.9971573
Mut4 0.9518116 0.9514847 0.9752521 0.9798979 0.9428478 0.9573851 0.9971573 1.0000000
cor(xx,method="spearman") %>% kbl(caption = "Spearman correlation coefficients") %>% kable_paper("hover", full_width = F)
Spearman correlation coefficients
Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
Ctl1 1.0000000 0.8799723 0.8726250 0.8731369 0.8687678 0.8722935 0.8650790 0.8656965
Ctl2 0.8799723 1.0000000 0.8702726 0.8727027 0.8678268 0.8726946 0.8606341 0.8645307
Ctl3 0.8726250 0.8702726 1.0000000 0.8822079 0.8580223 0.8633952 0.8725751 0.8759524
Ctl4 0.8731369 0.8727027 0.8822079 1.0000000 0.8599849 0.8662153 0.8741373 0.8765094
Mut1 0.8687678 0.8678268 0.8580223 0.8599849 1.0000000 0.8796945 0.8659710 0.8681118
Mut2 0.8722935 0.8726946 0.8633952 0.8662153 0.8796945 1.0000000 0.8726103 0.8744646
Mut3 0.8650790 0.8606341 0.8725751 0.8741373 0.8659710 0.8726103 1.0000000 0.8884237
Mut4 0.8656965 0.8645307 0.8759524 0.8765094 0.8681118 0.8744646 0.8884237 1.0000000

Analysis of differential gene expression

First we will look at control vs mutant with no filtering for low reads. Then we remove genes with fewer than 10 reads per sample on average and rerun DESeq2.

dds <- DESeqDataSetFromMatrix(countData = xx , colData = ss, design = ~ case )
## converting counts to integer mode
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression changes caused by VLCAD mutation") %>% kable_paper("hover", full_width = F)
Top gene expression changes caused by VLCAD mutation
baseMean log2FoldChange lfcSE stat pvalue padj Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
ENSG00000162511.8 LAPTM5 987.8437 -3.986356 0.1755532 -22.70739 0 0 11.002220 10.885389 10.899161 10.841009 7.254068 7.564944 7.254276 7.671797
ENSG00000272975.3 MYHAS 388.3574 -4.195153 0.1956555 -21.44153 0 0 9.483432 9.555854 9.850651 9.649723 6.499000 6.621006 6.398233 6.615835
ENSG00000152128.13 TMEM163 854.4807 -3.990912 0.1867547 -21.36981 0 0 10.564008 10.434528 10.843404 10.928707 7.153614 7.242889 7.399430 7.399775
ENSG00000116574.6 RHOU 1158.7330 -2.487046 0.1190345 -20.89349 0 0 11.068257 10.953967 10.955235 10.968457 8.534667 8.751548 8.631342 8.806695
ENSG00000100311.17 PDGFB 762.4043 -4.200241 0.2158178 -19.46197 0 0 10.818451 10.346682 10.679375 10.331647 6.809802 7.232135 7.001269 7.200800
ENSG00000100678.20 SLC8A3 358.9813 -3.566660 0.1921649 -18.56042 0 0 9.529756 9.369735 9.550313 9.546351 6.479031 6.830202 6.937598 6.825018
ENSG00000148798.11 INA 449.6321 -3.871214 0.2123022 -18.23446 0 0 9.903409 10.088926 9.662117 9.569548 6.911927 6.603283 6.915641 6.770452
ENSG00000089250.20 NOS1 250.7947 -5.564540 0.3085129 -18.03665 0 0 8.786950 9.018045 9.351096 9.221984 5.899032 5.929534 5.732877 5.658486
ENSG00000054983.17 GALC 254.7479 -7.177380 0.4290953 -16.72677 0 0 8.907264 8.988819 9.232172 9.411614 5.449783 5.578676 5.324774 5.490048
ENSG00000009709.13 PAX7 127.3333 -4.443382 0.2695849 -16.48231 0 0 8.195813 8.334413 8.292938 8.153813 5.736095 5.963757 5.834389 5.790457
ENSG00000183770.7 FOXL2 142.6765 -3.360216 0.2063866 -16.28117 0 0 8.272609 8.349909 8.328173 8.337779 6.328065 6.320680 6.168496 6.148784
ENSG00000133019.12 CHRM3 282.6068 -2.720740 0.1754015 -15.51149 0 0 9.015629 8.965668 9.216256 9.224424 6.839849 7.119456 7.011578 7.101723
ENSG00000154783.12 FGD5 355.7793 -2.851012 0.1861684 -15.31416 0 0 9.277233 9.394671 9.596083 9.411614 6.897841 7.285051 7.296208 7.118768
ENSG00000132164.10 SLC6A11 106.1048 -4.364647 0.2910877 -14.99427 0 0 8.012600 7.920884 8.148147 8.040793 5.780323 5.856464 5.865360 5.620831
ENSG00000206432.4 TMEM200C 135.5014 -2.709658 0.1868406 -14.50251 0 0 8.134126 8.186723 8.246306 8.253617 6.395351 6.450698 6.512449 6.466853
ENSG00000204644.10 ZFP57 750.2561 -1.666633 0.1167461 -14.27570 0 0 10.223624 10.203270 10.143080 10.355170 8.664944 8.769525 8.634497 8.789064
ENSG00000112414.15 ADGRG6 508.4714 -2.799366 0.1985637 -14.09807 0 0 9.579926 10.266681 9.843772 9.799432 7.434690 7.564944 7.544218 7.596921
ENSG00000106236.4 NPTX2 1200.4620 -3.000108 0.2134385 -14.05608 0 0 11.058195 10.995229 11.313973 11.016301 7.845060 8.377118 8.550102 8.528409
ENSG00000162426.16 SLC45A1 112.7501 -3.759645 0.2686451 -13.99484 0 0 8.099755 7.804630 8.119620 8.243949 6.034266 5.856464 5.978500 6.022722
ENSG00000134873.10 CLDN10 285.7763 -2.704238 0.1932477 -13.99364 0 0 9.221829 9.147062 9.096704 9.020378 6.731291 7.021709 7.147845 7.200800
nrow(dge)
## [1] 78724
nrow(subset(dge,padj<0.05))
## [1] 3655
dge[grep("ACADVL",rownames(dge)),]
##                           baseMean log2FoldChange    lfcSE      stat
## ENSG00000072778.20 ACADVL 13984.35      -1.757624 0.262024 -6.707873
##                                 pvalue         padj     Ctl1    Ctl2    Ctl3
## ENSG00000072778.20 ACADVL 1.974819e-11 1.528463e-09 14.33474 13.9524 14.6743
##                               Ctl4     Mut1     Mut2     Mut3     Mut4
## ENSG00000072778.20 ACADVL 14.54686 12.13382 12.30005 12.90259 13.06871
xxf <- xx[rowMeans(xx)>=10,]
dim(xx) ; dim(xxf)
## [1] 78724     8
## [1] 19603     8
dds <- DESeqDataSetFromMatrix(countData = xxf , colData = ss, design = ~ case )
## converting counts to integer mode
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression changes caused by VLCAD mutation") %>% kable_paper("hover", full_width = F)
Top gene expression changes caused by VLCAD mutation
baseMean log2FoldChange lfcSE stat pvalue padj Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
ENSG00000162511.8 LAPTM5 988.1810 -3.977558 0.1624486 -24.48502 0 0 11.013483 10.895831 10.910941 10.858916 7.387102 7.674627 7.392785 7.778289
ENSG00000116574.6 RHOU 1159.8102 -2.478028 0.1017890 -24.34475 0 0 11.078995 10.963816 10.966535 10.985276 8.601506 8.806630 8.702114 8.863824
ENSG00000272975.3 MYHAS 388.4941 -4.186664 0.1847116 -22.66595 0 0 9.515786 9.584977 9.875569 9.683639 6.691628 6.800557 6.603675 6.797969
ENSG00000152128.13 TMEM163 855.0084 -3.981991 0.1799082 -22.13346 0 0 10.579410 10.449616 10.855680 10.945856 7.293661 7.373581 7.528560 7.522791
ENSG00000100311.17 PDGFB 762.5890 -4.191593 0.2090203 -20.05352 0 0 10.831306 10.362846 10.693220 10.354940 6.975994 7.363579 7.157500 7.337194
ENSG00000100678.20 SLC8A3 359.1927 -3.556739 0.1795828 -19.80556 0 0 9.561124 9.402958 9.580921 9.582313 6.673445 6.992103 7.098566 6.989704
ENSG00000148798.11 INA 449.7423 -3.861696 0.2053141 -18.80873 0 0 9.927773 10.108623 9.690482 9.605039 7.070008 6.784385 7.078268 6.939574
ENSG00000089250.20 NOS1 250.8596 -5.556768 0.3047282 -18.23516 0 0 8.837995 9.060394 9.386105 9.265278 6.149451 6.175384 6.002635 5.934002
ENSG00000183770.7 FOXL2 142.7567 -3.353550 0.1913430 -17.52638 0 0 8.343122 8.415647 8.396021 8.409836 6.536307 6.527617 6.395099 6.373976
ENSG00000009709.13 PAX7 127.3592 -4.434877 0.2575833 -17.21726 0 0 8.269724 8.400802 8.362287 8.233832 6.003486 6.206073 6.093781 6.052004
ENSG00000054983.17 GALC 254.8315 -7.169298 0.4262001 -16.82143 0 0 8.954509 9.032016 9.270073 9.450447 5.747955 5.861942 5.637599 5.783747
ENSG00000133019.12 CHRM3 282.8664 -2.711812 0.1633780 -16.59839 0 0 9.059671 9.009547 9.254560 9.267658 7.003624 7.258975 7.167052 7.245191
ENSG00000204644.10 ZFP57 751.3959 -1.658220 0.1008664 -16.43976 0 0 10.243168 10.221327 10.163403 10.378174 8.727173 8.824016 8.705163 8.846753
ENSG00000154783.12 FGD5 356.0567 -2.841880 0.1756942 -16.17515 0 0 9.314336 9.427317 9.625754 9.450447 7.057023 7.412828 7.431948 7.260999
ENSG00000206432.4 TMEM200C 135.6225 -2.700161 0.1726686 -15.63782 0 0 8.210869 8.259598 8.317683 8.329221 6.597360 6.645493 6.707842 6.662130
ENSG00000132164.10 SLC6A11 106.1440 -4.356983 0.2804464 -15.53589 0 0 8.095188 8.006750 8.223958 8.126094 6.043063 6.109937 6.121624 5.900381
ENSG00000136425.14 CIB2 560.2324 -2.093824 0.1381385 -15.15743 0 0 9.964635 9.951553 10.006036 9.865507 8.086272 8.081641 8.064365 8.403856
ENSG00000134873.10 CLDN10 286.0041 -2.694147 0.1820120 -14.80202 0 0 9.260313 9.185839 9.138162 9.069007 6.903915 7.168526 7.293595 7.337194
ENSG00000112414.15 ADGRG6 508.8023 -2.790367 0.1916111 -14.56266 0 0 9.610256 10.283879 9.868809 9.830607 7.555822 7.674627 7.664559 7.707762
ENSG00000028137.19 TNFRSF1B 466.1307 -1.894338 0.1312207 -14.43627 0 0 9.762336 9.569750 9.733929 9.597504 7.935833 8.210393 8.015367 8.178466
write.table(dge,file="deseq2_mut.tsv",quote=FALSE,sep='\t')

nrow(dge)
## [1] 19603
nrow(subset(dge,padj<0.05))
## [1] 4095
dge[grep("ACADVL",rownames(dge)),]
##                           baseMean log2FoldChange     lfcSE      stat
## ENSG00000072778.20 ACADVL 14007.45      -1.748639 0.2636304 -6.632919
##                                 pvalue         padj     Ctl1     Ctl2     Ctl3
## ENSG00000072778.20 ACADVL 3.291119e-11 1.557717e-09 14.33522 13.95137 14.67416
##                               Ctl4     Mut1     Mut2     Mut3     Mut4
## ENSG00000072778.20 ACADVL 14.55264 12.14702 12.30763 12.92246 13.07882

Make some plots.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=1)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 1)
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}

make_heatmap <- function(de,name,myss,mx,n=30){
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  values <- myss$quickdash
  f <- colorRamp(c("yellow", "orange"))
  rr <- range(values)
  svals <- (values-rr[1])/diff(rr)
  colcols <- rgb(f(svals)/255)
  mxn <- mx/rowSums(mx)*1000000
  x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
  heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(7,15), cexRow=0.9, cexCol=1.4,
    main=paste("Top ranked",n,"genes in",name) )
}

maplot(dge,"ctrl vs mut")

make_volcano(dge,"ctrl vs mut")

make_heatmap(dge,"ctrl vs mut",ss,xxf,n=30)
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
## Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
## -Inf

Now look at the expression of ACADVL gene.

acadvl <- xx[grep("ACADVL",rownames(xx)),]

acadvl
##                            Ctl1  Ctl2  Ctl3  Ctl4 Mut1 Mut2 Mut3  Mut4
## ENSG00000072778.20 ACADVL 22090 13720 23784 23679 4073 4632 8803 11032
rpm <- apply(xxf,2,function(x) { x/sum(x) * 1e6 } )

acadvl <- rpm[grep("ACADVL",rownames(rpm)),]

acadvl
##      Ctl1      Ctl2      Ctl3      Ctl4      Mut1      Mut2      Mut3      Mut4 
## 371.56241 287.22945 471.14408 451.38897  80.52195  89.30932 147.93678 161.95195
ctl <- acadvl[grep("Ctl",names(acadvl))]
mut <- acadvl[grep("Mut",names(acadvl))]
l <- list("Ctl"=ctl,"Mut"=mut)
l
## $Ctl
##     Ctl1     Ctl2     Ctl3     Ctl4 
## 371.5624 287.2294 471.1441 451.3890 
## 
## $Mut
##      Mut1      Mut2      Mut3      Mut4 
##  80.52195  89.30932 147.93678 161.95195
summary(l[["Ctl"]])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   287.2   350.5   411.5   395.3   456.3   471.1
summary(l[["Mut"]])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   80.52   87.11  118.62  119.93  151.44  161.95
decrease= ( mean(l[["Ctl"]]) -  mean(l[["Mut"]]) ) / mean(l[["Ctl"]]) *100
msg <- paste("Decrease:",signif(decrease,3),"%")

boxplot(l,col="white",ylab="reads per million (RPM)",main="ACADVL RNA expression")
beeswarm(l,add=TRUE,pch=19,cex=2,col="darkgray")
mtext(msg)

Pathway enrichment

Here I’m using the mitch package and gene pathways from Reactome and Gene Ontology to understand gene changes caused by the mutation.

Gene ontology terms obtained from GO website and converted to list format, downloaded in Feb 2024 (GO_bp_2023q4.Rds).

reactome <- gmt_import("ReactomePathways_2025-03-14.gmt")

gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)
head(gt)
##                rownames(xx)   gene
## 1 ENSG00000000003.16 TSPAN6 TSPAN6
## 2    ENSG00000000005.6 TNMD   TNMD
## 3   ENSG00000000419.14 DPM1   DPM1
## 4  ENSG00000000457.14 SCYL3  SCYL3
## 5  ENSG00000000460.17 FIRRM  FIRRM
## 6    ENSG00000000938.13 FGR    FGR
m1 <- mitch_import(dge, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 19603
## Note: no. genes in output = 19545
## Note: estimated proportion of input genes in output = 0.997
dim(m1)
## [1] 19545     1
mres_reactome <- mitch_calc(m1, reactome, priority="effect",cores=8)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
mres_reactome_top <- subset(mres_reactome$enrichment_result,p.adjustANOVA < 0.05 )

head(mres_reactome_top,20) %>%
  kbl(caption = "Top gene pathway differences caused by ALVCAD mutation") %>%
  kable_paper("hover", full_width = F)
Top gene pathway differences caused by ALVCAD mutation
set setSize pANOVA s.dist p.adjustANOVA
520 HDR through MMEJ (alt-NHEJ) 12 0.0001959 0.6208382 0.0021961
214 Complex III assembly 25 0.0000002 0.6041107 0.0000066
772 NOTCH4 Activation and Transmission of Signal to the Nucleus 11 0.0005778 -0.5992628 0.0048951
1350 TNFs bind their physiological receptors 15 0.0000674 -0.5942174 0.0009101
1355 TP53 Regulates Transcription of Death Receptors and Ligands 11 0.0006827 -0.5913605 0.0053666
318 Dissolution of Fibrin Clot 11 0.0010058 -0.5726426 0.0069405
1042 Regulated proteolysis of p75NTR 11 0.0013650 -0.5575081 0.0089339
897 Plasma lipoprotein remodeling 16 0.0001284 -0.5529085 0.0015661
360 Endosomal/Vacuolar pathway 11 0.0015211 -0.5520538 0.0097879
381 FASTK family proteins regulate processing and stability of mitochondrial RNAs 19 0.0000395 0.5446449 0.0006050
187 Chondroitin sulfate biosynthesis 18 0.0000691 -0.5416944 0.0009171
888 Phase 4 - resting membrane potential 13 0.0009082 -0.5313647 0.0065054
411 Formation of ATP by chemiosmotic coupling 20 0.0000692 0.5139001 0.0009171
229 Cristae formation 33 0.0000003 0.5123312 0.0000126
1340 TGFBR3 PTM regulation 11 0.0035785 -0.5072275 0.0177759
213 Complex I biogenesis 66 0.0000000 0.5046219 0.0000000
723 Mitochondrial RNA degradation 25 0.0000165 0.4976434 0.0003144
1379 The canonical retinoid cycle in rods (twilight vision) 12 0.0031848 -0.4916807 0.0165196
635 Killing mechanisms 11 0.0047487 -0.4916463 0.0212368
1483 WNT5:FZD7-mediated leishmania damping 11 0.0047487 -0.4916463 0.0212368
write.table(mres_reactome$enrichment_result,file="mitch_mut_reactome.tsv",quote=FALSE,sep='\t')

par(mar=c(5,27,3,3))
top <- mres_reactome$enrichment_result
top <- subset(top,p.adjustANOVA<0.05)
nrow(top)
## [1] 448
up <- head(subset(top,s.dist>0),20)
dn <- head(subset(top,s.dist<0),20)
top <- rbind(up,dn)
vec=top$s.dist
names(vec)=top$set
names(vec) <- gsub("_"," ",names(vec))
vec <- vec[order(vec)]
barplot(abs(vec),col=sign(-vec)+3,horiz=TRUE,las=1,cex.names=0.65,main="Ctl vs Mut",xlab="ES")
grid()

if ( ! file.exists("mitch_mut_reactome.html") ) {
  mitch_report(res=mres_reactome,outfile="mitch_mut_reactome.html",overwrite=TRUE)
}

Gene ontology.

gobp <- gmt_import("gobp.msigdb.v2024.1.Hs.symbols.gmt")
names(gobp) <- gsub("GOBP_","",names(gobp))
names(gobp) <- gsub("_"," ",names(gobp))

#download.file("https://ziemann-lab.net/public/tmp/go_2024-11.gmt",destfile="go_2024-11.gmt")
#gobp <- gmt_import("go_2024-11.gmt")

gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)
head(gt)
##                rownames(xx)   gene
## 1 ENSG00000000003.16 TSPAN6 TSPAN6
## 2    ENSG00000000005.6 TNMD   TNMD
## 3   ENSG00000000419.14 DPM1   DPM1
## 4  ENSG00000000457.14 SCYL3  SCYL3
## 5  ENSG00000000460.17 FIRRM  FIRRM
## 6    ENSG00000000938.13 FGR    FGR
m1 <- mitch_import(dge, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 19603
## Note: no. genes in output = 19545
## Note: estimated proportion of input genes in output = 0.997
dim(m1)
## [1] 19545     1
mres_gobp <- mitch_calc(m1, gobp, priority="effect",cores=8)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
mres_gobp_top <- subset(mres_gobp$enrichment_result,p.adjustANOVA < 0.05 )

head(mres_gobp_top,20) %>%
  kbl(caption = "Top GOBP differences caused by ALVCAD mutation") %>%
  kable_paper("hover", full_width = F)
Top GOBP differences caused by ALVCAD mutation
set setSize pANOVA s.dist p.adjustANOVA
4175 RENAL SYSTEM PROCESS INVOLVED IN REGULATION OF BLOOD VOLUME 10 0.0000176 -0.7839672 0.0001403
2054 NEGATIVE REGULATION OF NEURON PROJECTION REGENERATION 14 0.0000006 -0.7714183 0.0000072
4415 RRNA BASE METHYLATION 10 0.0000265 0.7672281 0.0001977
3994 REGULATION OF RESPIRATORY BURST 11 0.0000724 -0.6908607 0.0004638
4081 REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE BY CIRCULATORY RENIN ANGIOTENSIN 10 0.0002052 -0.6779114 0.0011291
1083 GLOMERULAR MESANGIAL CELL PROLIFERATION 12 0.0000557 -0.6718545 0.0003739
4176 RENAL SYSTEM PROCESS INVOLVED IN REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE 17 0.0000022 -0.6633492 0.0000234
1934 NEGATIVE REGULATION OF EXTRACELLULAR MATRIX ORGANIZATION 12 0.0000692 -0.6632792 0.0004497
4083 REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE BY RENIN ANGIOTENSIN 14 0.0000262 -0.6488732 0.0001962
2453 PEPTIDYL LYSINE TRIMETHYLATION 11 0.0002204 0.6432235 0.0012031
1632 MITOCHONDRIAL ELECTRON TRANSPORT UBIQUINOL TO CYTOCHROME C 13 0.0000879 0.6281290 0.0005514
204 BASE CONVERSION OR SUBSTITUTION EDITING 11 0.0003130 -0.6275399 0.0016268
4352 RESPONSE TO WATER 10 0.0005951 -0.6270386 0.0027946
4190 RESPIRATORY BURST INVOLVED IN DEFENSE RESPONSE 10 0.0006917 -0.6195546 0.0031808
2232 NEUROBLAST DIVISION 13 0.0001259 -0.6141086 0.0007506
1368 LATERAL SPROUTING FROM AN EPITHELIUM 10 0.0007871 -0.6130637 0.0035459
1057 GAMMA DELTA T CELL ACTIVATION 15 0.0000534 -0.6024236 0.0003607
3299 PYRIMIDINE NUCLEOSIDE CATABOLIC PROCESS 10 0.0009997 -0.6008907 0.0042969
130 ANTIGEN PROCESSING AND PRESENTATION OF PEPTIDE ANTIGEN VIA MHC CLASS IB 16 0.0000385 -0.5943149 0.0002723
3621 REGULATION OF GLIAL CELL MIGRATION 12 0.0003827 -0.5920835 0.0019392
write.table(mres_gobp$enrichment_result,file="mitch_mut_gobp.tsv",quote=FALSE,sep='\t')

par(mar=c(5,27,3,3))
top <- mres_gobp$enrichment_result
top <- subset(top,p.adjustANOVA<0.05)
nrow(top)
## [1] 2011
up <- head(subset(top,s.dist>0),20)
dn <- head(subset(top,s.dist<0),20)
top <- rbind(up,dn)
vec=top$s.dist
names(vec)=top$set
names(vec) <- gsub("_"," ",names(vec))
vec <- vec[order(vec)]
barplot(abs(vec),col=sign(-vec)+3,horiz=TRUE,las=1,cex.names=0.65,main="Ctl vs Mut",xlab="ES")
grid()

if ( ! file.exists("mitch_mut_gobp.html") ) {
  mitch_report(res=mres_gobp,outfile="mitch_mut_gobp.html")
}

Network diagrams

In these charts the darkness of the colour indicates the enrichment score. The size of the node indicates the number of genes in the set driving the enrichment. The width of the edges indicates the proportion of shared genes.

map2color<-function(x,pal,limits=NULL){
    if(is.null(limits)) limits=range(x)
    pal[findInterval(x,seq(limits[1],limits[2],length.out=length(pal)+1), all.inside=TRUE)]
}

gs2net <- function(gset,em,colfunc=colorRampPalette(c("blue", "white","red"))(n=100)){
  gset <- gset[order(names(gset))]
  names(gset) <- lapply(names(gset), function(x) {
    nn <- substr(x, 1, 60)
    if (nchar(nn) == 60 ) {
      nn <- paste(nn,"...",sep="")
    }
    return(nn)
  } )
  em$set <- lapply(em$set, function(x) {
    nn <- substr(x, 1, 60)
    if (nchar(nn) == 60 ) {
      nn <- paste(nn,"...",sep="")
    }
    return(nn)
  } )
  mydf <- bind_rows(lapply(gset, as.data.frame.list))
  rownames(mydf) <- names(gset)
  j <- apply(mydf,1,function(x) {
    apply(mydf,1,function(y) {
      length(intersect(x,y) ) / length(union(x,y))
    })
  })
  j[lower.tri(j)] <- NA
  j[lower.tri(j,diag=TRUE)] <- 0
  jl <- melt(j)
  jl <- jl[which(jl$Var1 != jl$Var2),]
  jl <- jl[which(jl$value != 1),]
  jl <- jl[order(-jl$value),]
  jl <- head(jl,length(gset)*2)
  jl$edgeSize = jl$value/sum(jl$value)
  nodes <- sort(union(jl[,1],jl[,2]))
  lengths <- unlist(lapply(gset,length))
  lengths <- lengths[names(lengths) %in% nodes]
  nodes <- data.frame("nodes"=nodes,"lengths"=lengths)
  nodes$vertexsize <- sqrt(nodes$lengths/sum(nodes$lengths) * 100)
  nodes$es <- em[match(nodes$nodes,em$set),"s.dist"]
  nodes$colours <- map2color(nodes$es,colfunc)
  jl2 <- apply(jl[,1:2],2,as.character)
  jlnet <- network(jl2)
  plot(jlnet, displaylabels = TRUE, label.col = "steelblue",
       edge.lwd = c(jl$edgeSize) * 100,
       arrowhead.cex = 0,
       label.cex = 0.8, vertex.border = "white",vertex.cex = nodes$vertexsize,
       vertex.col = nodes$colours, edge.col = "black")
  E1=min(nodes$es)
  E5=max(nodes$es)
  E3=mean(c(E1,E5))
  EE <- c(E1,E3,E5)
  legcols <- map2color(EE,colfunc)
  legend("topleft", legend=signif(EE,2) ,title="ES",box.lty=0,
    fill=legcols, cex=0.8)
  S1=min(nodes$vertexsize)
  FRAG=S1/10
  S5=max(nodes$vertexsize)
  S3=mean(c(S1,S5))
  SS=c(S1-FRAG,0,S5-FRAG)
  L1=min(nodes$lengths)
  L5=max(nodes$lengths)
  LL=paste(" ",c(L1,"",L5))
  legend("topright", legend=LL ,title="no. genes",box.lty=0,
    pt.cex=SS*1, cex=0.9 , pch=19,col="darkgray")
  J1=min(jl$edgeSize)
  FRAG=J1*3
  J5=max(jl$edgeSize)
  J3=mean(c(J1,J5))
  JL=c(J1,J3,J5)+FRAG
  JJ=c(min(jl$value),mean(c(min(jl$value),max(jl$value))),max(jl$value))
  legend("bottomleft", legend=signif(JJ,2) , lwd=JL*50, title="Jaccard",
    box.lty=0, cex=0.9 , lty=1, col="black")
}

Reactome up.

scores <- m1$x
names(scores) <- rownames(m1)
gs <- reactome
up <- head(subset(mres_reactome$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0),20)
up_gs <- up[,1]
up_gs <- gs[which(names(gs) %in% up_gs)]

topgs_up <- lapply(1:length(up_gs),function(i) {
  gsname <- names(up_gs)[i]
  genes <- up_gs[[i]]
  gene_scores <- scores[which(names(scores) %in% genes)]
  top_genes <- names(which(gene_scores>2))
  return(top_genes)
})
names(topgs_up) <- names(up_gs)

gs2net(topgs_up,mres_reactome$enrichment_result,colorRampPalette(c("pink","darkred"))(n=100))

topgs_up
## $`Citric acid cycle (TCA cycle)`
##  [1] "ACAT1"  "DLD"    "IDH3B"  "MDH2"   "NFS1"   "NNT"    "OGDH"   "SDHA"  
##  [9] "SDHD"   "SUCLG1" "SUCLG2" "TRAP1" 
## 
## $`Complex I biogenesis`
##  [1] "ECSIT"    "FOXRED1"  "HSPA9"    "MT-ND1"   "MT-ND2"   "MT-ND3"  
##  [7] "NDUFA10"  "NDUFA2"   "NDUFA8"   "NDUFA9"   "NDUFAF1"  "NDUFAF2" 
## [13] "NDUFAF5"  "NDUFAF6"  "NDUFAF7"  "NDUFB3"   "NDUFB5"   "NDUFB6"  
## [19] "NDUFS4"   "NDUFS6"   "NDUFS7"   "NDUFS8"   "NDUFV1"   "NDUFV2"  
## [25] "TMEM126A" "TMEM186" 
## 
## $`Complex III assembly`
##  [1] "BCS1L"   "CYC1"    "HSPA9"   "LETM1"   "LYRM7"   "NFS1"    "UQCC2"  
##  [8] "UQCC5"   "UQCR10"  "UQCRB"   "UQCRC2"  "UQCRFS1" "UQCRH"   "UQCRQ"  
## 
## $`Cristae formation`
##  [1] "APOOL"   "ATP5F1A" "ATP5MC2" "ATP5ME"  "ATP5PD"  "ATP5PF"  "ATP5PO" 
##  [8] "CHCHD3"  "DNAJC11" "HSPA9"   "MICOS10" "MICOS13" "MT-ATP6" "MT-ATP8"
## [15] "MTX2"   
## 
## $`Cytosolic tRNA aminoacylation`
##  [1] "EPRS1" "HARS1" "IARS1" "KARS1" "LARS1" "MARS1" "NARS1" "RARS1" "WARS1"
## [10] "YARS1"
## 
## $`Defective homologous recombination repair (HRR) due to BRCA2 loss of function`
##  [1] "ATR"    "KAT5"   "NBN"    "RAD17"  "RAD50"  "RAD51C" "RAD9A"  "RAD9B" 
##  [9] "RBBP8"  "RPA1"   "RPA2"  
## 
## $`Diseases of DNA Double-Strand Break Repair`
##  [1] "ATR"    "KAT5"   "NBN"    "RAD17"  "RAD50"  "RAD51C" "RAD9A"  "RAD9B" 
##  [9] "RBBP8"  "RPA1"   "RPA2"  
## 
## $`Diseases of DNA repair`
##  [1] "ATR"    "KAT5"   "MUTYH"  "NBN"    "RAD17"  "RAD50"  "RAD51C" "RAD9A" 
##  [9] "RAD9B"  "RBBP8"  "RPA1"   "RPA2"  
## 
## $`FASTK family proteins regulate processing and stability of mitochondrial RNAs`
## [1] "MT-ATP6" "MT-ATP8" "MT-CO3"  "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L"
## 
## $`Formation of ATP by chemiosmotic coupling`
## [1] "ATP5F1A" "ATP5MC2" "ATP5ME"  "ATP5PD"  "ATP5PF"  "ATP5PO"  "MT-ATP6"
## [8] "MT-ATP8"
## 
## $`Glyoxylate metabolism and glycine degradation`
## [1] "ALDH4A1" "DLD"     "GRHPR"   "OGDH"   
## 
## $`HDR through MMEJ (alt-NHEJ)`
## [1] "LIG3"  "NBN"   "RAD50" "RAD52" "RBBP8" "XRCC1"
## 
## $`Homologous DNA Pairing and Strand Exchange`
##  [1] "ATR"    "KAT5"   "NBN"    "RAD17"  "RAD50"  "RAD51C" "RAD9A"  "RAD9B" 
##  [9] "RBBP8"  "RPA1"   "RPA2"  
## 
## $`Lysine catabolism`
## [1] "AASS"    "ALDH7A1" "DHTKD1"  "DLD"     "GCDH"    "HYKK"   
## 
## $`Mitochondrial RNA degradation`
## [1] "MT-ATP6" "MT-ATP8" "MT-CO3"  "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L"
## 
## $`Nucleotide biosynthesis`
## [1] "ATIC"  "CAD"   "DHODH" "GART"  "PFAS"  "PPAT" 
## 
## $`Resolution of D-loop Structures through Synthesis-Dependent Strand Annealing (SDSA)`
## [1] "KAT5"   "NBN"    "RAD50"  "RAD51C" "RBBP8" 
## 
## $`Respiratory electron transport`
##  [1] "BCS1L"    "COX11"    "COX15"    "COX18"    "COX6B1"   "COX6C"   
##  [7] "COX7B"    "COX7C"    "CYC1"     "ECSIT"    "ETFDH"    "FOXRED1" 
## [13] "GOT1"     "HIGD2A"   "HSPA9"    "LETM1"    "LYRM7"    "MDH2"    
## [19] "MT-CO3"   "MT-ND1"   "MT-ND2"   "MT-ND3"   "NDUFA10"  "NDUFA2"  
## [25] "NDUFA8"   "NDUFA9"   "NDUFAF1"  "NDUFAF2"  "NDUFAF5"  "NDUFAF6" 
## [31] "NDUFAF7"  "NDUFB3"   "NDUFB5"   "NDUFB6"   "NDUFS4"   "NDUFS6"  
## [37] "NDUFS7"   "NDUFS8"   "NDUFV1"   "NDUFV2"   "NFS1"     "SCO1"    
## [43] "SDHA"     "SDHD"     "SMIM20"   "TIMM21"   "TMEM126A" "TMEM186" 
## [49] "TRAP1"    "UQCC2"    "UQCC5"    "UQCR10"   "UQCRB"    "UQCRC2"  
## [55] "UQCRFS1"  "UQCRH"    "UQCRQ"   
## 
## $`WNT5A-dependent internalization of FZD4`
## [1] "AP2B1" "CLTC"  "DVL2"  "PRKCA"
## 
## $`rRNA processing in the mitochondrion`
##  [1] "ELAC2"   "MT-ATP6" "MT-ATP8" "MT-CO3"  "MT-ND1"  "MT-ND2"  "MT-ND3" 
##  [8] "MT-ND4L" "MT-TL2"  "MTERF4"

Reactome down.

scores <- m1$x
names(scores) <- rownames(m1)
gs <- reactome
dn <- head(subset(mres_reactome$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0),20)
dn_gs <- dn[,1]
dn_gs <- gs[which(names(gs) %in% dn_gs)]

topgs_dn <- lapply(1:length(dn_gs),function(i) {
  gsname <- names(dn_gs)[i]
  genes <- dn_gs[[i]]
  gene_scores <- scores[which(names(scores) %in% genes)]
  top_genes <- names(which(gene_scores<2))
  return(top_genes)
})
names(topgs_dn) <- names(dn_gs)

gs2net(topgs_dn,mres_reactome$enrichment_result,colorRampPalette(c("darkblue","lightblue"))(n=100))

topgs_dn
## $`CS/DS degradation`
##  [1] "ARSB"  "BCAN"  "BGN"   "CSPG4" "CSPG5" "HEXA"  "HEXB"  "HYAL1" "HYAL3"
## [10] "IDS"   "IDUA"  "VCAN" 
## 
## $`Chondroitin sulfate biosynthesis`
##  [1] "BCAN"       "BGN"        "CHPF"       "CHPF2"      "CHST11"    
##  [6] "CHST12"     "CHST13"     "CHST15"     "CHST3"      "CHST7"     
## [11] "CHST9"      "CHSY1"      "CSGALNACT1" "CSGALNACT2" "CSPG4"     
## [16] "CSPG5"      "VCAN"      
## 
## $`Dissolution of Fibrin Clot`
##  [1] "ANXA2"    "PLAT"     "PLAU"     "PLAUR"    "S100A10"  "SERPINB2"
##  [7] "SERPINB6" "SERPINB8" "SERPINE1" "SERPINE2" "SERPINF2"
## 
## $`Endosomal/Vacuolar pathway`
##  [1] "B2M"   "CTSL"  "CTSS"  "CTSV"  "HLA-A" "HLA-B" "HLA-C" "HLA-E" "HLA-F"
## [10] "HLA-H"
## 
## $`Glycosphingolipid biosynthesis`
##  [1] "A4GALT"     "B3GALNT1"   "B3GALT4"    "B3GNT5"     "B4GALNT1"  
##  [6] "B4GALT5"    "CERK"       "FUT1"       "FUT2"       "ST3GAL2"   
## [11] "ST3GAL3"    "ST3GAL5"    "ST6GALNAC5" "ST6GALNAC6" "ST8SIA5"   
## [16] "UGCG"       "UGT8"      
## 
## $`Killing mechanisms`
##  [1] "CYBA"  "DVL1"  "DVL3"  "FZD7"  "JUN"   "MAPK8" "NOX1"  "NOXA1" "RAC1" 
## [10] "WNT5A"
## 
## $`Maturation of nucleoprotein_9683610`
##  [1] "GSK3A"  "GSK3B"  "PARP10" "PARP14" "PARP16" "PARP4"  "PARP6"  "PARP8" 
##  [9] "PARP9"  "SUMO1"  "UBE2I" 
## 
## $`NOTCH4 Activation and Transmission of Signal to the Nucleus`
##  [1] "ADAM10" "APH1A"  "APH1B"  "DLL4"   "JAG1"   "NCSTN"  "NOTCH4" "PSEN1" 
##  [9] "PSEN2"  "PSENEN" "YWHAZ" 
## 
## $`Negative regulation of TCF-dependent signaling by WNT ligand antagonists`
## [1] "DKK2"    "KREMEN1" "KREMEN2" "LRP5"    "LRP6"    "SFRP1"   "WNT4"   
## [8] "WNT5A"   "WNT9A"  
## 
## $`Phase 4 - resting membrane potential`
##  [1] "KCNJ12" "KCNJ14" "KCNJ2"  "KCNJ4"  "KCNK1"  "KCNK10" "KCNK12" "KCNK13"
##  [9] "KCNK2"  "KCNK3"  "KCNK5"  "KCNK6"  "KCNK9" 
## 
## $`Plasma lipoprotein remodeling`
##  [1] "ABCG1"   "ANGPTL4" "APOE"    "FURIN"   "LCAT"    "LIPG"    "LMF1"   
##  [8] "LMF2"    "LPL"     "MBTPS1"  "MBTPS2"  "MTTP"    "P4HB"    "PCSK5"  
## [15] "PCSK6"   "PLTP"   
## 
## $`Regulated proteolysis of p75NTR`
##  [1] "ADAM17" "APH1A"  "APH1B"  "NCSTN"  "NFKB1"  "NGFR"   "PSEN1"  "PSEN2" 
##  [9] "PSENEN" "RELA"   "TRAF6" 
## 
## $`Specification of primordial germ cells`
##  [1] "BMP4"    "CBFA2T2" "CXCR4"   "EOMES"   "NANOS3"  "PDPN"    "PRDM1"  
##  [8] "SOX17"   "TET2"    "TFAP2C" 
## 
## $`TGFBR3 PTM regulation`
##  [1] "APH1A"  "APH1B"  "MMP14"  "NCSTN"  "PSEN1"  "PSEN2"  "PSENEN" "TGFBR3"
##  [9] "TIMP1"  "TIMP2" 
## 
## $`TNFs bind their physiological receptors`
##  [1] "CD70"      "EDA"       "EDARADD"   "LTA"       "TNFRSF11B" "TNFRSF14" 
##  [7] "TNFRSF1A"  "TNFRSF1B"  "TNFRSF25"  "TNFRSF6B"  "TNFRSF9"   "TNFSF13"  
## [13] "TNFSF13B"  "TNFSF4"    "TNFSF9"   
## 
## $`TP53 Regulates Transcription of Death Receptors and Ligands`
##  [1] "FAS"       "IGFBP3"    "PPP1R13B"  "TMEM219"   "TNFRSF10A" "TNFRSF10B"
##  [7] "TNFRSF10C" "TNFRSF10D" "TP53"      "TP53BP2"   "TP73"     
## 
## $`Termination of O-glycan biosynthesis`
##  [1] "MUC1"       "MUC12"      "MUC20"      "MUC3A"      "ST3GAL1"   
##  [6] "ST3GAL2"    "ST3GAL3"    "ST3GAL4"    "ST6GAL1"    "ST6GALNAC2"
## [11] "ST6GALNAC3" "ST6GALNAC4"
## 
## $`The canonical retinoid cycle in rods (twilight vision)`
##  [1] "ABCA4"   "CYP4V2"  "HSD17B6" "MYO7A"   "RBP1"    "RBP4"    "RDH10"  
##  [8] "RDH11"   "RDH16"   "RDH5"    "RLBP1"   "STRA6"  
## 
## $`WNT ligand biogenesis and trafficking`
##  [1] "PORCN"  "SNX3"   "TMED5"  "VPS26A" "VPS29"  "VPS35"  "WLS"    "WNT10A"
##  [9] "WNT10B" "WNT11"  "WNT2B"  "WNT3"   "WNT4"   "WNT5A"  "WNT5B"  "WNT6"  
## [17] "WNT7A"  "WNT7B"  "WNT9A" 
## 
## $`WNT5:FZD7-mediated leishmania damping`
##  [1] "CYBA"  "DVL1"  "DVL3"  "FZD7"  "JUN"   "MAPK8" "NOX1"  "NOXA1" "RAC1" 
## [10] "WNT5A"

GO BP up.

scores <- m1$x
names(scores) <- rownames(m1)
gs <- gobp
up <- head(subset(mres_gobp$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0),20)
up_gs <- up[,1]
up_gs <- gs[which(names(gs) %in% up_gs)]

topgs_up <- lapply(1:length(up_gs),function(i) {
  gsname <- names(up_gs)[i]
  genes <- up_gs[[i]]
  gene_scores <- scores[which(names(scores) %in% genes)]
  top_genes <- names(which(gene_scores>2))
  return(top_genes)
})
names(topgs_up) <- names(up_gs)

gs2net(topgs_up,mres_gobp$enrichment_result,colorRampPalette(c("pink","darkred"))(n=100))

topgs_up
## $`ATP BIOSYNTHETIC PROCESS`
##  [1] "ANTKMT"  "ATP5F1A" "ATP5MC2" "ATP5ME"  "ATP5PD"  "ATP5PF"  "ATP5PO" 
##  [8] "COX11"   "MT-ATP6" "MT-ATP8" "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L"
## [15] "NDUFA10" "NDUFA2"  "NDUFA8"  "NDUFA9"  "NDUFB3"  "NDUFB5"  "NDUFB6" 
## [22] "NDUFS4"  "NDUFS6"  "NDUFS7"  "NDUFS8"  "NDUFV1"  "NDUFV2"  "NUDT2"  
## [29] "PID1"    "SDHA"    "SDHD"    "STOML2"  "VCP"    
## 
## $`ATP SYNTHESIS COUPLED ELECTRON TRANSPORT`
##  [1] "AFG1L"   "COX6B1"  "COX6C"   "COX7B"   "COX7C"   "CYC1"    "DLD"    
##  [8] "MT-CO3"  "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L" "NDUFA10" "NDUFA2" 
## [15] "NDUFA8"  "NDUFA9"  "NDUFAF1" "NDUFB3"  "NDUFB5"  "NDUFB6"  "NDUFS4" 
## [22] "NDUFS6"  "NDUFS7"  "NDUFS8"  "NDUFV1"  "NDUFV2"  "SDHA"    "SDHD"   
## [29] "UQCR10"  "UQCRB"   "UQCRC2"  "UQCRFS1" "UQCRH"   "UQCRQ"  
## 
## $`CRISTAE FORMATION`
##  [1] "ADCK1"    "AFG3L2"   "APOOL"    "CHCHD3"   "DNAJC11"  "LETM1"   
##  [7] "MICOS10"  "MICOS13"  "OMA1"     "SLC25A46"
## 
## $`INNER MITOCHONDRIAL MEMBRANE ORGANIZATION`
##  [1] "ADCK1"    "AFG3L2"   "APOOL"    "BCS1L"    "CHCHD3"   "COX18"   
##  [7] "DNAJC11"  "HSPA9"    "LETM1"    "MAIP1"    "MICOS10"  "MICOS13" 
## [13] "MTX2"     "MTX3"     "OMA1"     "SLC25A46" "TMEM126A" "TOMM70"  
## [19] "TRMT10B" 
## 
## $`L SERINE METABOLIC PROCESS`
## [1] "PHGDH" "PSAT1" "SDSL"  "SHMT2" "SRR"  
## 
## $`MITOCHONDRIAL ELECTRON TRANSPORT NADH TO UBIQUINONE`
##  [1] "DLD"     "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L" "NDUFA10" "NDUFA2" 
##  [8] "NDUFA8"  "NDUFA9"  "NDUFAF1" "NDUFB3"  "NDUFB5"  "NDUFB6"  "NDUFS4" 
## [15] "NDUFS6"  "NDUFS7"  "NDUFS8"  "NDUFV1"  "NDUFV2" 
## 
## $`MITOCHONDRIAL ELECTRON TRANSPORT UBIQUINOL TO CYTOCHROME C`
## [1] "CYC1"    "UQCR10"  "UQCRB"   "UQCRC2"  "UQCRFS1" "UQCRH"   "UQCRQ"  
## 
## $`NADH DEHYDROGENASE COMPLEX ASSEMBLY`
##  [1] "BCS1L"    "DMAC1"    "ECSIT"    "FOXRED1"  "MT-ND1"   "MT-ND2"  
##  [7] "NDUFA10"  "NDUFA2"   "NDUFA8"   "NDUFA9"   "NDUFAF1"  "NDUFAF2" 
## [13] "NDUFAF5"  "NDUFAF6"  "NDUFAF7"  "NDUFB3"   "NDUFB5"   "NDUFB6"  
## [19] "NDUFS4"   "NDUFS7"   "NDUFS8"   "TIMM21"   "TMEM126A" "TMEM186" 
## 
## $`NEGATIVE REGULATION OF CENTROSOME DUPLICATION`
## [1] "CDK5RAP2" "KAT2A"    "MDM1"     "NPM1"     "TRIM37"  
## 
## $`PEPTIDYL LYSINE TRIMETHYLATION`
## [1] "ANTKMT"  "ETFBKMT" "FAM86B1" "FAM86B2" "SETD2"   "SETD4"  
## 
## $`POSITIVE REGULATION OF RRNA PROCESSING`
## [1] "BUD23" "RIOK2" "UTP15" "WDR75"
## 
## $`PROTEIN DENEDDYLATION`
## [1] "COPS5"  "COPS7A" "COPS7B" "COPS8"  "GPS1"   "SENP8" 
## 
## $`PROTON MOTIVE FORCE DRIVEN ATP SYNTHESIS`
##  [1] "ANTKMT"  "ATP5F1A" "ATP5MC2" "ATP5ME"  "ATP5PD"  "ATP5PF"  "ATP5PO" 
##  [8] "MT-ATP6" "MT-ATP8" "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L" "NDUFA10"
## [15] "NDUFA2"  "NDUFA8"  "NDUFA9"  "NDUFB3"  "NDUFB5"  "NDUFB6"  "NDUFS4" 
## [22] "NDUFS6"  "NDUFS7"  "NDUFS8"  "NDUFV1"  "NDUFV2"  "SDHA"    "SDHD"   
## [29] "STOML2" 
## 
## $`REGULATION OF CENTRIOLE REPLICATION`
## [1] "CDK5RAP2" "CENPJ"    "CEP295"   "CEP76"    "KAT2A"    "MDM1"     "NPM1"    
## [8] "TRIM37"   "VPS4B"   
## 
## $`REGULATION OF RRNA PROCESSING`
## [1] "BUD23"  "NUDT16" "RIOK2"  "USP36"  "UTP15"  "WDR75" 
## 
## $`RESPIRATORY CHAIN COMPLEX III ASSEMBLY`
## [1] "BCS1L"   "LYRM7"   "UQCC2"   "UQCC5"   "UQCRFS1"
## 
## $`RIBONUCLEOPROTEIN COMPLEX LOCALIZATION`
## [1] "NEAT1" "RIOK2" "SETD2" "THOC2" "WNK1" 
## 
## $`RRNA BASE METHYLATION`
## [1] "BUD23"   "FDXACB1" "METTL15" "METTL16" "NOP2"    "NSUN5"   "NSUN5P1"
## [8] "NSUN5P2"
## 
## $`SMALL NUCLEOLAR RIBONUCLEOPROTEIN COMPLEX ASSEMBLY`
## [1] "NOPCHAP1" "RUVBL2"   "TAF9"    
## 
## $`TRICARBOXYLIC ACID CYCLE`
##  [1] "ACO1"   "IDH3B"  "MDH2"   "NDP"    "NNT"    "OGDH"   "OGDHL"  "SDHA"  
##  [9] "SDHD"   "SUCLG1" "SUCLG2"

GO BP down.

scores <- m1$x
names(scores) <- rownames(m1)
gs <- gobp
dn <- head(subset(mres_gobp$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0),20)
dn_gs <- dn[,1]
dn_gs <- gs[which(names(gs) %in% dn_gs)]

topgs_dn <- lapply(1:length(dn_gs),function(i) {
  gsname <- names(dn_gs)[i]
  genes <- dn_gs[[i]]
  gene_scores <- scores[which(names(scores) %in% genes)]
  top_genes <- names(which(gene_scores<2))
  return(top_genes)
})
names(topgs_dn) <- names(dn_gs)

gs2net(topgs_dn,mres_gobp$enrichment_result,colorRampPalette(c("darkblue","lightblue"))(n=100))

topgs_dn
## $`ANTIGEN PROCESSING AND PRESENTATION OF PEPTIDE ANTIGEN VIA MHC CLASS IB`
##  [1] "B2M"    "HFE"    "HLA-A"  "HLA-B"  "HLA-C"  "HLA-E"  "HLA-F"  "HLA-H" 
##  [9] "MICA"   "MICB"   "RAET1E" "RAET1G" "TAP2"   "ULBP1"  "ULBP2"  "ULBP3" 
## 
## $`BASE CONVERSION OR SUBSTITUTION EDITING`
##  [1] "ADAR"     "ADARB1"   "ADARB2"   "ADAT2"    "APOBEC3B" "APOBEC3C"
##  [7] "APOBEC3D" "APOBEC3F" "APOBEC3G" "APOBEC3H" "RBM47"   
## 
## $`GAMMA DELTA T CELL ACTIVATION`
##  [1] "ALKBH5"  "CXADR"   "EGR3"    "ITK"     "JAG2"    "LEF1"    "LILRB1" 
##  [8] "MICA"    "MICB"    "NCKAP1L" "SOX13"   "SOX4"    "STAT5B"  "SYK"    
## [15] "TCF7"   
## 
## $`GLOMERULAR MESANGIAL CELL PROLIFERATION`
##  [1] "BMP4"     "BMP7"     "CFLAR"    "EGR1"     "IL6R"     "ITGB3"   
##  [7] "PDGFA"    "PDGFB"    "PDGFD"    "PDGFRB"   "SERPINB7" "WT1"     
## 
## $`HEPATOCYTE APOPTOTIC PROCESS`
##  [1] "ADAR"   "ARF6"   "ATF2"   "BCL2L1" "BID"    "CASP6"  "CFLAR"  "DNMT3A"
##  [9] "GSN"    "IGF1R"  "KRT18"  "KRT8"   "PIK3CG" "PPARA"  "PRKAA1" "PRKAA2"
## [17] "RB1"    "STK3"   "STK4"  
## 
## $`LATERAL SPROUTING FROM AN EPITHELIUM`
## [1] "BMP4"   "BMP7"   "CELSR1" "FGF10"  "FGFR2"  "NOG"    "SHH"    "SULF1" 
## [9] "WNT5A" 
## 
## $`NEGATIVE REGULATION OF EXTRACELLULAR MATRIX ORGANIZATION`
##  [1] "ANTXR1"   "CST3"     "DPP4"     "EMILIN1"  "FAP"      "NOTCH1"  
##  [7] "PPARG"    "TGFB1"    "TGFBR3"   "TNFRSF1A" "TNFRSF1B"
## 
## $`NEGATIVE REGULATION OF MACROPHAGE DERIVED FOAM CELL DIFFERENTIATION`
##  [1] "ABCA1"  "ABCA5"  "ABCG1"  "ITGAV"  "ITGB3"  "NFKBIA" "NR1H2"  "NR1H3" 
##  [9] "PPARA"  "PPARG" 
## 
## $`NEGATIVE REGULATION OF NEURON PROJECTION REGENERATION`
##  [1] "CERS2"   "EPHA4"   "FIGNL2"  "INPP5F"  "KREMEN1" "LRIG2"   "NEO1"   
##  [8] "PTPRS"   "RGMA"    "RTCA"    "RTN4R"   "RTN4RL1" "SPP1"    "TNR"    
## 
## $`NEUROBLAST DIVISION`
##  [1] "AKNA"    "ARHGEF2" "ASPM"    "DOCK7"   "FGF13"   "FGFR1"   "FGFR2"  
##  [8] "LEF1"    "NUMB"    "NUMBL"   "RAB10"   "SOX5"    "TEAD3"  
## 
## $`PYRIMIDINE NUCLEOSIDE CATABOLIC PROCESS`
##  [1] "APOBEC3B" "APOBEC3C" "APOBEC3D" "APOBEC3F" "APOBEC3G" "APOBEC3H"
##  [7] "CDADC1"   "DCTD"     "DPYD"     "UPP1"    
## 
## $`REGULATION OF GLIAL CELL MIGRATION`
##  [1] "ATP1B2" "BMERB1" "CERS2"  "CRKL"   "CSF1"   "CX3CL1" "NF1"    "NTN1"  
##  [9] "P2RX4"  "RRAS"   "RRAS2"  "TIAM1" 
## 
## $`REGULATION OF RESPIRATORY BURST`
##  [1] "BCR"    "CAMK1D" "CLEC7A" "DUSP10" "GRN"    "INSR"   "NOXA1"  "RAC1"  
##  [9] "RAC2"   "RPS19"  "SLAMF8"
## 
## $`REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE BY CIRCULATORY RENIN ANGIOTENSIN`
##  [1] "ACE"     "ATP6AP2" "COMT"    "CTSZ"    "EDNRB"   "F2R"     "F2RL1"  
##  [8] "NDST2"   "OR51E2"  "PCSK5"  
## 
## $`REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE BY RENIN ANGIOTENSIN`
##  [1] "ACE"      "AGTR1"    "ATP6AP2"  "COMT"     "CTSZ"     "EDNRB"   
##  [7] "F2R"      "F2RL1"    "NDST2"    "NOX1"     "OR51E2"   "PCSK5"   
## [13] "RHOA"     "SERPINF2"
## 
## $`RENAL SYSTEM PROCESS INVOLVED IN REGULATION OF BLOOD VOLUME`
##  [1] "ADORA1"  "CORO2B"  "EMP2"    "F2R"     "F2RL1"   "GAS6"    "GJA5"   
##  [8] "HSD11B2" "PDGFB"   "PTPRO"  
## 
## $`RENAL SYSTEM PROCESS INVOLVED IN REGULATION OF SYSTEMIC ARTERIAL BLOOD PRESSURE`
##  [1] "ADORA1"   "AGTR1"    "COMT"     "CORO2B"   "EDNRB"    "EMP2"    
##  [7] "F2R"      "F2RL1"    "GAS6"     "GJA5"     "HSD11B2"  "OR51E2"  
## [13] "PCSK5"    "PDGFB"    "PTPRO"    "RHOA"     "SERPINF2"
## 
## $`RESPIRATORY BURST INVOLVED IN DEFENSE RESPONSE`
##  [1] "CYBC1"   "DUSP10"  "GRN"     "LIPA"    "PIK3CD"  "PIK3CG"  "PRDX2"  
##  [8] "RPS19"   "SELENOK" "SLAMF8" 
## 
## $`RESPONSE TO WATER`
##  [1] "ATP2B4"  "AVPR1A"  "COL18A1" "KRT8"    "LRP11"   "NTRK1"   "PIK3CA" 
##  [8] "PKD2"    "PLEC"    "SIPA1"  
## 
## $`SEROTONIN TRANSPORT`
##  [1] "GPM6B"   "ITGB3"   "LILRB1"  "MAOB"    "NOS1"    "SLC18A2" "SLC18A3"
##  [8] "SLC18B1" "SLC29A3" "SLC29A4" "SNCA"    "SYK"

Session information

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] network_1.19.0              beeswarm_0.4.0             
##  [3] kableExtra_1.4.0            eulerr_7.0.2               
##  [5] mitch_1.19.2                MASS_7.3-61                
##  [7] gplots_3.2.0                DESeq2_1.44.0              
##  [9] SummarizedExperiment_1.34.0 Biobase_2.64.0             
## [11] MatrixGenerics_1.16.0       matrixStats_1.4.1          
## [13] GenomicRanges_1.56.1        GenomeInfoDb_1.40.1        
## [15] IRanges_2.38.1              S4Vectors_0.42.1           
## [17] BiocGenerics_0.50.0         reshape2_1.4.4             
## [19] dplyr_1.1.4                 zoo_1.8-12                 
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1        viridisLite_0.4.2       bitops_1.0-9           
##  [4] fastmap_1.2.0           GGally_2.2.1            promises_1.3.2         
##  [7] digest_0.6.37           mime_0.12               lifecycle_1.0.4        
## [10] magrittr_2.0.3          compiler_4.4.2          rlang_1.1.4            
## [13] sass_0.4.9              tools_4.4.2             yaml_2.3.10            
## [16] knitr_1.49              S4Arrays_1.4.1          htmlwidgets_1.6.4      
## [19] DelayedArray_0.30.1     xml2_1.3.6              plyr_1.8.9             
## [22] RColorBrewer_1.1-3      abind_1.4-8             BiocParallel_1.38.0    
## [25] KernSmooth_2.23-24      purrr_1.0.2             grid_4.4.2             
## [28] caTools_1.18.3          xtable_1.8-4            colorspace_2.1-1       
## [31] ggplot2_3.5.1           scales_1.3.0            gtools_3.9.5           
## [34] cli_3.6.3               rmarkdown_2.29          crayon_1.5.3           
## [37] generics_0.1.3          rstudioapi_0.17.1       httr_1.4.7             
## [40] cachem_1.1.0            stringr_1.5.1           zlibbioc_1.50.0        
## [43] parallel_4.4.2          XVector_0.44.0          vctrs_0.6.5            
## [46] Matrix_1.7-1            jsonlite_1.8.9          echarts4r_0.4.5        
## [49] systemfonts_1.1.0       locfit_1.5-9.10         jquerylib_0.1.4        
## [52] tidyr_1.3.1             glue_1.8.0              statnet.common_4.11.0  
## [55] ggstats_0.7.0           codetools_0.2-20        stringi_1.8.4          
## [58] gtable_0.3.6            later_1.4.1             UCSC.utils_1.0.0       
## [61] munsell_0.5.1           tibble_3.2.1            pillar_1.10.0          
## [64] htmltools_0.5.8.1       GenomeInfoDbData_1.2.12 R6_2.5.1               
## [67] evaluate_1.0.1          shiny_1.10.0            lattice_0.22-6         
## [70] httpuv_1.6.15           bslib_0.8.0             Rcpp_1.0.13-1          
## [73] coda_0.19-4.1           svglite_2.1.3           gridExtra_2.3          
## [76] SparseArray_1.4.8       xfun_0.49               pkgconfig_2.0.3
save.image("dge.Rdata")