Source: TBA

Introduction

Here we analyse the effect of VLCAD mutation.

Reads were mapped to the human transcriptome version 47 from GENCODE genes (gencode.v47.transcripts.fa).

Genes with mean counts > 10 are classified as detected.

Differential expression is conducted with DESeq2.

Pathway enrichment analysis is conducted with mitch.

Gene sets were obtained from the gene ontology database (q3 2023). Biological process sets were used.

suppressPackageStartupMessages({
    library("zoo")
    library("dplyr")
    library("reshape2")
    library("DESeq2")
    library("gplots")
    library("MASS")
    library("mitch")
    library("eulerr")
    library("kableExtra")
    library("beeswarm")

library("network")

})

knitr::opts_chunk$set(dev = 'svg') # set output device to svg

Import read counts

tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
accession <- sapply((strsplit(rownames(x),"\\|")),"[[",2)
symbol<-sapply((strsplit(rownames(x),"\\|")),"[[",6)
x$geneid <- paste(accession,symbol)
xx <- aggregate(. ~ geneid,x,sum)
rownames(xx) <- xx$geneid
xx$geneid = NULL
xx <- round(xx)
head(xx)
##                           Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
## ENSG00000000003.16 TSPAN6 4228 3204 2812 3362 2725 2330 2284 2711
## ENSG00000000005.6 TNMD       1    4    2    0    0    0    0    0
## ENSG00000000419.14 DPM1   2538 2211 1837 1867 2632 2677 2873 3146
## ENSG00000000457.14 SCYL3   526  440  444  607  360  385  519  605
## ENSG00000000460.17 FIRRM  1572 1325  652  744 1174 1142  674  870
## ENSG00000000938.13 FGR       0    0    1    3    0    1    0    4

Sample sheet

ss <- data.frame(colnames(xx))
ss$case <- as.numeric(grepl("Mut",ss[,1]))
rownames(ss) <- ss[,1]
ss[,1] = NULL
ss
##      case
## Ctl1    0
## Ctl2    0
## Ctl3    0
## Ctl4    0
## Mut1    1
## Mut2    1
## Mut3    1
## Mut4    1

QC analysis

Here I’ll look at a few different quality control measures.

par(mar=c(5,8,3,1))
barplot(colSums(xx),horiz=TRUE,las=1,xlab="num reads")

colSums(xx)
##     Ctl1     Ctl2     Ctl3     Ctl4     Mut1     Mut2     Mut3     Mut4 
## 59500246 47803671 50526219 52506392 50626163 51905681 59571523 68184815

We have 47M to 68M assigned reads, which is very good.

MDS plot

All sample cluster with others from the same group. Ctl and Mut samples are distinguished on dimension 2. No outliers recorded.

cols <- c(rep("tan1",4),rep("violet",4))

plot(cmdscale(dist(t(xx))), xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n",pch=19, cex=4, col=cols)

text(cmdscale(dist(t(xx))), labels=colnames(xx) )

Correlation heatmap

Correlation heatmap shows a slight batch effect with batch A consisting of replicates 1 and 2, while batch B consists of replicates 3 and 4.

heatmap.2(cor(xx),trace="n",main="Pearson correlation heatmap")

cor(xx) %>% kbl(caption = "Pearson correlation coefficients") %>% kable_paper("hover", full_width = F)
Pearson correlation coefficients
Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
Ctl1 1.0000000 0.9955678 0.9641994 0.9673286 0.9572850 0.9613203 0.9463720 0.9518116
Ctl2 0.9955678 1.0000000 0.9622280 0.9663104 0.9576128 0.9636163 0.9494607 0.9514847
Ctl3 0.9641994 0.9622280 1.0000000 0.9967999 0.9069284 0.9241674 0.9663318 0.9752521
Ctl4 0.9673286 0.9663104 0.9967999 1.0000000 0.9159501 0.9308417 0.9717279 0.9798979
Mut1 0.9572850 0.9576128 0.9069284 0.9159501 1.0000000 0.9938098 0.9502892 0.9428478
Mut2 0.9613203 0.9636163 0.9241674 0.9308417 0.9938098 1.0000000 0.9654085 0.9573851
Mut3 0.9463720 0.9494607 0.9663318 0.9717279 0.9502892 0.9654085 1.0000000 0.9971573
Mut4 0.9518116 0.9514847 0.9752521 0.9798979 0.9428478 0.9573851 0.9971573 1.0000000
cor(xx,method="spearman") %>% kbl(caption = "Spearman correlation coefficients") %>% kable_paper("hover", full_width = F)
Spearman correlation coefficients
Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
Ctl1 1.0000000 0.8799723 0.8726250 0.8731369 0.8687678 0.8722935 0.8650790 0.8656965
Ctl2 0.8799723 1.0000000 0.8702726 0.8727027 0.8678268 0.8726946 0.8606341 0.8645307
Ctl3 0.8726250 0.8702726 1.0000000 0.8822079 0.8580223 0.8633952 0.8725751 0.8759524
Ctl4 0.8731369 0.8727027 0.8822079 1.0000000 0.8599849 0.8662153 0.8741373 0.8765094
Mut1 0.8687678 0.8678268 0.8580223 0.8599849 1.0000000 0.8796945 0.8659710 0.8681118
Mut2 0.8722935 0.8726946 0.8633952 0.8662153 0.8796945 1.0000000 0.8726103 0.8744646
Mut3 0.8650790 0.8606341 0.8725751 0.8741373 0.8659710 0.8726103 1.0000000 0.8884237
Mut4 0.8656965 0.8645307 0.8759524 0.8765094 0.8681118 0.8744646 0.8884237 1.0000000

Analysis of differential gene expression

First we will look at control vs mutant with no filtering for low reads. Then we remove genes with fewer than 10 reads per sample on average and rerun DESeq2.

dds <- DESeqDataSetFromMatrix(countData = xx , colData = ss, design = ~ case )
## converting counts to integer mode
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression changes caused by VLCAD mutation") %>% kable_paper("hover", full_width = F)
Top gene expression changes caused by VLCAD mutation
baseMean log2FoldChange lfcSE stat pvalue padj Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
ENSG00000162511.8 LAPTM5 987.8437 -3.986356 0.1755532 -22.70739 0 0 11.002220 10.885389 10.899161 10.841009 7.254068 7.564944 7.254276 7.671797
ENSG00000272975.3 MYHAS 388.3574 -4.195153 0.1956555 -21.44153 0 0 9.483432 9.555854 9.850651 9.649723 6.499000 6.621006 6.398233 6.615835
ENSG00000152128.13 TMEM163 854.4807 -3.990912 0.1867547 -21.36981 0 0 10.564008 10.434528 10.843404 10.928707 7.153614 7.242889 7.399430 7.399775
ENSG00000116574.6 RHOU 1158.7330 -2.487046 0.1190345 -20.89349 0 0 11.068257 10.953967 10.955235 10.968457 8.534667 8.751548 8.631342 8.806695
ENSG00000100311.17 PDGFB 762.4043 -4.200241 0.2158178 -19.46197 0 0 10.818451 10.346682 10.679375 10.331647 6.809802 7.232135 7.001269 7.200800
ENSG00000100678.20 SLC8A3 358.9813 -3.566660 0.1921649 -18.56042 0 0 9.529756 9.369735 9.550313 9.546351 6.479031 6.830202 6.937598 6.825018
ENSG00000148798.11 INA 449.6321 -3.871214 0.2123022 -18.23446 0 0 9.903409 10.088926 9.662117 9.569548 6.911927 6.603283 6.915641 6.770452
ENSG00000089250.20 NOS1 250.7947 -5.564540 0.3085129 -18.03665 0 0 8.786950 9.018045 9.351096 9.221984 5.899032 5.929534 5.732877 5.658486
ENSG00000054983.17 GALC 254.7479 -7.177380 0.4290953 -16.72677 0 0 8.907264 8.988819 9.232172 9.411614 5.449783 5.578676 5.324774 5.490048
ENSG00000009709.13 PAX7 127.3333 -4.443382 0.2695849 -16.48231 0 0 8.195813 8.334413 8.292938 8.153813 5.736095 5.963757 5.834389 5.790457
ENSG00000183770.7 FOXL2 142.6765 -3.360216 0.2063866 -16.28117 0 0 8.272609 8.349909 8.328173 8.337779 6.328065 6.320680 6.168496 6.148784
ENSG00000133019.12 CHRM3 282.6068 -2.720740 0.1754015 -15.51149 0 0 9.015629 8.965668 9.216256 9.224424 6.839849 7.119456 7.011578 7.101723
ENSG00000154783.12 FGD5 355.7793 -2.851012 0.1861684 -15.31416 0 0 9.277233 9.394671 9.596083 9.411614 6.897841 7.285051 7.296208 7.118768
ENSG00000132164.10 SLC6A11 106.1048 -4.364647 0.2910877 -14.99427 0 0 8.012600 7.920884 8.148147 8.040793 5.780323 5.856464 5.865360 5.620831
ENSG00000206432.4 TMEM200C 135.5014 -2.709658 0.1868406 -14.50251 0 0 8.134126 8.186723 8.246306 8.253617 6.395351 6.450698 6.512449 6.466853
ENSG00000204644.10 ZFP57 750.2561 -1.666633 0.1167461 -14.27570 0 0 10.223624 10.203270 10.143080 10.355170 8.664944 8.769525 8.634497 8.789064
ENSG00000112414.15 ADGRG6 508.4714 -2.799366 0.1985637 -14.09807 0 0 9.579926 10.266681 9.843772 9.799432 7.434690 7.564944 7.544218 7.596921
ENSG00000106236.4 NPTX2 1200.4620 -3.000108 0.2134385 -14.05608 0 0 11.058195 10.995229 11.313973 11.016301 7.845060 8.377118 8.550102 8.528409
ENSG00000162426.16 SLC45A1 112.7501 -3.759645 0.2686451 -13.99484 0 0 8.099755 7.804630 8.119620 8.243949 6.034266 5.856464 5.978500 6.022722
ENSG00000134873.10 CLDN10 285.7763 -2.704238 0.1932477 -13.99364 0 0 9.221829 9.147062 9.096704 9.020378 6.731291 7.021709 7.147845 7.200800
nrow(dge)
## [1] 78724
nrow(subset(dge,padj<0.05))
## [1] 3655
dge[grep("ACADVL",rownames(dge)),]
##                           baseMean log2FoldChange    lfcSE      stat
## ENSG00000072778.20 ACADVL 13984.35      -1.757624 0.262024 -6.707873
##                                 pvalue         padj     Ctl1    Ctl2    Ctl3
## ENSG00000072778.20 ACADVL 1.974819e-11 1.528463e-09 14.33474 13.9524 14.6743
##                               Ctl4     Mut1     Mut2     Mut3     Mut4
## ENSG00000072778.20 ACADVL 14.54686 12.13382 12.30005 12.90259 13.06871
xxf <- xx[rowMeans(xx)>=10,]
dim(xx) ; dim(xxf)
## [1] 78724     8
## [1] 19603     8
dds <- DESeqDataSetFromMatrix(countData = xxf , colData = ss, design = ~ case )
## converting counts to integer mode
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
head(dge,20) %>% kbl(caption = "Top gene expression changes caused by VLCAD mutation") %>% kable_paper("hover", full_width = F)
Top gene expression changes caused by VLCAD mutation
baseMean log2FoldChange lfcSE stat pvalue padj Ctl1 Ctl2 Ctl3 Ctl4 Mut1 Mut2 Mut3 Mut4
ENSG00000162511.8 LAPTM5 988.1810 -3.977558 0.1624486 -24.48502 0 0 11.013483 10.895831 10.910941 10.858916 7.387102 7.674627 7.392785 7.778289
ENSG00000116574.6 RHOU 1159.8102 -2.478028 0.1017890 -24.34475 0 0 11.078995 10.963816 10.966535 10.985276 8.601506 8.806630 8.702114 8.863824
ENSG00000272975.3 MYHAS 388.4941 -4.186664 0.1847116 -22.66595 0 0 9.515786 9.584977 9.875569 9.683639 6.691628 6.800557 6.603675 6.797969
ENSG00000152128.13 TMEM163 855.0084 -3.981991 0.1799082 -22.13346 0 0 10.579410 10.449616 10.855680 10.945856 7.293661 7.373581 7.528560 7.522791
ENSG00000100311.17 PDGFB 762.5890 -4.191593 0.2090203 -20.05352 0 0 10.831306 10.362846 10.693220 10.354940 6.975994 7.363579 7.157500 7.337194
ENSG00000100678.20 SLC8A3 359.1927 -3.556739 0.1795828 -19.80556 0 0 9.561124 9.402958 9.580921 9.582313 6.673445 6.992103 7.098566 6.989704
ENSG00000148798.11 INA 449.7423 -3.861696 0.2053141 -18.80873 0 0 9.927773 10.108623 9.690482 9.605039 7.070008 6.784385 7.078268 6.939574
ENSG00000089250.20 NOS1 250.8596 -5.556768 0.3047282 -18.23516 0 0 8.837995 9.060394 9.386105 9.265278 6.149451 6.175384 6.002635 5.934002
ENSG00000183770.7 FOXL2 142.7567 -3.353550 0.1913430 -17.52638 0 0 8.343122 8.415647 8.396021 8.409836 6.536307 6.527617 6.395099 6.373976
ENSG00000009709.13 PAX7 127.3592 -4.434877 0.2575833 -17.21726 0 0 8.269724 8.400802 8.362287 8.233832 6.003486 6.206073 6.093781 6.052004
ENSG00000054983.17 GALC 254.8315 -7.169298 0.4262001 -16.82143 0 0 8.954509 9.032016 9.270073 9.450447 5.747955 5.861942 5.637599 5.783747
ENSG00000133019.12 CHRM3 282.8664 -2.711812 0.1633780 -16.59839 0 0 9.059671 9.009547 9.254560 9.267658 7.003624 7.258975 7.167052 7.245191
ENSG00000204644.10 ZFP57 751.3959 -1.658220 0.1008664 -16.43976 0 0 10.243168 10.221327 10.163403 10.378174 8.727173 8.824016 8.705163 8.846753
ENSG00000154783.12 FGD5 356.0567 -2.841880 0.1756942 -16.17515 0 0 9.314336 9.427317 9.625754 9.450447 7.057023 7.412828 7.431948 7.260999
ENSG00000206432.4 TMEM200C 135.6225 -2.700161 0.1726686 -15.63782 0 0 8.210869 8.259598 8.317683 8.329221 6.597360 6.645493 6.707842 6.662130
ENSG00000132164.10 SLC6A11 106.1440 -4.356983 0.2804464 -15.53589 0 0 8.095188 8.006750 8.223958 8.126094 6.043063 6.109937 6.121624 5.900381
ENSG00000136425.14 CIB2 560.2324 -2.093824 0.1381385 -15.15743 0 0 9.964635 9.951553 10.006036 9.865507 8.086272 8.081641 8.064365 8.403856
ENSG00000134873.10 CLDN10 286.0041 -2.694147 0.1820120 -14.80202 0 0 9.260313 9.185839 9.138162 9.069007 6.903915 7.168526 7.293595 7.337194
ENSG00000112414.15 ADGRG6 508.8023 -2.790367 0.1916111 -14.56266 0 0 9.610256 10.283879 9.868809 9.830607 7.555822 7.674627 7.664559 7.707762
ENSG00000028137.19 TNFRSF1B 466.1307 -1.894338 0.1312207 -14.43627 0 0 9.762336 9.569750 9.733929 9.597504 7.935833 8.210393 8.015367 8.178466
write.table(dge,file="deseq2_mut.tsv",quote=FALSE,sep='\t')

nrow(dge)
## [1] 19603
nrow(subset(dge,padj<0.05))
## [1] 4095
dge[grep("ACADVL",rownames(dge)),]
##                           baseMean log2FoldChange     lfcSE      stat
## ENSG00000072778.20 ACADVL 14007.45      -1.748639 0.2636304 -6.632919
##                                 pvalue         padj     Ctl1     Ctl2     Ctl3
## ENSG00000072778.20 ACADVL 3.291119e-11 1.557717e-09 14.33522 13.95137 14.67416
##                               Ctl4     Mut1     Mut2     Mut3     Mut4
## ENSG00000072778.20 ACADVL 14.55264 12.14702 12.30763 12.92246 13.07882

Make some plots.

maplot <- function(de,contrast_name) {
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange, 
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=1)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 1)
}

make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval", xlim=c(-6,6))
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}

make_heatmap <- function(de,name,myss,mx,n=30){
  colfunc <- colorRampPalette(c("blue", "white", "red"))
  values <- myss$quickdash
  f <- colorRamp(c("yellow", "orange"))
  rr <- range(values)
  svals <- (values-rr[1])/diff(rr)
  colcols <- rgb(f(svals)/255)
  mxn <- mx/rowSums(mx)*1000000
  x <- mxn[which(rownames(mxn) %in% rownames(head(de,n))),]
  heatmap.2(as.matrix(x),trace="none",col=colfunc(25),scale="row", margins = c(7,15), cexRow=0.9, cexCol=1.4,
    main=paste("Top ranked",n,"genes in",name) )
}

maplot(dge,"ctrl vs mut")

make_volcano(dge,"ctrl vs mut")

make_heatmap(dge,"ctrl vs mut",ss,xxf,n=30)
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning
## Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning
## -Inf

Now look at the expression of ACADVL gene.

acadvl <- xx[grep("ACADVL",rownames(xx)),]

acadvl
##                            Ctl1  Ctl2  Ctl3  Ctl4 Mut1 Mut2 Mut3  Mut4
## ENSG00000072778.20 ACADVL 22090 13720 23784 23679 4073 4632 8803 11032
rpm <- apply(xxf,2,function(x) { x/sum(x) * 1e6 } )

acadvl <- rpm[grep("ACADVL",rownames(rpm)),]

acadvl
##      Ctl1      Ctl2      Ctl3      Ctl4      Mut1      Mut2      Mut3      Mut4 
## 371.56241 287.22945 471.14408 451.38897  80.52195  89.30932 147.93678 161.95195
ctl <- acadvl[grep("Ctl",names(acadvl))]
mut <- acadvl[grep("Mut",names(acadvl))]
l <- list("Ctl"=ctl,"Mut"=mut)
l
## $Ctl
##     Ctl1     Ctl2     Ctl3     Ctl4 
## 371.5624 287.2294 471.1441 451.3890 
## 
## $Mut
##      Mut1      Mut2      Mut3      Mut4 
##  80.52195  89.30932 147.93678 161.95195
summary(l[["Ctl"]])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   287.2   350.5   411.5   395.3   456.3   471.1
summary(l[["Mut"]])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   80.52   87.11  118.62  119.93  151.44  161.95
decrease= ( mean(l[["Ctl"]]) -  mean(l[["Mut"]]) ) / mean(l[["Ctl"]]) *100
msg <- paste("Decrease:",signif(decrease,3),"%")

boxplot(l,col="white",ylab="reads per million (RPM)",main="ACADVL RNA expression")
beeswarm(l,add=TRUE,pch=19,cex=2,col="darkgray")
mtext(msg)

Pathway enrichment

Here I’m using the mitch package and gene pathways from Reactome and Gene Ontology to understand gene changes caused by the mutation.

Gene ontology terms obtained from GO website and converted to list format, downloaded in Feb 2024 (GO_bp_2023q4.Rds).

reactome <- gmt_import("ReactomePathways_2025-03-14.gmt")

gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)
head(gt)
##                rownames(xx)   gene
## 1 ENSG00000000003.16 TSPAN6 TSPAN6
## 2    ENSG00000000005.6 TNMD   TNMD
## 3   ENSG00000000419.14 DPM1   DPM1
## 4  ENSG00000000457.14 SCYL3  SCYL3
## 5  ENSG00000000460.17 FIRRM  FIRRM
## 6    ENSG00000000938.13 FGR    FGR
m1 <- mitch_import(dge, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 19603
## Note: no. genes in output = 19545
## Note: estimated proportion of input genes in output = 0.997
dim(m1)
## [1] 19545     1
mres_reactome <- mitch_calc(m1, reactome, priority="effect",cores=8)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
mres_reactome_top <- subset(mres_reactome$enrichment_result,p.adjustANOVA < 0.05 )

head(mres_reactome_top,20) %>%
  kbl(caption = "Top gene pathway differences caused by ALVCAD mutation") %>%
  kable_paper("hover", full_width = F)
Top gene pathway differences caused by ALVCAD mutation
set setSize pANOVA s.dist p.adjustANOVA
520 HDR through MMEJ (alt-NHEJ) 12 0.0001959 0.6208382 0.0021961
214 Complex III assembly 25 0.0000002 0.6041107 0.0000066
772 NOTCH4 Activation and Transmission of Signal to the Nucleus 11 0.0005778 -0.5992628 0.0048951
1350 TNFs bind their physiological receptors 15 0.0000674 -0.5942174 0.0009101
1355 TP53 Regulates Transcription of Death Receptors and Ligands 11 0.0006827 -0.5913605 0.0053666
318 Dissolution of Fibrin Clot 11 0.0010058 -0.5726426 0.0069405
1042 Regulated proteolysis of p75NTR 11 0.0013650 -0.5575081 0.0089339
897 Plasma lipoprotein remodeling 16 0.0001284 -0.5529085 0.0015661
360 Endosomal/Vacuolar pathway 11 0.0015211 -0.5520538 0.0097879
381 FASTK family proteins regulate processing and stability of mitochondrial RNAs 19 0.0000395 0.5446449 0.0006050
187 Chondroitin sulfate biosynthesis 18 0.0000691 -0.5416944 0.0009171
888 Phase 4 - resting membrane potential 13 0.0009082 -0.5313647 0.0065054
411 Formation of ATP by chemiosmotic coupling 20 0.0000692 0.5139001 0.0009171
229 Cristae formation 33 0.0000003 0.5123312 0.0000126
1340 TGFBR3 PTM regulation 11 0.0035785 -0.5072275 0.0177759
213 Complex I biogenesis 66 0.0000000 0.5046219 0.0000000
723 Mitochondrial RNA degradation 25 0.0000165 0.4976434 0.0003144
1379 The canonical retinoid cycle in rods (twilight vision) 12 0.0031848 -0.4916807 0.0165196
635 Killing mechanisms 11 0.0047487 -0.4916463 0.0212368
1483 WNT5:FZD7-mediated leishmania damping 11 0.0047487 -0.4916463 0.0212368
write.table(mres_reactome$enrichment_result,file="mitch_mut_reactome.tsv",quote=FALSE,sep='\t')

par(mar=c(5,27,3,3))
top <- mres_reactome$enrichment_result
top <- subset(top,p.adjustANOVA<0.05)
nrow(top)
## [1] 448
up <- head(subset(top,s.dist>0),20)
dn <- head(subset(top,s.dist<0),20)
top <- rbind(up,dn)
vec=top$s.dist
names(vec)=top$set
names(vec) <- gsub("_"," ",names(vec))
vec <- vec[order(vec)]
barplot(abs(vec),col=sign(-vec)+3,horiz=TRUE,las=1,cex.names=0.65,main="Ctl vs Mut",xlab="ES")
grid()

if ( ! file.exists("mitch_mut_reactome.html") ) {
  mitch_report(res=mres_reactome,outfile="mitch_mut_reactome.html",overwrite=TRUE)
}

Gene ontology.

#gobp <- gmt_import("gobp.msigdb.v2024.1.Hs.symbols.gmt")
#names(gobp) <- gsub("GOBP_","",names(gobp))
#names(gobp) <- gsub("_"," ",names(gobp))

#download.file("https://ziemann-lab.net/public/tmp/go_2024-11.gmt",destfile="go_2024-11.gmt")
gobp <- gmt_import("go_2024-11.gmt")

gt <- as.data.frame(rownames(xx))
gt$gene <- sapply(strsplit(gt[,1]," "),"[[",2)
head(gt)
##                rownames(xx)   gene
## 1 ENSG00000000003.16 TSPAN6 TSPAN6
## 2    ENSG00000000005.6 TNMD   TNMD
## 3   ENSG00000000419.14 DPM1   DPM1
## 4  ENSG00000000457.14 SCYL3  SCYL3
## 5  ENSG00000000460.17 FIRRM  FIRRM
## 6    ENSG00000000938.13 FGR    FGR
m1 <- mitch_import(dge, DEtype="deseq2",geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 19603
## Note: no. genes in output = 19545
## Note: estimated proportion of input genes in output = 0.997
dim(m1)
## [1] 19545     1
mres_gobp <- mitch_calc(m1, gobp, priority="effect",cores=8)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
mres_gobp_top <- subset(mres_gobp$enrichment_result,p.adjustANOVA < 0.05 )

head(mres_gobp_top,20) %>%
  kbl(caption = "Top GOBP differences caused by ALVCAD mutation") %>%
  kable_paper("hover", full_width = F)
Top GOBP differences caused by ALVCAD mutation
set setSize pANOVA s.dist p.adjustANOVA
356 GO:0003954 NADH dehydrogenase activity 10 0.0000196 0.7794727 0.0004374
2706 GO:0048681 negative regulation of axon regeneration 11 0.0000164 -0.7501233 0.0003840
2178 GO:0042246 tissue regeneration 14 0.0000021 -0.7318622 0.0000721
2461 GO:0045275 respiratory chain complex III 12 0.0000238 0.7043977 0.0005241
3284 GO:0090136 epithelial cell-cell adhesion 11 0.0001554 -0.6585162 0.0023032
273 GO:0002476 antigen processing and presentation of endogenous peptide antigen via MHC class Ib 11 0.0001760 -0.6531176 0.0025638
274 GO:0002486 antigen processing and presentation of endogenous peptide antigen via MHC class I via ER pathway, TAP-independent 11 0.0001760 -0.6531176 0.0025638
3336 GO:0097242 amyloid-beta clearance 11 0.0002261 -0.6420879 0.0030926
2322 GO:0043394 proteoglycan binding 13 0.0000621 -0.6414090 0.0011719
3337 GO:0097284 hepatocyte apoptotic process 14 0.0000347 -0.6389403 0.0007259
2958 GO:0060075 regulation of resting membrane potential 11 0.0002531 -0.6370710 0.0033683
474 GO:0005159 insulin-like growth factor receptor binding 12 0.0001459 -0.6330995 0.0022158
650 GO:0006122 mitochondrial electron transport, ubiquinol to cytochrome c 13 0.0000879 0.6281290 0.0015564
2195 GO:0042407 cristae formation 16 0.0000150 0.6249040 0.0003651
275 GO:0002548 monocyte chemotaxis 12 0.0001794 -0.6245243 0.0025763
1542 GO:0030215 semaphorin receptor binding 21 0.0000017 -0.6026770 0.0000626
993 GO:0008137 NADH dehydrogenase (ubiquinone) activity 41 0.0000000 0.6019328 0.0000000
3530 GO:1902004 positive regulation of amyloid-beta formation 19 0.0000092 -0.5875243 0.0002385
820 GO:0006953 acute-phase response 15 0.0000891 -0.5842635 0.0015643
1172 GO:0010745 negative regulation of macrophage derived foam cell differentiation 10 0.0014941 -0.5799335 0.0133932
write.table(mres_gobp$enrichment_result,file="mitch_mut_gobp.tsv",quote=FALSE,sep='\t')

par(mar=c(5,27,3,3))
top <- mres_gobp$enrichment_result
top <- subset(top,p.adjustANOVA<0.05)
nrow(top)
## [1] 830
up <- head(subset(top,s.dist>0),20)
dn <- head(subset(top,s.dist<0),20)
top <- rbind(up,dn)
vec=top$s.dist
names(vec)=top$set
names(vec) <- gsub("_"," ",names(vec))
vec <- vec[order(vec)]
barplot(abs(vec),col=sign(-vec)+3,horiz=TRUE,las=1,cex.names=0.65,main="Ctl vs Mut",xlab="ES")
grid()

if ( ! file.exists("mitch_mut_gobp.html") ) {
  mitch_report(res=mres_gobp,outfile="mitch_mut_gobp.html")
}

Network diagrams

In these charts the darkness of the colour indicates the enrichment score. The size of the node indicates the number of genes in the set driving the enrichment. The width of the edges indicates the proportion of shared genes.

map2color<-function(x,pal,limits=NULL){
    if(is.null(limits)) limits=range(x)
    pal[findInterval(x,seq(limits[1],limits[2],length.out=length(pal)+1), all.inside=TRUE)]
}

gs2net <- function(gset,em,colfunc=colorRampPalette(c("blue", "white","red"))(n=100)){
  gset <- gset[order(names(gset))]
  names(gset) <- lapply(names(gset), function(x) {
    nn <- substr(x, 1, 60)
    if (nchar(nn) == 60 ) {
      nn <- paste(nn,"...",sep="")
    }
    return(nn)
  } )
  em$set <- lapply(em$set, function(x) {
    nn <- substr(x, 1, 60)
    if (nchar(nn) == 60 ) {
      nn <- paste(nn,"...",sep="")
    }
    return(nn)
  } )
  mydf <- bind_rows(lapply(gset, as.data.frame.list))
  rownames(mydf) <- names(gset)
  j <- apply(mydf,1,function(x) {
    apply(mydf,1,function(y) {
      length(intersect(x,y) ) / length(union(x,y))
    })
  })
  j[lower.tri(j)] <- NA
  j[lower.tri(j,diag=TRUE)] <- 0
  jl <- melt(j)
  jl <- jl[which(jl$Var1 != jl$Var2),]
  jl <- jl[which(jl$value != 1),]
  jl <- jl[order(-jl$value),]
  jl <- head(jl,length(gset)*2)
  jl$edgeSize = jl$value/sum(jl$value)
  nodes <- sort(union(jl[,1],jl[,2]))
  lengths <- unlist(lapply(gset,length))
  lengths <- lengths[names(lengths) %in% nodes]
  nodes <- data.frame("nodes"=nodes,"lengths"=lengths)
  nodes$vertexsize <- sqrt(nodes$lengths/sum(nodes$lengths) * 100)
  nodes$es <- em[match(nodes$nodes,em$set),"s.dist"]
  nodes$colours <- map2color(nodes$es,colfunc)
  jl2 <- apply(jl[,1:2],2,as.character)
  jlnet <- network(jl2)
  plot(jlnet, displaylabels = TRUE, label.col = "steelblue",
       edge.lwd = c(jl$edgeSize) * 100,
       arrowhead.cex = 0,
       label.cex = 0.8, vertex.border = "white",vertex.cex = nodes$vertexsize,
       vertex.col = nodes$colours, edge.col = "black")
  E1=min(nodes$es)
  E5=max(nodes$es)
  E3=mean(c(E1,E5))
  EE <- c(E1,E3,E5)
  legcols <- map2color(EE,colfunc)
  legend("topleft", legend=signif(EE,2) ,title="ES",box.lty=0,
    fill=legcols, cex=0.8)
  S1=min(nodes$vertexsize)
  FRAG=S1/10
  S5=max(nodes$vertexsize)
  S3=mean(c(S1,S5))
  SS=c(S1-FRAG,0,S5-FRAG)
  L1=min(nodes$lengths)
  L5=max(nodes$lengths)
  LL=paste(" ",c(L1,"",L5))
  legend("topright", legend=LL ,title="no. genes",box.lty=0,
    pt.cex=SS*1, cex=0.9 , pch=19,col="darkgray")
  J1=min(jl$edgeSize)
  FRAG=J1*3
  J5=max(jl$edgeSize)
  J3=mean(c(J1,J5))
  JJ=c(J1,J3,J5)
  JL=JJ+FRAG
  legend("bottomleft", legend=signif(JJ,2) , lwd=JL*50, title="Jaccard",
    box.lty=0, cex=0.9 , lty=1, col="black")
}

Reactome up.

scores <- m1$x
names(scores) <- rownames(m1)
gs <- reactome
up <- head(subset(mres_reactome$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0),20)
up_gs <- up[,1]
up_gs <- gs[which(names(gs) %in% up_gs)]

topgs_up <- lapply(1:length(up_gs),function(i) {
  gsname <- names(up_gs)[i]
  genes <- up_gs[[i]]
  gene_scores <- scores[which(names(scores) %in% genes)]
  top_genes <- names(which(gene_scores>2))
  return(top_genes)
})
names(topgs_up) <- names(up_gs)

gs2net(topgs_up,mres_reactome$enrichment_result,colorRampPalette(c("pink","darkred"))(n=100))

topgs_up
## $`Citric acid cycle (TCA cycle)`
##  [1] "ACAT1"  "DLD"    "IDH3B"  "MDH2"   "NFS1"   "NNT"    "OGDH"   "SDHA"  
##  [9] "SDHD"   "SUCLG1" "SUCLG2" "TRAP1" 
## 
## $`Complex I biogenesis`
##  [1] "ECSIT"    "FOXRED1"  "HSPA9"    "MT-ND1"   "MT-ND2"   "MT-ND3"  
##  [7] "NDUFA10"  "NDUFA2"   "NDUFA8"   "NDUFA9"   "NDUFAF1"  "NDUFAF2" 
## [13] "NDUFAF5"  "NDUFAF6"  "NDUFAF7"  "NDUFB3"   "NDUFB5"   "NDUFB6"  
## [19] "NDUFS4"   "NDUFS6"   "NDUFS7"   "NDUFS8"   "NDUFV1"   "NDUFV2"  
## [25] "TMEM126A" "TMEM186" 
## 
## $`Complex III assembly`
##  [1] "BCS1L"   "CYC1"    "HSPA9"   "LETM1"   "LYRM7"   "NFS1"    "UQCC2"  
##  [8] "UQCC5"   "UQCR10"  "UQCRB"   "UQCRC2"  "UQCRFS1" "UQCRH"   "UQCRQ"  
## 
## $`Cristae formation`
##  [1] "APOOL"   "ATP5F1A" "ATP5MC2" "ATP5ME"  "ATP5PD"  "ATP5PF"  "ATP5PO" 
##  [8] "CHCHD3"  "DNAJC11" "HSPA9"   "MICOS10" "MICOS13" "MT-ATP6" "MT-ATP8"
## [15] "MTX2"   
## 
## $`Cytosolic tRNA aminoacylation`
##  [1] "EPRS1" "HARS1" "IARS1" "KARS1" "LARS1" "MARS1" "NARS1" "RARS1" "WARS1"
## [10] "YARS1"
## 
## $`Defective homologous recombination repair (HRR) due to BRCA2 loss of function`
##  [1] "ATR"    "KAT5"   "NBN"    "RAD17"  "RAD50"  "RAD51C" "RAD9A"  "RAD9B" 
##  [9] "RBBP8"  "RPA1"   "RPA2"  
## 
## $`Diseases of DNA Double-Strand Break Repair`
##  [1] "ATR"    "KAT5"   "NBN"    "RAD17"  "RAD50"  "RAD51C" "RAD9A"  "RAD9B" 
##  [9] "RBBP8"  "RPA1"   "RPA2"  
## 
## $`Diseases of DNA repair`
##  [1] "ATR"    "KAT5"   "MUTYH"  "NBN"    "RAD17"  "RAD50"  "RAD51C" "RAD9A" 
##  [9] "RAD9B"  "RBBP8"  "RPA1"   "RPA2"  
## 
## $`FASTK family proteins regulate processing and stability of mitochondrial RNAs`
## [1] "MT-ATP6" "MT-ATP8" "MT-CO3"  "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L"
## 
## $`Formation of ATP by chemiosmotic coupling`
## [1] "ATP5F1A" "ATP5MC2" "ATP5ME"  "ATP5PD"  "ATP5PF"  "ATP5PO"  "MT-ATP6"
## [8] "MT-ATP8"
## 
## $`Glyoxylate metabolism and glycine degradation`
## [1] "ALDH4A1" "DLD"     "GRHPR"   "OGDH"   
## 
## $`HDR through MMEJ (alt-NHEJ)`
## [1] "LIG3"  "NBN"   "RAD50" "RAD52" "RBBP8" "XRCC1"
## 
## $`Homologous DNA Pairing and Strand Exchange`
##  [1] "ATR"    "KAT5"   "NBN"    "RAD17"  "RAD50"  "RAD51C" "RAD9A"  "RAD9B" 
##  [9] "RBBP8"  "RPA1"   "RPA2"  
## 
## $`Lysine catabolism`
## [1] "AASS"    "ALDH7A1" "DHTKD1"  "DLD"     "GCDH"    "HYKK"   
## 
## $`Mitochondrial RNA degradation`
## [1] "MT-ATP6" "MT-ATP8" "MT-CO3"  "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L"
## 
## $`Nucleotide biosynthesis`
## [1] "ATIC"  "CAD"   "DHODH" "GART"  "PFAS"  "PPAT" 
## 
## $`Resolution of D-loop Structures through Synthesis-Dependent Strand Annealing (SDSA)`
## [1] "KAT5"   "NBN"    "RAD50"  "RAD51C" "RBBP8" 
## 
## $`Respiratory electron transport`
##  [1] "BCS1L"    "COX11"    "COX15"    "COX18"    "COX6B1"   "COX6C"   
##  [7] "COX7B"    "COX7C"    "CYC1"     "ECSIT"    "ETFDH"    "FOXRED1" 
## [13] "GOT1"     "HIGD2A"   "HSPA9"    "LETM1"    "LYRM7"    "MDH2"    
## [19] "MT-CO3"   "MT-ND1"   "MT-ND2"   "MT-ND3"   "NDUFA10"  "NDUFA2"  
## [25] "NDUFA8"   "NDUFA9"   "NDUFAF1"  "NDUFAF2"  "NDUFAF5"  "NDUFAF6" 
## [31] "NDUFAF7"  "NDUFB3"   "NDUFB5"   "NDUFB6"   "NDUFS4"   "NDUFS6"  
## [37] "NDUFS7"   "NDUFS8"   "NDUFV1"   "NDUFV2"   "NFS1"     "SCO1"    
## [43] "SDHA"     "SDHD"     "SMIM20"   "TIMM21"   "TMEM126A" "TMEM186" 
## [49] "TRAP1"    "UQCC2"    "UQCC5"    "UQCR10"   "UQCRB"    "UQCRC2"  
## [55] "UQCRFS1"  "UQCRH"    "UQCRQ"   
## 
## $`WNT5A-dependent internalization of FZD4`
## [1] "AP2B1" "CLTC"  "DVL2"  "PRKCA"
## 
## $`rRNA processing in the mitochondrion`
##  [1] "ELAC2"   "MT-ATP6" "MT-ATP8" "MT-CO3"  "MT-ND1"  "MT-ND2"  "MT-ND3" 
##  [8] "MT-ND4L" "MT-TL2"  "MTERF4"

Reactome down.

scores <- m1$x
names(scores) <- rownames(m1)
gs <- reactome
dn <- head(subset(mres_reactome$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0),20)
dn_gs <- dn[,1]
dn_gs <- gs[which(names(gs) %in% dn_gs)]

topgs_dn <- lapply(1:length(dn_gs),function(i) {
  gsname <- names(dn_gs)[i]
  genes <- dn_gs[[i]]
  gene_scores <- scores[which(names(scores) %in% genes)]
  top_genes <- names(which(gene_scores<2))
  return(top_genes)
})
names(topgs_dn) <- names(dn_gs)

gs2net(topgs_dn,mres_reactome$enrichment_result,colorRampPalette(c("darkblue","lightblue"))(n=100))

topgs_dn
## $`CS/DS degradation`
##  [1] "ARSB"  "BCAN"  "BGN"   "CSPG4" "CSPG5" "HEXA"  "HEXB"  "HYAL1" "HYAL3"
## [10] "IDS"   "IDUA"  "VCAN" 
## 
## $`Chondroitin sulfate biosynthesis`
##  [1] "BCAN"       "BGN"        "CHPF"       "CHPF2"      "CHST11"    
##  [6] "CHST12"     "CHST13"     "CHST15"     "CHST3"      "CHST7"     
## [11] "CHST9"      "CHSY1"      "CSGALNACT1" "CSGALNACT2" "CSPG4"     
## [16] "CSPG5"      "VCAN"      
## 
## $`Dissolution of Fibrin Clot`
##  [1] "ANXA2"    "PLAT"     "PLAU"     "PLAUR"    "S100A10"  "SERPINB2"
##  [7] "SERPINB6" "SERPINB8" "SERPINE1" "SERPINE2" "SERPINF2"
## 
## $`Endosomal/Vacuolar pathway`
##  [1] "B2M"   "CTSL"  "CTSS"  "CTSV"  "HLA-A" "HLA-B" "HLA-C" "HLA-E" "HLA-F"
## [10] "HLA-H"
## 
## $`Glycosphingolipid biosynthesis`
##  [1] "A4GALT"     "B3GALNT1"   "B3GALT4"    "B3GNT5"     "B4GALNT1"  
##  [6] "B4GALT5"    "CERK"       "FUT1"       "FUT2"       "ST3GAL2"   
## [11] "ST3GAL3"    "ST3GAL5"    "ST6GALNAC5" "ST6GALNAC6" "ST8SIA5"   
## [16] "UGCG"       "UGT8"      
## 
## $`Killing mechanisms`
##  [1] "CYBA"  "DVL1"  "DVL3"  "FZD7"  "JUN"   "MAPK8" "NOX1"  "NOXA1" "RAC1" 
## [10] "WNT5A"
## 
## $`Maturation of nucleoprotein_9683610`
##  [1] "GSK3A"  "GSK3B"  "PARP10" "PARP14" "PARP16" "PARP4"  "PARP6"  "PARP8" 
##  [9] "PARP9"  "SUMO1"  "UBE2I" 
## 
## $`NOTCH4 Activation and Transmission of Signal to the Nucleus`
##  [1] "ADAM10" "APH1A"  "APH1B"  "DLL4"   "JAG1"   "NCSTN"  "NOTCH4" "PSEN1" 
##  [9] "PSEN2"  "PSENEN" "YWHAZ" 
## 
## $`Negative regulation of TCF-dependent signaling by WNT ligand antagonists`
## [1] "DKK2"    "KREMEN1" "KREMEN2" "LRP5"    "LRP6"    "SFRP1"   "WNT4"   
## [8] "WNT5A"   "WNT9A"  
## 
## $`Phase 4 - resting membrane potential`
##  [1] "KCNJ12" "KCNJ14" "KCNJ2"  "KCNJ4"  "KCNK1"  "KCNK10" "KCNK12" "KCNK13"
##  [9] "KCNK2"  "KCNK3"  "KCNK5"  "KCNK6"  "KCNK9" 
## 
## $`Plasma lipoprotein remodeling`
##  [1] "ABCG1"   "ANGPTL4" "APOE"    "FURIN"   "LCAT"    "LIPG"    "LMF1"   
##  [8] "LMF2"    "LPL"     "MBTPS1"  "MBTPS2"  "MTTP"    "P4HB"    "PCSK5"  
## [15] "PCSK6"   "PLTP"   
## 
## $`Regulated proteolysis of p75NTR`
##  [1] "ADAM17" "APH1A"  "APH1B"  "NCSTN"  "NFKB1"  "NGFR"   "PSEN1"  "PSEN2" 
##  [9] "PSENEN" "RELA"   "TRAF6" 
## 
## $`Specification of primordial germ cells`
##  [1] "BMP4"    "CBFA2T2" "CXCR4"   "EOMES"   "NANOS3"  "PDPN"    "PRDM1"  
##  [8] "SOX17"   "TET2"    "TFAP2C" 
## 
## $`TGFBR3 PTM regulation`
##  [1] "APH1A"  "APH1B"  "MMP14"  "NCSTN"  "PSEN1"  "PSEN2"  "PSENEN" "TGFBR3"
##  [9] "TIMP1"  "TIMP2" 
## 
## $`TNFs bind their physiological receptors`
##  [1] "CD70"      "EDA"       "EDARADD"   "LTA"       "TNFRSF11B" "TNFRSF14" 
##  [7] "TNFRSF1A"  "TNFRSF1B"  "TNFRSF25"  "TNFRSF6B"  "TNFRSF9"   "TNFSF13"  
## [13] "TNFSF13B"  "TNFSF4"    "TNFSF9"   
## 
## $`TP53 Regulates Transcription of Death Receptors and Ligands`
##  [1] "FAS"       "IGFBP3"    "PPP1R13B"  "TMEM219"   "TNFRSF10A" "TNFRSF10B"
##  [7] "TNFRSF10C" "TNFRSF10D" "TP53"      "TP53BP2"   "TP73"     
## 
## $`Termination of O-glycan biosynthesis`
##  [1] "MUC1"       "MUC12"      "MUC20"      "MUC3A"      "ST3GAL1"   
##  [6] "ST3GAL2"    "ST3GAL3"    "ST3GAL4"    "ST6GAL1"    "ST6GALNAC2"
## [11] "ST6GALNAC3" "ST6GALNAC4"
## 
## $`The canonical retinoid cycle in rods (twilight vision)`
##  [1] "ABCA4"   "CYP4V2"  "HSD17B6" "MYO7A"   "RBP1"    "RBP4"    "RDH10"  
##  [8] "RDH11"   "RDH16"   "RDH5"    "RLBP1"   "STRA6"  
## 
## $`WNT ligand biogenesis and trafficking`
##  [1] "PORCN"  "SNX3"   "TMED5"  "VPS26A" "VPS29"  "VPS35"  "WLS"    "WNT10A"
##  [9] "WNT10B" "WNT11"  "WNT2B"  "WNT3"   "WNT4"   "WNT5A"  "WNT5B"  "WNT6"  
## [17] "WNT7A"  "WNT7B"  "WNT9A" 
## 
## $`WNT5:FZD7-mediated leishmania damping`
##  [1] "CYBA"  "DVL1"  "DVL3"  "FZD7"  "JUN"   "MAPK8" "NOX1"  "NOXA1" "RAC1" 
## [10] "WNT5A"

GO BP up.

scores <- m1$x
names(scores) <- rownames(m1)
gs <- gobp
up <- head(subset(mres_gobp$enrichment_result,p.adjustANOVA<0.05 & s.dist > 0),20)
up_gs <- up[,1]
up_gs <- gs[which(names(gs) %in% up_gs)]

topgs_up <- lapply(1:length(up_gs),function(i) {
  gsname <- names(up_gs)[i]
  genes <- up_gs[[i]]
  gene_scores <- scores[which(names(scores) %in% genes)]
  top_genes <- names(which(gene_scores>2))
  return(top_genes)
})
names(topgs_up) <- names(up_gs)

gs2net(topgs_up,mres_gobp$enrichment_result,colorRampPalette(c("pink","darkred"))(n=100))

topgs_up
## $`GO:0000338 protein deneddylation`
## [1] "COPS5"  "COPS7A" "COPS7B" "COPS8"  "GPS1"   "SENP8" 
## 
## $`GO:0000974 Prp19 complex`
## [1] "CRNKL1"  "CTNNBL1" "ISY1"    "POLR2A"  "RBM22"   "XAB2"   
## 
## $`GO:0001401 SAM complex`
## [1] "APOOL"   "CHCHD3"  "DNAJC11" "HSPA9"   "MICOS10" "MICOS13" "MTX2"   
## [8] "MTX3"   
## 
## $`GO:0003954 NADH dehydrogenase activity`
## [1] "AIFM1"  "ENOX1"  "MT-ND1" "NDUFA9" "NDUFS7" "NDUFS8" "NDUFV2"
## 
## $`GO:0004033 aldo-keto reductase (NADPH) activity`
## [1] "AKR1A1" "AKR1C1" "AKR1C3" "AKR7A2" "KCNAB3"
## 
## $`GO:0006120 mitochondrial electron transport, NADH to ubiquinone`
##  [1] "DLD"     "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L" "NDUFA10" "NDUFA2" 
##  [8] "NDUFA8"  "NDUFA9"  "NDUFAF1" "NDUFB3"  "NDUFB5"  "NDUFB6"  "NDUFS4" 
## [15] "NDUFS6"  "NDUFS7"  "NDUFS8"  "NDUFV1"  "NDUFV2" 
## 
## $`GO:0006122 mitochondrial electron transport, ubiquinol to cytochrome c`
## [1] "CYC1"    "UQCR10"  "UQCRB"   "UQCRC2"  "UQCRFS1" "UQCRH"   "UQCRQ"  
## 
## $`GO:0006418 tRNA aminoacylation for protein translation`
##  [1] "EPRS1" "HARS1" "HARS2" "IARS1" "LARS1" "LARS2" "MARS1" "MARS2" "NARS1"
## [10] "RARS1"
## 
## $`GO:0007007 inner mitochondrial membrane organization`
## [1] "APOOL"   "CHCHD3"  "DNAJC11" "HSPA9"   "LETM1"   "MICOS10" "MICOS13"
## [8] "MTX2"    "MTX3"   
## 
## $`GO:0008137 NADH dehydrogenase (ubiquinone) activity`
##  [1] "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L" "NDUFA10" "NDUFA2"  "NDUFA8" 
##  [8] "NDUFA9"  "NDUFB3"  "NDUFB5"  "NDUFB6"  "NDUFS4"  "NDUFS6"  "NDUFS7" 
## [15] "NDUFS8"  "NDUFV1"  "NDUFV2" 
## 
## $`GO:0034551 mitochondrial respiratory chain complex III assembly`
## [1] "BCS1L"   "LYRM7"   "UQCC2"   "UQCC5"   "UQCRFS1"
## 
## $`GO:0042407 cristae formation`
## [1] "AFG3L2"   "APOOL"    "CHCHD3"   "DNAJC11"  "LETM1"    "MICOS10"  "MICOS13" 
## [8] "OMA1"     "SLC25A46"
## 
## $`GO:0042776 proton motive force-driven mitochondrial ATP synthesis`
##  [1] "ATP5F1A" "ATP5ME"  "ATP5PD"  "ATP5PF"  "ATP5PO"  "MT-ATP6" "MT-ATP8"
##  [8] "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L" "NDUFA10" "NDUFA2"  "NDUFA8" 
## [15] "NDUFA9"  "NDUFB3"  "NDUFB5"  "NDUFB6"  "NDUFS4"  "NDUFS6"  "NDUFS7" 
## [22] "NDUFS8"  "NDUFV1"  "NDUFV2"  "SDHA"    "SDHD"    "STOML2" 
## 
## $`GO:0045252 oxoglutarate dehydrogenase complex`
## [1] "BCKDHB" "DLD"    "KAT2A"  "OGDH"   "OGDHL" 
## 
## $`GO:0045259 proton-transporting ATP synthase complex`
## [1] "ATP5F1A" "ATP5ME"  "ATP5PD"  "ATP5PF"  "ATP5PO"  "MT-ATP6" "MT-ATP8"
## 
## $`GO:0045263 proton-transporting ATP synthase complex, coupling factor F(o)`
## [1] "ATP5MC2" "ATP5ME"  "ATP5PD"  "ATP5PF"  "MT-ATP6" "MT-ATP8"
## 
## $`GO:0045271 respiratory chain complex I`
##  [1] "MT-ND1"  "MT-ND2"  "MT-ND3"  "MT-ND4L" "NDUFA10" "NDUFA2"  "NDUFA8" 
##  [8] "NDUFA9"  "NDUFB3"  "NDUFB5"  "NDUFB6"  "NDUFS4"  "NDUFS6"  "NDUFS7" 
## [15] "NDUFS8"  "NDUFV1"  "NDUFV2" 
## 
## $`GO:0045275 respiratory chain complex III`
## [1] "BCS1L"   "CYC1"    "UQCR10"  "UQCRB"   "UQCRC2"  "UQCRFS1" "UQCRH"  
## [8] "UQCRQ"  
## 
## $`GO:0098803 respiratory chain complex`
## [1] "COX15" "NNT"   "UQCRB" "UQCRH"
## 
## $`GO:0140275 MIB complex`
## [1] "APOOL"   "CHCHD3"  "DNAJC11" "HSPA9"   "MICOS10" "MICOS13" "MTX2"   
## [8] "MTX3"

GO BP down.

scores <- m1$x
names(scores) <- rownames(m1)
gs <- gobp
dn <- head(subset(mres_gobp$enrichment_result,p.adjustANOVA<0.05 & s.dist < 0),20)
dn_gs <- dn[,1]
dn_gs <- gs[which(names(gs) %in% dn_gs)]

topgs_dn <- lapply(1:length(dn_gs),function(i) {
  gsname <- names(dn_gs)[i]
  genes <- dn_gs[[i]]
  gene_scores <- scores[which(names(scores) %in% genes)]
  top_genes <- names(which(gene_scores<2))
  return(top_genes)
})
names(topgs_dn) <- names(dn_gs)

gs2net(topgs_dn,mres_gobp$enrichment_result,colorRampPalette(c("darkblue","lightblue"))(n=100))

topgs_dn
## $`GO:0002476 antigen processing and presentation of endogenous peptide antigen via MHC class Ib`
##  [1] "HLA-A"  "HLA-B"  "HLA-C"  "HLA-E"  "HLA-F"  "HLA-H"  "RAET1E" "RAET1G"
##  [9] "ULBP1"  "ULBP2"  "ULBP3" 
## 
## $`GO:0002486 antigen processing and presentation of endogenous peptide antigen via MHC class I via ER pathway, TAP-independent`
##  [1] "HLA-A"  "HLA-B"  "HLA-C"  "HLA-E"  "HLA-F"  "HLA-H"  "RAET1E" "RAET1G"
##  [9] "ULBP1"  "ULBP2"  "ULBP3" 
## 
## $`GO:0002548 monocyte chemotaxis`
##  [1] "ANXA1"     "CCL2"      "CCL26"     "FLT1"      "IL6"       "IL6R"     
##  [7] "LGALS3"    "MSMP"      "PDGFB"     "PTPRO"     "RPS19"     "TNFRSF11A"
## 
## $`GO:0005159 insulin-like growth factor receptor binding`
##  [1] "ARRB1"  "CRK"    "GNAS"   "IGF2"   "INSR"   "IRS1"   "PIK3R1" "SHC1"  
##  [9] "SOCS1"  "SOCS2"  "YWHAG"  "YWHAH" 
## 
## $`GO:0006953 acute-phase response`
##  [1] "A2M"      "APOL2"    "ASS1"     "CEBPB"    "F8"       "FN1"     
##  [7] "IL6"      "IL6R"     "PLSCR1"   "SERPINA1" "SERPINA3" "SERPINF2"
## [13] "SIGIRR"   "TFR2"     "TFRC"    
## 
## $`GO:0010745 negative regulation of macrophage derived foam cell differentiation`
##  [1] "ABCA1"  "ABCA5"  "ABCG1"  "ITGAV"  "ITGB3"  "NFKBIA" "NR1H2"  "NR1H3" 
##  [9] "PPARA"  "PPARG" 
## 
## $`GO:0030215 semaphorin receptor binding`
##  [1] "PLXNB1" "SEMA3A" "SEMA3B" "SEMA3C" "SEMA3D" "SEMA3F" "SEMA3G" "SEMA4A"
##  [9] "SEMA4B" "SEMA4C" "SEMA4D" "SEMA4F" "SEMA4G" "SEMA5A" "SEMA5B" "SEMA6A"
## [17] "SEMA6B" "SEMA6C" "SEMA6D" "SEMA7A" "SH3BP1"
## 
## $`GO:0035455 response to interferon-alpha`
##  [1] "ADAR"    "BST2"    "EIF2AK2" "IFITM1"  "IFITM2"  "IFITM3"  "IFNAR2" 
##  [8] "KLHL20"  "LAMP3"   "MX2"    
## 
## $`GO:0036152 phosphatidylethanolamine acyl-chain remodeling`
##  [1] "ABHD4"   "LPCAT3"  "LPCAT4"  "LPGAT1"  "MBOAT1"  "MBOAT2"  "PLA2G4B"
##  [8] "PLA2G4C" "PLAAT3"  "PLAAT4" 
## 
## $`GO:0042246 tissue regeneration`
##  [1] "ADAM15" "APOD"   "CDKN1A" "CPQ"    "FGF10"  "GAP43"  "IGFBP1" "KLK6"  
##  [9] "MDK"    "NINJ1"  "NOTCH1" "PTPN12" "SOX2"   "TEC"   
## 
## $`GO:0043394 proteoglycan binding`
##  [1] "ADA2"   "COL2A1" "COL5A1" "COL5A3" "COMP"   "CTSB"   "CTSK"   "CTSL"  
##  [9] "CTSS"   "FN1"    "NID1"   "SLIT2"  "THBS1" 
## 
## $`GO:0046847 filopodium assembly`
##  [1] "ARHGEF4" "CD2AP"   "CDC42"   "DNM3"    "EZR"     "FGD1"    "FGD4"   
##  [8] "FGD5"    "FGD6"    "FMNL3"   "ITGB4"   "PPP1R9B" "S1PR2"   "SH3BP1" 
## [15] "SPATA13" "SRF"     "SRGAP2"  "TGFBR1" 
## 
## $`GO:0048681 negative regulation of axon regeneration`
##  [1] "CERS2"   "EPHA4"   "FIGNL2"  "INPP5F"  "KREMEN1" "LRIG2"   "NEO1"   
##  [8] "PTPRS"   "RGMA"    "RTN4R"   "RTN4RL1"
## 
## $`GO:0048711 positive regulation of astrocyte differentiation`
##  [1] "BIN1"     "BMP2"     "CLCF1"    "HES1"     "ID2"      "IL6ST"   
##  [7] "LIF"      "NOTCH1"   "SERPINE2" "SHH"     
## 
## $`GO:0060075 regulation of resting membrane potential`
##  [1] "ATP1A3" "CLCN2"  "KCNJ2"  "KCNK1"  "KCNK13" "KCNK3"  "KCNK5"  "KCNK6" 
##  [9] "KCNK9"  "NALCN"  "PSEN1" 
## 
## $`GO:0086064 cell communication by electrical coupling involved in cardiac conduction`
##  [1] "ATP1A1"  "ATP1A3"  "ATP1B1"  "ATP1B2"  "CACNA1C" "GJA1"    "GJA5"   
##  [8] "PKP2"    "PRKACA"  "RYR2"    "SLC8A1" 
## 
## $`GO:0090136 epithelial cell-cell adhesion`
##  [1] "BVES"     "CAMSAP3"  "CTNNA1"   "DSP"      "ITGB5"    "KIFC3"   
##  [7] "PKP3"     "PLEKHA7"  "SERPINB8" "SRF"      "VCL"     
## 
## $`GO:0097242 amyloid-beta clearance`
##  [1] "C3"    "CLU"   "IDE"   "IGF1R" "INSR"  "ITGB2" "LDLR"  "LRP1"  "LRP2" 
## [10] "MME"   "SYK"  
## 
## $`GO:0097284 hepatocyte apoptotic process`
##  [1] "ADAR"   "ARF6"   "ATF2"   "BCL2L1" "BID"    "CASP6"  "DNMT3A" "GSN"   
##  [9] "KRT18"  "KRT8"   "PIK3CG" "RB1"    "STK3"   "STK4"  
## 
## $`GO:1902004 positive regulation of amyloid-beta formation`
##  [1] "ABCA2"    "ABCG1"    "APOE"     "CASP3"    "CHRFAM7A" "CHRNA7"  
##  [7] "CLU"      "CSNK1E"   "EFNA1"    "EPHA4"    "GSAP"     "GSK3A"   
## [13] "IFNGR1"   "PICALM"   "RELA"     "ROCK2"    "SLC2A13"  "SP1"     
## [19] "TNF"

Session information

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] network_1.19.0              beeswarm_0.4.0             
##  [3] kableExtra_1.4.0            eulerr_7.0.2               
##  [5] mitch_1.19.2                MASS_7.3-61                
##  [7] gplots_3.2.0                DESeq2_1.44.0              
##  [9] SummarizedExperiment_1.34.0 Biobase_2.64.0             
## [11] MatrixGenerics_1.16.0       matrixStats_1.4.1          
## [13] GenomicRanges_1.56.1        GenomeInfoDb_1.40.1        
## [15] IRanges_2.38.1              S4Vectors_0.42.1           
## [17] BiocGenerics_0.50.0         reshape2_1.4.4             
## [19] dplyr_1.1.4                 zoo_1.8-12                 
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1        viridisLite_0.4.2       bitops_1.0-9           
##  [4] fastmap_1.2.0           GGally_2.2.1            promises_1.3.2         
##  [7] digest_0.6.37           mime_0.12               lifecycle_1.0.4        
## [10] magrittr_2.0.3          compiler_4.4.2          rlang_1.1.4            
## [13] sass_0.4.9              tools_4.4.2             yaml_2.3.10            
## [16] knitr_1.49              S4Arrays_1.4.1          htmlwidgets_1.6.4      
## [19] DelayedArray_0.30.1     xml2_1.3.6              plyr_1.8.9             
## [22] RColorBrewer_1.1-3      abind_1.4-8             BiocParallel_1.38.0    
## [25] KernSmooth_2.23-24      purrr_1.0.2             grid_4.4.2             
## [28] caTools_1.18.3          xtable_1.8-4            colorspace_2.1-1       
## [31] ggplot2_3.5.1           scales_1.3.0            gtools_3.9.5           
## [34] cli_3.6.3               rmarkdown_2.29          crayon_1.5.3           
## [37] generics_0.1.3          rstudioapi_0.17.1       httr_1.4.7             
## [40] cachem_1.1.0            stringr_1.5.1           zlibbioc_1.50.0        
## [43] parallel_4.4.2          XVector_0.44.0          vctrs_0.6.5            
## [46] Matrix_1.7-1            jsonlite_1.8.9          echarts4r_0.4.5        
## [49] systemfonts_1.1.0       locfit_1.5-9.10         jquerylib_0.1.4        
## [52] tidyr_1.3.1             glue_1.8.0              statnet.common_4.11.0  
## [55] ggstats_0.7.0           codetools_0.2-20        stringi_1.8.4          
## [58] gtable_0.3.6            later_1.4.1             UCSC.utils_1.0.0       
## [61] munsell_0.5.1           tibble_3.2.1            pillar_1.10.0          
## [64] htmltools_0.5.8.1       GenomeInfoDbData_1.2.12 R6_2.5.1               
## [67] evaluate_1.0.1          shiny_1.10.0            lattice_0.22-6         
## [70] httpuv_1.6.15           bslib_0.8.0             Rcpp_1.0.13-1          
## [73] coda_0.19-4.1           svglite_2.1.3           gridExtra_2.3          
## [76] SparseArray_1.4.8       xfun_0.49               pkgconfig_2.0.3
save.image("dge.Rdata")