Source: https://github.com/markziemann/mesangial_rageko

Introduction

Looking at doing comparison of control and RAGE KO Mouse mesangial cells RNA-seq and miRNA-seq.

Methods

  • RAGE KO data sequenced at Baker. 61 nt length.

  • WT data sequenced at BGI with 91 nt length.

  • Quality trimming with skewer 0.2.2 to ensure 3’ end base quality > 20.

  • GENECODE mouse transcriptome v30 was downloaded and indexed.

  • Kallisto 0.46.0 was used for mapping paired end reads to the transcriptome.

  • Count data was loaded into R, followed by DE analysis with DESeq2.

  • Enrichment analysis was performed with mitch, using REACTOME pathways.

  • OS, R and package versions are shown in the session info section at the end of this report.

suppressPackageStartupMessages({
  library("reshape2")
  library("DESeq2")
  library("gplots")
  library("mitch")
  library("dplyr")
  library("biomaRt")
  library("kableExtra")
  library("beeswarm")
})

Data

tmp <- read.table("3col.tsv.gz",header=F)
x <- as.matrix(acast(tmp, V2~V1, value.var="V3", fun.aggregate = sum))
x <- as.data.frame(x)
txinfo <-  data.frame(do.call(rbind,strsplit(rownames(x),"\\|")))
colnames(txinfo) <- c("tx","GeneID","altGeneID","altTxID",
  "txname","genename","length","biotype")
x$GeneID <- paste(txinfo$GeneID , txinfo$genename)

# write out transcript table
x2 <- x
x2$GeneID = NULL
xrpm <- apply(x2, 2, function(x){x/sum(x,na.rm=T)}) * 1000000
colnames(xrpm) <- gsub("$","_RPM",colnames(xrpm))
colnames(x2) <- gsub("$","_counts",colnames(x2))
x2 <- cbind(xrpm,x2)
write.table(x2,file="Tx_RPM_count_table_biotype.tsv")

xx <- aggregate(. ~ GeneID, x , sum)
colnames(xx) <- gsub("mRNA_","",colnames(xx))
rownames(xx) <- xx$GeneID
xx$GeneID <- NULL
xx <- round(xx)
dim(xx)
## [1] 56691    12
head(xx)
##                             rageko_ctrl_1 rageko_ctrl_2 rageko_ctrl_3
## ENSMUSG00000000001.5 Gnai3           2856          2448          2865
## ENSMUSG00000000003.16 Pbsn              0             0             0
## ENSMUSG00000000028.16 Cdc45           124            95           112
## ENSMUSG00000000031.18 H19               1             0             1
## ENSMUSG00000000037.18 Scml2             9             7            12
## ENSMUSG00000000049.12 Apoh              4             2             4
##                             rageko_TGF_1 rageko_TGF_2 rageko_TGF_3 wt_ctrl_1_R1
## ENSMUSG00000000001.5 Gnai3          2279         2047         2717          727
## ENSMUSG00000000003.16 Pbsn             0            0            0            0
## ENSMUSG00000000028.16 Cdc45          102           89          114           28
## ENSMUSG00000000031.18 H19              0            0            0         1452
## ENSMUSG00000000037.18 Scml2            7            6            9            0
## ENSMUSG00000000049.12 Apoh             0            1            0            4
##                             wt_ctrl_2_R1 wt_ctrl_3_R1 wt_TGF_1_R1 wt_TGF_2_R1
## ENSMUSG00000000001.5 Gnai3           780          621        1127        1150
## ENSMUSG00000000003.16 Pbsn             0            0           0           0
## ENSMUSG00000000028.16 Cdc45           29           31          49          52
## ENSMUSG00000000031.18 H19           1099         1065          11           6
## ENSMUSG00000000037.18 Scml2            6            1           1           3
## ENSMUSG00000000049.12 Apoh             4            5           0           0
##                             wt_TGF_3_R1
## ENSMUSG00000000001.5 Gnai3         1136
## ENSMUSG00000000003.16 Pbsn            0
## ENSMUSG00000000028.16 Cdc45          52
## ENSMUSG00000000031.18 H19             6
## ENSMUSG00000000037.18 Scml2           2
## ENSMUSG00000000049.12 Apoh            0
write.table(x=xx,file="mRNA_count_matrix.tsv",sep="\t")

Now quantify the gene biotypes. Starting with all samples.

txinfo2 <- txinfo
txinfo2$gene <- paste(txinfo2$GeneID , txinfo2$genename)
biotype <- unique(txinfo2[,c("gene","biotype")])
xxbiotype <- merge(xx,biotype,by.x=0,by.y="gene")
xxbiotype$Row.names=NULL

xxbiotype <- aggregate(. ~ biotype,xxbiotype,sum)
rownames(xxbiotype) <- xxbiotype$biotype
xxbiotype$biotype = NULL
# remove biotypes with very few reads
xxbiotype <- xxbiotype[which(rowMeans(xxbiotype)>10),]
# keep top 10 classes for chart
n=10
xxbiotype <- xxbiotype[tail(order(rowMeans(xxbiotype)),n),]
# make percent
xxbiotype2 <- apply(xxbiotype, 2, function(x){x*100/sum(x,na.rm=T)})
# create color palette:
library(RColorBrewer)
pal <- brewer.pal(n, "Paired")

par(mfrow=c(1,2))
par(mar=c(10,5,3,1))
barplot(as.matrix(xxbiotype2),col=pal,las=2)
plot.new()
legend("topleft", legend=rev(rownames(xxbiotype2)),fill=rev(pal),bg="white",cex=1)

#restore default setting
par(mar=c(5.1, 4.1, 4.1, 2.1))

Now with just the samples not treated with TGFb.

txinfo2 <- txinfo
txinfo2$gene <- paste(txinfo2$GeneID , txinfo2$genename)
biotype <- unique(txinfo2[,c("gene","biotype")])
xxbiotype <- merge(xx,biotype,by.x=0,by.y="gene")
xxbiotype$Row.names=NULL

xxbiotype <- aggregate(. ~ biotype,xxbiotype,sum)
rownames(xxbiotype) <- xxbiotype$biotype
xxbiotype$biotype = NULL
# remove TGFb samples
xxbiotype <- xxbiotype[,grep("TGF",colnames(xxbiotype),invert=TRUE)]

# remove biotypes with very few reads
xxbiotype <- xxbiotype[which(rowMeans(xxbiotype)>10),]
# keep top 10 classes for chart
n=10
xxbiotype <- xxbiotype[tail(order(rowMeans(xxbiotype)),n),]
# make percent
xxbiotype2 <- apply(xxbiotype, 2, function(x){x*100/sum(x,na.rm=T)})
# create color palette:
library(RColorBrewer)
pal <- brewer.pal(n, "Paired")

par(mfrow=c(1,2))
par(mar=c(10,5,3,1))
barplot(as.matrix(xxbiotype2),col=pal,las=2)
plot.new()
legend("topleft", legend=rev(rownames(xxbiotype2)),fill=rev(pal),bg="white",cex=1)

#restore default setting
par(mar=c(5.1, 4.1, 4.1, 2.1))

Samplesheet

#colnames(xx) <- sapply(strsplit(colnames(xx),"_"),"[[",1)
mysamples <- colnames(xx)
ko <- as.numeric(grepl("ko",mysamples))
tgf <- as.numeric(grepl("TGF",mysamples))
ss <- data.frame(mysamples,ko,tgf)
rownames(ss) <- mysamples
ss
##                   mysamples ko tgf
## rageko_ctrl_1 rageko_ctrl_1  1   0
## rageko_ctrl_2 rageko_ctrl_2  1   0
## rageko_ctrl_3 rageko_ctrl_3  1   0
## rageko_TGF_1   rageko_TGF_1  1   1
## rageko_TGF_2   rageko_TGF_2  1   1
## rageko_TGF_3   rageko_TGF_3  1   1
## wt_ctrl_1_R1   wt_ctrl_1_R1  0   0
## wt_ctrl_2_R1   wt_ctrl_2_R1  0   0
## wt_ctrl_3_R1   wt_ctrl_3_R1  0   0
## wt_TGF_1_R1     wt_TGF_1_R1  0   1
## wt_TGF_2_R1     wt_TGF_2_R1  0   1
## wt_TGF_3_R1     wt_TGF_3_R1  0   1

QC

TODO: rRNA carryover.

par(mar=c(5,8,3,1))
barplot(colSums(xx),horiz=TRUE,las=1,xlab="num reads",col=ss$cols)
grid()

MDS plot

And correlation heat map.

par(mar=c(5,5,3,3))

mds <- cmdscale(dist(t(xx)))

plot(mds, xlab="Coordinate 1", ylab="Coordinate 2",
  type = "p",bty="n", cex=4 )

text(mds, labels=rownames(mds) ,col="black")

heatmap.2(cor(xx),trace="n",main="Pearson correlation heatmap",margin=c(8,8),cexRow=0.7,cexCol=0.7)

Differential expression

Compare wt control to RAGE KO control.

maplot <- function(de,contrast_name) {
  de <- de[which(!is.na(de$padj)),]
  sig <-subset(de, padj < 0.05 )
  up <-rownames(subset(de, padj < 0.05 & log2FoldChange > 0))
  dn <-rownames(subset(de, padj < 0.05 & log2FoldChange < 0))
  GENESUP <- length(up)
  GENESDN <- length(dn)
  DET=nrow(de)
  SUBHEADER = paste(GENESUP, "up, ", GENESDN, "down", DET, "detected")
  ns <-subset(de, padj > 0.05 )
  plot(log2(de$baseMean),de$log2FoldChange,
       xlab="log2 basemean", ylab="log2 foldchange",
       pch=19, cex=0.5, col="dark gray",
       main=contrast_name, cex.main=1)
  points(log2(sig$baseMean),sig$log2FoldChange,
         pch=19, cex=0.5, col="red")
  mtext(SUBHEADER,cex = 1)
}
make_volcano <- function(de,name) {
    de <- de[which(!is.na(de$padj)),]
    de$pvalue[which(de$pvalue==0)] <- 1e-320
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$pval),cex=0.5,pch=19,col="darkgray",
        main=name, xlab="log2 FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$pval),cex=0.5,pch=19,col="red")
}
ss1 <- subset(ss,tgf==0)
xx1 <- xx[,which(colnames(xx) %in% rownames(ss1) )]
dim(xx1)
## [1] 56691     6
xx1 <- xx1[which(rowMeans(xx1)>10),]
dim(xx1)
## [1] 16920     6
dds <- DESeqDataSetFromMatrix(countData = xx1 , colData = ss1 , design = ~ ko )
## converting counts to integer mode
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <- cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:20,1:6] %>% kbl(caption = "Top gene expression differences") %>%
  kable_paper("hover", full_width = F)
Top gene expression differences
baseMean log2FoldChange lfcSE stat pvalue padj
ENSMUSG00000001020.9 S100a4 4192.9386 5.170496 0.1377328 37.54005 0 0
ENSMUSG00000020044.14 Timp3 10285.4824 2.798266 0.0714929 39.14049 0 0
ENSMUSG00000020256.15 Aldh1l2 3821.4378 5.927939 0.1206065 49.15108 0 0
ENSMUSG00000022037.16 Clu 4747.7435 -7.309490 0.1278150 -57.18804 0 0
ENSMUSG00000023036.15 Pcdhgc4 1683.2516 -5.431429 0.1290685 -42.08175 0 0
ENSMUSG00000027204.14 Fbn1 4626.3922 4.515813 0.0992572 45.49608 0 0
ENSMUSG00000029661.17 Col1a2 55636.8192 11.118047 0.1666730 66.70573 0 0
ENSMUSG00000029810.16 Tmem176b 2962.7398 -3.721489 0.0945140 -39.37500 0 0
ENSMUSG00000035202.9 Lars2 440443.9343 7.995812 0.1385554 57.70843 0 0
ENSMUSG00000037071.4 Scd1 6218.4972 3.718488 0.0894171 41.58586 0 0
ENSMUSG00000040152.9 Thbs1 13191.7357 4.102414 0.0771886 53.14789 0 0
ENSMUSG00000069833.14 Ahnak 36337.4655 3.850237 0.0825342 46.65023 0 0
ENSMUSG00000089774.3 Slc5a3 3102.6441 4.271342 0.1133767 37.67390 0 0
ENSMUSG00000101249.2 Gm29216 134092.2407 -4.575209 0.0892471 -51.26454 0 0
ENSMUSG00000119584.1 Rn18s-rs5 1624546.6620 8.192274 0.1498508 54.66955 0 0
ENSMUSG00000006205.14 Htra1 2118.8069 4.111353 0.1106220 37.16579 0 0
ENSMUSG00000002266.18 Zim1 4852.0107 -3.413030 0.0930200 -36.69137 0 0
ENSMUSG00000006519.12 Cyba 1888.1110 -3.138444 0.0904406 -34.70173 0 0
ENSMUSG00000023904.11 Hcfc1r1 3021.9117 -2.955816 0.0856391 -34.51480 0 0
ENSMUSG00000031980.11 Agt 922.7918 -4.844777 0.1423726 -34.02887 0 0
d1up <- rownames(subset(dge,padj <= 0.05 & log2FoldChange > 0))
d1dn <- rownames(subset(dge,padj <= 0.05 & log2FoldChange < 0))

txinfo$GeneID <- sapply(strsplit(txinfo$GeneID,"\\."),"[[",1)

g <- sapply(strsplit(rownames(dge),"\\."),"[[",1)

dge$biotype <- sapply(g, function(x) {
  paste(unique(txinfo[txinfo$GeneID==x,"biotype"]),collapse=",") }
)

write.table(dge,file="mRNA_DE_ctrl_vs_rageko.tsv",quote=FALSE,sep="\t")

maplot(dge,"Cont1: Effect of RAGE KO")

make_volcano(dge,"Cont1: Effect of RAGE KO")

#agerdat <- assay(vsd)[grep("Ager",rownames(assay(vsd))),]
#par(mar=c(5,8,3,1))
#barplot(agerdat,horiz=TRUE,las=1,xlab="Normalised Ager expression",xlim=c(0,10))

Make a heatmap of the top 30 genes.

rpm1 <- apply(xx1, 2, function(x){x/sum(x,na.rm=T)}) * 1000000

agerdat <- rpm1[grep("Ager",rownames(rpm1)),]
par(mar=c(5,8,3,1))
barplot(agerdat,horiz=TRUE,las=1,xlab="Normalised Ager expression",xlim=c(0,16))

colfunc <- colorRampPalette(c("blue", "white", "red"))

rpm2 <- rpm1[which(rownames(rpm1) %in% rownames(head(dge,30))),]

heatmap.2(as.matrix(rpm2),margin=c(8, 22),cexRow=0.85,trace="none",
    cexCol=0.9,col=colfunc(20),scale="row")

rownames(rpm2) <- sapply(strsplit(rownames(rpm2)," "),"[[",2)

heatmap.2(as.matrix(rpm2),margin=c(8, 22),cexRow=0.85,trace="none",
    cexCol=0.9,col=colfunc(20),scale="row")

Pathway analysis

First need ortholog table.

Downloaded separately from the ensembl biomart website because the R package code is timing out.

orth <- read.table("../../ref/mart_export.txt",sep="\t",header=TRUE)
orth <- orth[which(orth$Human.gene.name != ""),]
orth <- orth[,c("Gene.stable.ID","Human.gene.name")]
orth <- unique(orth)
rownames(dge) <- sapply(strsplit(rownames(dge),"\\."),"[[",1)

Run mitch.

See the other report called “mrna_analysis_mitch.html” for the detailed analysis.

#download.file("https://reactome.org/download/current/ReactomePathways.gmt.zip", destfile="ReactomePathways.gmt.zip")
#unzip("ReactomePathways.gmt.zip")
# downloaded 19th August 2022
genesets <- gmt_import("ReactomePathways.gmt")

y <- mitch_import(dge, DEtype="deseq2",geneTable=orth)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 16920
## Note: no. genes in output = 12290
## Note: estimated proportion of input genes in output = 0.726
head(y)
##                 x
## A4GALT  0.5563104
## AAAS   -9.3505565
## AACS   17.4368054
## AAGAB   0.6211840
## AAK1   10.8699941
## AAMDC  -5.4631631
res <- mitch_calc(y, genesets, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
if (! file.exists("mrna_analysis_mitch.html")) {
  mitch_report(res, "mrna_analysis_mitch.html")
}

head(res$enrichment_result,20) %>%
  kbl(row.names=FALSE,caption = "Top pathways") %>%
  kable_paper("hover", full_width = F)
Top pathways
set setSize pANOVA s.dist p.adjustANOVA
tRNA processing in the mitochondrion 18 0.0000003 -0.7014613 0.0000088
Cholesterol biosynthesis 26 0.0000000 0.6917369 0.0000000
rRNA processing in the mitochondrion 21 0.0000009 -0.6199170 0.0000254
MET activates PTK2 signaling 12 0.0006111 0.5713064 0.0081165
ALK mutants bind TKIs 10 0.0021024 0.5616612 0.0206912
Laminin interactions 18 0.0000371 0.5616037 0.0007146
Apoptosis induced DNA fragmentation 10 0.0050416 0.5121824 0.0355514
Membrane binding and targetting of GAG proteins 14 0.0012044 -0.4998371 0.0142897
Synthesis And Processing Of GAG, GAGPOL Polyproteins 14 0.0012044 -0.4998371 0.0142897
Formation of a pool of free 40S subunits 79 0.0000000 -0.4903578 0.0000000
Peptide chain elongation 67 0.0000000 -0.4901281 0.0000000
Syndecan interactions 19 0.0002684 0.4829573 0.0039479
Eukaryotic Translation Termination 71 0.0000000 -0.4766048 0.0000000
Formation of the ternary complex, and subsequently, the 43S complex 47 0.0000000 -0.4742580 0.0000008
Viral mRNA Translation 67 0.0000000 -0.4739731 0.0000000
Telomere C-strand synthesis initiation 13 0.0033769 -0.4695773 0.0281679
Mitochondrial translation initiation 88 0.0000000 -0.4695169 0.0000000
Mitochondrial translation termination 88 0.0000000 -0.4647096 0.0000000
Regulation of cholesterol biosynthesis by SREBP (SREBF) 54 0.0000000 0.4634064 0.0000002
Eukaryotic Translation Elongation 70 0.0000000 -0.4553718 0.0000000
write.table(res$enrichment_result,file="mitch_pathways.tsv",sep="\t")

MiR-214

We have a list of mir-214 targets.

mir214_targ <- readLines("miR214_targets.txt")

mir214_targ <- list(mir214_targ)

names(mir214_targ) <- "mir214_targ"

res2 <- mitch_calc(y, mir214_targ, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
res2$enrichment_result
##           set setSize       pANOVA    s.dist p.adjustANOVA
## 1 mir214_targ    3522 5.682073e-34 0.1395454  5.682073e-34
if (! file.exists("mrna_analysis_mitch_mir214.html")) {
  mitch_report(res2, "mrna_analysis_mitch_mir214.html")
}

par(mfrow=c(2,1))
MIN=min(res2$ranked_profile[,1])
MAX=max(res2$ranked_profile[,1])
beeswarm(res2$detailed_sets$mir214_targ,pch=19,
  lty="blank",cex=0.45,horiz=TRUE,ylim=c(MIN,MAX),
  main="miR-214 targets",
  xlab="position in rank (Ctrl vs RAGE KO)"
)
abline(v=0,lwd=2,lty=2,col="red")
abline(v=MIN,lwd=2,lty=2,col="black")
abline(v=MAX,lwd=2,lty=2,col="black")
grid()

par(mfrow=c(1,1))

It appears that they are upregulated in this contrast.

Reactome analysis of miR-214 targets

Here we are going to try running a 1 sample t-test.

  1. Extract the miR214 targets from the mitch rank.

  2. For each Reactome, get the set that intersects with miR-214 targets.

  3. Run 1-sample t-test to determine enrichment.

  4. Collect and format results.

  5. Present chart and enriched genes for the top 10 pathways

head(res2$ranked_profile)
##            x
## A4GALT   517
## AAAS   -5332
## AACS    6074
## AAGAB    563
## AAK1    5478
## AAMDC  -4170
MIN=min(as.vector(res2$ranked_profile))
MAX=max(as.vector(res2$ranked_profile))

str(res2$detailed_sets)
## List of 1
##  $ mir214_targ: Named num [1:3522] 517 6074 563 5478 193 ...
##   ..- attr(*, "names")= chr [1:3522] "A4GALT" "AACS" "AAGAB" "AAK1" ...
head(res2$detailed_sets[[1]])
## A4GALT   AACS  AAGAB   AAK1   AAR2  ABCA1 
##    517   6074    563   5478    193  -5914
tail(res2$detailed_sets[[1]])
## ZSCAN29  ZSWIM4  ZSWIM7  ZWILCH  ZYG11B   ZZEF1 
##   -2262   -3416   -1100    2921    4749    5677
hist(res2$detailed_sets[[1]],xlab="gene rank")

hist(res2$detailed_sets[[1]],xlab="gene rank",breaks=100)

y <- res2$detailed_sets[[1]]

names(genesets[lapply(genesets,length) >500])
##  [1] "Adaptive Immune System"                            
##  [2] "Axon guidance"                                     
##  [3] "Cell Cycle"                                        
##  [4] "Cell Cycle, Mitotic"                               
##  [5] "Cellular responses to stimuli"                     
##  [6] "Cellular responses to stress"                      
##  [7] "Cytokine Signaling in Immune system"               
##  [8] "Developmental Biology"                             
##  [9] "Disease"                                           
## [10] "GPCR downstream signalling"                        
## [11] "Gene expression (Transcription)"                   
## [12] "Generic Transcription Pathway"                     
## [13] "Hemostasis"                                        
## [14] "Immune System"                                     
## [15] "Infectious disease"                                
## [16] "Innate Immune System"                              
## [17] "Membrane Trafficking"                              
## [18] "Metabolism"                                        
## [19] "Metabolism of RNA"                                 
## [20] "Metabolism of lipids"                              
## [21] "Metabolism of proteins"                            
## [22] "Nervous system development"                        
## [23] "Post-translational protein modification"           
## [24] "RNA Polymerase II Transcription"                   
## [25] "Sensory Perception"                                
## [26] "Signal Transduction"                               
## [27] "Signaling by GPCR"                                 
## [28] "Signaling by Receptor Tyrosine Kinases"            
## [29] "Signaling by Rho GTPases"                          
## [30] "Signaling by Rho GTPases, Miro GTPases and RHOBTB3"
## [31] "Transport of small molecules"                      
## [32] "Vesicle-mediated transport"
# 1 sample t-test self contained test
# z = profile
# i = geneset index
# gsl = gene set library
# n = min number fo genes overlap=5
t1 <- function(y,i,gsl,n=5) {
  gs <- gsl[[i]]
  gsname <- names(gsl)[i]
  z <- y[names(y) %in% gs]
  len=length(z)
  if (len >= n ) {
    tres=t.test(z)
    pval=tres$p.value
    zmean=mean(z)
    zmedian=median(z)
    zcil=tres$conf.int[1]
    zciu=tres$conf.int[2]

    res = list(gsname=gsname,"p-value"=pval,"mean"=zmean,"median"=zmedian,
      "lower conf interval"=zcil,"upper conf interval"=zciu,numgenes=len)

    return(res)
  }
}

# test it
t1(y,143,genesets)
## $gsname
## [1] "Anti-inflammatory response favouring Leishmania parasite infection"
## 
## $`p-value`
## [1] 0.03424286
## 
## $mean
## [1] 1783.4
## 
## $median
## [1] 2551.5
## 
## $`lower conf interval`
## [1] 147.134
## 
## $`upper conf interval`
## [1] 3419.666
## 
## $numgenes
## [1] 20
# run it for all
res <- lapply(1:length(genesets), function(i) {
  unlist(t1(y,i,gsl=genesets,n=5))
})

# format the results
out <- t(do.call(cbind,res))
rownames(out) <- out[,1]
out <- out[,2:ncol(out)]
out <- as.matrix(out)
out2 <- apply(out,2,as.numeric)
rownames(out2) <- rownames(out)
out <- as.data.frame(out2)
out$padj <- p.adjust(out$`p-value`)
out <- out[order(out$`p-value`),]

Now we can take a look at the results

Top significant results.

Take this table with a grain of salt because small p-values don’t always mean a biologically meaningful observation. For example, some of these are very large sets of genes with only a slight change.

head(out,20) %>% kbl(caption = "Top significant pathways") %>%
  kable_paper("hover", full_width = F)
Top significant pathways
p-value mean median lower conf interval upper conf interval numgenes padj
Signal Transduction 0.00e+00 1433.0687 2107.50 1152.1919 1713.946 626 0.0000000
Post-translational protein modification 0.00e+00 1296.2787 2077.50 930.8210 1661.736 366 0.0000000
Signaling by Rho GTPases, Miro GTPases and RHOBTB3 0.00e+00 1803.0343 2874.50 1302.5172 2303.551 204 0.0000000
Signaling by Rho GTPases 0.00e+00 1733.6786 2747.50 1218.8964 2248.461 196 0.0000003
Metabolism of proteins 0.00e+00 1103.4178 1832.25 765.6431 1441.193 438 0.0000004
Metabolism 0.00e+00 1059.5813 1508.00 733.2895 1385.873 461 0.0000005
Disease 0.00e+00 1014.0908 1247.00 664.2562 1363.925 402 0.0000261
Metabolism of lipids 1.00e-07 1493.9365 2424.00 974.2207 2013.652 189 0.0000590
Hemostasis 2.00e-07 1757.4930 3143.00 1124.8551 2390.131 142 0.0002001
Post-translational protein phosphorylation 4.00e-07 3913.3636 5003.00 2783.6013 5043.126 22 0.0004710
Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs) 5.00e-07 3674.9583 4788.50 2579.0320 4770.885 24 0.0005018
RHO GTPase cycle 1.70e-06 1604.6765 2452.50 970.0825 2239.270 136 0.0019318
Immune System 2.20e-06 864.8614 1071.00 510.3120 1219.411 433 0.0024914
RHOA GTPase cycle 4.00e-06 2573.1304 3667.50 1585.4504 3560.810 46 0.0044526
Vesicle-mediated transport 5.60e-06 1208.1244 1654.00 697.6935 1718.555 197 0.0062333
G alpha (12/13) signalling events 9.70e-06 3464.9583 4885.00 2193.5628 4736.354 24 0.0107508
Metabolism of steroids 1.00e-05 2731.8056 3634.50 1656.0884 3807.523 36 0.0110658
Membrane Trafficking 1.18e-05 1169.9098 1585.50 657.0870 1682.733 194 0.0129622
Diseases of signal transduction by growth factor receptors and second messengers 1.20e-05 1457.4407 2572.00 823.4708 2091.411 135 0.0132787
Synthesis of PIPs at the early endosome membrane 1.21e-05 3996.7500 4118.50 3130.0703 4863.430 8 0.0132787

Biggest effect size (upregulated) with FDR<0.05, ranked by median.

sigup <- subset(out,padj<0.05 & median>0)
nrow(sigup)
## [1] 24
upreg <- head(sigup[order(-sigup$median),],20)

upreg %>% kbl(caption = "Top upregulated pathways") %>%
  kable_paper("hover", full_width = F)
Top upregulated pathways
p-value mean median lower conf interval upper conf interval numgenes padj
Post-translational protein phosphorylation 4.00e-07 3913.364 5003.00 2783.6013 5043.126 22 0.0004710
G alpha (12/13) signalling events 9.70e-06 3464.958 4885.00 2193.5628 4736.354 24 0.0107508
Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs) 5.00e-07 3674.958 4788.50 2579.0320 4770.885 24 0.0005018
Synthesis of PIPs at the early endosome membrane 1.21e-05 3996.750 4118.50 3130.0703 4863.430 8 0.0132787
RHOA GTPase cycle 4.00e-06 2573.130 3667.50 1585.4504 3560.810 46 0.0044526
Metabolism of steroids 1.00e-05 2731.806 3634.50 1656.0884 3807.523 36 0.0110658
Resolution of Sister Chromatid Cohesion 3.41e-05 2701.781 3606.00 1562.9539 3840.609 32 0.0374372
RHO GTPases Activate Formins 1.45e-05 2557.053 3603.00 1519.0110 3595.094 38 0.0159440
Sphingolipid metabolism 4.51e-05 2692.231 3427.50 1566.6416 3817.820 26 0.0495066
Hemostasis 2.00e-07 1757.493 3143.00 1124.8551 2390.131 142 0.0002001
Signaling by Rho GTPases, Miro GTPases and RHOBTB3 0.00e+00 1803.034 2874.50 1302.5172 2303.551 204 0.0000000
Signaling by Rho GTPases 0.00e+00 1733.679 2747.50 1218.8964 2248.461 196 0.0000003
Diseases of signal transduction by growth factor receptors and second messengers 1.20e-05 1457.441 2572.00 823.4708 2091.411 135 0.0132787
RHO GTPase cycle 1.70e-06 1604.676 2452.50 970.0825 2239.270 136 0.0019318
Metabolism of lipids 1.00e-07 1493.937 2424.00 974.2207 2013.652 189 0.0000590
RHO GTPase Effectors 1.78e-05 1766.561 2366.50 996.0825 2537.039 82 0.0195666
Signal Transduction 0.00e+00 1433.069 2107.50 1152.1919 1713.946 626 0.0000000
Post-translational protein modification 0.00e+00 1296.279 2077.50 930.8210 1661.736 366 0.0000000
Metabolism of proteins 0.00e+00 1103.418 1832.25 765.6431 1441.193 438 0.0000004
Vesicle-mediated transport 5.60e-06 1208.124 1654.00 697.6935 1718.555 197 0.0062333

Biggest effect size (downregulated) with FDR<0.05, ranked by median.

sigdn <- subset(out,padj<0.05 & median<0)
nrow(sigdn)
## [1] 0
dnreg <- head(sigdn[order(sigdn$median),],20)

dnreg %>% kbl(caption = "Top downregulated pathways") %>%
  kable_paper("hover", full_width = F)
Top downregulated pathways
p-value mean median lower conf interval upper conf interval numgenes padj

There were no downregulated sets that met the FDR threshold.

Take a look at a few top sets.

First with some histograms.

gsnames <- head(rownames(upreg),10)
for (gsname in gsnames ) {
  gs <- genesets[[gsname]]
  z <- y[names(y) %in% gs]
  hist(z,xlim=c(MIN,MAX),breaks=10,xlab="generank",main=gsname)
}

for (gsname in gsnames ) {
  gs <- genesets[[gsname]]
  print(gsname)
  z <- y[names(y) %in% gs]
  z <- z[order(-z)]
  print(z)
}
## [1] "Post-translational protein phosphorylation"
##     FBN1     CALU    PCSK9      FN1   TGOLN2    PDIA6    FSTL1     CSF1 
##     6271     6120     6119     6049     6017     5731     5713     5613 
##    QSOX1   DNAJC3     WFS1     SDC2    FUCA2 TMEM132A    MXRA8   CHRDL1 
##     5503     5352     5171     4835     4742     4284     4131     3146 
##    APLP2     VWA1    MFGE8   MGAT4A       TF   PRSS23 
##     1521     1467     1252      340     -979    -2304 
## [1] "G alpha (12/13) signalling events"
##     ROCK2     KALRN     GNA13  ARHGEF12      VAV2   ARHGEF5      FGD3       ABR 
##      5996      5986      5970      5954      5880      5839      5760      5754 
##    ADRA1D     MCF2L   ARHGEF2    AKAP13  ARHGEF10      GNB1      RHOB   ARHGEF7 
##      5728      5417      5168      5086      4684      4295      3934      3067 
## ARHGEF10L      SOS2     ITSN1  ARHGEF39     ROCK1    PLXNB1   ARHGEF9      RHOC 
##      3058      2421      1497        55     -1060     -1199     -2997     -3134 
## [1] "Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)"
##     FBN1     CALU    PCSK9      FN1   TGOLN2    PDIA6    FSTL1     CSF1 
##     6271     6120     6119     6049     6017     5731     5713     5613 
##    QSOX1   DNAJC3     WFS1     SDC2    FUCA2 TMEM132A    MXRA8   CHRDL1 
##     5503     5352     5171     4835     4742     4284     4131     3146 
##    PAPPA    APLP2     VWA1    MFGE8   MGAT4A     IGF1       TF   PRSS23 
##     2389     1521     1467     1252      340     -284     -979    -2304 
## [1] "Synthesis of PIPs at the early endosome membrane"
##  MTMR4 PI4K2B MTMR10 INPP4A PI4K2A   MTM1  MTMR2 MTMR12 
##   5488   4772   4681   4671   3566   3236   2938   2622 
## [1] "RHOA GTPase cycle"
##    IQGAP1     ROCK2     KALRN  ARHGEF12      VAV2   ARHGAP5   ARHGEF5       ABR 
##      6055      5996      5986      5954      5880      5845      5839      5754 
##     LMAN1      PLD1  ARHGAP28     MCF2L       CIT   STARD13   ARHGEF2    AKAP13 
##      5739      5542      5446      5417      5301      5197      5168      5086 
##  ARHGEF10      TEX2      TMPO      VAPB   PLEKHG3  ARHGAP31    PGRMC2     STK10 
##      4684      4424      4156      4053      3926      3748      3699      3636 
##   ARHGEF7 ARHGEF10L      YKT6  ARHGAP32      FAF2     FMNL3      PKN3  ARHGAP22 
##      3067      3058      2828      2388      2168      2020      1933      1279 
##  ARHGAP42      STOM    DIAPH1  ARHGAP26     MYO9A    DDRGK1     ROCK1       BCR 
##       983       947       835       699       553       348     -1060     -2044 
##   ARHGAP1    FAM13A     OPHN1    VANGL1  ARHGAP24     FLOT2 
##     -2449     -3281     -3289     -4215     -5273     -5662 
## [1] "Metabolism of steroids"
##   DHCR24   STARD4   HMGCS1   SEC24D      MVD    ACACA   ELOVL6     FASN 
##   6229.0   6220.0   6164.0   6108.0   5889.0   5851.0   5643.0   5556.0 
##     OSBP  LDLRAP1     TSPO     MED1      VDR     GPAM  TBL1XR1      SP1 
##   5461.0   5310.0   5183.0   5181.0   5110.0   4848.0   4666.0   4531.0 
##   STARD5 STARD3NL HSD17B12   CREBBP   SEC24C    PTGIS     NFYB   HSD3B7 
##   4307.0   3961.0   3308.0   3254.0   3087.0   2713.0   2490.0   2000.0 
##  CYP27B1    TBL1X     NFYA   OSBPL3      EBP    PIAS4    ACACB     LRP2 
##   1626.0    449.5    -10.0    -72.0   -347.0  -1018.0  -1102.0  -1900.0 
##   HSD3B2   STARD3    UBE2I     CHD9 
##  -1960.5  -2241.0  -3525.0  -4625.0 
## [1] "Resolution of Sister Chromatid Cohesion"
##    TAOK1    CENPF DYNC1LI2 PAFAH1B1   AHCTF1   DYNLL2    SEH1L     RCC2 
##     6166     6057     5894     5817     5618     5597     5112     4992 
##  RANGAP1    CKAP5  PPP2R5E    PDS5A   CLASP1    CENPA     NUF2    KIF2C 
##     4802     4709     4545     4367     4225     3936     3867     3666 
##    STAG2    AURKB     NSL1   ZWILCH    RAD21  PPP2R5C    CENPO   MAPRE1 
##     3546     3524     2968     2921     2592     2032     1584     1580 
##   CLASP2    NUP37    CENPP    CENPM   PPP2CB     BUB3  PPP2R5D  ITGB3BP 
##     1439      714      465      426    -2132    -3622    -5174    -5776 
## [1] "RHO GTPases Activate Formins"
##    ITGB1    TAOK1    CENPF DYNC1LI2 PAFAH1B1   AHCTF1   DYNLL2    SEH1L 
##     6188     6166     6057     5894     5817     5618     5597     5112 
##     RCC2  RANGAP1    CKAP5  PPP2R5E   CLASP1   DIAPH2    CENPA     RHOB 
##     4992     4802     4709     4545     4225     4086     3936     3934 
##     DVL1     NUF2    KIF2C      SRF    AURKB     NSL1   ZWILCH  PPP2R5C 
##     3917     3867     3666     3540     3524     2968     2921     2032 
##    FMNL3    CENPO   MAPRE1   CLASP2   DIAPH1    NUP37    CENPP    CENPM 
##     2020     1584     1580     1439      835      714      465      426 
##   SRGAP2   PPP2CB     RHOC     BUB3  PPP2R5D  ITGB3BP 
##     -170    -2132    -3134    -3622    -5174    -5776 
## [1] "Sphingolipid metabolism"
##    UGCG   ESYT2   SGMS2   SPNS2    OSBP   SAMD8  SPTLC2    PSAP    KDSR  ORMDL3 
##    5926    5737    5626    5463    5461    5049    4923    4808    4755    4480 
## ALDH3A2    VAPB   CERS6    HEXA     GBA    ARSG   ASAH2    NEU3   CERS5  SPTSSA 
##    4439    4053    3762    3093    2970    2014    2004    1942    1492     595 
##    GLB1   SGPP2   ACER2   SPHK1   PPM1L   ACER3 
##     153    -141    -746   -1179   -2940   -3741 
## [1] "Hemostasis"
##    ITGAV     THBD    ITGB1    DOCK5   ATP2B4  RAPGEF3     SDC1    KIF1B 
##     6242     6194     6188     6184     6181     6173     6165     6151 
##      CRK     CALU      FN1   KIF21B    PDPK1     KLC1   KIF26B    GNA13 
##     6130     6120     6049     6031     6027     5992     5990     5970 
##    ITPR3   PTPN11    GATA2     LRP8   SLC7A5     VAV2    GNAI3  SLC16A3 
##     5960     5950     5938     5911     5899     5880     5690     5609 
##    L1CAM   PIK3CB    QSOX1  PRKAR2B   ATP2A2     MAFF     CBX5   ANGPT4 
##     5601     5551     5503     5316     5300     5266     5169     5113 
##    CD109     MFN2   KIF13B   AKAP10  PRKAR1A     SDC2   NHLRC2     MAFG 
##     5092     5022     4976     4940     4931     4835     4829     4812 
##     PSAP    RCOR1    ALDOA    ITGA2   CD99L2   PDE10A    PROS1  PPP2R5E 
##     4808     4795     4679     4663     4655     4609     4550     4545 
##     GNB1   ADRA2A    DOCK9     MFN1    ITGA3    PRKG1     RHOB     DGKH 
##     4295     4279     4190     4159     4068     4013     3934     3856 
##    DAGLA     MGLL     TLN1    VTI1B    KIF2C     KRAS      VCL     DGKD 
##     3796     3777     3738     3683     3666     3661     3559     3486 
##    DOCK8      SPN  CEACAM1    ABCC4    ORAI2  SLC16A1    KIF5C     CD44 
##     3460     3433     3406     3314     3312     3301     3170     3116 
##  PRKAR2A    YWHAZ  CABLES1   PIK3R5     CD47    CHID1     KLC2    ANXA5 
##     2848     2668     2514     2334     2257     2244     2242     2155 
##    KIF1A  PPP2R5C    PLCG2     EHD1   STXBP3    APLP2 LGALS3BP     NRAS 
##     2081     2032     1993     1897     1654     1521     1502     1310 
##    PRKCQ    PICK1     EHD2    ITPK1    KIF5A     CDK2    GNA11     PLEK 
##     1171      984      834      682      415      381      317      302 
##    MAPK1   PIK3R3     IRF1   ENDOD1     IGF1   PRKACB     CD63   ANGPT2 
##      206     -118     -140     -189     -284     -446     -471     -474 
##    VPS45       TF  GUCY1A2    PLCG1    CALM1    PRKCE   PPP2CB    PTPN1 
##     -554     -979    -1063    -1236    -1598    -1925    -2132    -2246 
##   PHF21A     CD84    SH2B3   ATP1B3   KCNMB4  CABLES2     GRB2   HMG20B 
##    -2301    -2482    -2489    -2627    -2701    -2748    -2780    -3024 
##     IRF2     OLA1   MAPK14    PRKCA      LYN    KIF3A     DGKZ    PRKCD 
##    -3239    -3253    -3588    -3602    -4293    -4342    -4394    -4414 
##      CSK    P2RX6     EHD3  PPP2R5D    GNAI2      FGB    GATA6    PPIL2 
##    -4454    -4634    -4657    -5174    -5289    -5400    -5423    -5455 
##    ORAI1 SERPINA3     F11R   SLC8A1     RHOG     PPBP 
##    -5757    -5825    -5883    -5916    -5918    -5919

Now take a look at the downregulated pathways anyway.

sigdn <- subset(out,`p-value`<0.05 & median<0)
nrow(sigdn)
## [1] 20
dnreg <- head(sigdn[order(sigdn$median),],20)

dnreg %>% kbl(caption = "Top downregulated pathways") %>%
  kable_paper("hover", full_width = F)
Top downregulated pathways
p-value mean median lower conf interval upper conf interval numgenes padj
Cell recruitment (pro-inflammatory response) 0.0475782 -3637.200 -4879.0 -7211.685 -62.7146628 5 1.0000000
Purinergic signaling in leishmaniasis infection 0.0475782 -3637.200 -4879.0 -7211.685 -62.7146628 5 1.0000000
The NLRP3 inflammasome 0.0486885 -3564.200 -4514.0 -7095.294 -33.1059815 5 1.0000000
Inflammasomes 0.0499510 -2652.429 -4360.0 -5304.074 -0.7828632 7 1.0000000
Inhibition of DNA recombination at telomere 0.0008892 -4224.200 -4355.0 -5545.238 -2903.1624589 5 0.9479087
PI3K events in ERBB2 signaling 0.0011070 -4048.400 -4229.0 -5388.989 -2707.8106236 5 1.0000000
ERBB2 Regulates Cell Motility 0.0392551 -3325.400 -4229.0 -6385.030 -265.7703937 5 1.0000000
SHC1 events in ERBB2 signaling 0.0216016 -2521.200 -3396.5 -4576.991 -465.4088498 10 1.0000000
Mitochondrial translation termination 0.0043009 -2467.000 -3171.5 -4032.209 -901.7905216 16 1.0000000
Mitochondrial translation initiation 0.0094833 -2281.467 -3084.0 -3910.590 -652.3431029 15 1.0000000
Mitochondrial translation 0.0184976 -1953.222 -2823.0 -3535.311 -371.1331601 18 1.0000000
Formation of the Early Elongation Complex 0.0323851 -2417.143 -2817.0 -4551.681 -282.6043178 7 1.0000000
Formation of the HIV-1 Early Elongation Complex 0.0323851 -2417.143 -2817.0 -4551.681 -282.6043178 7 1.0000000
RNA Pol II CTD phosphorylation and interaction with CE 0.0399199 -2261.429 -2623.0 -4378.518 -144.3388890 7 1.0000000
RNA Pol II CTD phosphorylation and interaction with CE during HIV infection 0.0399199 -2261.429 -2623.0 -4378.518 -144.3388890 7 1.0000000
mRNA Capping 0.0399199 -2261.429 -2623.0 -4378.518 -144.3388890 7 1.0000000
Mitochondrial translation elongation 0.0359553 -1759.294 -2562.0 -3388.018 -130.5702501 17 1.0000000
RUNX1 interacts with co-factors whose precise effect on RUNX1 targets is not known 0.0198441 -2064.200 -2381.0 -3748.527 -379.8726291 15 1.0000000
Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins. 0.0152683 -1777.150 -2362.0 -3172.374 -381.9258719 20 1.0000000
Respiratory electron transport 0.0436868 -1604.824 -2018.0 -3158.298 -51.3486411 17 1.0000000

Now some charts for those pathways.

gsnames <- head(rownames(dnreg),20)
for (gsname in gsnames ) {
  gs <- genesets[[gsname]]
  z <- y[names(y) %in% gs]
  hist(z,xlim=c(MIN,MAX),breaks=10,xlab="generank",main=gsname)
}

for (gsname in gsnames ) {
  gs <- genesets[[gsname]]
  print(gsname)
  z <- y[names(y) %in% gs]
  z <- z[order(z)]
  print(z)
}
## [1] "Cell recruitment (pro-inflammatory response)"
##  TXNIP  SUGT1 ENTPD5   RELA  NFKB1 
##  -5282  -5139  -4879  -4360   1474 
## [1] "Purinergic signaling in leishmaniasis infection"
##  TXNIP  SUGT1 ENTPD5   RELA  NFKB1 
##  -5282  -5139  -4879  -4360   1474 
## [1] "The NLRP3 inflammasome"
## TXNIP SUGT1 PANX1  RELA NFKB1 
## -5282 -5139 -4514 -4360  1474 
## [1] "Inflammasomes"
##  TXNIP  SUGT1  PANX1   RELA BCL2L1   BCL2  NFKB1 
##  -5282  -5139  -4514  -4360  -1474    728   1474 
## [1] "Inhibition of DNA recombination at telomere"
##   TERF2  POLR2L  POLR2D TERF2IP  POLR2F 
##   -5472   -4936   -4355   -3541   -2817 
## [1] "PI3K events in ERBB2 signaling"
##  EGFR ERBB2 HBEGF  NRG2  GRB2 
## -5465 -4577 -4229 -3191 -2780 
## [1] "ERBB2 Regulates Cell Motility"
##   EGFR  ERBB2  HBEGF   NRG2 DIAPH1 
##  -5465  -4577  -4229  -3191    835 
## [1] "SHC1 events in ERBB2 signaling"
##  EGFR ERBB2 PRKCD HBEGF PRKCA  NRG2  GRB2 PRKCE  NRAS  KRAS 
## -5465 -4577 -4414 -4229 -3602 -3191 -2780 -1925  1310  3661 
## [1] "Mitochondrial translation termination"
## GADD45GIP1    MRPS18A       MRRF     MRPS25     MRPL30     MRPS35     MRPL28 
##      -5912      -5617      -5250      -4909      -3989      -3962      -3705 
##     MRPS30     MRPL46     MRPS10     MRPL17     MRPL51     MRPS23      MRPL3 
##      -3259      -3084      -2562      -2140       -837       -190        -97 
##     MRPL19      MRPS6 
##        468       5573 
## [1] "Mitochondrial translation initiation"
## GADD45GIP1    MRPS18A     MRPS25     MRPL30     MRPS35     MRPL28     MRPS30 
##      -5912      -5617      -4909      -3989      -3962      -3705      -3259 
##     MRPL46     MRPS10     MRPL17     MRPL51     MRPS23      MRPL3     MRPL19 
##      -3084      -2562      -2140       -837       -190        -97        468 
##      MRPS6 
##       5573 
## [1] "Mitochondrial translation"
## GADD45GIP1    MRPS18A       MRRF     MRPS25     MRPL30     MRPS35     MRPL28 
##      -5912      -5617      -5250      -4909      -3989      -3962      -3705 
##     MRPS30     MRPL46     MRPS10     MRPL17     MRPL51     MRPS23      MRPL3 
##      -3259      -3084      -2562      -2140       -837       -190        -97 
##     MRPL19       GFM1       TSFM      MRPS6 
##        468        635       3679       5573 
## [1] "Formation of the Early Elongation Complex"
##  POLR2L  POLR2D SUPT4H1  POLR2F  GTF2H5   ERCC2    CDK7 
##   -4936   -4355   -3123   -2817   -2623   -1037    1971 
## [1] "Formation of the HIV-1 Early Elongation Complex"
##  POLR2L  POLR2D SUPT4H1  POLR2F  GTF2H5   ERCC2    CDK7 
##   -4936   -4355   -3123   -2817   -2623   -1037    1971 
## [1] "RNA Pol II CTD phosphorylation and interaction with CE"
## POLR2L POLR2D POLR2F GTF2H5  RNGTT  ERCC2   CDK7 
##  -4936  -4355  -2817  -2623  -2033  -1037   1971 
## [1] "RNA Pol II CTD phosphorylation and interaction with CE during HIV infection"
## POLR2L POLR2D POLR2F GTF2H5  RNGTT  ERCC2   CDK7 
##  -4936  -4355  -2817  -2623  -2033  -1037   1971 
## [1] "mRNA Capping"
## POLR2L POLR2D POLR2F GTF2H5  RNGTT  ERCC2   CDK7 
##  -4936  -4355  -2817  -2623  -2033  -1037   1971 
## [1] "Mitochondrial translation elongation"
## GADD45GIP1    MRPS18A     MRPS25     MRPL30     MRPS35     MRPL28     MRPS30 
##      -5912      -5617      -4909      -3989      -3962      -3705      -3259 
##     MRPL46     MRPS10     MRPL17     MRPL51     MRPS23      MRPL3     MRPL19 
##      -3084      -2562      -2140       -837       -190        -97        468 
##       GFM1       TSFM      MRPS6 
##        635       3679       5573 
## [1] "RUNX1 interacts with co-factors whose precise effect on RUNX1 targets is not known"
##    PHC2 SMARCD1    CBX2   SCMH1 SMARCE1    CBX6   RING1    RYBP SMARCC2    YAF2 
##   -5557   -4804   -4330   -4189   -3517   -3448   -3019   -2381   -2315   -2312 
##  ARID1A    RNF2    CBX4 SMARCC1    PHC3 
##   -2021   -1904    -557    3887    5504 
## [1] "Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins."
##  MT-ATP6   NDUFB5   NDUFB9  NDUFA10   NDUFA4    COX7C   COX4I1   UQCRC1 
##    -5894    -5265    -4924    -4856    -4493    -4241    -3949    -3364 
##   NDUFA7 SLC25A27     SDHC  NDUFAF3  NDUFAF6   NDUFA2   PM20D1     SCO1 
##    -2945    -2706    -2018    -1702     -888     -569      339      903 
##     ETFA    COX14    NUBPL    TACO1 
##     2110     2479     2825     3615 
## [1] "Respiratory electron transport"
##  NDUFB5  NDUFB9 NDUFA10  NDUFA4   COX7C  COX4I1  UQCRC1  NDUFA7    SDHC NDUFAF3 
##   -5265   -4924   -4856   -4493   -4241   -3949   -3364   -2945   -2018   -1702 
## NDUFAF6  NDUFA2    SCO1    ETFA   COX14   NUBPL   TACO1 
##    -888    -569     903    2110    2479    2825    3615

SessionInfo

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-3          beeswarm_0.4.0             
##  [3] kableExtra_1.3.4            biomaRt_2.56.1             
##  [5] dplyr_1.1.2                 mitch_1.12.0               
##  [7] gplots_3.1.3                DESeq2_1.40.2              
##  [9] SummarizedExperiment_1.30.2 Biobase_2.60.0             
## [11] MatrixGenerics_1.12.3       matrixStats_1.0.0          
## [13] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
## [15] IRanges_2.34.1              S4Vectors_0.38.1           
## [17] BiocGenerics_0.46.0         reshape2_1.4.4             
## 
## loaded via a namespace (and not attached):
##  [1] DBI_1.1.3               bitops_1.0-7            gridExtra_2.3          
##  [4] echarts4r_0.4.5         rlang_1.1.1             magrittr_2.0.3         
##  [7] compiler_4.3.1          RSQLite_2.3.1           png_0.1-8              
## [10] systemfonts_1.0.4       vctrs_0.6.3             rvest_1.0.3            
## [13] stringr_1.5.0           pkgconfig_2.0.3         crayon_1.5.2           
## [16] fastmap_1.1.1           dbplyr_2.3.3            XVector_0.40.0         
## [19] ellipsis_0.3.2          caTools_1.18.2          utf8_1.2.3             
## [22] promises_1.2.0.1        rmarkdown_2.23          bit_4.0.5              
## [25] xfun_0.39               zlibbioc_1.46.0         cachem_1.0.8           
## [28] jsonlite_1.8.7          progress_1.2.2          blob_1.2.4             
## [31] highr_0.10              later_1.3.1             DelayedArray_0.26.7    
## [34] reshape_0.8.9           BiocParallel_1.34.2     prettyunits_1.1.1      
## [37] parallel_4.3.1          R6_2.5.1                bslib_0.5.0            
## [40] stringi_1.7.12          GGally_2.1.2            jquerylib_0.1.4        
## [43] Rcpp_1.0.11             knitr_1.43              httpuv_1.6.11          
## [46] Matrix_1.6-0            tidyselect_1.2.0        rstudioapi_0.15.0      
## [49] abind_1.4-5             yaml_2.3.7              codetools_0.2-19       
## [52] curl_5.0.1              lattice_0.21-8          tibble_3.2.1           
## [55] plyr_1.8.8              KEGGREST_1.40.0         shiny_1.7.4.1          
## [58] evaluate_0.21           BiocFileCache_2.8.0     xml2_1.3.5             
## [61] Biostrings_2.68.1       filelock_1.0.2          pillar_1.9.0           
## [64] KernSmooth_2.23-22      generics_0.1.3          RCurl_1.98-1.12        
## [67] hms_1.1.3               ggplot2_3.4.2           munsell_0.5.0          
## [70] scales_1.2.1            gtools_3.9.4            xtable_1.8-4           
## [73] glue_1.6.2              tools_4.3.1             locfit_1.5-9.8         
## [76] webshot_0.5.5           XML_3.99-0.14           grid_4.3.1             
## [79] AnnotationDbi_1.62.2    colorspace_2.1-0        GenomeInfoDbData_1.2.10
## [82] cli_3.6.1               rappdirs_0.3.3          fansi_1.0.4            
## [85] S4Arrays_1.0.5          viridisLite_0.4.2       svglite_2.1.1          
## [88] gtable_0.3.3            sass_0.4.7              digest_0.6.33          
## [91] htmlwidgets_1.6.2       memoise_2.0.1           htmltools_0.5.5        
## [94] lifecycle_1.0.3         httr_1.4.6              mime_0.12              
## [97] bit64_4.0.5             MASS_7.3-60