Source: TBA

Introduction

Most popular tools that do enrichment analysis of microRNA target genes don’t accept a background list. Let’s see what happens when we use a background list for this type of analysis. GSE188236 was used because it has both miR and mRNA datasets from the same samples, which means we can generate a background for the microRNA target gene pathway enrichment analysis. This dataset examines the effect of TGFB1 expression and co-treatment with SB431542 is a selective and potent inhibitor of the TGF-beta/Activin/Nodal pathway. For the sake of this analysis we will only be looking at the comparison of control and TGFB1 cells, with 5 replicates.

library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library("plyr")
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:S4Vectors':
## 
##     rename
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("kableExtra")
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library("readxl")
library("fgsea")
library("eulerr")

Method overview

Load count data from GEO, conduct DESeq2 and make short list of up and downregulated genes. Select target mRNAs using a database. Discard any target mRNAs that aren’t expressed at sufficient level to be differential. Conduct enrichment analysis on these target mRNAs. Repeat the above but without the background list.

Load data

if ( file.exists("GSE188236_miRcounts.rds") ) {

  df_mir <- readRDS("GSE188236_miRcounts.rds")

  df_mrna <- readRDS("GSE188236_mRNAcounts.rds")

} else {
  download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE188236&format=file",
  destfile="GSE188236.tar")
  untar("GSE188236.tar")

  files_mrna <- c("GSM5673193_CTRL_1.txt.gz","GSM5673194_CTRL_2.txt.gz","GSM5673195_CTRL_3.txt.gz",
    "GSM5673196_CTRL_4.txt.gz","GSM5673197_CTRL_5.txt.gz","GSM5673198_TGFB1_1.txt.gz",
    "GSM5673199_TGFB1_2.txt.gz","GSM5673200_TGFB1_3.txt.gz","GSM5673201_TGFB1_4.txt.gz",
    "GSM5673202_TGFB1_5.txt.gz")

  dat_mrna <- lapply(files_mrna,function(f) {
    read.table(f, header=TRUE)
  })

  df_mrna <- join_all(dat_mrna)

  rownames(df_mrna) <- df_mrna$Ensembl_gene_id ; df_mrna$Ensembl_gene_id=NULL

  saveRDS(object=df_mrna,file="GSE188236_mRNAcounts.rds")

  files_mir <- c("GSM5673208_CTRL_1.txt.gz", "GSM5673209_CTRL_2.txt.gz", "GSM5673210_CTRL_3.txt.gz",
    "GSM5673211_CTRL_4.txt.gz", "GSM5673212_CTRL_5.txt.gz", "GSM5673213_TGFB1_1.txt.gz",
    "GSM5673214_TGFB1_2.txt.gz", "GSM5673215_TGFB1_3.txt.gz", "GSM5673216_TGFB1_4.txt.gz",
    "GSM5673217_TGFB1_5.txt.gz")

  dat_mir <- lapply(files_mir,function(f) {
    read.table(f, header=TRUE)
  })

  df_mir <- join_all(dat_mir)

  rownames(df_mir) <- df_mir$Ensembl_ID ; df_mir$Ensembl_ID=NULL

  saveRDS(object=df_mir,file="GSE188236_miRcounts.rds")

  dir.create("GSE188236")

  delfiles <- list.files(".",pattern="txt.gz")

  file.copy(delfiles,"GSE188236",overwrite=TRUE)

   unlink(delfiles)
}

Filtering

dim(df_mir)
## [1] 602  10
summary(rowMeans(df_mir))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       5      41     200   20923    1700 4214429
df_mir_f <- df_mir[which(rowMeans(df_mir)>9),]
dim(df_mir_f)
## [1] 595  10
df_mir_f <- round(df_mir_f)

dim(df_mrna)
## [1] 60666    10
summary(rowMeans(df_mrna))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0        0        1      468       17 10326474
df_mrna_f <- df_mrna[which(rowMeans(df_mrna)>9),]
dim(df_mrna_f)
## [1] 17646    10
summary(rowMeans(df_mrna_f))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        9       29      120     1606      486 10326474

Load ensembl 105 data

Not sure whether this is the correct one. Just a guess. There are 60666 rows of data and 60531 in the v105 annotation which is quite close. Data was downloaded from Ensembl biomart archive 27th Nov 2023.

gt <- read.table("mart_export.txt",sep="\t",header=TRUE)

Differential analysis miRNA

ss <- data.frame(colnames(df_mir_f))
rownames(ss) <- ss[,1]
colnames(ss) <- "samplename"
ss$trt <- factor(grepl("TGF",ss$samplename))

dds <- DESeqDataSetFromMatrix(countData = df_mir_f , colData = ss, design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE,nsub=200)
zz <- cbind(as.data.frame(z),assay(vsd))
dge_mir <- as.data.frame(zz[order(zz$pvalue),])

dge_mir[1:20,1:6] %>%
  kbl(caption = "Top miRNA expression differences between ctrl and TGFB1 treated cells") %>%
  kable_paper("hover", full_width = F)
Top miRNA expression differences between ctrl and TGFB1 treated cells
baseMean log2FoldChange lfcSE stat pvalue padj
ENSG00000221430.1 38.70888 1.3966136 0.2675422 5.220163 0.0000002 0.0000999
ENSG00000207635.1 1374.01594 -3.0763902 0.6573548 -4.679954 0.0000029 0.0005590
ENSG00000215991.1 200.10822 -2.8669389 0.6137997 -4.670805 0.0000030 0.0005590
ENSG00000199157.1 22.05865 -2.0649738 0.5161413 -4.000792 0.0000631 0.0088225
ENSG00000207595.1 7570.74021 0.9544503 0.2425789 3.934598 0.0000833 0.0093170
ENSG00000199177.1 16601.35509 0.9993812 0.2621493 3.812259 0.0001377 0.0128293
ENSG00000202569.2 10823.70658 -0.9710050 0.2784574 -3.487087 0.0004883 0.0389953
ENSG00000207947.1 39369.13777 0.4604510 0.1362776 3.378773 0.0007281 0.0508760
ENSG00000215938.1 34.96205 -1.6676766 0.4993528 -3.339676 0.0008388 0.0520964
ENSG00000208035.1 182667.75439 1.1575924 0.3544068 3.266282 0.0010897 0.0609141
ENSG00000201943.1 42.93282 -1.1252750 0.3643687 -3.088287 0.0020131 0.0991240
ENSG00000216009.1 865.66647 -0.7038392 0.2291310 -3.071777 0.0021279 0.0991240
ENSG00000211997.1 5393.96134 0.5880439 0.1953641 3.009990 0.0026126 0.1123404
ENSG00000207584.1 3061.07930 0.3739365 0.1304552 2.866398 0.0041517 0.1657724
ENSG00000207759.1 2503.74820 0.5475869 0.1989519 2.752359 0.0059168 0.2204983
ENSG00000207548.1 24.61279 1.1562851 0.4404271 2.625373 0.0086554 0.2655144
ENSG00000207944.1 3589.42379 0.3380020 0.1302380 2.595264 0.0094518 0.2655144
ENSG00000207975.1 176.52056 0.4530077 0.1754826 2.581497 0.0098373 0.2655144
ENSG00000201143.1 41.29776 -0.9828034 0.3824975 -2.569437 0.0101864 0.2655144
ENSG00000208008.1 80986.50162 0.4585598 0.1787005 2.566080 0.0102855 0.2655144
mir_up <- rownames(subset(dge_mir,padj<0.05 & log2FoldChange >0 ))
mir_up
## [1] "ENSG00000221430.1" "ENSG00000207595.1" "ENSG00000199177.1"
mir_up <- gt[which(gt$Gene.stable.ID.version %in% mir_up),]
mir_up$Gene.name <- tolower(mir_up$Gene.name)
mir_up
##        Gene.stable.ID Gene.stable.ID.version Gene.name
## 20177 ENSG00000199177      ENSG00000199177.1     mir31
## 20307 ENSG00000207595      ENSG00000207595.1  mir181a2
## 47185 ENSG00000221430      ENSG00000221430.1   mir1294
mir_dn <- rownames(subset(dge_mir,padj<0.05 & log2FoldChange <0 ))
mir_dn
## [1] "ENSG00000207635.1" "ENSG00000215991.1" "ENSG00000199157.1"
## [4] "ENSG00000202569.2"
mir_dn <- gt[which(gt$Gene.stable.ID.version %in% mir_dn),]
mir_dn
##        Gene.stable.ID Gene.stable.ID.version Gene.name
## 33471 ENSG00000207635      ENSG00000207635.1   MIR499A
## 49205 ENSG00000215991      ENSG00000215991.1   MIR208B
mir_dn$Gene.name <- tolower(mir_dn$Gene.name)
mir_dn
##        Gene.stable.ID Gene.stable.ID.version Gene.name
## 33471 ENSG00000207635      ENSG00000207635.1   mir499a
## 49205 ENSG00000215991      ENSG00000215991.1   mir208b

Differential analysis mRNA

ss <- data.frame(colnames(df_mrna_f))
rownames(ss) <- ss[,1]
colnames(ss) <- "samplename"
ss$trt <- factor(grepl("TGF",ss$samplename))

dds <- DESeqDataSetFromMatrix(countData = df_mrna_f , colData = ss, design = ~ trt )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE,nsub=200)
zz <- cbind(as.data.frame(z),assay(vsd))
dge_mrna <- as.data.frame(zz[order(zz$pvalue),])

dge_mrna[1:20,1:6] %>%
  kbl(caption = "Top mRNA gene expression differences between ctrl and TGFB1 treated cells") %>%
  kable_paper("hover", full_width = F)
Top mRNA gene expression differences between ctrl and TGFB1 treated cells
baseMean log2FoldChange lfcSE stat pvalue padj
ENSG00000175899 693.18892 -3.0227028 0.3819312 -7.914259 0.00e+00 0.0000000
ENSG00000130203 23.58394 -2.5336090 0.3990153 -6.349654 0.00e+00 0.0000019
ENSG00000140022 110.23686 -1.6602078 0.2730986 -6.079153 0.00e+00 0.0000071
ENSG00000162849 296.52318 0.8694710 0.1496897 5.808489 0.00e+00 0.0000278
ENSG00000155792 48.35853 -1.3544237 0.2396370 -5.651982 0.00e+00 0.0000559
ENSG00000172005 27.16968 -2.5036791 0.4766035 -5.253170 1.00e-07 0.0004393
ENSG00000183230 28.83016 -2.9147741 0.5615835 -5.190277 2.00e-07 0.0005288
ENSG00000025039 192.64840 -2.1110174 0.4248466 -4.968893 7.00e-07 0.0013385
ENSG00000203805 19.24304 1.6474490 0.3317431 4.966038 7.00e-07 0.0013385
ENSG00000177354 168.52007 -1.6862593 0.3491360 -4.829807 1.40e-06 0.0024093
ENSG00000102678 51.26376 -2.1852803 0.4569946 -4.781852 1.70e-06 0.0027836
ENSG00000166091 16.35597 -2.5529347 0.5438487 -4.694200 2.70e-06 0.0039320
ENSG00000155011 37.71857 2.5664640 0.5656626 4.537093 5.70e-06 0.0077344
ENSG00000149256 539.39085 0.7748345 0.1732289 4.472894 7.70e-06 0.0090408
ENSG00000120820 312.10446 0.7915045 0.1770330 4.470944 7.80e-06 0.0090408
ENSG00000265727 260.91927 0.5588482 0.1258137 4.441871 8.90e-06 0.0090408
ENSG00000197852 773.84275 -0.5856260 0.1319888 -4.436937 9.10e-06 0.0090408
ENSG00000143028 157.63287 -1.8189523 0.4101872 -4.434444 9.20e-06 0.0090408
ENSG00000130766 79.01247 0.9431118 0.2132849 4.421840 9.80e-06 0.0090802
ENSG00000167470 278.70379 -0.5306694 0.1212890 -4.375247 1.21e-05 0.0106100
mrna_up <- rownames(subset(dge_mrna,padj<0.05 & log2FoldChange >0 ))
mrna_up
##  [1] "ENSG00000162849" "ENSG00000203805" "ENSG00000155011" "ENSG00000149256"
##  [5] "ENSG00000120820" "ENSG00000265727" "ENSG00000130766" "ENSG00000254951"
##  [9] "ENSG00000028277" "ENSG00000286458" "ENSG00000183049" "ENSG00000125430"
## [13] "ENSG00000128965" "ENSG00000157680" "ENSG00000250384" "ENSG00000243753"
## [17] "ENSG00000142871" "ENSG00000144802" "ENSG00000276012" "ENSG00000137809"
## [21] "ENSG00000278880"
mrna_up <- gt[which(gt$Gene.stable.ID %in% mrna_up),]
mrna_up <- unique(mrna_up$Gene.name)
mrna_up <- mrna_up[mrna_up != ""]
mrna_up
##  [1] "RN7SL648P" "DKK2"      "CHAC1"     "ITGA11"    "TENM4"     "PLPP4"    
##  [7] "UBE2CP3"   "SESN2"     "DGKI"      "HS3ST3B1"  "CAMK1D"    "KIF26B"   
## [13] "HLA-L"     "GLT8D2"    "CCN1"      "POU2F2"    "NFKBIZ"
mrna_dn <- rownames(subset(dge_mrna,padj<0.05 & log2FoldChange <0 ))
mrna_dn
##  [1] "ENSG00000175899" "ENSG00000130203" "ENSG00000140022" "ENSG00000155792"
##  [5] "ENSG00000172005" "ENSG00000183230" "ENSG00000025039" "ENSG00000177354"
##  [9] "ENSG00000102678" "ENSG00000166091" "ENSG00000197852" "ENSG00000143028"
## [13] "ENSG00000167470" "ENSG00000072954" "ENSG00000183520" "ENSG00000122574"
## [17] "ENSG00000077274" "ENSG00000101638" "ENSG00000168556" "ENSG00000197616"
## [21] "ENSG00000120669" "ENSG00000147255" "ENSG00000218416" "ENSG00000077009"
## [25] "ENSG00000100979" "ENSG00000170290" "ENSG00000163249" "ENSG00000228672"
## [29] "ENSG00000105088" "ENSG00000004848"
mrna_dn <- gt[which(gt$Gene.stable.ID %in% mrna_dn),]
mrna_dn <- unique(mrna_dn$Gene.name)
mrna_dn <- mrna_dn[mrna_dn != ""]
mrna_dn
##  [1] "SOHLH2"   "IGSF1"    "ST8SIA5"  "PLTP"     "CMTM5"    "ARX"     
##  [7] "CAPN6"    "RRAGD"    "SLN"      "ING2"     "MYH6"     "FGF9"    
## [13] "DEPTOR"   "C10orf71" "UTP11"    "A2M"      "MAL"      "CTNNA3"  
## [19] "PROB1"    "STON2"    "WIPF3"    "CCNYL1"   "APOE"     "OLFM2"   
## [25] "MIDN"     "NMRK2"    "TMEM38A"  "INKA2"    "SYPL2"

Fetch target gene database

#options(timeout=1000)
#download.file("https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx",
#destfile="hsa_MTI.xlsx",)

mirtarbase <- read_excel("hsa_MTI.xlsx")

# mir up
mir_up_gn <- mir_up$Gene.name
mir_up_gn2 <- gsub("$","-",gsub("mir","hsa-miR-",mir_up_gn))
mir_up_gn2 <- gsub("a2","a",mir_up_gn2)

up_targets <- lapply(mir_up_gn2,function(x) {
  a <- unique(unlist(as.vector(mirtarbase[grep(x,mirtarbase$miRNA),"Target Gene"])))
  y <- gsub("-$","",x)
  b <- unique(unlist(as.vector(mirtarbase[which(mirtarbase$miRNA %in% y),"Target Gene"])))
  d <- unique(c(a,b))
  return(d)
})

lapply(up_targets,length)
## [[1]]
## [1] 314
## 
## [[2]]
## [1] 679
## 
## [[3]]
## [1] 115
up_targets <- unique(unlist(up_targets))
up_targets
##    [1] "RHOA"           "PPP2R2A"        "LATS2"          "FOXP3"         
##    [5] "SELE"           "CASR"           "YY1"            "RET"           
##    [9] "NUMB"           "NFAT5"          "KLF13"          "JAZF1"         
##   [13] "HOXC13"         "ETS1"           "ITGA5"          "MPRIP"         
##   [17] "MMP16"          "RDX"            "CXCL12"         "ARPC5"         
##   [21] "FZD3"           "DMD"            "TIAM1"          "ICAM1"         
##   [25] "DKK1"           "DACT3"          "WASF3"          "HIF1AN"        
##   [29] "SATB2"          "PRKCE"          "RASA1"          "STK40"         
##   [33] "MCM2"           "CDK1"           "CREG1"          "MLH1"          
##   [37] "MET"            "INHBA"          "RAN"            "ACADVL"        
##   [41] "XK"             "CIPC"           "PPP2R5C"        "MCM4"          
##   [45] "RICTOR"         "C1orf198"       "DBR1"           "PPIA"          
##   [49] "ARL6IP5"        "NUS1"           "TNKS2"          "SLC9A6"        
##   [53] "PPIL2"          "ZC3H18"         "RPS7"           "MED12"         
##   [57] "CYP27B1"        "HIST1H2BC"      "PTPRJ"          "AGO1"          
##   [61] "DNAAF5"         "HIST1H2BK"      "RPA1"           "TXNDC5"        
##   [65] "AKAP8L"         "FLNA"           "ATP2A2"         "NDFIP2"        
##   [69] "TRIB3"          "NUP188"         "RPS4Y1"         "GLI2"          
##   [73] "RPL37A"         "EXOSC5"         "DDX19A"         "UBA6"          
##   [77] "NOL9"           "DNAJC5"         "RETREG3"        "PEX19"         
##   [81] "PPP4R3B"        "TRRAP"          "RANGAP1"        "TNRC6B"        
##   [85] "NOP56"          "PCSK1N"         "ZNF275"         "C2CD5"         
##   [89] "NFATC2IP"       "HIST1H2BJ"      "SRRM2"          "GYG1"          
##   [93] "ZC3H12C"        "LRRC59"         "GHITM"          "EDC3"          
##   [97] "MYO1D"          "STOML2"         "EXOC6"          "HOXA7"         
##  [101] "SFXN1"          "ILF2"           "SRC"            "MAP4K4"        
##  [105] "RAB27A"         "TBXA2R"         "EMSY"           "SESN2"         
##  [109] "CRKL"           "SP1"            "ARID1A"         "PLEKHB2"       
##  [113] "TMEM109"        "ZNF805"         "CHMP4B"         "BACH1"         
##  [117] "SNRNP27"        "NCBP2"          "FOXC1"          "YWHAE"         
##  [121] "BAHD1"          "ABCB9"          "RHOBTB1"        "SP7"           
##  [125] "NUDT3"          "C15orf52"       "SLC16A9"        "FOXD4"         
##  [129] "FOXD4L5"        "FOXD4L4"        "FOXD4L1"        "MICA"          
##  [133] "PAPOLG"         "PARP11"         "ZDHHC6"         "RBM38"         
##  [137] "HSPA6"          "SPRED1"         "SMG1"           "SDC4"          
##  [141] "RNF111"         "PHF12"          "PDE4D"          "TSPAN1"        
##  [145] "FBXL5"          "DPM2"           "ATP5A1"         "ARF1"          
##  [149] "HOXD3"          "MZT1"           "MXRA7"          "REXO2"         
##  [153] "POLR3E"         "IL5RA"          "RPL35A"         "NF2"           
##  [157] "ZNF485"         "INTU"           "TXNIP"          "CDC42SE1"      
##  [161] "KLHL15"         "FRS2"           "DCK"            "RSRC1"         
##  [165] "ZNF587"         "ZNF641"         "SLC38A1"        "BCAS4"         
##  [169] "PARP1"          "FRK"            "KIAA0391"       "SFT2D2"        
##  [173] "FBXL7"          "AR"             "CNBP"           "SLC30A5"       
##  [177] "ZNF71"          "PPIC"           "GUF1"           "ZNF460"        
##  [181] "TOR1AIP1"       "ZNF331"         "C9orf64"        "C19orf12"      
##  [185] "ECHDC1"         "EPB41L4B"       "TRAF1"          "ZNF678"        
##  [189] "CCDC127"        "ZIK1"           "SYDE2"          "SUPT7L"        
##  [193] "PYURF"          "PUM2"           "PTPDC1"         "NUP43"         
##  [197] "LAPTM4A"        "RAB18"          "CBX3"           "FZD1"          
##  [201] "GTF2E1"         "CCNT1"          "SPRTN"          "CRK"           
##  [205] "SLC18B1"        "RPL7L1"         "PRPF38A"        "QSER1"         
##  [209] "CKAP2L"         "PAPLN"          "GABRB1"         "CDH13"         
##  [213] "SRPX2"          "BLOC1S4"        "LILRA2"         "TET3"          
##  [217] "LIPG"           "IPP"            "PPM1L"          "RPL27A"        
##  [221] "RPL12"          "AGO2"           "XPO6"           "ACBD7"         
##  [225] "JARID2"         "AKNA"           "AFF1"           "PNPT1"         
##  [229] "MAFF"           "NLGN1"          "PAX6"           "DPYSL5"        
##  [233] "FAM193A"        "MCMBP"          "E2F2"           "NECTIN4"       
##  [237] "XRCC5"          "C1QTNF9"        "DOCK1"          "SMAD4"         
##  [241] "SGPP2"          "SOX4"           "BAP1"           "IL25"          
##  [245] "SDHA"           "SPRED2"         "SPRY1"          "SPRY3"         
##  [249] "SPRY4"          "GNA13"          "FOXO3"          "STMN1"         
##  [253] "DICER1"         "SLC1A2"         "BRWD1"          "LMNB2"         
##  [257] "ANKRD52"        "AP2B1"          "CASKIN2"        "EFNB1"         
##  [261] "KLHDC10"        "KMT2B"          "MTSS1L"         "NFE2L1"        
##  [265] "NFIC"           "NPM1"           "PHLDA1"         "PRRC2B"        
##  [269] "RAB1B"          "RAB5B"          "SERTAD2"        "SFN"           
##  [273] "VPS26B"         "DNAJB4"         "ZNF614"         "FOXJ3"         
##  [277] "GIGYF1"         "MAGEA12"        "MAGEA3"         "MAGEA6"        
##  [281] "OTUD4"          "PLAGL2"         "RASA4"          "TAPBP"         
##  [285] "TMEM9"          "ZBTB39"         "NUCB1"          "C17orf99"      
##  [289] "TMEM182"        "KIAA1737"       "HEATR2"         "FAM134C"       
##  [293] "SMEK2"          "C11orf30"       "PVRL4"          "HIF1A"         
##  [297] "GAS1"           "App"            "BACE1"          "BMP2"          
##  [301] "RUNX2"          "PTEN"           "ACOX1"          "MMP3"          
##  [305] "SLC20A1"        "AGO3"           "RASA2"          "CELSR2"        
##  [309] "CGNL1"          "TINCR"          "WLS"            "C14orf101"     
##  [313] "NXF1"           "U2SURP"         "NLK"            "GATA6"         
##  [317] "CDX2"           "PLAG1"          "BCL2"           "PROX1"         
##  [321] "KAT2B"          "CDKN1B"         "ZNF763"         "DDIT4"         
##  [325] "ATM"            "HIPK2"          "BCL2L11"        "HRAS"          
##  [329] "RNF2"           "RALA"           "SIRT1"          "PRAP1"         
##  [333] "DUSP6"          "PTPN11"         "DUSP5"          "PTPN22"        
##  [337] "FOS"            "MTMR3"          "KLF6"           "MCL1"          
##  [341] "XIAP"           "GPR78"          "NANOG"          "LFNG"          
##  [345] "LRRC17"         "CHRFAM7A"       "CD46"           "RASSF6"        
##  [349] "FXYD6"          "KCTD3"          "TSHR"           "ZNF558"        
##  [353] "C8A"            "ARL6IP6"        "ZNF426"         "ATF7IP2"       
##  [357] "PRR4"           "TCF21"          "PHOX2A"         "PLPBP"         
##  [361] "HACD3"          "GSTM2"          "FSIP1"          "KBTBD3"        
##  [365] "PTPRZ1"         "WNT3A"          "TUSC1"          "LRRN3"         
##  [369] "TMEM45A"        "ARF6"           "C1orf109"       "TAF15"         
##  [373] "PLXDC2"         "NMRK2"          "WNT2"           "ATG10"         
##  [377] "PRDX3"          "ZNF652"         "RTEL1-TNFRSF6B" "GCNT1"         
##  [381] "PCDHB8"         "ENAH"           "ZNF25"          "S100A1"        
##  [385] "PLA2G4C"        "NOL4"           "SIX6"           "FKBP10"        
##  [389] "SMCHD1"         "OR11A1"         "INCENP"         "LPGAT1"        
##  [393] "CLUAP1"         "LYSMD3"         "CCDC6"          "BAG2"          
##  [397] "GPR83"          "PTGS2"          "ANKRD13C"       "RLF"           
##  [401] "FBXO28"         "ZNF350"         "TIAL1"          "RNF34"         
##  [405] "LCLAT1"         "JCAD"           "ZNF35"          "PITPNB"        
##  [409] "SCD"            "H3F3B"          "GATAD2B"        "LGALSL"        
##  [413] "TGIF2"          "MOB1A"          "SLC35B4"        "FAM160A2"      
##  [417] "NUP58"          "GPRIN3"         "H1F0"           "ARHGAP12"      
##  [421] "SPRY2"          "TGFBR3"         "TMED4"          "MAP2K1"        
##  [425] "PUM1"           "TRIM2"          "FBXO33"         "NRP1"          
##  [429] "FAM47B"         "CCNG1"          "BRMS1L"         "OTUD1"         
##  [433] "ATP6V0E1"       "WNT16"          "CST5"           "SH3BGRL"       
##  [437] "GPR137B"        "OFCC1"          "IQCG"           "NKX3-2"        
##  [441] "OTX2"           "ROPN1L"         "TMEM14A"        "TAF2"          
##  [445] "IDS"            "FRA10AC1"       "COL27A1"        "EPHA5"         
##  [449] "DCST1"          "ZNF562"         "EYA4"           "CHL1"          
##  [453] "TAAR6"          "SLCO2A1"        "HMGB2"          "HERC3"         
##  [457] "BTBD3"          "SRPK2"          "DNAJC7"         "ANKRD1"        
##  [461] "CFI"            "MRPS14"         "HEY2"           "MTMR12"        
##  [465] "ACOT12"         "PCLAF"          "USP28"          "AMMECR1"       
##  [469] "BPGM"           "DSCR8"          "UGT3A1"         "HSD17B3"       
##  [473] "GADD45G"        "FBXO34"         "KLRC4"          "MOB3B"         
##  [477] "FKBP7"          "TBX4"           "TMPRSS11A"      "SNAI2"         
##  [481] "SLC7A11"        "NUDT12"         "COPS2"          "ZNF12"         
##  [485] "PRLR"           "PLCL2"          "ZNF594"         "METAP1"        
##  [489] "HSPA13"         "NR6A1"          "YOD1"           "SLC37A3"       
##  [493] "FBXO11"         "ZNF445"         "TM9SF3"         "ATP8A1"        
##  [497] "TMEM64"         "MOB1B"          "GNAI3"          "TAB2"          
##  [501] "SRSF7"          "DDX3X"          "KRAS"           "LBR"           
##  [505] "KLHL42"         "TMEM132B"       "AFTPH"          "ZNF148"        
##  [509] "NOTCH2"         "NFYB"           "NOTCH1"         "HOOK3"         
##  [513] "SIK2"           "FAM222B"        "RPS8"           "STAG2"         
##  [517] "PFKFB2"         "ZEB2"           "MAZ"            "RPL14"         
##  [521] "KCTD2"          "UBA2"           "DDX27"          "FAT1"          
##  [525] "HDAC6"          "TMEM192"        "LAMA3"          "HUWE1"         
##  [529] "ND2"            "HNRNPAB"        "OCA2"           "AP1M1"         
##  [533] "UCHL1"          "PGD"            "ZFP36L2"        "AKAP12"        
##  [537] "PABPC1"         "GANAB"          "PHPT1"          "H2AFY"         
##  [541] "TEAD4"          "BRCA1"          "MTCL1"          "KIAA0100"      
##  [545] "PPP1R9A"        "MGAT5"          "TNIP1"          "PBX3"          
##  [549] "TIMP1"          "PGR"            "COL16A1"        "PPP3CA"        
##  [553] "ATG5"           "CD4"            "TGFBRAP1"       "TGFBR1"        
##  [557] "TNFRSF11B"      "PCDHAC1"        "PCDHAC2"        "PCDHA1"        
##  [561] "PCDHA10"        "PCDHA11"        "PCDHA12"        "PCDHA13"       
##  [565] "PCDHA2"         "PCDHA3"         "PCDHA4"         "PCDHA5"        
##  [569] "PCDHA6"         "PCDHA7"         "PCDHA8"         "PDGFRA"        
##  [573] "BMP3"           "SOX5"           "MAP3K3"         "TAB3"          
##  [577] "PDAP1"          "MAPK1IP1L"      "BMPR2"          "SMAD2"         
##  [581] "MADD"           "ACAN"           "MAP3K10"        "PCDHB6"        
##  [585] "MMP14"          "E2F5"           "RAP1B"          "C12orf29"      
##  [589] "NHLRC3"         "CCNK"           "PAPD5"          "CARM1"         
##  [593] "LDLR"           "C2orf69"        "44806"          "TNPO1"         
##  [597] "PTBP3"          "CUL5"           "PRRC2C"         "PEBP1"         
##  [601] "PNRC2"          "IPO5"           "RCOR1"          "KIF2C"         
##  [605] "CHD9"           "LRRC8D"         "ZNF136"         "KIF3B"         
##  [609] "TTPAL"          "NCOA3"          "FNDC3B"         "PRKCD"         
##  [613] "SLC25A37"       "CHCHD7"         "ZBTB33"         "GRK2"          
##  [617] "ELK4"           "ZNF268"         "ZFAND6"         "LMAN1"         
##  [621] "MRPL34"         "PDIA6"          "CHMP2B"         "DRAM1"         
##  [625] "TMEM94"         "ZNF440"         "ZNF439"         "NSD2"          
##  [629] "KIAA1551"       "PMAIP1"         "FKBP1A"         "HIGD2A"        
##  [633] "ID4"            "DAZAP2"         "ZFP36L1"        "SLC35G2"       
##  [637] "MTX3"           "ABCG2"          "PHACTR2"        "SASH1"         
##  [641] "MOSPD1"         "SRGAP1"         "UBL3"           "PHACTR4"       
##  [645] "EREG"           "TERT"           "RGS5"           "IFNG"          
##  [649] "AHR"            "STAT3"          "WIF1"           "TWIST1"        
##  [653] "MAPK1"          "CDKL2"          "DCAF4"          "RSF1"          
##  [657] "FAM96A"         "CAMK4"          "VPS4A"          "VMP1"          
##  [661] "UBN2"           "SLC25A25"       "RPRD2"          "PPP2R5E"       
##  [665] "KLHDC3"         "MIGA2"          "CBX4"           "CAPRIN2"       
##  [669] "ARL5C"          "ZFAND2B"        "TAOK1"          "FKBP1C"        
##  [673] "ZNF83"          "HSPA1B"         "ZNF669"         "ZNF788"        
##  [677] "ZNF781"         "ZNF667"         "ZNF487"         "DGS2"          
##  [681] "EPS8"           "MAN1A2"         "FKBP14"         "ZNF791"        
##  [685] "ZFP69B"         "NPM3"           "TCF23"          "HSP90B1"       
##  [689] "CTC1"           "GJB7"           "ZNF138"         "WT1"           
##  [693] "ZDHHC15"        "XPNPEP3"        "MTPAP"          "NOX5"          
##  [697] "ZNF699"         "TXLNG"          "TRPC5"          "TBL1XR1"       
##  [701] "SEC61A2"        "PHOX2B"         "PHC3"           "MKL2"          
##  [705] "HEPHL1"         "COG5"           "ATP8B1"         "ANKRD50"       
##  [709] "PLPP3"          "ZNF844"         "ZNF780B"        "NCOA7"         
##  [713] "ZNF266"         "KRBOX4"         "SCAMP2"         "TMCC1"         
##  [717] "FOXL1"          "ZNF107"         "SPIRE1"         "SMU1"          
##  [721] "RNF6"           "HMGA2"          "CSNK1A1"        "ARSJ"          
##  [725] "ARPP19"         "ZNF415"         "ZNF616"         "SUV39H2"       
##  [729] "FAM3C"          "ZNF846"         "ZNF23"          "TUBB2A"        
##  [733] "MIER3"          "GTPBP3"         "IRAK1BP1"       "SURF6"         
##  [737] "THYN1"          "FADS6"          "RABGEF1"        "PPP3CB"        
##  [741] "MYO1C"          "DMRT2"          "IL17REL"        "SLC35G3"       
##  [745] "GRAMD1B"        "UNC5B"          "REPS1"          "NOS1AP"        
##  [749] "ZC3HAV1L"       "RPS6KA3"        "AP5M1"          "TNFRSF13B"     
##  [753] "ATP1A3"         "ZNF829"         "WDR72"          "HFM1"          
##  [757] "SPTLC3"         "ARFIP2"         "SCN8A"          "SMCR8"         
##  [761] "ATXN7"          "TLDC1"          "RBM25"          "TRAFD1"        
##  [765] "ZADH2"          "WDR13"          "PRR11"          "GJD3"          
##  [769] "AP3M2"          "ZNF556"         "ZNF852"         "MUC20"         
##  [773] "PRRG4"          "TIMM10B"        "ZNF253"         "DNAJC3"        
##  [777] "TK1"            "WNK1"           "TOPBP1"         "TEF"           
##  [781] "RNMT"           "ADAM17"         "RSBN1L"         "DYNLL2"        
##  [785] "CFHR3"          "KLHL24"         "PDK3"           "ZNF419"        
##  [789] "RNF187"         "ZNF597"         "CENPO"          "FAM13A"        
##  [793] "LAIR1"          "RLIM"           "CYP1B1"         "MAPK14"        
##  [797] "VCAM1"          "PNMA8B"         "LAPTM4B"        "PGK1"          
##  [801] "MTHFD1L"        "MYLK"           "CHST4"          "ZNF490"        
##  [805] "ZNF449"         "PNKD"           "EPRS"           "TEAD1"         
##  [809] "KRTAP21-2"      "MSX2"           "EN2"            "ZNF664"        
##  [813] "ZNF121"         "ZBTB4"          "ULK1"           "TUBB"          
##  [817] "TSG101"         "TMF1"           "TMEM30A"        "TFRC"          
##  [821] "TBC1D7"         "TBC1D13"        "STX2"           "STAG1"         
##  [825] "SSX2IP"         "SRGN"           "SORT1"          "SLC7A1"        
##  [829] "SLC38A2"        "SLC19A2"        "SLC10A7"        "SIPA1L1"       
##  [833] "SHOC2"          "RP2"            "RHOG"           "RGS16"         
##  [837] "RAB2B"          "PURB"           "PGAP1"          "PER2"          
##  [841] "PBRM1"          "OSBPL3"         "NMT2"           "FAM192A"       
##  [845] "NIN"            "NCAPG"          "NAA50"          "MTUS1"         
##  [849] "MPP5"           "KMT2E"          "LPCAT1"         "LONRF1"        
##  [853] "KPNA1"          "EFCAB14"        "WASHC5"         "KDM5A"         
##  [857] "INO80D"         "IL1A"           "HIST1H3D"       "HECW2"         
##  [861] "GOT1"           "GOLGA8B"        "GOLGA1"         "GNS"           
##  [865] "GK5"            "G3BP2"          "FSD1L"          "CCNQ"          
##  [869] "EPS15"          "EED"            "DYNC1LI2"       "DDX52"         
##  [873] "CPOX"           "CPEB4"          "CLCC1"          "CCL22"         
##  [877] "CCDC88C"        "ELMSAN1"        "GSKIP"          "BLOC1S2"       
##  [881] "BAZ2A"          "ATP2B1"         "ATG2B"          "ASB1"          
##  [885] "ARRDC3"         "ARRB2"          "APOL6"          "ALDH9A1"       
##  [889] "AFF4"           "ACYP1"          "CTDSPL"         "TUSC3"         
##  [893] "MEG3"           "PHLPP2"         "GPD1L"          "ALDH1A1"       
##  [897] "CDKN1A"         "BAX"            "RASSF1"         "INPP4B"        
##  [901] "CTNNB1"         "TCF4"           "VEGFA"          "PRKN"          
##  [905] "EGR1"           "NRAS"           "RUNX1"          "CEBPA"         
##  [909] "SAMHD1"         "REPIN1"         "SLC7A5"         "ZC3H7B"        
##  [913] "ARHGDIA"        "MARK2"          "ADCY9"          "DCBLD2"        
##  [917] "RPL13A"         "FAM27E2"        "NSG2"           "OCIAD2"        
##  [921] "PSD"            "SUFU"           "CADPS2"         "PRAMEF11"      
##  [925] "PRAMEF15"       "PRAMEF26"       "PRAMEF4"        "PRAMEF9"       
##  [929] "ATXN2"          "BASP1"          "HNRNPH1"        "WDFY3"         
##  [933] "BDNF"           "HOXA11"         "ESR1"           "PROSC"         
##  [937] "PTPLAD1"        "TAF6L"          "KIAA1462"       "NUPL1"         
##  [941] "TMEM257"        "KIAA0101"       "SOGA2"          "C12ORF29"      
##  [945] "C2ORF69"        "ADRBK1"         "KIAA0195"       "WHSC1"         
##  [949] "FAM73B"         "ZNF487P"        "PPAP2B"         "PNMAL2"        
##  [953] "KIAA0196"       "FAM58A"         "OCLN"           "TFAM"          
##  [957] "SFRP4"          "EGFR"           "ZHX2"           "POSTN"         
##  [961] "CDH11"          "SERPINB1"       "RB1"            "CCND1"         
##  [965] "CARD11"         "LINC00703"      "BMP10"          "AGO4"          
##  [969] "DNMT3A"         "CBLB"           "CCAT1"          "MECP2"         
##  [973] "CELSR3"         "ITGA6"          "LGI2"           "IGF1R"         
##  [977] "SEC24A"         "MYC"            "CALU"           "EDN1"          
##  [981] "PEX11B"         "RRM2"           "SGCD"           "ZNF774"        
##  [985] "OLR1"           "NUCB2"          "RECK"           "AGTRAP"        
##  [989] "LYPLA2"         "GLO1"           "NECAB3"         "ZNRF3"         
##  [993] "ZNF385A"        "YWHAZ"          "XPOT"           "SP2"           
##  [997] "SMARCD1"        "SLC5A6"         "RNF126"         "PANK3"         
## [1001] "KMT2D"          "KLHDC8B"        "KIF3A"          "KIAA0930"      
## [1005] "KCTD21"         "IL2RB"          "GM2A"           "FAM83G"        
## [1009] "EFHD2"          "E2F3"           "CFL2"           "BSCL2"         
## [1013] "ATG9A"          "AREL1"          "ABHD2"          "ABHD14B"       
## [1017] "GNB1L"          "FAM71B"         "SNX19"          "LASP1"         
## [1021] "LAMA5"          "HSDL1"          "NAV1"           "RAB11FIP4"     
## [1025] "PLCG2"          "RBFOX2"         "ZNF644"         "DIABLO"        
## [1029] "C12orf49"       "DIRAS2"         "CTSB"           "DVL3"          
## [1033] "LEFTY1"         "NUP155"         "CBX5"           "SURF4"         
## [1037] "NCS1"           "TTLL7"          "EIF2S3"         "SNTN"          
## [1041] "RAX"            "ARL5A"          "C10orf111"      "ZNF277"        
## [1045] "PTPN3"          "IPO9"           "KCNMB1"         "ASGR2"         
## [1049] "CTDSP1"         "CXCL8"          "FGF19"          "KDM2A"         
## [1053] "MRPL44"         "MSX1"           "MYH9"           "P4HB"          
## [1057] "POLL"           "PPIB"           "SRF"            "SUSD1"         
## [1061] "TOR2A"          "TRAPPC10"       "CLDN12"         "EIF5AL1"       
## [1065] "ESCO1"          "HARBI1"         "IFITM3"         "MEAF6"         
## [1069] "NR2F6"          "ZBTB34"         "REEP3"          "SCYL3"         
## [1073] "ENO1"
length(up_targets)
## [1] 1073
# mir dn
mir_dn_gn <- mir_dn$Gene.name
mir_dn_gn2 <- gsub("$","-",gsub("mir","hsa-miR-",mir_dn_gn))

dn_targets <- lapply(mir_dn_gn2,function(x) {
  a <- unique(unlist(as.vector(mirtarbase[grep(x,mirtarbase$miRNA),"Target Gene"])))
  y <- gsub("-$","",x)
  b <- unique(unlist(as.vector(mirtarbase[which(mirtarbase$miRNA %in% y),"Target Gene"])))
  d <- unique(c(a,b))
  return(d)
})

lapply(dn_targets,length)
## [[1]]
## [1] 139
## 
## [[2]]
## [1] 169
dn_targets <- unique(unlist(dn_targets))
dn_targets
##   [1] "SOX6"      "FOXO4"     "PDCD4"     "ETS1"      "PMAIP1"    "TXNIP"    
##   [7] "ETNK1"     "TGOLN2"    "ADO"       "NAPG"      "IGFBP4"    "MTDH"     
##  [13] "DR1"       "ABLIM1"    "IQGAP2"    "MLANA"     "METTL7A"   "EPC2"     
##  [19] "CELF1"     "STYX"      "ZNF844"    "ZNF460"    "APOBEC3F"  "VPS35"    
##  [25] "TSC22D3"   "SSRP1"     "SIKE1"     "PTGFRN"    "PAQR5"     "LIN54"    
##  [31] "LIMA1"     "LIFR"      "TYRP1"     "MYO1D"     "ZSCAN16"   "SUGT1"    
##  [37] "MAF"       "SLC25A12"  "SPTSSA"    "POM121C"   "KPNA5"     "WDR76"    
##  [43] "UQCRB"     "RAB5C"     "KLHL15"    "HIST1H3B"  "EIF4A2"    "HIST1H2AD"
##  [49] "DCAF12L2"  "C11orf54"  "MC2R"      "SRRM1"     "PROSER2"   "RPS4Y1"   
##  [55] "CD93"      "INIP"      "CAPRIN2"   "ASAP3"     "GDE1"      "PPP6C"    
##  [61] "NAT8L"     "AXIN2"     "MDM2"      "TRIM59"    "YTHDF1"    "NT5C3A"   
##  [67] "PGAM4"     "FOXO1"     "FIGN"      "FEM1B"     "GLO1"      "QRFPR"    
##  [73] "UBN2"      "UBE2V2"    "SLC7A2"    "PTMA"      "GXYLT1"    "EIF2AK4"  
##  [79] "FXYD6"     "ACOT2"     "BTLA"      "CACNA1B"   "ELK4"      "ARHGAP12" 
##  [85] "TFAM"      "MAPK1"     "BMPR1A"    "DGKG"      "DNAH9"     "BMP10"    
##  [91] "WWC2"      "DNAH8"     "UBN1"      "EEF1E1"    "TBC1D22A"  "CFAP65"   
##  [97] "MAPK10"    "NRIP1"     "PRR13"     "SRXN1"     "UTP18"     "TEF"      
## [103] "PDS5A"     "FGD4"      "RTL8C"     "PDE12"     "NLN"       "CSTF1"    
## [109] "RARS2"     "VAV3"      "MEF2C"     "CXCL8"     "DAZAP2"    "PLEKHG5"  
## [115] "RNMT"      "CDKN1A"    "MAMLD1"    "MDM4"      "ASAP1"     "BCL2L11"  
## [121] "HNRNPC"    "44626"     "TMEM178B"  "TRAF3IP1"  "WASHC2C"   "ZNF107"   
## [127] "LEP"       "PSMB9"     "MAML3"     "TTF2"      "CCDC108"   "FAM127A"  
## [133] "KCNN3"     "ALDH1L2"   "CUL1"      "KIF13A"    "LUC7L"     "NR5A1"    
## [139] "ICE2"      "FBXO28"    "PI4K2B"    "RNF11"     "SKIL"      "CBX3"     
## [145] "RACGAP1"   "TPBG"      "SP8"       "BRCC3"     "PPP2R1B"   "TUBB2A"   
## [151] "CYP1B1"    "FAM199X"   "SNAP25"    "RAPH1"     "RPLP1"     "GAS1"     
## [157] "ANP32B"    "RAP1B"     "CEP97"     "TBRG1"     "SLC28A2"   "HDHD2"    
## [163] "STXBP5L"   "SAR1B"     "RNF125"    "LRRC8A"    "JMJD1C"    "MYLIP"    
## [169] "HMGN2"     "ZFP36L1"   "PRR23A"    "KYAT3"     "LNPK"      "LRIG3"    
## [175] "KPNA3"     "DSN1"      "ARSK"      "SOCS5"     "MAP3K5"    "SINHCAF"  
## [181] "CPEB2"     "SYNCRIP"   "SON"       "BTG2"      "KIF21A"    "ATP8B4"   
## [187] "UFM1"      "SRRM4"     "BUD23"     "FOXP1"     "KIAA1210"  "UNKL"     
## [193] "ATF6"      "TPGS1"     "ACO1"      "RWDD1"     "PCDH19"    "SLC2A6"   
## [199] "SLC35G1"   "PAPSS2"    "TCN2"      "CCT4"      "PCDHB11"   "FSIP2"    
## [205] "CARNMT1"   "PKHD1"     "PTAFR"     "OSTM1"     "CLEC17A"   "RABGEF1"  
## [211] "LRIG1"     "FAM9C"     "FBLIM1"    "ZDHHC20"   "UBFD1"     "SUMO2"    
## [217] "SIN3A"     "SENP1"     "S1PR1"     "RYK"       "PPT1"      "PPM1F"    
## [223] "OLFML2A"   "HNRNPDL"   "TSC22D2"   "CAV1"      "HEY2"      "MTRF1L"   
## [229] "PINX1"     "MAK16"     "HOMEZ"     "ZSWIM9"    "KCTD20"    "CAPZA2"   
## [235] "MIA3"      "TRAPPC6B"  "IBTK"      "ADAMTS15"  "CACNA1C"   "CACNB2"   
## [241] "Qk"        "MSTN"      "QKI"       "AMBRA1"    "TUBB"      "GPATCH3"  
## [247] "RBPJ"      "SLIT3"     "SPRY4"     "ARHGEF39"  "GNG5"      "CDH2"     
## [253] "HMGN3"     "LCOR"      "RAB11FIP4" "ZNF688"    "NUDCD3"    "CCBL2"    
## [259] "KIAA1715"  "FAM60A"    "WBSCR22"   "C9orf41"   "C19orf68"  "ADGRL2"   
## [265] "STK26"
length(dn_targets)
## [1] 265

miR target Enrichment with FORA

reactome <- gmtPathways("ReactomePathways_2023-11-28.gmt")

kegg <- gmtPathways("c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt")

summary(unlist(lapply(reactome,length)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    5.00   15.00   49.36   44.00 2607.00
summary(unlist(lapply(kegg,length)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00    8.00   13.00   15.59   19.00   92.00
# make a gene list - some of the ensembl IDs dont have a symbol (~2000 of ~17000)
gl<-gt[gt$Gene.stable.ID %in% rownames(df_mrna_f),"Gene.name"]

# make a non-redundant list of gene symbols
bg <- unique(gl[which(gl!="")])

length(gl)
## [1] 17628
length(bg)
## [1] 15265

First try with whole genome as background.

wgbg <- unique(gt$Gene.name)
wgbg <- wgbg[which(wgbg!="")]

setdiff(up_targets,wgbg)
##  [1] "HIST1H2BC" "HIST1H2BK" "HIST1H2BJ" "C15orf52"  "ATP5A1"    "KIAA0391" 
##  [7] "MTSS1L"    "KIAA1737"  "HEATR2"    "FAM134C"   "SMEK2"     "C11orf30" 
## [13] "PVRL4"     "App"       "C14orf101" "H3F3B"     "FAM160A2"  "H1F0"     
## [19] "ND2"       "H2AFY"     "PAPD5"     "44806"     "KIAA1551"  "FAM96A"   
## [25] "ZNF788"    "DGS2"      "MKL2"      "TLDC1"     "EPRS"      "FAM192A"  
## [31] "MPP5"      "HIST1H3D"  "ELMSAN1"   "FAM27E2"   "PROSC"     "PTPLAD1"  
## [37] "KIAA1462"  "NUPL1"     "TMEM257"   "KIAA0101"  "SOGA2"     "C12ORF29" 
## [43] "C2ORF69"   "ADRBK1"    "KIAA0195"  "WHSC1"     "FAM73B"    "ZNF487P"  
## [49] "PPAP2B"    "PNMAL2"    "KIAA0196"  "FAM58A"    "CCAT1"     "C12orf49" 
## [55] "C10orf111"
length(setdiff(up_targets,wgbg))
## [1] 55
length(setdiff(up_targets,wgbg))/length(up_targets)
## [1] 0.05125815
up_wg_res <- fora(pathways=reactome, genes=up_targets,  universe=wgbg, minSize = 5)
## Warning in fora(pathways = reactome, genes = up_targets, universe = wgbg, : Not
## all of the input genes belong to the universe, such genes were removed
nbg = length(wgbg)
nsel = length(intersect(up_targets,wgbg))
up_wg_res$foldenrichment <- (up_wg_res$overlap / nsel ) / ( up_wg_res$size / nbg )
head(up_wg_res)
##                            pathway         pval         padj overlap size
## 1:             Signal Transduction 3.531590e-62 7.052585e-59     227 2588
## 2: Gene expression (Transcription) 1.549856e-56 1.547531e-53     165 1528
## 3:   Generic Transcription Pathway 2.491551e-53 1.658542e-50     144 1233
## 4: RNA Polymerase II Transcription 1.912269e-52 9.547004e-50     150 1359
## 5:                         Disease 1.091849e-42 4.360845e-40     158 1791
## 6:                   Immune System 1.435853e-35 4.778998e-33     161 2118
##                                overlapGenes foldenrichment
## 1:     ADAM17,ADCY9,AGO1,AGO2,AGO3,AGO4,...       3.416394
## 2:      AFF4,AGO1,AGO2,AGO3,AGO4,AKAP8L,...       4.205978
## 3:        AGO1,AGO2,AGO3,AGO4,AR,ARID1A,...       4.548894
## 4:          AFF4,AGO1,AGO2,AGO3,AGO4,AR,...       4.299106
## 5: ACAN,ADAM17,ADCY9,AGTRAP,AP1M1,AP2B1,...       3.436116
## 6:  ADAM17,AP1M1,AP2B1,AREL1,ARF1,ARPC5,...       2.960781
nrow(up_wg_res)
## [1] 1997
nrow(subset(up_wg_res,padj<0.05 & foldenrichment >2))
## [1] 661
up_wg_res_sets <- subset(up_wg_res,padj<0.05 & foldenrichment >2)$pathway

setdiff(dn_targets,wgbg)
##  [1] "HIST1H3B"  "HIST1H2AD" "44626"     "CCDC108"   "FAM127A"   "Qk"       
##  [7] "CCBL2"     "KIAA1715"  "FAM60A"    "WBSCR22"   "C9orf41"   "C19orf68"
length(setdiff(dn_targets,wgbg))
## [1] 12
length(setdiff(dn_targets,wgbg))/length(dn_targets)
## [1] 0.04528302
dn_wg_res <- fora(pathways=reactome, genes=dn_targets,  universe=wgbg, minSize = 5)
## Warning in fora(pathways = reactome, genes = dn_targets, universe = wgbg, : Not
## all of the input genes belong to the universe, such genes were removed
nbg = length(wgbg)
nsel = length(intersect(dn_targets,wgbg))
dn_wg_res$foldenrichment <- (dn_wg_res$overlap / nsel ) / ( dn_wg_res$size / nbg )
head(dn_wg_res)
##                            pathway         pval         padj overlap size
## 1:          Metabolism of proteins 8.933815e-13 1.163718e-09      43 1934
## 2:                         Disease 1.301879e-12 1.163718e-09      41 1791
## 3:             Signal Transduction 1.748199e-12 1.163718e-09      50 2588
## 4: Gene expression (Transcription) 1.615781e-11 8.066785e-09      36 1528
## 5: RNA Polymerase II Transcription 5.674276e-11 2.105760e-08      33 1359
## 6:                   Immune System 6.326771e-11 2.105760e-08      42 2118
##                                        overlapGenes foldenrichment
## 1:        ADAMTS15,ARSK,AXIN2,BRCC3,CAPZA2,CCT4,...       3.484541
## 2:     ADAMTS15,BCL2L11,CAV1,CDKN1A,CUL1,CYP1B1,...       3.587748
## 3: ARHGAP12,ARHGEF39,AXIN2,BCL2L11,BMP10,BMPR1A,...       3.027885
## 4:          BCL2L11,BTG2,CAV1,CBX3,CDKN1A,CSTF1,...       3.692434
## 5:          BCL2L11,BTG2,CAV1,CBX3,CDKN1A,CSTF1,...       3.805644
## 6:       ATP8B4,BCL2L11,BTLA,CAPZA2,CD93,CDKN1A,...       3.107828
nrow(dn_wg_res)
## [1] 1997
nrow(subset(dn_wg_res,padj<0.05 & foldenrichment >2))
## [1] 155
dn_wg_res_sets <- subset(dn_wg_res,padj<0.05 & foldenrichment >2)$pathway

Now try with proper background.

setdiff(up_targets,bg)
##   [1] "FOXP3"          "SELE"           "CASR"           "RET"           
##   [5] "XK"             "HIST1H2BC"      "HIST1H2BK"      "TXNDC5"        
##   [9] "PCSK1N"         "HIST1H2BJ"      "HOXA7"          "TBXA2R"        
##  [13] "SP7"            "NUDT3"          "C15orf52"       "FOXD4"         
##  [17] "FOXD4L5"        "FOXD4L4"        "FOXD4L1"        "HSPA6"         
##  [21] "TSPAN1"         "ATP5A1"         "HOXD3"          "IL5RA"         
##  [25] "KIAA0391"       "PYURF"          "GABRB1"         "LILRA2"        
##  [29] "C1QTNF9"        "IL25"           "MTSS1L"         "PRRC2B"        
##  [33] "SFN"            "MAGEA12"        "MAGEA3"         "MAGEA6"        
##  [37] "C17orf99"       "KIAA1737"       "HEATR2"         "FAM134C"       
##  [41] "SMEK2"          "C11orf30"       "PVRL4"          "App"           
##  [45] "MMP3"           "TINCR"          "C14orf101"      "CDX2"          
##  [49] "ZNF763"         "PRAP1"          "NANOG"          "CHRFAM7A"      
##  [53] "RASSF6"         "TSHR"           "C8A"            "TCF21"         
##  [57] "PHOX2A"         "PTPRZ1"         "WNT3A"          "WNT2"          
##  [61] "RTEL1-TNFRSF6B" "S100A1"         "NOL4"           "SIX6"          
##  [65] "OR11A1"         "H3F3B"          "FAM160A2"       "H1F0"          
##  [69] "FAM47B"         "WNT16"          "CST5"           "OFCC1"         
##  [73] "OTX2"           "ROPN1L"         "DCST1"          "CHL1"          
##  [77] "TAAR6"          "SLCO2A1"        "CFI"            "HEY2"          
##  [81] "ACOT12"         "DSCR8"          "UGT3A1"         "HSD17B3"       
##  [85] "KLRC4"          "TBX4"           "TMPRSS11A"      "PRLR"          
##  [89] "MAZ"            "ND2"            "OCA2"           "H2AFY"         
##  [93] "PGR"            "PCDHAC1"        "PCDHAC2"        "PCDHA1"        
##  [97] "PCDHA10"        "PCDHA11"        "PCDHA12"        "PCDHA13"       
## [101] "PCDHA2"         "PCDHA3"         "PCDHA4"         "PCDHA5"        
## [105] "PCDHA6"         "PCDHA7"         "PCDHA8"         "BMP3"          
## [109] "PAPD5"          "44806"          "KIAA1551"       "TERT"          
## [113] "IFNG"           "WIF1"           "CDKL2"          "FAM96A"        
## [117] "ARL5C"          "ZNF788"         "DGS2"           "TCF23"         
## [121] "GJB7"           "PHOX2B"         "MKL2"           "FADS6"         
## [125] "DMRT2"          "IL17REL"        "SLC35G3"        "TNFRSF13B"     
## [129] "ATP1A3"         "WDR72"          "TLDC1"          "GJD3"          
## [133] "PRRG4"          "CFHR3"          "LAIR1"          "PNMA8B"        
## [137] "CHST4"          "EPRS"           "KRTAP21-2"      "FAM192A"       
## [141] "MPP5"           "IL1A"           "HIST1H3D"       "CCL22"         
## [145] "ELMSAN1"        "CEBPA"          "FAM27E2"        "NSG2"          
## [149] "PSD"            "PRAMEF11"       "PRAMEF15"       "PRAMEF26"      
## [153] "PRAMEF4"        "PRAMEF9"        "PROSC"          "PTPLAD1"       
## [157] "KIAA1462"       "NUPL1"          "TMEM257"        "KIAA0101"      
## [161] "SOGA2"          "C12ORF29"       "C2ORF69"        "ADRBK1"        
## [165] "KIAA0195"       "WHSC1"          "FAM73B"         "ZNF487P"       
## [169] "PPAP2B"         "PNMAL2"         "KIAA0196"       "FAM58A"        
## [173] "CARD11"         "LINC00703"      "BMP10"          "CCAT1"         
## [177] "LGI2"           "OLR1"           "IL2RB"          "BSCL2"         
## [181] "GNB1L"          "FAM71B"         "C12orf49"       "DIRAS2"        
## [185] "LEFTY1"         "SNTN"           "RAX"            "C10orf111"     
## [189] "ASGR2"          "CXCL8"          "FGF19"          "TOR2A"
length(setdiff(up_targets,bg))
## [1] 192
length(intersect(up_targets,bg))
## [1] 881
length(up_targets)
## [1] 1073
length(setdiff(up_targets,bg))/length(up_targets)
## [1] 0.1789376
up_bg_res <- fora(pathways=reactome, genes=up_targets,  universe=bg, minSize = 5)
## Warning in fora(pathways = reactome, genes = up_targets, universe = bg, : Not
## all of the input genes belong to the universe, such genes were removed
nbg = length(bg)
nsel = length(intersect(up_targets,bg))
up_bg_res$foldenrichment <- (up_bg_res$overlap / nsel ) / ( up_bg_res$size / nbg )
head(up_bg_res)
##                                   pathway         pval         padj overlap
## 1:                    Signal Transduction 9.620197e-22 1.727787e-18     204
## 2:          Generic Transcription Pathway 2.478318e-19 2.225529e-16     137
## 3:        Gene expression (Transcription) 1.036131e-18 6.202970e-16     158
## 4:        RNA Polymerase II Transcription 8.491643e-18 3.812748e-15     143
## 5:                                Disease 1.652687e-15 5.936451e-13     150
## 6: Signaling by Receptor Tyrosine Kinases 3.576543e-14 1.070579e-11      66
##    size                             overlapGenes foldenrichment
## 1: 1832     ADAM17,ADCY9,AGO1,AGO2,AGO3,AGO4,...       1.929415
## 2: 1076        AGO1,AGO2,AGO3,AGO4,AR,ARID1A,...       2.206120
## 3: 1349      AFF4,AGO1,AGO2,AGO3,AGO4,AKAP8L,...       2.029392
## 4: 1193          AFF4,AGO1,AGO2,AGO3,AGO4,AR,...       2.076904
## 5: 1357 ACAN,ADAM17,ADCY9,AGTRAP,AP1M1,AP2B1,...       1.915280
## 6:  414  ADAM17,AP2B1,ARF6,ATP6V0E1,BAX,BDNF,...       2.762260
nrow(up_bg_res)
## [1] 1796
nrow(subset(up_bg_res,padj<0.05 & foldenrichment >2))
## [1] 227
up_bg_res_sets <- subset(up_bg_res,padj<0.05 & foldenrichment >2)$pathway

setdiff(dn_targets,bg)
##  [1] "IQGAP2"    "TYRP1"     "HIST1H3B"  "HIST1H2AD" "DCAF12L2"  "MC2R"     
##  [7] "CD93"      "QRFPR"     "BTLA"      "CACNA1B"   "DNAH9"     "BMP10"    
## [13] "DNAH8"     "CFAP65"    "SRXN1"     "CXCL8"     "44626"     "LEP"      
## [19] "CCDC108"   "FAM127A"   "NR5A1"     "SP8"       "SNAP25"    "PRR23A"   
## [25] "SRRM4"     "KIAA1210"  "TPGS1"     "PTAFR"     "CLEC17A"   "HEY2"     
## [31] "Qk"        "CCBL2"     "KIAA1715"  "FAM60A"    "WBSCR22"   "C9orf41"  
## [37] "C19orf68"
length(setdiff(dn_targets,bg))
## [1] 37
length(intersect(dn_targets,bg))
## [1] 228
length(dn_targets)
## [1] 265
length(setdiff(dn_targets,bg))/length(dn_targets)
## [1] 0.1396226
dn_bg_res <- fora(pathways=reactome, genes=dn_targets,  universe=bg, minSize = 5)
## Warning in fora(pathways = reactome, genes = dn_targets, universe = bg, : Not
## all of the input genes belong to the universe, such genes were removed
nbg = length(bg)
nsel = length(intersect(dn_targets,bg))
dn_bg_res$foldenrichment <- (dn_bg_res$overlap / nsel ) / ( dn_bg_res$size / nbg )
head(dn_bg_res)
##                                                pathway         pval        padj
## 1:                         FOXO-mediated transcription 1.261597e-06 0.002265829
## 2:     FOXO-mediated transcription of cell cycle genes 2.717299e-06 0.002440135
## 3:                 Cytokine Signaling in Immune system 6.682545e-05 0.040006169
## 4:                                             Disease 1.132751e-04 0.045194440
## 5:                 Regulation of beta-cell development 1.258197e-04 0.045194440
## 6: NOTCH2 intracellular domain regulates transcription 2.584770e-04 0.077370788
##    overlap size                                 overlapGenes foldenrichment
## 1:       8   54     BCL2L11,CAV1,CDKN1A,FOXO1,FOXO4,MSTN,...       9.918778
## 2:       5   16                 CAV1,CDKN1A,FOXO1,FOXO4,MSTN      20.922423
## 3:      21  543 BCL2L11,CDKN1A,CUL1,EIF4A2,FOXO1,HNRNPDL,...       2.589294
## 4:      38 1357 ADAMTS15,BCL2L11,CAV1,CDKN1A,CUL1,CYP1B1,...       1.874846
## 5:       4   18                      FOXO1,MAML3,MAMLD1,RBPJ      14.878168
## 6:       3    9                            MAML3,MAMLD1,RBPJ      22.317251
nrow(dn_bg_res)
## [1] 1796
nrow(subset(dn_bg_res,padj<0.05 & foldenrichment >2))
## [1] 4
dn_bg_res_sets <- subset(dn_bg_res,padj<0.05 & foldenrichment >2)$pathway
v1 <- list("WG up"=up_wg_res_sets,"BG up"=up_bg_res_sets,
  "WG dn"=dn_wg_res_sets,"BG dn"=dn_bg_res_sets )

plot(euler(v1),quantities = TRUE,main="Effect of background list")

Now compare miR targets up and down together with observed up and down.

v2 <- list("up DGE"=mrna_up, "dn DGE"=mrna_dn,
  "up miR targets"=up_targets, "dn miR targets"=dn_targets)

plot(euler(v2),quantities = TRUE, main="miR targets and DEGs?")

Interesting how low the enrichment is.

Now check with 100 genes.

mrna_up <- rownames(head(subset(dge_mrna,log2FoldChange >0 ),129))
str(mrna_up)
##  chr [1:129] "ENSG00000162849" "ENSG00000203805" "ENSG00000155011" ...
mrna_up <- unique(gt[which(gt$Gene.stable.ID %in% mrna_up),"Gene.name"])
mrna_up <- mrna_up[mrna_up != ""]
str(mrna_up)
##  chr [1:100] "AGKP1" "APP" "CLTCL1" "SIPA1L1-AS1" "GTF3AP2" "CD226" ...
mrna_dn <- rownames(head(subset(dge_mrna,log2FoldChange <0 ),105))
str(mrna_dn)
##  chr [1:105] "ENSG00000175899" "ENSG00000130203" "ENSG00000140022" ...
mrna_dn <- unique(gt[which(gt$Gene.stable.ID %in% mrna_dn),"Gene.name"])
mrna_dn <- mrna_dn[mrna_dn != ""]
str(mrna_dn)
##  chr [1:100] "MT-RNR1" "MT-CO2" "MT-ND5" "SOHLH2" "PPARGC1A" "FREM2" ...
v2 <- list("up DGE"=mrna_up, "dn DGE"=mrna_dn,
  "up miR targets"=up_targets, "dn miR targets"=dn_targets)

plot(euler(v2),quantities = TRUE, main="miR targets and DEGs?")

Now check with 1000 genes.

mrna_up <- rownames(head(subset(dge_mrna,log2FoldChange >0 ),1288))
str(mrna_up)
##  chr [1:1288] "ENSG00000162849" "ENSG00000203805" "ENSG00000155011" ...
mrna_up <- unique(gt[which(gt$Gene.stable.ID %in% mrna_up),"Gene.name"])
mrna_up <- mrna_up[mrna_up != ""]
str(mrna_up)
##  chr [1:1000] "ARSDP1" "CDY4P" "PRKY" "UTY" "AGKP1" "ZNF343" "LINC01669" ...
mrna_dn <- rownames(head(subset(dge_mrna,log2FoldChange <0 ),1080))
str(mrna_dn)
##  chr [1:1080] "ENSG00000175899" "ENSG00000130203" "ENSG00000140022" ...
mrna_dn <- unique(gt[which(gt$Gene.stable.ID %in% mrna_dn),"Gene.name"])
mrna_dn <- mrna_dn[mrna_dn != ""]
str(mrna_dn)
##  chr [1:1000] "MT-RNR1" "MT-RNR2" "MT-ND1" "MT-CO1" "MT-CO2" "MT-ATP6" ...
v2 <- list("up DGE"=mrna_up, "dn DGE"=mrna_dn,
  "up miR targets"=up_targets, "dn miR targets"=dn_targets)

plot(euler(v2),quantities = TRUE, main="miR targets and DEGs?")

Now check with 250 genes.

mrna_up <- rownames(head(subset(dge_mrna,log2FoldChange >0 ),321))
str(mrna_up)
##  chr [1:321] "ENSG00000162849" "ENSG00000203805" "ENSG00000155011" ...
mrna_up <- unique(gt[which(gt$Gene.stable.ID %in% mrna_up),"Gene.name"])
mrna_up <- mrna_up[mrna_up != ""]
str(mrna_up)
##  chr [1:250] "AGKP1" "APP" "CLTCL1" "C21orf62" "APCDD1L" "MN1" "HDAC8" ...
mrna_dn <- rownames(head(subset(dge_mrna,log2FoldChange <0 ),266))
str(mrna_dn)
##  chr [1:266] "ENSG00000175899" "ENSG00000130203" "ENSG00000140022" ...
mrna_dn <- unique(gt[which(gt$Gene.stable.ID %in% mrna_dn),"Gene.name"])
mrna_dn <- mrna_dn[mrna_dn != ""]
str(mrna_dn)
##  chr [1:250] "MT-RNR1" "MT-RNR2" "MT-CO2" "MT-ATP6" "MT-ND3" "MT-ND4" ...

Now pathway analysis of long RNA-seq

Use top 250 genes in either direction for enrichment analysis. There was a paper by Tarca recommending this.

mrna_up_res <- fora(pathways=reactome, genes=mrna_up,  universe=bg, minSize = 5)
nbg = length(bg)
nsel = length(intersect(mrna_up,bg))
mrna_up_res$foldenrichment <- (mrna_up_res$overlap / nsel ) / ( mrna_up_res$size / nbg )
head(mrna_up_res)
##                                                                                                                        pathway
## 1:                                                                                           Extracellular matrix organization
## 2:                                                                                 Collagen biosynthesis and modifying enzymes
## 3:                                                                                                Collagen chain trimerization
## 4:                                                                                                          Collagen formation
## 5: Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)
## 6:                                                                                                        Collagen degradation
##            pval        padj overlap size
## 1: 5.814354e-07 0.001044258      16  217
## 2: 2.898106e-06 0.002602499       8   55
## 3: 1.663602e-05 0.009959432       6   34
## 4: 2.241904e-05 0.010066149       8   72
## 5: 4.851678e-05 0.017427228       8   80
## 6: 7.614645e-05 0.022071903       6   44
##                                        overlapGenes foldenrichment
## 1:    ADAMTS16,APP,BMP4,COL11A1,COL15A1,COL27A1,...       4.502120
## 2: COL11A1,COL15A1,COL27A1,COL4A4,COL8A1,COL8A2,...       8.881455
## 3:     COL11A1,COL15A1,COL27A1,COL4A4,COL8A1,COL8A2      10.775294
## 4: COL11A1,COL15A1,COL27A1,COL4A4,COL8A1,COL8A2,...       6.784444
## 5:                  APP,BMP4,CDH2,FN1,GAS6,MEN1,...       6.106000
## 6:        COL11A1,COL15A1,COL4A4,COL8A1,COL8A2,MMP2       8.326364
nrow(mrna_up_res)
## [1] 1796
nrow(subset(mrna_up_res,padj<0.05 & foldenrichment >0))
## [1] 10
mrna_up_res_sets <- subset(mrna_up_res,padj<0.05 & foldenrichment >1)$pathway

mrna_dn_res <- fora(pathways=reactome, genes=mrna_dn,  universe=bg, minSize = 5)
nbg = length(bg)
nsel = length(intersect(mrna_dn,bg))
mrna_dn_res$foldenrichment <- (mrna_dn_res$overlap / nsel ) / ( mrna_dn_res$size / nbg )
head(mrna_dn_res)
##                                                                          pathway
## 1: FASTK family proteins regulate processing and stability of mitochondrial RNAs
## 2:                                          tRNA processing in the mitochondrion
## 3:                                          rRNA processing in the mitochondrion
## 4:                                                 Mitochondrial RNA degradation
## 5:                                                            Muscle contraction
## 6:                                                               Ion homeostasis
##            pval         padj overlap size
## 1: 1.238109e-08 2.223644e-05       7   19
## 2: 4.018142e-08 3.608291e-05       7   22
## 3: 7.929370e-08 4.747049e-05       7   24
## 4: 1.085999e-07 4.876136e-05       7   25
## 5: 3.030982e-06 1.088729e-03      12  139
## 6: 2.759865e-05 8.261197e-03       6   37
##                                       overlapGenes foldenrichment
## 1: MT-ATP6,MT-CO2,MT-ND3,MT-ND4,MT-ND5,MT-RNR1,...      22.495789
## 2: MT-ATP6,MT-CO2,MT-ND3,MT-ND4,MT-ND5,MT-RNR1,...      19.428182
## 3: MT-ATP6,MT-CO2,MT-ND3,MT-ND4,MT-ND5,MT-RNR1,...      17.809167
## 4: MT-ATP6,MT-CO2,MT-ND3,MT-ND4,MT-ND5,MT-RNR1,...      17.096800
## 5:        ATP2A1,CALM1,CAMK2B,CAMK2D,KCNJ2,MME,...       5.271367
## 6:             ATP2A1,CALM1,CAMK2B,CAMK2D,SLN,TRDN       9.901622
nrow(mrna_dn_res)
## [1] 1796
nrow(subset(mrna_dn_res,padj<0.05 & foldenrichment >0))
## [1] 9
mrna_dn_res_sets <- subset(mrna_dn_res,padj<0.05 & foldenrichment >1)$pathway
v3 <- list("dn miR target"=dn_bg_res_sets,
  "up miR target"=up_bg_res_sets,
  "up mRNA"=mrna_up_res_sets,
  "dn mRNA"=mrna_dn_res_sets)

plot(euler(v3),quantities = TRUE, main="miR target and DE pathways")

Conclusions