Source: TBA
Most popular tools that do enrichment analysis of microRNA target genes don’t accept a background list. Let’s see what happens when we use a background list for this type of analysis. GSE188236 was used because it has both miR and mRNA datasets from the same samples, which means we can generate a background for the microRNA target gene pathway enrichment analysis. This dataset examines the effect of TGFB1 expression and co-treatment with SB431542 is a selective and potent inhibitor of the TGF-beta/Activin/Nodal pathway. For the sake of this analysis we will only be looking at the comparison of control and TGFB1 cells, with 5 replicates.
library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library("plyr")
##
## Attaching package: 'plyr'
## The following object is masked from 'package:matrixStats':
##
## count
## The following object is masked from 'package:IRanges':
##
## desc
## The following object is masked from 'package:S4Vectors':
##
## rename
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("kableExtra")
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library("readxl")
library("fgsea")
library("eulerr")
Load count data from GEO, conduct DESeq2 and make short list of up and downregulated genes. Select target mRNAs using a database. Discard any target mRNAs that aren’t expressed at sufficient level to be differential. Conduct enrichment analysis on these target mRNAs. Repeat the above but without the background list.
if ( file.exists("GSE188236_miRcounts.rds") ) {
df_mir <- readRDS("GSE188236_miRcounts.rds")
df_mrna <- readRDS("GSE188236_mRNAcounts.rds")
} else {
download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE188236&format=file",
destfile="GSE188236.tar")
untar("GSE188236.tar")
files_mrna <- c("GSM5673193_CTRL_1.txt.gz","GSM5673194_CTRL_2.txt.gz","GSM5673195_CTRL_3.txt.gz",
"GSM5673196_CTRL_4.txt.gz","GSM5673197_CTRL_5.txt.gz","GSM5673198_TGFB1_1.txt.gz",
"GSM5673199_TGFB1_2.txt.gz","GSM5673200_TGFB1_3.txt.gz","GSM5673201_TGFB1_4.txt.gz",
"GSM5673202_TGFB1_5.txt.gz")
dat_mrna <- lapply(files_mrna,function(f) {
read.table(f, header=TRUE)
})
df_mrna <- join_all(dat_mrna)
rownames(df_mrna) <- df_mrna$Ensembl_gene_id ; df_mrna$Ensembl_gene_id=NULL
saveRDS(object=df_mrna,file="GSE188236_mRNAcounts.rds")
files_mir <- c("GSM5673208_CTRL_1.txt.gz", "GSM5673209_CTRL_2.txt.gz", "GSM5673210_CTRL_3.txt.gz",
"GSM5673211_CTRL_4.txt.gz", "GSM5673212_CTRL_5.txt.gz", "GSM5673213_TGFB1_1.txt.gz",
"GSM5673214_TGFB1_2.txt.gz", "GSM5673215_TGFB1_3.txt.gz", "GSM5673216_TGFB1_4.txt.gz",
"GSM5673217_TGFB1_5.txt.gz")
dat_mir <- lapply(files_mir,function(f) {
read.table(f, header=TRUE)
})
df_mir <- join_all(dat_mir)
rownames(df_mir) <- df_mir$Ensembl_ID ; df_mir$Ensembl_ID=NULL
saveRDS(object=df_mir,file="GSE188236_miRcounts.rds")
dir.create("GSE188236")
delfiles <- list.files(".",pattern="txt.gz")
file.copy(delfiles,"GSE188236",overwrite=TRUE)
unlink(delfiles)
}
dim(df_mir)
## [1] 602 10
summary(rowMeans(df_mir))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5 41 200 20923 1700 4214429
df_mir_f <- df_mir[which(rowMeans(df_mir)>9),]
dim(df_mir_f)
## [1] 595 10
df_mir_f <- round(df_mir_f)
dim(df_mrna)
## [1] 60666 10
summary(rowMeans(df_mrna))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 1 468 17 10326474
df_mrna_f <- df_mrna[which(rowMeans(df_mrna)>9),]
dim(df_mrna_f)
## [1] 17646 10
summary(rowMeans(df_mrna_f))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9 29 120 1606 486 10326474
Not sure whether this is the correct one. Just a guess. There are 60666 rows of data and 60531 in the v105 annotation which is quite close. Data was downloaded from Ensembl biomart archive 27th Nov 2023.
gt <- read.table("mart_export.txt",sep="\t",header=TRUE)
ss <- data.frame(colnames(df_mir_f))
rownames(ss) <- ss[,1]
colnames(ss) <- "samplename"
ss$trt <- factor(grepl("TGF",ss$samplename))
dds <- DESeqDataSetFromMatrix(countData = df_mir_f , colData = ss, design = ~ trt )
## converting counts to integer mode
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE,nsub=200)
zz <- cbind(as.data.frame(z),assay(vsd))
dge_mir <- as.data.frame(zz[order(zz$pvalue),])
dge_mir[1:20,1:6] %>%
kbl(caption = "Top miRNA expression differences between ctrl and TGFB1 treated cells") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSG00000221430.1 | 38.70888 | 1.3966136 | 0.2675422 | 5.220163 | 0.0000002 | 0.0000999 |
ENSG00000207635.1 | 1374.01594 | -3.0763902 | 0.6573548 | -4.679954 | 0.0000029 | 0.0005590 |
ENSG00000215991.1 | 200.10822 | -2.8669389 | 0.6137997 | -4.670805 | 0.0000030 | 0.0005590 |
ENSG00000199157.1 | 22.05865 | -2.0649738 | 0.5161413 | -4.000792 | 0.0000631 | 0.0088225 |
ENSG00000207595.1 | 7570.74021 | 0.9544503 | 0.2425789 | 3.934598 | 0.0000833 | 0.0093170 |
ENSG00000199177.1 | 16601.35509 | 0.9993812 | 0.2621493 | 3.812259 | 0.0001377 | 0.0128293 |
ENSG00000202569.2 | 10823.70658 | -0.9710050 | 0.2784574 | -3.487087 | 0.0004883 | 0.0389953 |
ENSG00000207947.1 | 39369.13777 | 0.4604510 | 0.1362776 | 3.378773 | 0.0007281 | 0.0508760 |
ENSG00000215938.1 | 34.96205 | -1.6676766 | 0.4993528 | -3.339676 | 0.0008388 | 0.0520964 |
ENSG00000208035.1 | 182667.75439 | 1.1575924 | 0.3544068 | 3.266282 | 0.0010897 | 0.0609141 |
ENSG00000201943.1 | 42.93282 | -1.1252750 | 0.3643687 | -3.088287 | 0.0020131 | 0.0991240 |
ENSG00000216009.1 | 865.66647 | -0.7038392 | 0.2291310 | -3.071777 | 0.0021279 | 0.0991240 |
ENSG00000211997.1 | 5393.96134 | 0.5880439 | 0.1953641 | 3.009990 | 0.0026126 | 0.1123404 |
ENSG00000207584.1 | 3061.07930 | 0.3739365 | 0.1304552 | 2.866398 | 0.0041517 | 0.1657724 |
ENSG00000207759.1 | 2503.74820 | 0.5475869 | 0.1989519 | 2.752359 | 0.0059168 | 0.2204983 |
ENSG00000207548.1 | 24.61279 | 1.1562851 | 0.4404271 | 2.625373 | 0.0086554 | 0.2655144 |
ENSG00000207944.1 | 3589.42379 | 0.3380020 | 0.1302380 | 2.595264 | 0.0094518 | 0.2655144 |
ENSG00000207975.1 | 176.52056 | 0.4530077 | 0.1754826 | 2.581497 | 0.0098373 | 0.2655144 |
ENSG00000201143.1 | 41.29776 | -0.9828034 | 0.3824975 | -2.569437 | 0.0101864 | 0.2655144 |
ENSG00000208008.1 | 80986.50162 | 0.4585598 | 0.1787005 | 2.566080 | 0.0102855 | 0.2655144 |
mir_up <- rownames(subset(dge_mir,padj<0.05 & log2FoldChange >0 ))
mir_up
## [1] "ENSG00000221430.1" "ENSG00000207595.1" "ENSG00000199177.1"
mir_up <- gt[which(gt$Gene.stable.ID.version %in% mir_up),]
mir_up$Gene.name <- tolower(mir_up$Gene.name)
mir_up
## Gene.stable.ID Gene.stable.ID.version Gene.name
## 20177 ENSG00000199177 ENSG00000199177.1 mir31
## 20307 ENSG00000207595 ENSG00000207595.1 mir181a2
## 47185 ENSG00000221430 ENSG00000221430.1 mir1294
mir_dn <- rownames(subset(dge_mir,padj<0.05 & log2FoldChange <0 ))
mir_dn
## [1] "ENSG00000207635.1" "ENSG00000215991.1" "ENSG00000199157.1"
## [4] "ENSG00000202569.2"
mir_dn <- gt[which(gt$Gene.stable.ID.version %in% mir_dn),]
mir_dn
## Gene.stable.ID Gene.stable.ID.version Gene.name
## 33471 ENSG00000207635 ENSG00000207635.1 MIR499A
## 49205 ENSG00000215991 ENSG00000215991.1 MIR208B
mir_dn$Gene.name <- tolower(mir_dn$Gene.name)
mir_dn
## Gene.stable.ID Gene.stable.ID.version Gene.name
## 33471 ENSG00000207635 ENSG00000207635.1 mir499a
## 49205 ENSG00000215991 ENSG00000215991.1 mir208b
ss <- data.frame(colnames(df_mrna_f))
rownames(ss) <- ss[,1]
colnames(ss) <- "samplename"
ss$trt <- factor(grepl("TGF",ss$samplename))
dds <- DESeqDataSetFromMatrix(countData = df_mrna_f , colData = ss, design = ~ trt )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z<- results(res)
vsd <- vst(dds, blind=FALSE,nsub=200)
zz <- cbind(as.data.frame(z),assay(vsd))
dge_mrna <- as.data.frame(zz[order(zz$pvalue),])
dge_mrna[1:20,1:6] %>%
kbl(caption = "Top mRNA gene expression differences between ctrl and TGFB1 treated cells") %>%
kable_paper("hover", full_width = F)
baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
---|---|---|---|---|---|---|
ENSG00000175899 | 693.18892 | -3.0227028 | 0.3819312 | -7.914259 | 0.00e+00 | 0.0000000 |
ENSG00000130203 | 23.58394 | -2.5336090 | 0.3990153 | -6.349654 | 0.00e+00 | 0.0000019 |
ENSG00000140022 | 110.23686 | -1.6602078 | 0.2730986 | -6.079153 | 0.00e+00 | 0.0000071 |
ENSG00000162849 | 296.52318 | 0.8694710 | 0.1496897 | 5.808489 | 0.00e+00 | 0.0000278 |
ENSG00000155792 | 48.35853 | -1.3544237 | 0.2396370 | -5.651982 | 0.00e+00 | 0.0000559 |
ENSG00000172005 | 27.16968 | -2.5036791 | 0.4766035 | -5.253170 | 1.00e-07 | 0.0004393 |
ENSG00000183230 | 28.83016 | -2.9147741 | 0.5615835 | -5.190277 | 2.00e-07 | 0.0005288 |
ENSG00000025039 | 192.64840 | -2.1110174 | 0.4248466 | -4.968893 | 7.00e-07 | 0.0013385 |
ENSG00000203805 | 19.24304 | 1.6474490 | 0.3317431 | 4.966038 | 7.00e-07 | 0.0013385 |
ENSG00000177354 | 168.52007 | -1.6862593 | 0.3491360 | -4.829807 | 1.40e-06 | 0.0024093 |
ENSG00000102678 | 51.26376 | -2.1852803 | 0.4569946 | -4.781852 | 1.70e-06 | 0.0027836 |
ENSG00000166091 | 16.35597 | -2.5529347 | 0.5438487 | -4.694200 | 2.70e-06 | 0.0039320 |
ENSG00000155011 | 37.71857 | 2.5664640 | 0.5656626 | 4.537093 | 5.70e-06 | 0.0077344 |
ENSG00000149256 | 539.39085 | 0.7748345 | 0.1732289 | 4.472894 | 7.70e-06 | 0.0090408 |
ENSG00000120820 | 312.10446 | 0.7915045 | 0.1770330 | 4.470944 | 7.80e-06 | 0.0090408 |
ENSG00000265727 | 260.91927 | 0.5588482 | 0.1258137 | 4.441871 | 8.90e-06 | 0.0090408 |
ENSG00000197852 | 773.84275 | -0.5856260 | 0.1319888 | -4.436937 | 9.10e-06 | 0.0090408 |
ENSG00000143028 | 157.63287 | -1.8189523 | 0.4101872 | -4.434444 | 9.20e-06 | 0.0090408 |
ENSG00000130766 | 79.01247 | 0.9431118 | 0.2132849 | 4.421840 | 9.80e-06 | 0.0090802 |
ENSG00000167470 | 278.70379 | -0.5306694 | 0.1212890 | -4.375247 | 1.21e-05 | 0.0106100 |
mrna_up <- rownames(subset(dge_mrna,padj<0.05 & log2FoldChange >0 ))
mrna_up
## [1] "ENSG00000162849" "ENSG00000203805" "ENSG00000155011" "ENSG00000149256"
## [5] "ENSG00000120820" "ENSG00000265727" "ENSG00000130766" "ENSG00000254951"
## [9] "ENSG00000028277" "ENSG00000286458" "ENSG00000183049" "ENSG00000125430"
## [13] "ENSG00000128965" "ENSG00000157680" "ENSG00000250384" "ENSG00000243753"
## [17] "ENSG00000142871" "ENSG00000144802" "ENSG00000276012" "ENSG00000137809"
## [21] "ENSG00000278880"
mrna_up <- gt[which(gt$Gene.stable.ID %in% mrna_up),]
mrna_up <- unique(mrna_up$Gene.name)
mrna_up <- mrna_up[mrna_up != ""]
mrna_up
## [1] "RN7SL648P" "DKK2" "CHAC1" "ITGA11" "TENM4" "PLPP4"
## [7] "UBE2CP3" "SESN2" "DGKI" "HS3ST3B1" "CAMK1D" "KIF26B"
## [13] "HLA-L" "GLT8D2" "CCN1" "POU2F2" "NFKBIZ"
mrna_dn <- rownames(subset(dge_mrna,padj<0.05 & log2FoldChange <0 ))
mrna_dn
## [1] "ENSG00000175899" "ENSG00000130203" "ENSG00000140022" "ENSG00000155792"
## [5] "ENSG00000172005" "ENSG00000183230" "ENSG00000025039" "ENSG00000177354"
## [9] "ENSG00000102678" "ENSG00000166091" "ENSG00000197852" "ENSG00000143028"
## [13] "ENSG00000167470" "ENSG00000072954" "ENSG00000183520" "ENSG00000122574"
## [17] "ENSG00000077274" "ENSG00000101638" "ENSG00000168556" "ENSG00000197616"
## [21] "ENSG00000120669" "ENSG00000147255" "ENSG00000218416" "ENSG00000077009"
## [25] "ENSG00000100979" "ENSG00000170290" "ENSG00000163249" "ENSG00000228672"
## [29] "ENSG00000105088" "ENSG00000004848"
mrna_dn <- gt[which(gt$Gene.stable.ID %in% mrna_dn),]
mrna_dn <- unique(mrna_dn$Gene.name)
mrna_dn <- mrna_dn[mrna_dn != ""]
mrna_dn
## [1] "SOHLH2" "IGSF1" "ST8SIA5" "PLTP" "CMTM5" "ARX"
## [7] "CAPN6" "RRAGD" "SLN" "ING2" "MYH6" "FGF9"
## [13] "DEPTOR" "C10orf71" "UTP11" "A2M" "MAL" "CTNNA3"
## [19] "PROB1" "STON2" "WIPF3" "CCNYL1" "APOE" "OLFM2"
## [25] "MIDN" "NMRK2" "TMEM38A" "INKA2" "SYPL2"
#options(timeout=1000)
#download.file("https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx",
#destfile="hsa_MTI.xlsx",)
mirtarbase <- read_excel("hsa_MTI.xlsx")
# mir up
mir_up_gn <- mir_up$Gene.name
mir_up_gn2 <- gsub("$","-",gsub("mir","hsa-miR-",mir_up_gn))
mir_up_gn2 <- gsub("a2","a",mir_up_gn2)
up_targets <- lapply(mir_up_gn2,function(x) {
a <- unique(unlist(as.vector(mirtarbase[grep(x,mirtarbase$miRNA),"Target Gene"])))
y <- gsub("-$","",x)
b <- unique(unlist(as.vector(mirtarbase[which(mirtarbase$miRNA %in% y),"Target Gene"])))
d <- unique(c(a,b))
return(d)
})
lapply(up_targets,length)
## [[1]]
## [1] 314
##
## [[2]]
## [1] 679
##
## [[3]]
## [1] 115
up_targets <- unique(unlist(up_targets))
up_targets
## [1] "RHOA" "PPP2R2A" "LATS2" "FOXP3"
## [5] "SELE" "CASR" "YY1" "RET"
## [9] "NUMB" "NFAT5" "KLF13" "JAZF1"
## [13] "HOXC13" "ETS1" "ITGA5" "MPRIP"
## [17] "MMP16" "RDX" "CXCL12" "ARPC5"
## [21] "FZD3" "DMD" "TIAM1" "ICAM1"
## [25] "DKK1" "DACT3" "WASF3" "HIF1AN"
## [29] "SATB2" "PRKCE" "RASA1" "STK40"
## [33] "MCM2" "CDK1" "CREG1" "MLH1"
## [37] "MET" "INHBA" "RAN" "ACADVL"
## [41] "XK" "CIPC" "PPP2R5C" "MCM4"
## [45] "RICTOR" "C1orf198" "DBR1" "PPIA"
## [49] "ARL6IP5" "NUS1" "TNKS2" "SLC9A6"
## [53] "PPIL2" "ZC3H18" "RPS7" "MED12"
## [57] "CYP27B1" "HIST1H2BC" "PTPRJ" "AGO1"
## [61] "DNAAF5" "HIST1H2BK" "RPA1" "TXNDC5"
## [65] "AKAP8L" "FLNA" "ATP2A2" "NDFIP2"
## [69] "TRIB3" "NUP188" "RPS4Y1" "GLI2"
## [73] "RPL37A" "EXOSC5" "DDX19A" "UBA6"
## [77] "NOL9" "DNAJC5" "RETREG3" "PEX19"
## [81] "PPP4R3B" "TRRAP" "RANGAP1" "TNRC6B"
## [85] "NOP56" "PCSK1N" "ZNF275" "C2CD5"
## [89] "NFATC2IP" "HIST1H2BJ" "SRRM2" "GYG1"
## [93] "ZC3H12C" "LRRC59" "GHITM" "EDC3"
## [97] "MYO1D" "STOML2" "EXOC6" "HOXA7"
## [101] "SFXN1" "ILF2" "SRC" "MAP4K4"
## [105] "RAB27A" "TBXA2R" "EMSY" "SESN2"
## [109] "CRKL" "SP1" "ARID1A" "PLEKHB2"
## [113] "TMEM109" "ZNF805" "CHMP4B" "BACH1"
## [117] "SNRNP27" "NCBP2" "FOXC1" "YWHAE"
## [121] "BAHD1" "ABCB9" "RHOBTB1" "SP7"
## [125] "NUDT3" "C15orf52" "SLC16A9" "FOXD4"
## [129] "FOXD4L5" "FOXD4L4" "FOXD4L1" "MICA"
## [133] "PAPOLG" "PARP11" "ZDHHC6" "RBM38"
## [137] "HSPA6" "SPRED1" "SMG1" "SDC4"
## [141] "RNF111" "PHF12" "PDE4D" "TSPAN1"
## [145] "FBXL5" "DPM2" "ATP5A1" "ARF1"
## [149] "HOXD3" "MZT1" "MXRA7" "REXO2"
## [153] "POLR3E" "IL5RA" "RPL35A" "NF2"
## [157] "ZNF485" "INTU" "TXNIP" "CDC42SE1"
## [161] "KLHL15" "FRS2" "DCK" "RSRC1"
## [165] "ZNF587" "ZNF641" "SLC38A1" "BCAS4"
## [169] "PARP1" "FRK" "KIAA0391" "SFT2D2"
## [173] "FBXL7" "AR" "CNBP" "SLC30A5"
## [177] "ZNF71" "PPIC" "GUF1" "ZNF460"
## [181] "TOR1AIP1" "ZNF331" "C9orf64" "C19orf12"
## [185] "ECHDC1" "EPB41L4B" "TRAF1" "ZNF678"
## [189] "CCDC127" "ZIK1" "SYDE2" "SUPT7L"
## [193] "PYURF" "PUM2" "PTPDC1" "NUP43"
## [197] "LAPTM4A" "RAB18" "CBX3" "FZD1"
## [201] "GTF2E1" "CCNT1" "SPRTN" "CRK"
## [205] "SLC18B1" "RPL7L1" "PRPF38A" "QSER1"
## [209] "CKAP2L" "PAPLN" "GABRB1" "CDH13"
## [213] "SRPX2" "BLOC1S4" "LILRA2" "TET3"
## [217] "LIPG" "IPP" "PPM1L" "RPL27A"
## [221] "RPL12" "AGO2" "XPO6" "ACBD7"
## [225] "JARID2" "AKNA" "AFF1" "PNPT1"
## [229] "MAFF" "NLGN1" "PAX6" "DPYSL5"
## [233] "FAM193A" "MCMBP" "E2F2" "NECTIN4"
## [237] "XRCC5" "C1QTNF9" "DOCK1" "SMAD4"
## [241] "SGPP2" "SOX4" "BAP1" "IL25"
## [245] "SDHA" "SPRED2" "SPRY1" "SPRY3"
## [249] "SPRY4" "GNA13" "FOXO3" "STMN1"
## [253] "DICER1" "SLC1A2" "BRWD1" "LMNB2"
## [257] "ANKRD52" "AP2B1" "CASKIN2" "EFNB1"
## [261] "KLHDC10" "KMT2B" "MTSS1L" "NFE2L1"
## [265] "NFIC" "NPM1" "PHLDA1" "PRRC2B"
## [269] "RAB1B" "RAB5B" "SERTAD2" "SFN"
## [273] "VPS26B" "DNAJB4" "ZNF614" "FOXJ3"
## [277] "GIGYF1" "MAGEA12" "MAGEA3" "MAGEA6"
## [281] "OTUD4" "PLAGL2" "RASA4" "TAPBP"
## [285] "TMEM9" "ZBTB39" "NUCB1" "C17orf99"
## [289] "TMEM182" "KIAA1737" "HEATR2" "FAM134C"
## [293] "SMEK2" "C11orf30" "PVRL4" "HIF1A"
## [297] "GAS1" "App" "BACE1" "BMP2"
## [301] "RUNX2" "PTEN" "ACOX1" "MMP3"
## [305] "SLC20A1" "AGO3" "RASA2" "CELSR2"
## [309] "CGNL1" "TINCR" "WLS" "C14orf101"
## [313] "NXF1" "U2SURP" "NLK" "GATA6"
## [317] "CDX2" "PLAG1" "BCL2" "PROX1"
## [321] "KAT2B" "CDKN1B" "ZNF763" "DDIT4"
## [325] "ATM" "HIPK2" "BCL2L11" "HRAS"
## [329] "RNF2" "RALA" "SIRT1" "PRAP1"
## [333] "DUSP6" "PTPN11" "DUSP5" "PTPN22"
## [337] "FOS" "MTMR3" "KLF6" "MCL1"
## [341] "XIAP" "GPR78" "NANOG" "LFNG"
## [345] "LRRC17" "CHRFAM7A" "CD46" "RASSF6"
## [349] "FXYD6" "KCTD3" "TSHR" "ZNF558"
## [353] "C8A" "ARL6IP6" "ZNF426" "ATF7IP2"
## [357] "PRR4" "TCF21" "PHOX2A" "PLPBP"
## [361] "HACD3" "GSTM2" "FSIP1" "KBTBD3"
## [365] "PTPRZ1" "WNT3A" "TUSC1" "LRRN3"
## [369] "TMEM45A" "ARF6" "C1orf109" "TAF15"
## [373] "PLXDC2" "NMRK2" "WNT2" "ATG10"
## [377] "PRDX3" "ZNF652" "RTEL1-TNFRSF6B" "GCNT1"
## [381] "PCDHB8" "ENAH" "ZNF25" "S100A1"
## [385] "PLA2G4C" "NOL4" "SIX6" "FKBP10"
## [389] "SMCHD1" "OR11A1" "INCENP" "LPGAT1"
## [393] "CLUAP1" "LYSMD3" "CCDC6" "BAG2"
## [397] "GPR83" "PTGS2" "ANKRD13C" "RLF"
## [401] "FBXO28" "ZNF350" "TIAL1" "RNF34"
## [405] "LCLAT1" "JCAD" "ZNF35" "PITPNB"
## [409] "SCD" "H3F3B" "GATAD2B" "LGALSL"
## [413] "TGIF2" "MOB1A" "SLC35B4" "FAM160A2"
## [417] "NUP58" "GPRIN3" "H1F0" "ARHGAP12"
## [421] "SPRY2" "TGFBR3" "TMED4" "MAP2K1"
## [425] "PUM1" "TRIM2" "FBXO33" "NRP1"
## [429] "FAM47B" "CCNG1" "BRMS1L" "OTUD1"
## [433] "ATP6V0E1" "WNT16" "CST5" "SH3BGRL"
## [437] "GPR137B" "OFCC1" "IQCG" "NKX3-2"
## [441] "OTX2" "ROPN1L" "TMEM14A" "TAF2"
## [445] "IDS" "FRA10AC1" "COL27A1" "EPHA5"
## [449] "DCST1" "ZNF562" "EYA4" "CHL1"
## [453] "TAAR6" "SLCO2A1" "HMGB2" "HERC3"
## [457] "BTBD3" "SRPK2" "DNAJC7" "ANKRD1"
## [461] "CFI" "MRPS14" "HEY2" "MTMR12"
## [465] "ACOT12" "PCLAF" "USP28" "AMMECR1"
## [469] "BPGM" "DSCR8" "UGT3A1" "HSD17B3"
## [473] "GADD45G" "FBXO34" "KLRC4" "MOB3B"
## [477] "FKBP7" "TBX4" "TMPRSS11A" "SNAI2"
## [481] "SLC7A11" "NUDT12" "COPS2" "ZNF12"
## [485] "PRLR" "PLCL2" "ZNF594" "METAP1"
## [489] "HSPA13" "NR6A1" "YOD1" "SLC37A3"
## [493] "FBXO11" "ZNF445" "TM9SF3" "ATP8A1"
## [497] "TMEM64" "MOB1B" "GNAI3" "TAB2"
## [501] "SRSF7" "DDX3X" "KRAS" "LBR"
## [505] "KLHL42" "TMEM132B" "AFTPH" "ZNF148"
## [509] "NOTCH2" "NFYB" "NOTCH1" "HOOK3"
## [513] "SIK2" "FAM222B" "RPS8" "STAG2"
## [517] "PFKFB2" "ZEB2" "MAZ" "RPL14"
## [521] "KCTD2" "UBA2" "DDX27" "FAT1"
## [525] "HDAC6" "TMEM192" "LAMA3" "HUWE1"
## [529] "ND2" "HNRNPAB" "OCA2" "AP1M1"
## [533] "UCHL1" "PGD" "ZFP36L2" "AKAP12"
## [537] "PABPC1" "GANAB" "PHPT1" "H2AFY"
## [541] "TEAD4" "BRCA1" "MTCL1" "KIAA0100"
## [545] "PPP1R9A" "MGAT5" "TNIP1" "PBX3"
## [549] "TIMP1" "PGR" "COL16A1" "PPP3CA"
## [553] "ATG5" "CD4" "TGFBRAP1" "TGFBR1"
## [557] "TNFRSF11B" "PCDHAC1" "PCDHAC2" "PCDHA1"
## [561] "PCDHA10" "PCDHA11" "PCDHA12" "PCDHA13"
## [565] "PCDHA2" "PCDHA3" "PCDHA4" "PCDHA5"
## [569] "PCDHA6" "PCDHA7" "PCDHA8" "PDGFRA"
## [573] "BMP3" "SOX5" "MAP3K3" "TAB3"
## [577] "PDAP1" "MAPK1IP1L" "BMPR2" "SMAD2"
## [581] "MADD" "ACAN" "MAP3K10" "PCDHB6"
## [585] "MMP14" "E2F5" "RAP1B" "C12orf29"
## [589] "NHLRC3" "CCNK" "PAPD5" "CARM1"
## [593] "LDLR" "C2orf69" "44806" "TNPO1"
## [597] "PTBP3" "CUL5" "PRRC2C" "PEBP1"
## [601] "PNRC2" "IPO5" "RCOR1" "KIF2C"
## [605] "CHD9" "LRRC8D" "ZNF136" "KIF3B"
## [609] "TTPAL" "NCOA3" "FNDC3B" "PRKCD"
## [613] "SLC25A37" "CHCHD7" "ZBTB33" "GRK2"
## [617] "ELK4" "ZNF268" "ZFAND6" "LMAN1"
## [621] "MRPL34" "PDIA6" "CHMP2B" "DRAM1"
## [625] "TMEM94" "ZNF440" "ZNF439" "NSD2"
## [629] "KIAA1551" "PMAIP1" "FKBP1A" "HIGD2A"
## [633] "ID4" "DAZAP2" "ZFP36L1" "SLC35G2"
## [637] "MTX3" "ABCG2" "PHACTR2" "SASH1"
## [641] "MOSPD1" "SRGAP1" "UBL3" "PHACTR4"
## [645] "EREG" "TERT" "RGS5" "IFNG"
## [649] "AHR" "STAT3" "WIF1" "TWIST1"
## [653] "MAPK1" "CDKL2" "DCAF4" "RSF1"
## [657] "FAM96A" "CAMK4" "VPS4A" "VMP1"
## [661] "UBN2" "SLC25A25" "RPRD2" "PPP2R5E"
## [665] "KLHDC3" "MIGA2" "CBX4" "CAPRIN2"
## [669] "ARL5C" "ZFAND2B" "TAOK1" "FKBP1C"
## [673] "ZNF83" "HSPA1B" "ZNF669" "ZNF788"
## [677] "ZNF781" "ZNF667" "ZNF487" "DGS2"
## [681] "EPS8" "MAN1A2" "FKBP14" "ZNF791"
## [685] "ZFP69B" "NPM3" "TCF23" "HSP90B1"
## [689] "CTC1" "GJB7" "ZNF138" "WT1"
## [693] "ZDHHC15" "XPNPEP3" "MTPAP" "NOX5"
## [697] "ZNF699" "TXLNG" "TRPC5" "TBL1XR1"
## [701] "SEC61A2" "PHOX2B" "PHC3" "MKL2"
## [705] "HEPHL1" "COG5" "ATP8B1" "ANKRD50"
## [709] "PLPP3" "ZNF844" "ZNF780B" "NCOA7"
## [713] "ZNF266" "KRBOX4" "SCAMP2" "TMCC1"
## [717] "FOXL1" "ZNF107" "SPIRE1" "SMU1"
## [721] "RNF6" "HMGA2" "CSNK1A1" "ARSJ"
## [725] "ARPP19" "ZNF415" "ZNF616" "SUV39H2"
## [729] "FAM3C" "ZNF846" "ZNF23" "TUBB2A"
## [733] "MIER3" "GTPBP3" "IRAK1BP1" "SURF6"
## [737] "THYN1" "FADS6" "RABGEF1" "PPP3CB"
## [741] "MYO1C" "DMRT2" "IL17REL" "SLC35G3"
## [745] "GRAMD1B" "UNC5B" "REPS1" "NOS1AP"
## [749] "ZC3HAV1L" "RPS6KA3" "AP5M1" "TNFRSF13B"
## [753] "ATP1A3" "ZNF829" "WDR72" "HFM1"
## [757] "SPTLC3" "ARFIP2" "SCN8A" "SMCR8"
## [761] "ATXN7" "TLDC1" "RBM25" "TRAFD1"
## [765] "ZADH2" "WDR13" "PRR11" "GJD3"
## [769] "AP3M2" "ZNF556" "ZNF852" "MUC20"
## [773] "PRRG4" "TIMM10B" "ZNF253" "DNAJC3"
## [777] "TK1" "WNK1" "TOPBP1" "TEF"
## [781] "RNMT" "ADAM17" "RSBN1L" "DYNLL2"
## [785] "CFHR3" "KLHL24" "PDK3" "ZNF419"
## [789] "RNF187" "ZNF597" "CENPO" "FAM13A"
## [793] "LAIR1" "RLIM" "CYP1B1" "MAPK14"
## [797] "VCAM1" "PNMA8B" "LAPTM4B" "PGK1"
## [801] "MTHFD1L" "MYLK" "CHST4" "ZNF490"
## [805] "ZNF449" "PNKD" "EPRS" "TEAD1"
## [809] "KRTAP21-2" "MSX2" "EN2" "ZNF664"
## [813] "ZNF121" "ZBTB4" "ULK1" "TUBB"
## [817] "TSG101" "TMF1" "TMEM30A" "TFRC"
## [821] "TBC1D7" "TBC1D13" "STX2" "STAG1"
## [825] "SSX2IP" "SRGN" "SORT1" "SLC7A1"
## [829] "SLC38A2" "SLC19A2" "SLC10A7" "SIPA1L1"
## [833] "SHOC2" "RP2" "RHOG" "RGS16"
## [837] "RAB2B" "PURB" "PGAP1" "PER2"
## [841] "PBRM1" "OSBPL3" "NMT2" "FAM192A"
## [845] "NIN" "NCAPG" "NAA50" "MTUS1"
## [849] "MPP5" "KMT2E" "LPCAT1" "LONRF1"
## [853] "KPNA1" "EFCAB14" "WASHC5" "KDM5A"
## [857] "INO80D" "IL1A" "HIST1H3D" "HECW2"
## [861] "GOT1" "GOLGA8B" "GOLGA1" "GNS"
## [865] "GK5" "G3BP2" "FSD1L" "CCNQ"
## [869] "EPS15" "EED" "DYNC1LI2" "DDX52"
## [873] "CPOX" "CPEB4" "CLCC1" "CCL22"
## [877] "CCDC88C" "ELMSAN1" "GSKIP" "BLOC1S2"
## [881] "BAZ2A" "ATP2B1" "ATG2B" "ASB1"
## [885] "ARRDC3" "ARRB2" "APOL6" "ALDH9A1"
## [889] "AFF4" "ACYP1" "CTDSPL" "TUSC3"
## [893] "MEG3" "PHLPP2" "GPD1L" "ALDH1A1"
## [897] "CDKN1A" "BAX" "RASSF1" "INPP4B"
## [901] "CTNNB1" "TCF4" "VEGFA" "PRKN"
## [905] "EGR1" "NRAS" "RUNX1" "CEBPA"
## [909] "SAMHD1" "REPIN1" "SLC7A5" "ZC3H7B"
## [913] "ARHGDIA" "MARK2" "ADCY9" "DCBLD2"
## [917] "RPL13A" "FAM27E2" "NSG2" "OCIAD2"
## [921] "PSD" "SUFU" "CADPS2" "PRAMEF11"
## [925] "PRAMEF15" "PRAMEF26" "PRAMEF4" "PRAMEF9"
## [929] "ATXN2" "BASP1" "HNRNPH1" "WDFY3"
## [933] "BDNF" "HOXA11" "ESR1" "PROSC"
## [937] "PTPLAD1" "TAF6L" "KIAA1462" "NUPL1"
## [941] "TMEM257" "KIAA0101" "SOGA2" "C12ORF29"
## [945] "C2ORF69" "ADRBK1" "KIAA0195" "WHSC1"
## [949] "FAM73B" "ZNF487P" "PPAP2B" "PNMAL2"
## [953] "KIAA0196" "FAM58A" "OCLN" "TFAM"
## [957] "SFRP4" "EGFR" "ZHX2" "POSTN"
## [961] "CDH11" "SERPINB1" "RB1" "CCND1"
## [965] "CARD11" "LINC00703" "BMP10" "AGO4"
## [969] "DNMT3A" "CBLB" "CCAT1" "MECP2"
## [973] "CELSR3" "ITGA6" "LGI2" "IGF1R"
## [977] "SEC24A" "MYC" "CALU" "EDN1"
## [981] "PEX11B" "RRM2" "SGCD" "ZNF774"
## [985] "OLR1" "NUCB2" "RECK" "AGTRAP"
## [989] "LYPLA2" "GLO1" "NECAB3" "ZNRF3"
## [993] "ZNF385A" "YWHAZ" "XPOT" "SP2"
## [997] "SMARCD1" "SLC5A6" "RNF126" "PANK3"
## [1001] "KMT2D" "KLHDC8B" "KIF3A" "KIAA0930"
## [1005] "KCTD21" "IL2RB" "GM2A" "FAM83G"
## [1009] "EFHD2" "E2F3" "CFL2" "BSCL2"
## [1013] "ATG9A" "AREL1" "ABHD2" "ABHD14B"
## [1017] "GNB1L" "FAM71B" "SNX19" "LASP1"
## [1021] "LAMA5" "HSDL1" "NAV1" "RAB11FIP4"
## [1025] "PLCG2" "RBFOX2" "ZNF644" "DIABLO"
## [1029] "C12orf49" "DIRAS2" "CTSB" "DVL3"
## [1033] "LEFTY1" "NUP155" "CBX5" "SURF4"
## [1037] "NCS1" "TTLL7" "EIF2S3" "SNTN"
## [1041] "RAX" "ARL5A" "C10orf111" "ZNF277"
## [1045] "PTPN3" "IPO9" "KCNMB1" "ASGR2"
## [1049] "CTDSP1" "CXCL8" "FGF19" "KDM2A"
## [1053] "MRPL44" "MSX1" "MYH9" "P4HB"
## [1057] "POLL" "PPIB" "SRF" "SUSD1"
## [1061] "TOR2A" "TRAPPC10" "CLDN12" "EIF5AL1"
## [1065] "ESCO1" "HARBI1" "IFITM3" "MEAF6"
## [1069] "NR2F6" "ZBTB34" "REEP3" "SCYL3"
## [1073] "ENO1"
length(up_targets)
## [1] 1073
# mir dn
mir_dn_gn <- mir_dn$Gene.name
mir_dn_gn2 <- gsub("$","-",gsub("mir","hsa-miR-",mir_dn_gn))
dn_targets <- lapply(mir_dn_gn2,function(x) {
a <- unique(unlist(as.vector(mirtarbase[grep(x,mirtarbase$miRNA),"Target Gene"])))
y <- gsub("-$","",x)
b <- unique(unlist(as.vector(mirtarbase[which(mirtarbase$miRNA %in% y),"Target Gene"])))
d <- unique(c(a,b))
return(d)
})
lapply(dn_targets,length)
## [[1]]
## [1] 139
##
## [[2]]
## [1] 169
dn_targets <- unique(unlist(dn_targets))
dn_targets
## [1] "SOX6" "FOXO4" "PDCD4" "ETS1" "PMAIP1" "TXNIP"
## [7] "ETNK1" "TGOLN2" "ADO" "NAPG" "IGFBP4" "MTDH"
## [13] "DR1" "ABLIM1" "IQGAP2" "MLANA" "METTL7A" "EPC2"
## [19] "CELF1" "STYX" "ZNF844" "ZNF460" "APOBEC3F" "VPS35"
## [25] "TSC22D3" "SSRP1" "SIKE1" "PTGFRN" "PAQR5" "LIN54"
## [31] "LIMA1" "LIFR" "TYRP1" "MYO1D" "ZSCAN16" "SUGT1"
## [37] "MAF" "SLC25A12" "SPTSSA" "POM121C" "KPNA5" "WDR76"
## [43] "UQCRB" "RAB5C" "KLHL15" "HIST1H3B" "EIF4A2" "HIST1H2AD"
## [49] "DCAF12L2" "C11orf54" "MC2R" "SRRM1" "PROSER2" "RPS4Y1"
## [55] "CD93" "INIP" "CAPRIN2" "ASAP3" "GDE1" "PPP6C"
## [61] "NAT8L" "AXIN2" "MDM2" "TRIM59" "YTHDF1" "NT5C3A"
## [67] "PGAM4" "FOXO1" "FIGN" "FEM1B" "GLO1" "QRFPR"
## [73] "UBN2" "UBE2V2" "SLC7A2" "PTMA" "GXYLT1" "EIF2AK4"
## [79] "FXYD6" "ACOT2" "BTLA" "CACNA1B" "ELK4" "ARHGAP12"
## [85] "TFAM" "MAPK1" "BMPR1A" "DGKG" "DNAH9" "BMP10"
## [91] "WWC2" "DNAH8" "UBN1" "EEF1E1" "TBC1D22A" "CFAP65"
## [97] "MAPK10" "NRIP1" "PRR13" "SRXN1" "UTP18" "TEF"
## [103] "PDS5A" "FGD4" "RTL8C" "PDE12" "NLN" "CSTF1"
## [109] "RARS2" "VAV3" "MEF2C" "CXCL8" "DAZAP2" "PLEKHG5"
## [115] "RNMT" "CDKN1A" "MAMLD1" "MDM4" "ASAP1" "BCL2L11"
## [121] "HNRNPC" "44626" "TMEM178B" "TRAF3IP1" "WASHC2C" "ZNF107"
## [127] "LEP" "PSMB9" "MAML3" "TTF2" "CCDC108" "FAM127A"
## [133] "KCNN3" "ALDH1L2" "CUL1" "KIF13A" "LUC7L" "NR5A1"
## [139] "ICE2" "FBXO28" "PI4K2B" "RNF11" "SKIL" "CBX3"
## [145] "RACGAP1" "TPBG" "SP8" "BRCC3" "PPP2R1B" "TUBB2A"
## [151] "CYP1B1" "FAM199X" "SNAP25" "RAPH1" "RPLP1" "GAS1"
## [157] "ANP32B" "RAP1B" "CEP97" "TBRG1" "SLC28A2" "HDHD2"
## [163] "STXBP5L" "SAR1B" "RNF125" "LRRC8A" "JMJD1C" "MYLIP"
## [169] "HMGN2" "ZFP36L1" "PRR23A" "KYAT3" "LNPK" "LRIG3"
## [175] "KPNA3" "DSN1" "ARSK" "SOCS5" "MAP3K5" "SINHCAF"
## [181] "CPEB2" "SYNCRIP" "SON" "BTG2" "KIF21A" "ATP8B4"
## [187] "UFM1" "SRRM4" "BUD23" "FOXP1" "KIAA1210" "UNKL"
## [193] "ATF6" "TPGS1" "ACO1" "RWDD1" "PCDH19" "SLC2A6"
## [199] "SLC35G1" "PAPSS2" "TCN2" "CCT4" "PCDHB11" "FSIP2"
## [205] "CARNMT1" "PKHD1" "PTAFR" "OSTM1" "CLEC17A" "RABGEF1"
## [211] "LRIG1" "FAM9C" "FBLIM1" "ZDHHC20" "UBFD1" "SUMO2"
## [217] "SIN3A" "SENP1" "S1PR1" "RYK" "PPT1" "PPM1F"
## [223] "OLFML2A" "HNRNPDL" "TSC22D2" "CAV1" "HEY2" "MTRF1L"
## [229] "PINX1" "MAK16" "HOMEZ" "ZSWIM9" "KCTD20" "CAPZA2"
## [235] "MIA3" "TRAPPC6B" "IBTK" "ADAMTS15" "CACNA1C" "CACNB2"
## [241] "Qk" "MSTN" "QKI" "AMBRA1" "TUBB" "GPATCH3"
## [247] "RBPJ" "SLIT3" "SPRY4" "ARHGEF39" "GNG5" "CDH2"
## [253] "HMGN3" "LCOR" "RAB11FIP4" "ZNF688" "NUDCD3" "CCBL2"
## [259] "KIAA1715" "FAM60A" "WBSCR22" "C9orf41" "C19orf68" "ADGRL2"
## [265] "STK26"
length(dn_targets)
## [1] 265
reactome <- gmtPathways("ReactomePathways_2023-11-28.gmt")
kegg <- gmtPathways("c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt")
summary(unlist(lapply(reactome,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 5.00 15.00 49.36 44.00 2607.00
summary(unlist(lapply(kegg,length)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 8.00 13.00 15.59 19.00 92.00
# make a gene list - some of the ensembl IDs dont have a symbol (~2000 of ~17000)
gl<-gt[gt$Gene.stable.ID %in% rownames(df_mrna_f),"Gene.name"]
# make a non-redundant list of gene symbols
bg <- unique(gl[which(gl!="")])
length(gl)
## [1] 17628
length(bg)
## [1] 15265
First try with whole genome as background.
wgbg <- unique(gt$Gene.name)
wgbg <- wgbg[which(wgbg!="")]
setdiff(up_targets,wgbg)
## [1] "HIST1H2BC" "HIST1H2BK" "HIST1H2BJ" "C15orf52" "ATP5A1" "KIAA0391"
## [7] "MTSS1L" "KIAA1737" "HEATR2" "FAM134C" "SMEK2" "C11orf30"
## [13] "PVRL4" "App" "C14orf101" "H3F3B" "FAM160A2" "H1F0"
## [19] "ND2" "H2AFY" "PAPD5" "44806" "KIAA1551" "FAM96A"
## [25] "ZNF788" "DGS2" "MKL2" "TLDC1" "EPRS" "FAM192A"
## [31] "MPP5" "HIST1H3D" "ELMSAN1" "FAM27E2" "PROSC" "PTPLAD1"
## [37] "KIAA1462" "NUPL1" "TMEM257" "KIAA0101" "SOGA2" "C12ORF29"
## [43] "C2ORF69" "ADRBK1" "KIAA0195" "WHSC1" "FAM73B" "ZNF487P"
## [49] "PPAP2B" "PNMAL2" "KIAA0196" "FAM58A" "CCAT1" "C12orf49"
## [55] "C10orf111"
length(setdiff(up_targets,wgbg))
## [1] 55
length(setdiff(up_targets,wgbg))/length(up_targets)
## [1] 0.05125815
up_wg_res <- fora(pathways=reactome, genes=up_targets, universe=wgbg, minSize = 5)
## Warning in fora(pathways = reactome, genes = up_targets, universe = wgbg, : Not
## all of the input genes belong to the universe, such genes were removed
nbg = length(wgbg)
nsel = length(intersect(up_targets,wgbg))
up_wg_res$foldenrichment <- (up_wg_res$overlap / nsel ) / ( up_wg_res$size / nbg )
head(up_wg_res)
## pathway pval padj overlap size
## 1: Signal Transduction 3.531590e-62 7.052585e-59 227 2588
## 2: Gene expression (Transcription) 1.549856e-56 1.547531e-53 165 1528
## 3: Generic Transcription Pathway 2.491551e-53 1.658542e-50 144 1233
## 4: RNA Polymerase II Transcription 1.912269e-52 9.547004e-50 150 1359
## 5: Disease 1.091849e-42 4.360845e-40 158 1791
## 6: Immune System 1.435853e-35 4.778998e-33 161 2118
## overlapGenes foldenrichment
## 1: ADAM17,ADCY9,AGO1,AGO2,AGO3,AGO4,... 3.416394
## 2: AFF4,AGO1,AGO2,AGO3,AGO4,AKAP8L,... 4.205978
## 3: AGO1,AGO2,AGO3,AGO4,AR,ARID1A,... 4.548894
## 4: AFF4,AGO1,AGO2,AGO3,AGO4,AR,... 4.299106
## 5: ACAN,ADAM17,ADCY9,AGTRAP,AP1M1,AP2B1,... 3.436116
## 6: ADAM17,AP1M1,AP2B1,AREL1,ARF1,ARPC5,... 2.960781
nrow(up_wg_res)
## [1] 1997
nrow(subset(up_wg_res,padj<0.05 & foldenrichment >2))
## [1] 661
up_wg_res_sets <- subset(up_wg_res,padj<0.05 & foldenrichment >2)$pathway
setdiff(dn_targets,wgbg)
## [1] "HIST1H3B" "HIST1H2AD" "44626" "CCDC108" "FAM127A" "Qk"
## [7] "CCBL2" "KIAA1715" "FAM60A" "WBSCR22" "C9orf41" "C19orf68"
length(setdiff(dn_targets,wgbg))
## [1] 12
length(setdiff(dn_targets,wgbg))/length(dn_targets)
## [1] 0.04528302
dn_wg_res <- fora(pathways=reactome, genes=dn_targets, universe=wgbg, minSize = 5)
## Warning in fora(pathways = reactome, genes = dn_targets, universe = wgbg, : Not
## all of the input genes belong to the universe, such genes were removed
nbg = length(wgbg)
nsel = length(intersect(dn_targets,wgbg))
dn_wg_res$foldenrichment <- (dn_wg_res$overlap / nsel ) / ( dn_wg_res$size / nbg )
head(dn_wg_res)
## pathway pval padj overlap size
## 1: Metabolism of proteins 8.933815e-13 1.163718e-09 43 1934
## 2: Disease 1.301879e-12 1.163718e-09 41 1791
## 3: Signal Transduction 1.748199e-12 1.163718e-09 50 2588
## 4: Gene expression (Transcription) 1.615781e-11 8.066785e-09 36 1528
## 5: RNA Polymerase II Transcription 5.674276e-11 2.105760e-08 33 1359
## 6: Immune System 6.326771e-11 2.105760e-08 42 2118
## overlapGenes foldenrichment
## 1: ADAMTS15,ARSK,AXIN2,BRCC3,CAPZA2,CCT4,... 3.484541
## 2: ADAMTS15,BCL2L11,CAV1,CDKN1A,CUL1,CYP1B1,... 3.587748
## 3: ARHGAP12,ARHGEF39,AXIN2,BCL2L11,BMP10,BMPR1A,... 3.027885
## 4: BCL2L11,BTG2,CAV1,CBX3,CDKN1A,CSTF1,... 3.692434
## 5: BCL2L11,BTG2,CAV1,CBX3,CDKN1A,CSTF1,... 3.805644
## 6: ATP8B4,BCL2L11,BTLA,CAPZA2,CD93,CDKN1A,... 3.107828
nrow(dn_wg_res)
## [1] 1997
nrow(subset(dn_wg_res,padj<0.05 & foldenrichment >2))
## [1] 155
dn_wg_res_sets <- subset(dn_wg_res,padj<0.05 & foldenrichment >2)$pathway
Now try with proper background.
setdiff(up_targets,bg)
## [1] "FOXP3" "SELE" "CASR" "RET"
## [5] "XK" "HIST1H2BC" "HIST1H2BK" "TXNDC5"
## [9] "PCSK1N" "HIST1H2BJ" "HOXA7" "TBXA2R"
## [13] "SP7" "NUDT3" "C15orf52" "FOXD4"
## [17] "FOXD4L5" "FOXD4L4" "FOXD4L1" "HSPA6"
## [21] "TSPAN1" "ATP5A1" "HOXD3" "IL5RA"
## [25] "KIAA0391" "PYURF" "GABRB1" "LILRA2"
## [29] "C1QTNF9" "IL25" "MTSS1L" "PRRC2B"
## [33] "SFN" "MAGEA12" "MAGEA3" "MAGEA6"
## [37] "C17orf99" "KIAA1737" "HEATR2" "FAM134C"
## [41] "SMEK2" "C11orf30" "PVRL4" "App"
## [45] "MMP3" "TINCR" "C14orf101" "CDX2"
## [49] "ZNF763" "PRAP1" "NANOG" "CHRFAM7A"
## [53] "RASSF6" "TSHR" "C8A" "TCF21"
## [57] "PHOX2A" "PTPRZ1" "WNT3A" "WNT2"
## [61] "RTEL1-TNFRSF6B" "S100A1" "NOL4" "SIX6"
## [65] "OR11A1" "H3F3B" "FAM160A2" "H1F0"
## [69] "FAM47B" "WNT16" "CST5" "OFCC1"
## [73] "OTX2" "ROPN1L" "DCST1" "CHL1"
## [77] "TAAR6" "SLCO2A1" "CFI" "HEY2"
## [81] "ACOT12" "DSCR8" "UGT3A1" "HSD17B3"
## [85] "KLRC4" "TBX4" "TMPRSS11A" "PRLR"
## [89] "MAZ" "ND2" "OCA2" "H2AFY"
## [93] "PGR" "PCDHAC1" "PCDHAC2" "PCDHA1"
## [97] "PCDHA10" "PCDHA11" "PCDHA12" "PCDHA13"
## [101] "PCDHA2" "PCDHA3" "PCDHA4" "PCDHA5"
## [105] "PCDHA6" "PCDHA7" "PCDHA8" "BMP3"
## [109] "PAPD5" "44806" "KIAA1551" "TERT"
## [113] "IFNG" "WIF1" "CDKL2" "FAM96A"
## [117] "ARL5C" "ZNF788" "DGS2" "TCF23"
## [121] "GJB7" "PHOX2B" "MKL2" "FADS6"
## [125] "DMRT2" "IL17REL" "SLC35G3" "TNFRSF13B"
## [129] "ATP1A3" "WDR72" "TLDC1" "GJD3"
## [133] "PRRG4" "CFHR3" "LAIR1" "PNMA8B"
## [137] "CHST4" "EPRS" "KRTAP21-2" "FAM192A"
## [141] "MPP5" "IL1A" "HIST1H3D" "CCL22"
## [145] "ELMSAN1" "CEBPA" "FAM27E2" "NSG2"
## [149] "PSD" "PRAMEF11" "PRAMEF15" "PRAMEF26"
## [153] "PRAMEF4" "PRAMEF9" "PROSC" "PTPLAD1"
## [157] "KIAA1462" "NUPL1" "TMEM257" "KIAA0101"
## [161] "SOGA2" "C12ORF29" "C2ORF69" "ADRBK1"
## [165] "KIAA0195" "WHSC1" "FAM73B" "ZNF487P"
## [169] "PPAP2B" "PNMAL2" "KIAA0196" "FAM58A"
## [173] "CARD11" "LINC00703" "BMP10" "CCAT1"
## [177] "LGI2" "OLR1" "IL2RB" "BSCL2"
## [181] "GNB1L" "FAM71B" "C12orf49" "DIRAS2"
## [185] "LEFTY1" "SNTN" "RAX" "C10orf111"
## [189] "ASGR2" "CXCL8" "FGF19" "TOR2A"
length(setdiff(up_targets,bg))
## [1] 192
length(intersect(up_targets,bg))
## [1] 881
length(up_targets)
## [1] 1073
length(setdiff(up_targets,bg))/length(up_targets)
## [1] 0.1789376
up_bg_res <- fora(pathways=reactome, genes=up_targets, universe=bg, minSize = 5)
## Warning in fora(pathways = reactome, genes = up_targets, universe = bg, : Not
## all of the input genes belong to the universe, such genes were removed
nbg = length(bg)
nsel = length(intersect(up_targets,bg))
up_bg_res$foldenrichment <- (up_bg_res$overlap / nsel ) / ( up_bg_res$size / nbg )
head(up_bg_res)
## pathway pval padj overlap
## 1: Signal Transduction 9.620197e-22 1.727787e-18 204
## 2: Generic Transcription Pathway 2.478318e-19 2.225529e-16 137
## 3: Gene expression (Transcription) 1.036131e-18 6.202970e-16 158
## 4: RNA Polymerase II Transcription 8.491643e-18 3.812748e-15 143
## 5: Disease 1.652687e-15 5.936451e-13 150
## 6: Signaling by Receptor Tyrosine Kinases 3.576543e-14 1.070579e-11 66
## size overlapGenes foldenrichment
## 1: 1832 ADAM17,ADCY9,AGO1,AGO2,AGO3,AGO4,... 1.929415
## 2: 1076 AGO1,AGO2,AGO3,AGO4,AR,ARID1A,... 2.206120
## 3: 1349 AFF4,AGO1,AGO2,AGO3,AGO4,AKAP8L,... 2.029392
## 4: 1193 AFF4,AGO1,AGO2,AGO3,AGO4,AR,... 2.076904
## 5: 1357 ACAN,ADAM17,ADCY9,AGTRAP,AP1M1,AP2B1,... 1.915280
## 6: 414 ADAM17,AP2B1,ARF6,ATP6V0E1,BAX,BDNF,... 2.762260
nrow(up_bg_res)
## [1] 1796
nrow(subset(up_bg_res,padj<0.05 & foldenrichment >2))
## [1] 227
up_bg_res_sets <- subset(up_bg_res,padj<0.05 & foldenrichment >2)$pathway
setdiff(dn_targets,bg)
## [1] "IQGAP2" "TYRP1" "HIST1H3B" "HIST1H2AD" "DCAF12L2" "MC2R"
## [7] "CD93" "QRFPR" "BTLA" "CACNA1B" "DNAH9" "BMP10"
## [13] "DNAH8" "CFAP65" "SRXN1" "CXCL8" "44626" "LEP"
## [19] "CCDC108" "FAM127A" "NR5A1" "SP8" "SNAP25" "PRR23A"
## [25] "SRRM4" "KIAA1210" "TPGS1" "PTAFR" "CLEC17A" "HEY2"
## [31] "Qk" "CCBL2" "KIAA1715" "FAM60A" "WBSCR22" "C9orf41"
## [37] "C19orf68"
length(setdiff(dn_targets,bg))
## [1] 37
length(intersect(dn_targets,bg))
## [1] 228
length(dn_targets)
## [1] 265
length(setdiff(dn_targets,bg))/length(dn_targets)
## [1] 0.1396226
dn_bg_res <- fora(pathways=reactome, genes=dn_targets, universe=bg, minSize = 5)
## Warning in fora(pathways = reactome, genes = dn_targets, universe = bg, : Not
## all of the input genes belong to the universe, such genes were removed
nbg = length(bg)
nsel = length(intersect(dn_targets,bg))
dn_bg_res$foldenrichment <- (dn_bg_res$overlap / nsel ) / ( dn_bg_res$size / nbg )
head(dn_bg_res)
## pathway pval padj
## 1: FOXO-mediated transcription 1.261597e-06 0.002265829
## 2: FOXO-mediated transcription of cell cycle genes 2.717299e-06 0.002440135
## 3: Cytokine Signaling in Immune system 6.682545e-05 0.040006169
## 4: Disease 1.132751e-04 0.045194440
## 5: Regulation of beta-cell development 1.258197e-04 0.045194440
## 6: NOTCH2 intracellular domain regulates transcription 2.584770e-04 0.077370788
## overlap size overlapGenes foldenrichment
## 1: 8 54 BCL2L11,CAV1,CDKN1A,FOXO1,FOXO4,MSTN,... 9.918778
## 2: 5 16 CAV1,CDKN1A,FOXO1,FOXO4,MSTN 20.922423
## 3: 21 543 BCL2L11,CDKN1A,CUL1,EIF4A2,FOXO1,HNRNPDL,... 2.589294
## 4: 38 1357 ADAMTS15,BCL2L11,CAV1,CDKN1A,CUL1,CYP1B1,... 1.874846
## 5: 4 18 FOXO1,MAML3,MAMLD1,RBPJ 14.878168
## 6: 3 9 MAML3,MAMLD1,RBPJ 22.317251
nrow(dn_bg_res)
## [1] 1796
nrow(subset(dn_bg_res,padj<0.05 & foldenrichment >2))
## [1] 4
dn_bg_res_sets <- subset(dn_bg_res,padj<0.05 & foldenrichment >2)$pathway
v1 <- list("WG up"=up_wg_res_sets,"BG up"=up_bg_res_sets,
"WG dn"=dn_wg_res_sets,"BG dn"=dn_bg_res_sets )
plot(euler(v1),quantities = TRUE,main="Effect of background list")
Now compare miR targets up and down together with observed up and down.
v2 <- list("up DGE"=mrna_up, "dn DGE"=mrna_dn,
"up miR targets"=up_targets, "dn miR targets"=dn_targets)
plot(euler(v2),quantities = TRUE, main="miR targets and DEGs?")
Interesting how low the enrichment is.
Now check with 100 genes.
mrna_up <- rownames(head(subset(dge_mrna,log2FoldChange >0 ),129))
str(mrna_up)
## chr [1:129] "ENSG00000162849" "ENSG00000203805" "ENSG00000155011" ...
mrna_up <- unique(gt[which(gt$Gene.stable.ID %in% mrna_up),"Gene.name"])
mrna_up <- mrna_up[mrna_up != ""]
str(mrna_up)
## chr [1:100] "AGKP1" "APP" "CLTCL1" "SIPA1L1-AS1" "GTF3AP2" "CD226" ...
mrna_dn <- rownames(head(subset(dge_mrna,log2FoldChange <0 ),105))
str(mrna_dn)
## chr [1:105] "ENSG00000175899" "ENSG00000130203" "ENSG00000140022" ...
mrna_dn <- unique(gt[which(gt$Gene.stable.ID %in% mrna_dn),"Gene.name"])
mrna_dn <- mrna_dn[mrna_dn != ""]
str(mrna_dn)
## chr [1:100] "MT-RNR1" "MT-CO2" "MT-ND5" "SOHLH2" "PPARGC1A" "FREM2" ...
v2 <- list("up DGE"=mrna_up, "dn DGE"=mrna_dn,
"up miR targets"=up_targets, "dn miR targets"=dn_targets)
plot(euler(v2),quantities = TRUE, main="miR targets and DEGs?")
Now check with 1000 genes.
mrna_up <- rownames(head(subset(dge_mrna,log2FoldChange >0 ),1288))
str(mrna_up)
## chr [1:1288] "ENSG00000162849" "ENSG00000203805" "ENSG00000155011" ...
mrna_up <- unique(gt[which(gt$Gene.stable.ID %in% mrna_up),"Gene.name"])
mrna_up <- mrna_up[mrna_up != ""]
str(mrna_up)
## chr [1:1000] "ARSDP1" "CDY4P" "PRKY" "UTY" "AGKP1" "ZNF343" "LINC01669" ...
mrna_dn <- rownames(head(subset(dge_mrna,log2FoldChange <0 ),1080))
str(mrna_dn)
## chr [1:1080] "ENSG00000175899" "ENSG00000130203" "ENSG00000140022" ...
mrna_dn <- unique(gt[which(gt$Gene.stable.ID %in% mrna_dn),"Gene.name"])
mrna_dn <- mrna_dn[mrna_dn != ""]
str(mrna_dn)
## chr [1:1000] "MT-RNR1" "MT-RNR2" "MT-ND1" "MT-CO1" "MT-CO2" "MT-ATP6" ...
v2 <- list("up DGE"=mrna_up, "dn DGE"=mrna_dn,
"up miR targets"=up_targets, "dn miR targets"=dn_targets)
plot(euler(v2),quantities = TRUE, main="miR targets and DEGs?")
Now check with 250 genes.
mrna_up <- rownames(head(subset(dge_mrna,log2FoldChange >0 ),321))
str(mrna_up)
## chr [1:321] "ENSG00000162849" "ENSG00000203805" "ENSG00000155011" ...
mrna_up <- unique(gt[which(gt$Gene.stable.ID %in% mrna_up),"Gene.name"])
mrna_up <- mrna_up[mrna_up != ""]
str(mrna_up)
## chr [1:250] "AGKP1" "APP" "CLTCL1" "C21orf62" "APCDD1L" "MN1" "HDAC8" ...
mrna_dn <- rownames(head(subset(dge_mrna,log2FoldChange <0 ),266))
str(mrna_dn)
## chr [1:266] "ENSG00000175899" "ENSG00000130203" "ENSG00000140022" ...
mrna_dn <- unique(gt[which(gt$Gene.stable.ID %in% mrna_dn),"Gene.name"])
mrna_dn <- mrna_dn[mrna_dn != ""]
str(mrna_dn)
## chr [1:250] "MT-RNR1" "MT-RNR2" "MT-CO2" "MT-ATP6" "MT-ND3" "MT-ND4" ...
Use top 250 genes in either direction for enrichment analysis. There was a paper by Tarca recommending this.
mrna_up_res <- fora(pathways=reactome, genes=mrna_up, universe=bg, minSize = 5)
nbg = length(bg)
nsel = length(intersect(mrna_up,bg))
mrna_up_res$foldenrichment <- (mrna_up_res$overlap / nsel ) / ( mrna_up_res$size / nbg )
head(mrna_up_res)
## pathway
## 1: Extracellular matrix organization
## 2: Collagen biosynthesis and modifying enzymes
## 3: Collagen chain trimerization
## 4: Collagen formation
## 5: Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)
## 6: Collagen degradation
## pval padj overlap size
## 1: 5.814354e-07 0.001044258 16 217
## 2: 2.898106e-06 0.002602499 8 55
## 3: 1.663602e-05 0.009959432 6 34
## 4: 2.241904e-05 0.010066149 8 72
## 5: 4.851678e-05 0.017427228 8 80
## 6: 7.614645e-05 0.022071903 6 44
## overlapGenes foldenrichment
## 1: ADAMTS16,APP,BMP4,COL11A1,COL15A1,COL27A1,... 4.502120
## 2: COL11A1,COL15A1,COL27A1,COL4A4,COL8A1,COL8A2,... 8.881455
## 3: COL11A1,COL15A1,COL27A1,COL4A4,COL8A1,COL8A2 10.775294
## 4: COL11A1,COL15A1,COL27A1,COL4A4,COL8A1,COL8A2,... 6.784444
## 5: APP,BMP4,CDH2,FN1,GAS6,MEN1,... 6.106000
## 6: COL11A1,COL15A1,COL4A4,COL8A1,COL8A2,MMP2 8.326364
nrow(mrna_up_res)
## [1] 1796
nrow(subset(mrna_up_res,padj<0.05 & foldenrichment >0))
## [1] 10
mrna_up_res_sets <- subset(mrna_up_res,padj<0.05 & foldenrichment >1)$pathway
mrna_dn_res <- fora(pathways=reactome, genes=mrna_dn, universe=bg, minSize = 5)
nbg = length(bg)
nsel = length(intersect(mrna_dn,bg))
mrna_dn_res$foldenrichment <- (mrna_dn_res$overlap / nsel ) / ( mrna_dn_res$size / nbg )
head(mrna_dn_res)
## pathway
## 1: FASTK family proteins regulate processing and stability of mitochondrial RNAs
## 2: tRNA processing in the mitochondrion
## 3: rRNA processing in the mitochondrion
## 4: Mitochondrial RNA degradation
## 5: Muscle contraction
## 6: Ion homeostasis
## pval padj overlap size
## 1: 1.238109e-08 2.223644e-05 7 19
## 2: 4.018142e-08 3.608291e-05 7 22
## 3: 7.929370e-08 4.747049e-05 7 24
## 4: 1.085999e-07 4.876136e-05 7 25
## 5: 3.030982e-06 1.088729e-03 12 139
## 6: 2.759865e-05 8.261197e-03 6 37
## overlapGenes foldenrichment
## 1: MT-ATP6,MT-CO2,MT-ND3,MT-ND4,MT-ND5,MT-RNR1,... 22.495789
## 2: MT-ATP6,MT-CO2,MT-ND3,MT-ND4,MT-ND5,MT-RNR1,... 19.428182
## 3: MT-ATP6,MT-CO2,MT-ND3,MT-ND4,MT-ND5,MT-RNR1,... 17.809167
## 4: MT-ATP6,MT-CO2,MT-ND3,MT-ND4,MT-ND5,MT-RNR1,... 17.096800
## 5: ATP2A1,CALM1,CAMK2B,CAMK2D,KCNJ2,MME,... 5.271367
## 6: ATP2A1,CALM1,CAMK2B,CAMK2D,SLN,TRDN 9.901622
nrow(mrna_dn_res)
## [1] 1796
nrow(subset(mrna_dn_res,padj<0.05 & foldenrichment >0))
## [1] 9
mrna_dn_res_sets <- subset(mrna_dn_res,padj<0.05 & foldenrichment >1)$pathway
v3 <- list("dn miR target"=dn_bg_res_sets,
"up miR target"=up_bg_res_sets,
"up mRNA"=mrna_up_res_sets,
"dn mRNA"=mrna_dn_res_sets)
plot(euler(v3),quantities = TRUE, main="miR target and DE pathways")