The purpose of this analysis is to apply different enrichment analysis approaches, and see which approach is most parsimonious with gene expressoin differences.
We will specifically be testing the GMEA approach versus existing approaches, namely GOseq and mitch.
This study (GSE158433) is a super-series contains matched RNA-seq (GSE158420) and Infinium EPIC array (GSE158422) data for matched control and cancer tissues.
In this script, we will be processing the EPIC DNA methylation data and performing the statistical analysis in limma and saving the results for downstream enrichment analysis.
suppressPackageStartupMessages({
library("plyr")
library("R.utils")
library("missMethyl")
library("limma")
library("DMRcate")
library("DMRcatedata")
library("topconfects")
library("minfi")
library("IlluminaHumanMethylation450kmanifest")
library("RColorBrewer")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("GEOquery")
library("eulerr")
library("plyr")
library("gplots")
library("reshape2")
library("forestplot")
library("beeswarm")
library("RCircos")
library("qqman")
library("ENmix")
})
## Warning: replacing previous import 'matrixStats::rowMedians' by
## 'Biobase::rowMedians' when loading 'DSS'
## Warning: replacing previous import 'matrixStats::anyMissing' by
## 'Biobase::anyMissing' when loading 'DSS'
source("../meth_functions.R")
anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)
GSE158422 is the accession number for the array data.
ARRAY_DATA="GSE158422_RAW.tar"
if(!file.exists(ARRAY_DATA)){
download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE158422&format=file",
destfile=ARRAY_DATA)
untar(tarfile = ARRAY_DATA)
}
SERIES_MATRIX="GSE158422_series_matrix.txt.gz"
if(!file.exists(SERIES_MATRIX)){
download.file("https://ftp.ncbi.nlm.nih.gov/geo/series/GSE158nnn/GSE158422/matrix/GSE158422_series_matrix.txt.gz",
destfile=SERIES_MATRIX)
}
gse <- getGEO(filename=SERIES_MATRIX)
baseDir <- "IDAT"
sample_metadata <- pData(phenoData(gse))
targets <- sample_metadata
mybase <- unique(gsub("_Red.idat.gz" ,"", gsub("_Grn.idat.gz", "" ,list.files(".",pattern = "GSM",recursive = TRUE))))
targets$Basename <- mybase
head(targets)
## title geo_accession status
## GSM4799888 pat2_tumor [methylation] GSM4799888 Public on Jan 08 2022
## GSM4799889 pat2_normal [methylation] GSM4799889 Public on Jan 08 2022
## GSM4799890 pat3_tumor [methylation] GSM4799890 Public on Jan 08 2022
## GSM4799891 pat3_normal [methylation] GSM4799891 Public on Jan 08 2022
## GSM4799892 pat4_tumor [methylation] GSM4799892 Public on Jan 08 2022
## GSM4799893 pat4_normal [methylation] GSM4799893 Public on Jan 08 2022
## submission_date last_update_date type channel_count
## GSM4799888 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799889 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799890 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799891 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799892 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799893 Sep 23 2020 Jan 09 2022 genomic 1
## source_name_ch1 organism_ch1 characteristics_ch1 molecule_ch1
## GSM4799888 lung biopsies Homo sapiens tumor: tumor genomic DNA
## GSM4799889 lung biopsies Homo sapiens tumor: normal genomic DNA
## GSM4799890 lung biopsies Homo sapiens tumor: tumor genomic DNA
## GSM4799891 lung biopsies Homo sapiens tumor: normal genomic DNA
## GSM4799892 lung biopsies Homo sapiens tumor: tumor genomic DNA
## GSM4799893 lung biopsies Homo sapiens tumor: normal genomic DNA
## extract_protocol_ch1
## GSM4799888 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799889 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799890 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799891 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799892 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799893 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## extract_protocol_ch1.1 label_ch1
## GSM4799888 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799889 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799890 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799891 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799892 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799893 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## label_protocol_ch1 taxid_ch1
## GSM4799888 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799889 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799890 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799891 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799892 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799893 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## hyb_protocol
## GSM4799888 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799889 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799890 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799891 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799892 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799893 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## scan_protocol
## GSM4799888 Arrays were imaged using iScan System.
## GSM4799889 Arrays were imaged using iScan System.
## GSM4799890 Arrays were imaged using iScan System.
## GSM4799891 Arrays were imaged using iScan System.
## GSM4799892 Arrays were imaged using iScan System.
## GSM4799893 Arrays were imaged using iScan System.
## data_processing
## GSM4799888 Cpgsite filtering: Rnbeads package
## GSM4799889 Cpgsite filtering: Rnbeads package
## GSM4799890 Cpgsite filtering: Rnbeads package
## GSM4799891 Cpgsite filtering: Rnbeads package
## GSM4799892 Cpgsite filtering: Rnbeads package
## GSM4799893 Cpgsite filtering: Rnbeads package
## data_processing.1
## GSM4799888 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799889 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799890 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799891 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799892 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799893 Additional filtering: SNP region from KOVA and KRGDB
## data_processing.2 platform_id contact_name
## GSM4799888 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799889 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799890 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799891 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799892 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799893 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## contact_laboratory contact_department contact_institute
## GSM4799888 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799889 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799890 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799891 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799892 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799893 Network Biomedicine Laboratory Biotechnology Yonsei University
## contact_address contact_city contact_zip/postal_code
## GSM4799888 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799889 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799890 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799891 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799892 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799893 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## contact_country
## GSM4799888 South Korea
## GSM4799889 South Korea
## GSM4799890 South Korea
## GSM4799891 South Korea
## GSM4799892 South Korea
## GSM4799893 South Korea
## supplementary_file
## GSM4799888 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799888/suppl/GSM4799888_201959750039_R02C01_Grn.idat.gz
## GSM4799889 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799889/suppl/GSM4799889_201959750039_R03C01_Grn.idat.gz
## GSM4799890 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799890/suppl/GSM4799890_201959750039_R04C01_Grn.idat.gz
## GSM4799891 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799891/suppl/GSM4799891_201959750039_R05C01_Grn.idat.gz
## GSM4799892 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799892/suppl/GSM4799892_201959750039_R06C01_Grn.idat.gz
## GSM4799893 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799893/suppl/GSM4799893_201959750039_R07C01_Grn.idat.gz
## supplementary_file.1
## GSM4799888 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799888/suppl/GSM4799888_201959750039_R02C01_Red.idat.gz
## GSM4799889 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799889/suppl/GSM4799889_201959750039_R03C01_Red.idat.gz
## GSM4799890 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799890/suppl/GSM4799890_201959750039_R04C01_Red.idat.gz
## GSM4799891 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799891/suppl/GSM4799891_201959750039_R05C01_Red.idat.gz
## GSM4799892 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799892/suppl/GSM4799892_201959750039_R06C01_Red.idat.gz
## GSM4799893 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799893/suppl/GSM4799893_201959750039_R07C01_Red.idat.gz
## data_row_count tumor:ch1 Basename
## GSM4799888 651062 tumor IDAT/GSM4799888_201959750039_R02C01
## GSM4799889 651062 normal IDAT/GSM4799889_201959750039_R03C01
## GSM4799890 651062 tumor IDAT/GSM4799890_201959750039_R04C01
## GSM4799891 651062 normal IDAT/GSM4799891_201959750039_R05C01
## GSM4799892 651062 tumor IDAT/GSM4799892_201959750039_R06C01
## GSM4799893 651062 normal IDAT/GSM4799893_201959750039_R07C01
rgSet <- read.metharray.exp(targets = targets)
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
## Warning in readChar(con, nchars = n): truncating string with embedded nuls
rgSet
## class: RGChannelSet
## dim: 1051815 74
## metadata(0):
## assays(2): Green Red
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(74): GSM4799888_201959750039_R02C01
## GSM4799889_201959750039_R03C01 ... GSM4799972_201959750223_R04C01
## GSM4799973_201959750223_R05C01
## colData names(36): title geo_accession ... Basename filenames
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
targets$Basename <- gsub("IDAT/","",mybase)
head(targets)
## title geo_accession status
## GSM4799888 pat2_tumor [methylation] GSM4799888 Public on Jan 08 2022
## GSM4799889 pat2_normal [methylation] GSM4799889 Public on Jan 08 2022
## GSM4799890 pat3_tumor [methylation] GSM4799890 Public on Jan 08 2022
## GSM4799891 pat3_normal [methylation] GSM4799891 Public on Jan 08 2022
## GSM4799892 pat4_tumor [methylation] GSM4799892 Public on Jan 08 2022
## GSM4799893 pat4_normal [methylation] GSM4799893 Public on Jan 08 2022
## submission_date last_update_date type channel_count
## GSM4799888 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799889 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799890 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799891 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799892 Sep 23 2020 Jan 09 2022 genomic 1
## GSM4799893 Sep 23 2020 Jan 09 2022 genomic 1
## source_name_ch1 organism_ch1 characteristics_ch1 molecule_ch1
## GSM4799888 lung biopsies Homo sapiens tumor: tumor genomic DNA
## GSM4799889 lung biopsies Homo sapiens tumor: normal genomic DNA
## GSM4799890 lung biopsies Homo sapiens tumor: tumor genomic DNA
## GSM4799891 lung biopsies Homo sapiens tumor: normal genomic DNA
## GSM4799892 lung biopsies Homo sapiens tumor: tumor genomic DNA
## GSM4799893 lung biopsies Homo sapiens tumor: normal genomic DNA
## extract_protocol_ch1
## GSM4799888 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799889 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799890 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799891 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799892 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799893 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## extract_protocol_ch1.1 label_ch1
## GSM4799888 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799889 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799890 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799891 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799892 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799893 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## label_protocol_ch1 taxid_ch1
## GSM4799888 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799889 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799890 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799891 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799892 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## GSM4799893 Standard Illumina labelling protocol (label: Cy3 and Cy5) 9606
## hyb_protocol
## GSM4799888 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799889 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799890 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799891 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799892 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799893 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## scan_protocol
## GSM4799888 Arrays were imaged using iScan System.
## GSM4799889 Arrays were imaged using iScan System.
## GSM4799890 Arrays were imaged using iScan System.
## GSM4799891 Arrays were imaged using iScan System.
## GSM4799892 Arrays were imaged using iScan System.
## GSM4799893 Arrays were imaged using iScan System.
## data_processing
## GSM4799888 Cpgsite filtering: Rnbeads package
## GSM4799889 Cpgsite filtering: Rnbeads package
## GSM4799890 Cpgsite filtering: Rnbeads package
## GSM4799891 Cpgsite filtering: Rnbeads package
## GSM4799892 Cpgsite filtering: Rnbeads package
## GSM4799893 Cpgsite filtering: Rnbeads package
## data_processing.1
## GSM4799888 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799889 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799890 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799891 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799892 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799893 Additional filtering: SNP region from KOVA and KRGDB
## data_processing.2 platform_id contact_name
## GSM4799888 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799889 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799890 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799891 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799892 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## GSM4799893 Normalization: Functional normalization GPL21145 JAE WON,,CHO
## contact_laboratory contact_department contact_institute
## GSM4799888 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799889 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799890 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799891 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799892 Network Biomedicine Laboratory Biotechnology Yonsei University
## GSM4799893 Network Biomedicine Laboratory Biotechnology Yonsei University
## contact_address contact_city contact_zip/postal_code
## GSM4799888 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799889 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799890 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799891 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799892 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## GSM4799893 50 Yonsei-ro, Seodaemun-gu Seoul 03722
## contact_country
## GSM4799888 South Korea
## GSM4799889 South Korea
## GSM4799890 South Korea
## GSM4799891 South Korea
## GSM4799892 South Korea
## GSM4799893 South Korea
## supplementary_file
## GSM4799888 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799888/suppl/GSM4799888_201959750039_R02C01_Grn.idat.gz
## GSM4799889 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799889/suppl/GSM4799889_201959750039_R03C01_Grn.idat.gz
## GSM4799890 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799890/suppl/GSM4799890_201959750039_R04C01_Grn.idat.gz
## GSM4799891 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799891/suppl/GSM4799891_201959750039_R05C01_Grn.idat.gz
## GSM4799892 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799892/suppl/GSM4799892_201959750039_R06C01_Grn.idat.gz
## GSM4799893 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799893/suppl/GSM4799893_201959750039_R07C01_Grn.idat.gz
## supplementary_file.1
## GSM4799888 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799888/suppl/GSM4799888_201959750039_R02C01_Red.idat.gz
## GSM4799889 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799889/suppl/GSM4799889_201959750039_R03C01_Red.idat.gz
## GSM4799890 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799890/suppl/GSM4799890_201959750039_R04C01_Red.idat.gz
## GSM4799891 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799891/suppl/GSM4799891_201959750039_R05C01_Red.idat.gz
## GSM4799892 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799892/suppl/GSM4799892_201959750039_R06C01_Red.idat.gz
## GSM4799893 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799893/suppl/GSM4799893_201959750039_R07C01_Red.idat.gz
## data_row_count tumor:ch1 Basename
## GSM4799888 651062 tumor GSM4799888_201959750039_R02C01
## GSM4799889 651062 normal GSM4799889_201959750039_R03C01
## GSM4799890 651062 tumor GSM4799890_201959750039_R04C01
## GSM4799891 651062 normal GSM4799891_201959750039_R05C01
## GSM4799892 651062 tumor GSM4799892_201959750039_R06C01
## GSM4799893 651062 normal GSM4799893_201959750039_R07C01
mSet <- preprocessRaw(rgSet)
## Loading required package: IlluminaHumanMethylationEPICmanifest
mSetSw <- SWAN(mSet,verbose=FALSE)
## [SWAN] Preparing normalization subset
## EPIC
## [SWAN] Normalizing methylated channel
## [SWAN] Normalizing unmethylated channel
par(mfrow=c(1,2), cex=0.8)
densityByProbeType(mSet[,1], main = "Raw")
densityByProbeType(mSetSw[,1], main = "SWAN")
# include sex chromosomes
detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mSetSw <- mSetSw[keep,]
# exclude SNP probes
mSetSw <- mapToGenome(mSetSw)
mSetSw_nosnp <- dropLociWithSnps(mSetSw)
# exclude sex chromosomes
xyprobes <- anno$Name[anno$chr %in% c("chrX","chrY")]
mSetFlt <- mSetSw[which(!rownames(mSetSw) %in% xyprobes),]
# include sex chromosomes
meth <- getMeth(mSetSw)
unmeth <- getUnmeth(mSetSw)
Mval <- log2((meth + 100)/(unmeth + 100))
beta <- getBeta(mSetSw)
dim(Mval)
## [1] 858338 74
# exclude sex chromosomes
meth <- getMeth(mSetFlt)
unmeth <- getUnmeth(mSetFlt)
Mval_flt <- log2((meth + 100)/(unmeth + 100))
beta_flt <- getBeta(mSetFlt)
dim(Mval_flt)
## [1] 839473 74
xprobes <- anno$Name[anno$chr %in% "chrX"]
yprobes <- anno$Name[anno$chr %in% "chrY"]
xdat <- Mval[which( rownames(Mval) %in% xprobes ),]
ydat <- Mval[which( rownames(Mval) %in% yprobes ),]
gsm <- sapply( strsplit(colnames(Mval),"_" ) , "[[" , 1)
plot(colMeans(xdat) , colMeans(ydat), cex=1)
text(colMeans(xdat) , colMeans(ydat),labels=gsm )
grid()
abline(v=-0.1,col="red",lty=2)
which( colMeans(xdat) > -0.1 )
## GSM4799928_201959750126_R04C01 GSM4799929_201959750126_R05C01
## 41 42
## GSM4799958_201959750019_R06C01 GSM4799959_201959750019_R07C01
## 59 60
sex_female <- as.numeric(colMeans(xdat) > -0.1)
# scree plot shows the amount of variation in a dataset that is accounted
# for by the first N principal components
myscree <- function(mx,n=10,main="") {
pc <- princomp(mx)$sdev
pcp <- pc/sum(pc)*100
pcp <- pcp[1:10]
barplot(pcp,cex.names = 1,las=2,ylim=c(0,60),
ylab="percent (%) variance explained", main=main)
text((0.5:length(pcp)*1.2),pcp,label=signif(pcp,3),pos=3,cex=0.8)
}
par(mfrow=c(2,1))
myscree(Mval,main="incl sex chr")
myscree(Mval_flt,main="excl sex chr")
par(mfrow=c(1,1))
#MDS plot 1
colnames(targets) <- gsub(" ","_",colnames(targets))
colnames(targets) <- gsub(":","_",colnames(targets))
targets$sex<-factor(sex_female)
targets$case<-factor(targets$`characteristics_ch1`)
sample_groups <- factor(targets$case)
colour_palette=brewer.pal(n = length(levels(sample_groups)), name = "Paired")
## Warning in brewer.pal(n = length(levels(sample_groups)), name = "Paired"): minimal value for n is 3, returning requested palette with 3 different levels
colours <- colour_palette[as.integer(factor(targets$case))]
plot(1,axes = FALSE,xlab="",ylab="",main="MDS by case/ctrl",cex=0)
legend("center",legend=levels(sample_groups),pch=16,cex=1.2,col=colour_palette)
mydist <- plotMDS(Mval, labels=gsm,col=colours,main="sex chromosomes included")
mydist_flt <- plotMDS(Mval_flt, labels=gsm,col=colours,main="sex chromosomes excluded")
Paired normal versus cancer tissue.
samplesheet<-targets
groups <- factor(samplesheet$case,levels=c("tumor: normal","tumor: tumor"))
sex <- factor(samplesheet$sex,levels=c("0","1"))
dim(Mval)
## [1] 858338 74
dim(beta)
## [1] 858338 74
GSE158422 <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval,name="GSE158422",
myann=myann ,beta= beta)
## Your contrast returned 219137 individually significant probes. We recommend the default setting of pcutoff in dmrcate().
## Fitting chr1...
## Fitting chr2...
## Fitting chr3...
## Fitting chr4...
## Fitting chr5...
## Fitting chr6...
## Fitting chr7...
## Fitting chr8...
## Fitting chr9...
## Fitting chr10...
## Fitting chr11...
## Fitting chr12...
## Fitting chr13...
## Fitting chr14...
## Fitting chr15...
## Fitting chr16...
## Fitting chr17...
## Fitting chr18...
## Fitting chr19...
## Fitting chr20...
## Fitting chr21...
## Fitting chr22...
## Fitting chrX...
## Fitting chrY...
## Demarcating regions...
## Done!
## snapshotDate(): 2022-04-26
## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation
## loading from cache
##
## RCircos.Core.Components initialized.
## Type ?RCircos.Reset.Plot.Parameters to see how to modify the core components.
## Tiles plot may use more than one track. Please select correct area for next track if necessary.
##
## Tiles plot may use more than one track. Please select correct area for next track if necessary.
head(GSE158422$dma,10)
## Row.names UCSC_RefGene_Name
## 5777 cg00159780 REXO1L2P;REXO1L1
## 468211 cg14205001 NOTCH1
## 61184 cg01772014 C20orf160
## 221392 cg06569947 DCUN1D2
## 444856 cg13531667 MCC
## 577564 cg17655624 ZNF385A;ZNF385A
## 669100 cg20979986 SERINC5;SERINC5;SERINC5;SERINC5;SERINC5
## 657981 cg20568402 DCUN1D2
## 60887 cg01762663 DOCK9;DOCK9
## 760860 cg24334634 DCUN1D2
## Regulatory_Feature_Group Islands_Name
## 5777 chr8:86573421-86575248
## 468211 chr9:139405089-139405292
## 61184 Unclassified_Cell_type_specific chr20:30606492-30607174
## 221392
## 444856 chr5:112823256-112824304
## 577564 Unclassified_Cell_type_specific chr12:54784900-54785238
## 669100 Unclassified_Cell_type_specific
## 657981
## 60887
## 760860
## Relation_to_Island logFC AveExpr t P.Value
## 5777 Island -1.680582 1.119242 -13.42558 1.187923e-21
## 468211 S_Shore 1.063122 1.017852 13.40475 1.290365e-21
## 61184 N_Shore -1.258942 -2.131805 -13.37502 1.452267e-21
## 221392 OpenSea 1.689789 1.602801 13.18873 3.053315e-21
## 444856 S_Shore -1.948149 -2.464149 -13.15484 3.496891e-21
## 577564 S_Shore -1.601538 -2.430925 -13.10683 4.238623e-21
## 669100 OpenSea -1.947790 -1.504807 -13.04706 5.387984e-21
## 657981 OpenSea 1.678729 1.358215 13.03365 5.686433e-21
## 60887 OpenSea 1.415148 1.625403 12.95398 7.835603e-21
## 760860 OpenSea 1.896843 1.347877 12.89046 1.012375e-20
## adj.P.Val B
## 5777 4.155120e-16 38.63506
## 468211 4.155120e-16 38.55528
## 61184 4.155120e-16 38.44125
## 221392 6.003029e-16 37.72414
## 444856 6.003029e-16 37.59318
## 577564 6.063618e-16 37.40744
## 669100 6.101102e-16 37.17573
## 657981 6.101102e-16 37.12366
## 60887 7.472884e-16 36.81395
## 760860 8.689604e-16 36.56639
head(GSE158422$dmr,10)
## GRanges object with 10 ranges and 8 metadata columns:
## seqnames ranges strand | no.cpgs min_smoothed_fdr
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr6 32034322-32059605 * | 215 1.66769e-108
## [2] chr6 33129419-33155135 * | 166 2.75845e-87
## [3] chr6 32013974-32033307 * | 157 1.70467e-94
## [4] chr7 27180888-27185512 * | 48 0.00000e+00
## [5] chr1 50880150-50893984 * | 59 8.70901e-138
## [6] chr2 223161771-223173061 * | 57 7.41084e-82
## [7] chr7 27140797-27148204 * | 46 3.68060e-130
## [8] chr2 176980837-176989586 * | 41 2.11869e-141
## [9] chr3 147105190-147116807 * | 48 1.31679e-71
## [10] chr2 177012117-177017797 * | 33 1.62824e-145
## Stouffer HMFDR Fisher maxdiff meandiff
## <numeric> <numeric> <numeric> <numeric> <numeric>
## [1] 0.00000e+00 2.89730e-06 0.00000e+00 -0.184036 -0.0643286
## [2] 0.00000e+00 4.89484e-09 0.00000e+00 -0.197831 -0.0694411
## [3] 0.00000e+00 1.88937e-07 0.00000e+00 -0.200274 -0.0744661
## [4] 0.00000e+00 3.46237e-10 0.00000e+00 0.292363 0.1980467
## [5] 1.89330e-299 3.98485e-14 1.49408e-316 0.317989 0.1324507
## [6] 2.42511e-284 1.87577e-09 4.10695e-276 0.227861 0.1251198
## [7] 5.60715e-278 4.34808e-12 5.69989e-268 0.284529 0.1601439
## [8] 1.07663e-269 2.06317e-11 4.63765e-256 0.307304 0.1766871
## [9] 6.29131e-247 1.84459e-10 3.08455e-236 0.280936 0.1565660
## [10] 8.08145e-212 6.19307e-10 1.08398e-202 0.246288 0.1739056
## overlapping.genes
## <character>
## [1] TNXB, RNA5SP206
## [2] COL11A2
## [3] TNXB
## [4] HOXA-AS3, HOXA3, RP1..
## [5] DMRTA2
## [6] CCDC140, PAX3
## [7] HOXA-AS2, HOXA2, HOXA3
## [8] HOXD10, HOXD9, HOXD-..
## [9] ZIC4-AS1, ZIC1, ZIC4
## [10] HOXD3, MIR10B, HOXD4
## -------
## seqinfo: 24 sequences from an unspecified genome; no seqlengths
saveRDS(GSE158422,file="GSE158422.rds")
write.table(GSE158422$dma,file="GSE158422_limma.tsv",sep="\t",quote=FALSE)
save.image("GSE158422.Rdata")