Introduction

The purpose of this analysis is to apply different enrichment analysis approaches, and see which approach is most parsimonious with gene expressoin differences.

We will specifically be testing the GMEA approach versus existing approaches, namely GOseq and mitch.

This study (GSE158433) is a super-series contains matched RNA-seq (GSE158420) and Infinium EPIC array (GSE158422) data for matched control and cancer tissues.

In this script, we will be processing the EPIC DNA methylation data and performing the statistical analysis in limma and saving the results for downstream enrichment analysis.

suppressPackageStartupMessages({
  library("plyr")
  library("R.utils")
  library("missMethyl")
  library("limma")
  library("DMRcate")
  library("DMRcatedata")
  library("topconfects")
  library("minfi")
  library("IlluminaHumanMethylation450kmanifest")
  library("RColorBrewer")
  library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
  library("GEOquery")
  library("eulerr")
  library("plyr")
  library("gplots")
  library("reshape2")
  library("forestplot")
  library("beeswarm")
  library("RCircos")
  library("qqman")
  library("ENmix")
})
## Warning: replacing previous import 'matrixStats::rowMedians' by
## 'Biobase::rowMedians' when loading 'DSS'
## Warning: replacing previous import 'matrixStats::anyMissing' by
## 'Biobase::anyMissing' when loading 'DSS'
source("../meth_functions.R")

Obtaining array annotations

anno <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
myann <- data.frame(anno[,c("UCSC_RefGene_Name","Regulatory_Feature_Group","Islands_Name","Relation_to_Island")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)

Obtain array data

GSE158422 is the accession number for the array data.

ARRAY_DATA="GSE158422_RAW.tar"
if(!file.exists(ARRAY_DATA)){
  download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE158422&format=file",
    destfile=ARRAY_DATA)
  untar(tarfile = ARRAY_DATA)
}

SERIES_MATRIX="GSE158422_series_matrix.txt.gz"
if(!file.exists(SERIES_MATRIX)){
download.file("https://ftp.ncbi.nlm.nih.gov/geo/series/GSE158nnn/GSE158422/matrix/GSE158422_series_matrix.txt.gz",
  destfile=SERIES_MATRIX)
}
gse <- getGEO(filename=SERIES_MATRIX)
baseDir <- "IDAT"
sample_metadata <- pData(phenoData(gse))
targets <- sample_metadata
mybase <- unique(gsub("_Red.idat.gz" ,"",  gsub("_Grn.idat.gz", "" ,list.files(".",pattern = "GSM",recursive = TRUE))))
targets$Basename <- mybase
head(targets)
##                                title geo_accession                status
## GSM4799888  pat2_tumor [methylation]    GSM4799888 Public on Jan 08 2022
## GSM4799889 pat2_normal [methylation]    GSM4799889 Public on Jan 08 2022
## GSM4799890  pat3_tumor [methylation]    GSM4799890 Public on Jan 08 2022
## GSM4799891 pat3_normal [methylation]    GSM4799891 Public on Jan 08 2022
## GSM4799892  pat4_tumor [methylation]    GSM4799892 Public on Jan 08 2022
## GSM4799893 pat4_normal [methylation]    GSM4799893 Public on Jan 08 2022
##            submission_date last_update_date    type channel_count
## GSM4799888     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799889     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799890     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799891     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799892     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799893     Sep 23 2020      Jan 09 2022 genomic             1
##            source_name_ch1 organism_ch1 characteristics_ch1 molecule_ch1
## GSM4799888   lung biopsies Homo sapiens        tumor: tumor  genomic DNA
## GSM4799889   lung biopsies Homo sapiens       tumor: normal  genomic DNA
## GSM4799890   lung biopsies Homo sapiens        tumor: tumor  genomic DNA
## GSM4799891   lung biopsies Homo sapiens       tumor: normal  genomic DNA
## GSM4799892   lung biopsies Homo sapiens        tumor: tumor  genomic DNA
## GSM4799893   lung biopsies Homo sapiens       tumor: normal  genomic DNA
##                                                                 extract_protocol_ch1
## GSM4799888 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799889 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799890 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799891 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799892 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799893 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
##                                      extract_protocol_ch1.1   label_ch1
## GSM4799888 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799889 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799890 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799891 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799892 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799893 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
##                                                   label_protocol_ch1 taxid_ch1
## GSM4799888 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799889 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799890 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799891 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799892 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799893 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
##                                                                                                                                                    hyb_protocol
## GSM4799888 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799889 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799890 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799891 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799892 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799893 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
##                                     scan_protocol
## GSM4799888 Arrays were imaged using iScan System.
## GSM4799889 Arrays were imaged using iScan System.
## GSM4799890 Arrays were imaged using iScan System.
## GSM4799891 Arrays were imaged using iScan System.
## GSM4799892 Arrays were imaged using iScan System.
## GSM4799893 Arrays were imaged using iScan System.
##                               data_processing
## GSM4799888 Cpgsite filtering: Rnbeads package
## GSM4799889 Cpgsite filtering: Rnbeads package
## GSM4799890 Cpgsite filtering: Rnbeads package
## GSM4799891 Cpgsite filtering: Rnbeads package
## GSM4799892 Cpgsite filtering: Rnbeads package
## GSM4799893 Cpgsite filtering: Rnbeads package
##                                               data_processing.1
## GSM4799888 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799889 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799890 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799891 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799892 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799893 Additional filtering: SNP region from KOVA and KRGDB
##                                  data_processing.2 platform_id contact_name
## GSM4799888 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799889 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799890 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799891 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799892 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799893 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
##                        contact_laboratory contact_department contact_institute
## GSM4799888 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799889 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799890 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799891 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799892 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799893 Network Biomedicine Laboratory      Biotechnology Yonsei University
##                       contact_address contact_city contact_zip/postal_code
## GSM4799888 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799889 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799890 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799891 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799892 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799893 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
##            contact_country
## GSM4799888     South Korea
## GSM4799889     South Korea
## GSM4799890     South Korea
## GSM4799891     South Korea
## GSM4799892     South Korea
## GSM4799893     South Korea
##                                                                                                       supplementary_file
## GSM4799888 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799888/suppl/GSM4799888_201959750039_R02C01_Grn.idat.gz
## GSM4799889 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799889/suppl/GSM4799889_201959750039_R03C01_Grn.idat.gz
## GSM4799890 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799890/suppl/GSM4799890_201959750039_R04C01_Grn.idat.gz
## GSM4799891 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799891/suppl/GSM4799891_201959750039_R05C01_Grn.idat.gz
## GSM4799892 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799892/suppl/GSM4799892_201959750039_R06C01_Grn.idat.gz
## GSM4799893 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799893/suppl/GSM4799893_201959750039_R07C01_Grn.idat.gz
##                                                                                                     supplementary_file.1
## GSM4799888 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799888/suppl/GSM4799888_201959750039_R02C01_Red.idat.gz
## GSM4799889 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799889/suppl/GSM4799889_201959750039_R03C01_Red.idat.gz
## GSM4799890 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799890/suppl/GSM4799890_201959750039_R04C01_Red.idat.gz
## GSM4799891 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799891/suppl/GSM4799891_201959750039_R05C01_Red.idat.gz
## GSM4799892 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799892/suppl/GSM4799892_201959750039_R06C01_Red.idat.gz
## GSM4799893 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799893/suppl/GSM4799893_201959750039_R07C01_Red.idat.gz
##            data_row_count tumor:ch1                            Basename
## GSM4799888         651062     tumor IDAT/GSM4799888_201959750039_R02C01
## GSM4799889         651062    normal IDAT/GSM4799889_201959750039_R03C01
## GSM4799890         651062     tumor IDAT/GSM4799890_201959750039_R04C01
## GSM4799891         651062    normal IDAT/GSM4799891_201959750039_R05C01
## GSM4799892         651062     tumor IDAT/GSM4799892_201959750039_R06C01
## GSM4799893         651062    normal IDAT/GSM4799893_201959750039_R07C01
rgSet <- read.metharray.exp(targets = targets)
## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls

## Warning in readChar(con, nchars = n): truncating string with embedded nuls
rgSet
## class: RGChannelSet 
## dim: 1051815 74 
## metadata(0):
## assays(2): Green Red
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(74): GSM4799888_201959750039_R02C01
##   GSM4799889_201959750039_R03C01 ... GSM4799972_201959750223_R04C01
##   GSM4799973_201959750223_R05C01
## colData names(36): title geo_accession ... Basename filenames
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19
targets$Basename <- gsub("IDAT/","",mybase)
head(targets)
##                                title geo_accession                status
## GSM4799888  pat2_tumor [methylation]    GSM4799888 Public on Jan 08 2022
## GSM4799889 pat2_normal [methylation]    GSM4799889 Public on Jan 08 2022
## GSM4799890  pat3_tumor [methylation]    GSM4799890 Public on Jan 08 2022
## GSM4799891 pat3_normal [methylation]    GSM4799891 Public on Jan 08 2022
## GSM4799892  pat4_tumor [methylation]    GSM4799892 Public on Jan 08 2022
## GSM4799893 pat4_normal [methylation]    GSM4799893 Public on Jan 08 2022
##            submission_date last_update_date    type channel_count
## GSM4799888     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799889     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799890     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799891     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799892     Sep 23 2020      Jan 09 2022 genomic             1
## GSM4799893     Sep 23 2020      Jan 09 2022 genomic             1
##            source_name_ch1 organism_ch1 characteristics_ch1 molecule_ch1
## GSM4799888   lung biopsies Homo sapiens        tumor: tumor  genomic DNA
## GSM4799889   lung biopsies Homo sapiens       tumor: normal  genomic DNA
## GSM4799890   lung biopsies Homo sapiens        tumor: tumor  genomic DNA
## GSM4799891   lung biopsies Homo sapiens       tumor: normal  genomic DNA
## GSM4799892   lung biopsies Homo sapiens        tumor: tumor  genomic DNA
## GSM4799893   lung biopsies Homo sapiens       tumor: normal  genomic DNA
##                                                                 extract_protocol_ch1
## GSM4799888 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799889 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799890 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799891 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799892 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
## GSM4799893 Extraction was performed by Quant-iT PicoGreen (Invitrogen) quantitation.
##                                      extract_protocol_ch1.1   label_ch1
## GSM4799888 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799889 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799890 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799891 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799892 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
## GSM4799893 EZ DNA methylation kit (Zymo Research) protocols Cy3 and Cy5
##                                                   label_protocol_ch1 taxid_ch1
## GSM4799888 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799889 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799890 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799891 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799892 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
## GSM4799893 Standard Illumina labelling protocol (label: Cy3 and Cy5)      9606
##                                                                                                                                                    hyb_protocol
## GSM4799888 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799889 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799890 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799891 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799892 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
## GSM4799893 Bisulphite coverted DNA was amplified, fragmented and hybridised to Illumina Infinium Human Methylation850 Beadchip using standard Illumina protocol
##                                     scan_protocol
## GSM4799888 Arrays were imaged using iScan System.
## GSM4799889 Arrays were imaged using iScan System.
## GSM4799890 Arrays were imaged using iScan System.
## GSM4799891 Arrays were imaged using iScan System.
## GSM4799892 Arrays were imaged using iScan System.
## GSM4799893 Arrays were imaged using iScan System.
##                               data_processing
## GSM4799888 Cpgsite filtering: Rnbeads package
## GSM4799889 Cpgsite filtering: Rnbeads package
## GSM4799890 Cpgsite filtering: Rnbeads package
## GSM4799891 Cpgsite filtering: Rnbeads package
## GSM4799892 Cpgsite filtering: Rnbeads package
## GSM4799893 Cpgsite filtering: Rnbeads package
##                                               data_processing.1
## GSM4799888 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799889 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799890 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799891 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799892 Additional filtering: SNP region from KOVA and KRGDB
## GSM4799893 Additional filtering: SNP region from KOVA and KRGDB
##                                  data_processing.2 platform_id contact_name
## GSM4799888 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799889 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799890 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799891 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799892 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
## GSM4799893 Normalization: Functional normalization    GPL21145 JAE WON,,CHO
##                        contact_laboratory contact_department contact_institute
## GSM4799888 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799889 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799890 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799891 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799892 Network Biomedicine Laboratory      Biotechnology Yonsei University
## GSM4799893 Network Biomedicine Laboratory      Biotechnology Yonsei University
##                       contact_address contact_city contact_zip/postal_code
## GSM4799888 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799889 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799890 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799891 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799892 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
## GSM4799893 50 Yonsei-ro, Seodaemun-gu        Seoul                   03722
##            contact_country
## GSM4799888     South Korea
## GSM4799889     South Korea
## GSM4799890     South Korea
## GSM4799891     South Korea
## GSM4799892     South Korea
## GSM4799893     South Korea
##                                                                                                       supplementary_file
## GSM4799888 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799888/suppl/GSM4799888_201959750039_R02C01_Grn.idat.gz
## GSM4799889 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799889/suppl/GSM4799889_201959750039_R03C01_Grn.idat.gz
## GSM4799890 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799890/suppl/GSM4799890_201959750039_R04C01_Grn.idat.gz
## GSM4799891 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799891/suppl/GSM4799891_201959750039_R05C01_Grn.idat.gz
## GSM4799892 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799892/suppl/GSM4799892_201959750039_R06C01_Grn.idat.gz
## GSM4799893 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799893/suppl/GSM4799893_201959750039_R07C01_Grn.idat.gz
##                                                                                                     supplementary_file.1
## GSM4799888 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799888/suppl/GSM4799888_201959750039_R02C01_Red.idat.gz
## GSM4799889 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799889/suppl/GSM4799889_201959750039_R03C01_Red.idat.gz
## GSM4799890 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799890/suppl/GSM4799890_201959750039_R04C01_Red.idat.gz
## GSM4799891 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799891/suppl/GSM4799891_201959750039_R05C01_Red.idat.gz
## GSM4799892 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799892/suppl/GSM4799892_201959750039_R06C01_Red.idat.gz
## GSM4799893 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4799nnn/GSM4799893/suppl/GSM4799893_201959750039_R07C01_Red.idat.gz
##            data_row_count tumor:ch1                       Basename
## GSM4799888         651062     tumor GSM4799888_201959750039_R02C01
## GSM4799889         651062    normal GSM4799889_201959750039_R03C01
## GSM4799890         651062     tumor GSM4799890_201959750039_R04C01
## GSM4799891         651062    normal GSM4799891_201959750039_R05C01
## GSM4799892         651062     tumor GSM4799892_201959750039_R06C01
## GSM4799893         651062    normal GSM4799893_201959750039_R07C01

Filtering and normalisation

mSet <- preprocessRaw(rgSet)
## Loading required package: IlluminaHumanMethylationEPICmanifest
mSetSw <- SWAN(mSet,verbose=FALSE)
## [SWAN] Preparing normalization subset
## EPIC
## [SWAN] Normalizing methylated channel
## [SWAN] Normalizing unmethylated channel
par(mfrow=c(1,2), cex=0.8)
densityByProbeType(mSet[,1], main = "Raw")
densityByProbeType(mSetSw[,1], main = "SWAN")
Figure 1. Normalisation of bead-array data with SWAN.

Figure 1. Normalisation of bead-array data with SWAN.

# include sex chromosomes
detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mSetSw <- mSetSw[keep,]

# exclude SNP probes
mSetSw <- mapToGenome(mSetSw)
mSetSw_nosnp <- dropLociWithSnps(mSetSw)

# exclude sex chromosomes
xyprobes <- anno$Name[anno$chr %in% c("chrX","chrY")]
mSetFlt <- mSetSw[which(!rownames(mSetSw) %in% xyprobes),]
# include sex chromosomes
meth <- getMeth(mSetSw)
unmeth <- getUnmeth(mSetSw)
Mval <- log2((meth + 100)/(unmeth + 100))
beta <- getBeta(mSetSw)
dim(Mval)
## [1] 858338     74
# exclude sex chromosomes
meth <- getMeth(mSetFlt)
unmeth <- getUnmeth(mSetFlt)
Mval_flt <- log2((meth + 100)/(unmeth + 100))
beta_flt <- getBeta(mSetFlt)
dim(Mval_flt)
## [1] 839473     74

Infer sex

xprobes <- anno$Name[anno$chr %in% "chrX"]
yprobes <- anno$Name[anno$chr %in% "chrY"]

xdat <- Mval[which( rownames(Mval) %in% xprobes ),]
ydat <- Mval[which( rownames(Mval) %in% yprobes ),]
gsm <- sapply( strsplit(colnames(Mval),"_" ) , "[[" , 1)
plot(colMeans(xdat) , colMeans(ydat), cex=1)
text(colMeans(xdat) , colMeans(ydat),labels=gsm )
grid()
abline(v=-0.1,col="red",lty=2)

which( colMeans(xdat)  > -0.1 )
## GSM4799928_201959750126_R04C01 GSM4799929_201959750126_R05C01 
##                             41                             42 
## GSM4799958_201959750019_R06C01 GSM4799959_201959750019_R07C01 
##                             59                             60
sex_female <- as.numeric(colMeans(xdat)  > -0.1)

MDS analysis

# scree plot shows the amount of variation in a dataset that is accounted
# for by the first N principal components
myscree <- function(mx,n=10,main="") {
  pc <- princomp(mx)$sdev
  pcp <- pc/sum(pc)*100
  pcp <- pcp[1:10]
  barplot(pcp,cex.names = 1,las=2,ylim=c(0,60),
      ylab="percent (%) variance explained", main=main)
  text((0.5:length(pcp)*1.2),pcp,label=signif(pcp,3),pos=3,cex=0.8)
}

par(mfrow=c(2,1))
myscree(Mval,main="incl sex chr")
myscree(Mval_flt,main="excl sex chr")
Figure 2. Screeplot shows contribution of top principal components to the overall variation in the experiment.

Figure 2. Screeplot shows contribution of top principal components to the overall variation in the experiment.

par(mfrow=c(1,1))

#MDS plot 1

colnames(targets) <- gsub(" ","_",colnames(targets))
colnames(targets) <- gsub(":","_",colnames(targets))
targets$sex<-factor(sex_female)
targets$case<-factor(targets$`characteristics_ch1`)
sample_groups <- factor(targets$case)
colour_palette=brewer.pal(n = length(levels(sample_groups)), name = "Paired")
## Warning in brewer.pal(n = length(levels(sample_groups)), name = "Paired"): minimal value for n is 3, returning requested palette with 3 different levels
colours <- colour_palette[as.integer(factor(targets$case))]
plot(1,axes = FALSE,xlab="",ylab="",main="MDS by case/ctrl",cex=0)
legend("center",legend=levels(sample_groups),pch=16,cex=1.2,col=colour_palette)
Figure 3. MDS plot coloured by ART classification.

Figure 3. MDS plot coloured by ART classification.

mydist <- plotMDS(Mval, labels=gsm,col=colours,main="sex chromosomes included")
Figure 3. MDS plot coloured by ART classification.

Figure 3. MDS plot coloured by ART classification.

mydist_flt <- plotMDS(Mval_flt, labels=gsm,col=colours,main="sex chromosomes excluded")
Figure 3. MDS plot coloured by ART classification.

Figure 3. MDS plot coloured by ART classification.

Differential analysis

Paired normal versus cancer tissue.

samplesheet<-targets
groups <- factor(samplesheet$case,levels=c("tumor: normal","tumor: tumor"))
sex <- factor(samplesheet$sex,levels=c("0","1"))
dim(Mval)
## [1] 858338     74
dim(beta)
## [1] 858338     74
GSE158422 <- dm_analysis(samplesheet=samplesheet,
  sex=sex,groups=groups,mx=Mval,name="GSE158422",
  myann=myann ,beta= beta)
## Your contrast returned 219137 individually significant probes. We recommend the default setting of pcutoff in dmrcate().
## Fitting chr1...
## Fitting chr2...
## Fitting chr3...
## Fitting chr4...
## Fitting chr5...
## Fitting chr6...
## Fitting chr7...
## Fitting chr8...
## Fitting chr9...
## Fitting chr10...
## Fitting chr11...
## Fitting chr12...
## Fitting chr13...
## Fitting chr14...
## Fitting chr15...
## Fitting chr16...
## Fitting chr17...
## Fitting chr18...
## Fitting chr19...
## Fitting chr20...
## Fitting chr21...
## Fitting chr22...
## Fitting chrX...
## Fitting chrY...
## Demarcating regions...
## Done!
## snapshotDate(): 2022-04-26
## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation
## loading from cache

## 
## RCircos.Core.Components initialized.
## Type ?RCircos.Reset.Plot.Parameters to see how to modify the core components.
## Tiles plot may use more than one track. Please select correct area for next track if necessary.
## 
## Tiles plot may use more than one track. Please select correct area for next track if necessary.

head(GSE158422$dma,10)
##         Row.names                       UCSC_RefGene_Name
## 5777   cg00159780                        REXO1L2P;REXO1L1
## 468211 cg14205001                                  NOTCH1
## 61184  cg01772014                               C20orf160
## 221392 cg06569947                                 DCUN1D2
## 444856 cg13531667                                     MCC
## 577564 cg17655624                         ZNF385A;ZNF385A
## 669100 cg20979986 SERINC5;SERINC5;SERINC5;SERINC5;SERINC5
## 657981 cg20568402                                 DCUN1D2
## 60887  cg01762663                             DOCK9;DOCK9
## 760860 cg24334634                                 DCUN1D2
##               Regulatory_Feature_Group             Islands_Name
## 5777                                     chr8:86573421-86575248
## 468211                                 chr9:139405089-139405292
## 61184  Unclassified_Cell_type_specific  chr20:30606492-30607174
## 221392                                                         
## 444856                                 chr5:112823256-112824304
## 577564 Unclassified_Cell_type_specific  chr12:54784900-54785238
## 669100 Unclassified_Cell_type_specific                         
## 657981                                                         
## 60887                                                          
## 760860                                                         
##        Relation_to_Island     logFC   AveExpr         t      P.Value
## 5777               Island -1.680582  1.119242 -13.42558 1.187923e-21
## 468211            S_Shore  1.063122  1.017852  13.40475 1.290365e-21
## 61184             N_Shore -1.258942 -2.131805 -13.37502 1.452267e-21
## 221392            OpenSea  1.689789  1.602801  13.18873 3.053315e-21
## 444856            S_Shore -1.948149 -2.464149 -13.15484 3.496891e-21
## 577564            S_Shore -1.601538 -2.430925 -13.10683 4.238623e-21
## 669100            OpenSea -1.947790 -1.504807 -13.04706 5.387984e-21
## 657981            OpenSea  1.678729  1.358215  13.03365 5.686433e-21
## 60887             OpenSea  1.415148  1.625403  12.95398 7.835603e-21
## 760860            OpenSea  1.896843  1.347877  12.89046 1.012375e-20
##           adj.P.Val        B
## 5777   4.155120e-16 38.63506
## 468211 4.155120e-16 38.55528
## 61184  4.155120e-16 38.44125
## 221392 6.003029e-16 37.72414
## 444856 6.003029e-16 37.59318
## 577564 6.063618e-16 37.40744
## 669100 6.101102e-16 37.17573
## 657981 6.101102e-16 37.12366
## 60887  7.472884e-16 36.81395
## 760860 8.689604e-16 36.56639
head(GSE158422$dmr,10)
## GRanges object with 10 ranges and 8 metadata columns:
##        seqnames              ranges strand |   no.cpgs min_smoothed_fdr
##           <Rle>           <IRanges>  <Rle> | <integer>        <numeric>
##    [1]     chr6   32034322-32059605      * |       215     1.66769e-108
##    [2]     chr6   33129419-33155135      * |       166      2.75845e-87
##    [3]     chr6   32013974-32033307      * |       157      1.70467e-94
##    [4]     chr7   27180888-27185512      * |        48      0.00000e+00
##    [5]     chr1   50880150-50893984      * |        59     8.70901e-138
##    [6]     chr2 223161771-223173061      * |        57      7.41084e-82
##    [7]     chr7   27140797-27148204      * |        46     3.68060e-130
##    [8]     chr2 176980837-176989586      * |        41     2.11869e-141
##    [9]     chr3 147105190-147116807      * |        48      1.31679e-71
##   [10]     chr2 177012117-177017797      * |        33     1.62824e-145
##            Stouffer       HMFDR       Fisher   maxdiff   meandiff
##           <numeric>   <numeric>    <numeric> <numeric>  <numeric>
##    [1]  0.00000e+00 2.89730e-06  0.00000e+00 -0.184036 -0.0643286
##    [2]  0.00000e+00 4.89484e-09  0.00000e+00 -0.197831 -0.0694411
##    [3]  0.00000e+00 1.88937e-07  0.00000e+00 -0.200274 -0.0744661
##    [4]  0.00000e+00 3.46237e-10  0.00000e+00  0.292363  0.1980467
##    [5] 1.89330e-299 3.98485e-14 1.49408e-316  0.317989  0.1324507
##    [6] 2.42511e-284 1.87577e-09 4.10695e-276  0.227861  0.1251198
##    [7] 5.60715e-278 4.34808e-12 5.69989e-268  0.284529  0.1601439
##    [8] 1.07663e-269 2.06317e-11 4.63765e-256  0.307304  0.1766871
##    [9] 6.29131e-247 1.84459e-10 3.08455e-236  0.280936  0.1565660
##   [10] 8.08145e-212 6.19307e-10 1.08398e-202  0.246288  0.1739056
##             overlapping.genes
##                   <character>
##    [1]        TNXB, RNA5SP206
##    [2]                COL11A2
##    [3]                   TNXB
##    [4] HOXA-AS3, HOXA3, RP1..
##    [5]                 DMRTA2
##    [6]          CCDC140, PAX3
##    [7] HOXA-AS2, HOXA2, HOXA3
##    [8] HOXD10, HOXD9, HOXD-..
##    [9]   ZIC4-AS1, ZIC1, ZIC4
##   [10]   HOXD3, MIR10B, HOXD4
##   -------
##   seqinfo: 24 sequences from an unspecified genome; no seqlengths
saveRDS(GSE158422,file="GSE158422.rds")
write.table(GSE158422$dma,file="GSE158422_limma.tsv",sep="\t",quote=FALSE)

save data object

save.image("GSE158422.Rdata")