This script uses the ROADMAP E062 PBMC reference epigenome to assess chromatin state enrichment of the adipose CpGs.


Setup

Load packages

library(tidyverse)
library(ggrepel)
library(GenomicRanges)
library(ggpubr)
library(DNAmArray)
library(MASS)

Load data

load("../GOTO_Data/GOTO_results-full-blood.Rdata")

sig_cpgs <- (limma_base %>% filter(padj_fdr <= 0.05))$cpg
length(sig_cpgs)
## [1] 441

Annotation

manifest_hg19 (fetched on 4/4/2023 from https://zwdzwd.github.io/InfiniumAnnotation)

  • probeID as cpg - CpG ID
  • CpG_chrm as cpg_chr_hg19 - chromosome (hg19)
  • CpG_beg as cpg_start_hg19 - CpG start position (hg19)
  • CpG_end as cpg_end_hg19 - CpG end position (hg19)
  • probe_strand as cpg_strand - strand
  • gene_HGNC
manifest_hg19 <- read_tsv(
  "/exports/molepi/users/ljsinke/LLS/Shared_Data/Manifests/EPIC.hg19.manifest.tsv.gz")
## Rows: 865918 Columns: 57
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (21): CpG_chrm, probe_strand, probeID, channel, designType, nextBase, ne...
## dbl (24): CpG_beg, CpG_end, address_A, address_B, probeCpGcnt, context35, pr...
## lgl (12): posMatch, MASK_mapping, MASK_typeINextBaseSwitch, MASK_rmsk15, MAS...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
anno <- manifest_hg19 %>% 
  dplyr::select(
    cpg = probeID,
    cpg_chr = CpG_chrm,
    cpg_start = CpG_beg,
    cpg_end = CpG_end,
    cpg_strand = probe_strand,
    gene_HGNC
  ) %>% 
  mutate(
    cpg_chr = substr(cpg_chr,4,5)
  )

anno <- anno %>% 
  dplyr::filter(cpg %in% limma_base$cpg)
manifest_chrom <- read_tsv(
  "/exports/molepi/users/ljsinke/LLS/Shared_Data/Manifests/EPIC.hg19.REMC.chromHMM.tsv.gz"
)
## Rows: 865918 Columns: 131
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (129): CpG_chrm, probeID, E001, E002, E003, E004, E005, E006, E007, E008...
## dbl   (2): CpG_beg, CpG_end
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
manifest_chrom <- manifest_chrom %>% 
  dplyr::select(
    cpg = probeID,
    E062)

anno <- left_join(
  anno, manifest_chrom,
  by="cpg"
)

limma_base <- left_join(limma_base, anno, by="cpg")

Save

save(limma_base, file="../GOTO_Data/GOTO_results-full-blood.Rdata")

PBMC reference (E062)

Save chromatin states

states <- c("15_Quies", "14_ReprPCWk", "13_ReprPC", 
            "12_EnhBiv", "11_BivFlnk", "10_TssBiv", 
            "9_Het", "8_ZNF/Rpts", "7_Enh", 
            "6_EnhG", "5_TxWk", "4_Tx",  
            "3_TxFlnk", "2_TssAFlnk", "1_TssA")

Test enrichment

for(i in states){
  # Binary indicators
  res_road <- limma_base %>% 
    mutate(
      sig = ifelse(limma_base$cpg %in% sig_cpgs, 1, 0),
      chrom = ifelse(grepl(i, E062), 1, 0)
    )
  
  # GLM
  x <- glm(chrom ~ sig, family=binomial, data=res_road)
  out <- c(coef(summary(x))[2,],
           exp(cbind(coef(x), confint.default(x)))[2,])
  names(out) <- c('logOR', 'SE', 'z', 'p', 'OR', 'low_CI', 'upp_CI')
  out <- as.data.frame(t(out))
  out$Trait = i
  out <- out %>% dplyr::select(Trait, OR, logOR, 
                               low_CI, upp_CI, z, p) 
  
  if(i == states[1]){
    res <- out
  } else {
    res <- rbind(res, out)
  }
}

Adjust p-values

res$padj <- p.adjust(res$p, method='fdr')

Save results

write_csv(res %>% arrange(p, OR),
          file="../GOTO_Data/Tables/ST18.csv")

Data for plot

chrom <- res %>% 
  mutate(
    loglowCI = log(low_CI),
    loguppCI = log(upp_CI),
    padj = p.adjust(p, method='fdr')
  ) %>% 
  filter(OR < 200)

chrom %>% filter(padj < 0.05)
##        Trait        OR      logOR    low_CI    upp_CI         z            p
## 1  13_ReprPC 0.3419538 -1.0730798 0.1832235 0.6381953 -3.370677 0.0007498361
## 2 2_TssAFlnk 1.4998172  0.4053432 1.1682283 1.9255241  3.179678 0.0014743882
##         padj   loglowCI   loguppCI
## 1 0.01105791 -1.6970486 -0.4491109
## 2 0.01105791  0.1554883  0.6551982
chrom$fill <- ifelse(chrom$padj < 0.05, "Enriched", "Not Enriched")
chrom$invlogOR <- -chrom$logOR

Plot

plot <- chrom %>% 
  ggplot(aes(x = logOR, 
             y = reorder(Trait,-invlogOR),
             xmin = loglowCI,
             xmax = loguppCI)) +
  geom_vline(xintercept=0, linewidth=1, 
             color='grey60', linetype='dashed') +
  geom_errorbar(width=0.5,
                linewidth=1,
                position=position_dodge(width=0.9)) +
  geom_point(aes(fill=fill),
             size=3,
             shape=21,
             stroke=1.2,
             position=position_dodge(width=0.9)) +
  xlab('log(OR)') + ylab('') + xlim(c(-4,4)) +
  theme(axis.text = element_text(size=14, color = '#373334'),
        axis.title = element_text(size=16, hjust=0.5, 
                                  color = '#373334'),
        text=element_text(size=14),
        panel.background = element_rect(fill = 'white',
                                        color='#373334'),
        panel.grid.major = element_line(color = 'grey95'),
        panel.grid.minor = element_line(color = 'grey95'),
        plot.background = element_rect(fill = 'white'),
        axis.ticks.x = element_line(size=1))
print(plot)

Save

png("../GOTO_Data/Figures/Figure_5E.png")
print(plot)
dev.off()
## png 
##   2

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] MASS_7.3-60                                        
##  [2] ggpubr_0.4.0                                       
##  [3] ggrepel_0.9.1                                      
##  [4] lmerTest_3.1-3                                     
##  [5] lme4_1.1-30                                        
##  [6] Matrix_1.5-4.1                                     
##  [7] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [8] IlluminaHumanMethylationEPICmanifest_0.3.0         
##  [9] FlowSorted.BloodExtended.EPIC_1.1.1                
## [10] FlowSorted.Blood.EPIC_1.12.1                       
## [11] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [12] nlme_3.1-162                                       
## [13] quadprog_1.5-8                                     
## [14] genefilter_1.76.0                                  
## [15] ExperimentHub_2.2.1                                
## [16] AnnotationHub_3.2.2                                
## [17] BiocFileCache_2.2.1                                
## [18] dbplyr_2.2.1                                       
## [19] MuSiC_0.2.0                                        
## [20] nnls_1.4                                           
## [21] lubridate_1.9.2                                    
## [22] DNAmArray_2.0.0                                    
## [23] pls_2.8-2                                          
## [24] FDb.InfiniumMethylation.hg19_2.2.0                 
## [25] org.Hs.eg.db_3.14.0                                
## [26] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2            
## [27] GenomicFeatures_1.46.5                             
## [28] AnnotationDbi_1.56.2                               
## [29] minfi_1.40.0                                       
## [30] bumphunter_1.36.0                                  
## [31] locfit_1.5-9.8                                     
## [32] iterators_1.0.14                                   
## [33] foreach_1.5.2                                      
## [34] Biostrings_2.62.0                                  
## [35] XVector_0.34.0                                     
## [36] SummarizedExperiment_1.24.0                        
## [37] Biobase_2.58.0                                     
## [38] MatrixGenerics_1.10.0                              
## [39] matrixStats_1.0.0                                  
## [40] GenomicRanges_1.46.1                               
## [41] GenomeInfoDb_1.34.9                                
## [42] IRanges_2.32.0                                     
## [43] S4Vectors_0.36.2                                   
## [44] BiocGenerics_0.44.0                                
## [45] forcats_0.5.2                                      
## [46] stringr_1.5.0                                      
## [47] dplyr_1.1.3                                        
## [48] purrr_0.3.4                                        
## [49] readr_2.1.2                                        
## [50] tidyr_1.2.1                                        
## [51] tibble_3.2.1                                       
## [52] ggplot2_3.4.3                                      
## [53] tidyverse_1.3.2                                    
## [54] cli_3.6.1                                          
## [55] htmltools_0.5.5                                    
## [56] rlang_1.1.1                                        
## [57] rmarkdown_2.16                                     
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3                    tidyselect_1.2.0             
##   [3] RSQLite_2.2.17                grid_4.2.2                   
##   [5] BiocParallel_1.32.6           cinaR_0.2.3                  
##   [7] munsell_0.5.0                 codetools_0.2-19             
##   [9] preprocessCore_1.60.2         withr_2.5.0                  
##  [11] colorspace_2.1-0              filelock_1.0.2               
##  [13] highr_0.10                    knitr_1.43                   
##  [15] rstudioapi_0.14               ggsignif_0.6.3               
##  [17] labeling_0.4.2                GenomeInfoDbData_1.2.9       
##  [19] MCMCpack_1.6-3                farver_2.1.1                 
##  [21] bit64_4.0.5                   rhdf5_2.42.1                 
##  [23] coda_0.19-4                   vctrs_0.6.3                  
##  [25] generics_0.1.3                xfun_0.39                    
##  [27] timechange_0.2.0              R6_2.5.1                     
##  [29] illuminaio_0.40.0             bitops_1.0-7                 
##  [31] rhdf5filters_1.10.1           cachem_1.0.8                 
##  [33] reshape_0.8.9                 DelayedArray_0.24.0          
##  [35] assertthat_0.2.1              vroom_1.5.7                  
##  [37] promises_1.2.0.1              BiocIO_1.8.0                 
##  [39] scales_1.2.1                  googlesheets4_1.0.1          
##  [41] gtable_0.3.3                  mcmc_0.9-7                   
##  [43] MatrixModels_0.5-1            splines_4.2.2                
##  [45] rstatix_0.7.0                 rtracklayer_1.54.0           
##  [47] gargle_1.5.0                  GEOquery_2.62.2              
##  [49] htm2txt_2.2.2                 broom_1.0.1                  
##  [51] abind_1.4-5                   BiocManager_1.30.21          
##  [53] yaml_2.3.7                    reshape2_1.4.4               
##  [55] modelr_0.1.9                  backports_1.4.1              
##  [57] httpuv_1.6.11                 tools_4.2.2                  
##  [59] nor1mix_1.3-0                 ellipsis_0.3.2               
##  [61] jquerylib_0.1.4               RColorBrewer_1.1-3           
##  [63] siggenes_1.68.0               Rcpp_1.0.10                  
##  [65] plyr_1.8.8                    sparseMatrixStats_1.10.0     
##  [67] progress_1.2.2                zlibbioc_1.44.0              
##  [69] RCurl_1.98-1.12               prettyunits_1.1.1            
##  [71] openssl_2.0.6                 haven_2.5.1                  
##  [73] fs_1.6.2                      magrittr_2.0.3               
##  [75] data.table_1.14.8             SparseM_1.81                 
##  [77] reprex_2.0.2                  googledrive_2.0.0            
##  [79] mime_0.12                     hms_1.1.2                    
##  [81] evaluate_0.21                 xtable_1.8-4                 
##  [83] XML_3.99-0.14                 mclust_6.0.0                 
##  [85] readxl_1.4.1                  compiler_4.2.2               
##  [87] biomaRt_2.50.3                crayon_1.5.2                 
##  [89] minqa_1.2.5                   later_1.3.1                  
##  [91] tzdb_0.4.0                    DBI_1.1.3                    
##  [93] rappdirs_0.3.3                boot_1.3-28.1                
##  [95] car_3.1-0                     pkgconfig_2.0.3              
##  [97] GenomicAlignments_1.30.0      numDeriv_2016.8-1.1          
##  [99] xml2_1.3.4                    annotate_1.72.0              
## [101] bslib_0.5.0                   rngtools_1.5.2               
## [103] multtest_2.50.0               beanplot_1.3.1               
## [105] rvest_1.0.3                   doRNG_1.8.6                  
## [107] scrime_1.3.5                  digest_0.6.31                
## [109] base64_2.0.1                  cellranger_1.1.0             
## [111] edgeR_3.40.2                  DelayedMatrixStats_1.16.0    
## [113] restfulr_0.0.15               curl_5.0.1                   
## [115] shiny_1.7.2                   Rsamtools_2.10.0             
## [117] quantreg_5.94                 nloptr_2.0.3                 
## [119] rjson_0.2.21                  lifecycle_1.0.3              
## [121] jsonlite_1.8.5                Rhdf5lib_1.20.0              
## [123] carData_3.0-5                 askpass_1.1                  
## [125] limma_3.54.2                  fansi_1.0.4                  
## [127] pillar_1.9.0                  lattice_0.21-8               
## [129] KEGGREST_1.34.0               fastmap_1.1.1                
## [131] httr_1.4.6                    survival_3.5-5               
## [133] interactiveDisplayBase_1.32.0 glue_1.6.2                   
## [135] png_0.1-8                     BiocVersion_3.16.0           
## [137] bit_4.0.5                     stringi_1.7.12               
## [139] sass_0.4.6                    HDF5Array_1.22.1             
## [141] blob_1.2.4                    memoise_2.0.1

Clear

rm(list=ls())