This script uses the ROADMAP E062 PBMC reference epigenome to assess chromatin state enrichment of the adipose CpGs.
Load packages
library(tidyverse)
library(ggrepel)
library(GenomicRanges)
library(ggpubr)
library(DNAmArray)
library(MASS)
load("../GOTO_Data/GOTO_results-full-blood.Rdata")
sig_cpgs <- (limma_base %>% filter(padj_fdr <= 0.05))$cpg
length(sig_cpgs)
## [1] 441
manifest_hg19
(fetched on 4/4/2023 from https://zwdzwd.github.io/InfiniumAnnotation)
probeID
as cpg
- CpG IDCpG_chrm
as cpg_chr_hg19
- chromosome (hg19)CpG_beg
as cpg_start_hg19
- CpG start position (hg19)CpG_end
as cpg_end_hg19
- CpG end position (hg19)probe_strand
as cpg_strand
- strandgene_HGNC
manifest_hg19 <- read_tsv(
"/exports/molepi/users/ljsinke/LLS/Shared_Data/Manifests/EPIC.hg19.manifest.tsv.gz")
## Rows: 865918 Columns: 57
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (21): CpG_chrm, probe_strand, probeID, channel, designType, nextBase, ne...
## dbl (24): CpG_beg, CpG_end, address_A, address_B, probeCpGcnt, context35, pr...
## lgl (12): posMatch, MASK_mapping, MASK_typeINextBaseSwitch, MASK_rmsk15, MAS...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
anno <- manifest_hg19 %>%
dplyr::select(
cpg = probeID,
cpg_chr = CpG_chrm,
cpg_start = CpG_beg,
cpg_end = CpG_end,
cpg_strand = probe_strand,
gene_HGNC
) %>%
mutate(
cpg_chr = substr(cpg_chr,4,5)
)
anno <- anno %>%
dplyr::filter(cpg %in% limma_base$cpg)
manifest_chrom <- read_tsv(
"/exports/molepi/users/ljsinke/LLS/Shared_Data/Manifests/EPIC.hg19.REMC.chromHMM.tsv.gz"
)
## Rows: 865918 Columns: 131
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (129): CpG_chrm, probeID, E001, E002, E003, E004, E005, E006, E007, E008...
## dbl (2): CpG_beg, CpG_end
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
manifest_chrom <- manifest_chrom %>%
dplyr::select(
cpg = probeID,
E062)
anno <- left_join(
anno, manifest_chrom,
by="cpg"
)
limma_base <- left_join(limma_base, anno, by="cpg")
Save
save(limma_base, file="../GOTO_Data/GOTO_results-full-blood.Rdata")
Save chromatin states
states <- c("15_Quies", "14_ReprPCWk", "13_ReprPC",
"12_EnhBiv", "11_BivFlnk", "10_TssBiv",
"9_Het", "8_ZNF/Rpts", "7_Enh",
"6_EnhG", "5_TxWk", "4_Tx",
"3_TxFlnk", "2_TssAFlnk", "1_TssA")
Test enrichment
for(i in states){
# Binary indicators
res_road <- limma_base %>%
mutate(
sig = ifelse(limma_base$cpg %in% sig_cpgs, 1, 0),
chrom = ifelse(grepl(i, E062), 1, 0)
)
# GLM
x <- glm(chrom ~ sig, family=binomial, data=res_road)
out <- c(coef(summary(x))[2,],
exp(cbind(coef(x), confint.default(x)))[2,])
names(out) <- c('logOR', 'SE', 'z', 'p', 'OR', 'low_CI', 'upp_CI')
out <- as.data.frame(t(out))
out$Trait = i
out <- out %>% dplyr::select(Trait, OR, logOR,
low_CI, upp_CI, z, p)
if(i == states[1]){
res <- out
} else {
res <- rbind(res, out)
}
}
Adjust p-values
res$padj <- p.adjust(res$p, method='fdr')
Save results
write_csv(res %>% arrange(p, OR),
file="../GOTO_Data/Tables/ST18.csv")
Data for plot
chrom <- res %>%
mutate(
loglowCI = log(low_CI),
loguppCI = log(upp_CI),
padj = p.adjust(p, method='fdr')
) %>%
filter(OR < 200)
chrom %>% filter(padj < 0.05)
## Trait OR logOR low_CI upp_CI z p
## 1 13_ReprPC 0.3419538 -1.0730798 0.1832235 0.6381953 -3.370677 0.0007498361
## 2 2_TssAFlnk 1.4998172 0.4053432 1.1682283 1.9255241 3.179678 0.0014743882
## padj loglowCI loguppCI
## 1 0.01105791 -1.6970486 -0.4491109
## 2 0.01105791 0.1554883 0.6551982
chrom$fill <- ifelse(chrom$padj < 0.05, "Enriched", "Not Enriched")
chrom$invlogOR <- -chrom$logOR
Plot
plot <- chrom %>%
ggplot(aes(x = logOR,
y = reorder(Trait,-invlogOR),
xmin = loglowCI,
xmax = loguppCI)) +
geom_vline(xintercept=0, linewidth=1,
color='grey60', linetype='dashed') +
geom_errorbar(width=0.5,
linewidth=1,
position=position_dodge(width=0.9)) +
geom_point(aes(fill=fill),
size=3,
shape=21,
stroke=1.2,
position=position_dodge(width=0.9)) +
xlab('log(OR)') + ylab('') + xlim(c(-4,4)) +
theme(axis.text = element_text(size=14, color = '#373334'),
axis.title = element_text(size=16, hjust=0.5,
color = '#373334'),
text=element_text(size=14),
panel.background = element_rect(fill = 'white',
color='#373334'),
panel.grid.major = element_line(color = 'grey95'),
panel.grid.minor = element_line(color = 'grey95'),
plot.background = element_rect(fill = 'white'),
axis.ticks.x = element_line(size=1))
print(plot)
Save
png("../GOTO_Data/Figures/Figure_5E.png")
print(plot)
dev.off()
## png
## 2
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] MASS_7.3-60
## [2] ggpubr_0.4.0
## [3] ggrepel_0.9.1
## [4] lmerTest_3.1-3
## [5] lme4_1.1-30
## [6] Matrix_1.5-4.1
## [7] IlluminaHumanMethylation450kmanifest_0.4.0
## [8] IlluminaHumanMethylationEPICmanifest_0.3.0
## [9] FlowSorted.BloodExtended.EPIC_1.1.1
## [10] FlowSorted.Blood.EPIC_1.12.1
## [11] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [12] nlme_3.1-162
## [13] quadprog_1.5-8
## [14] genefilter_1.76.0
## [15] ExperimentHub_2.2.1
## [16] AnnotationHub_3.2.2
## [17] BiocFileCache_2.2.1
## [18] dbplyr_2.2.1
## [19] MuSiC_0.2.0
## [20] nnls_1.4
## [21] lubridate_1.9.2
## [22] DNAmArray_2.0.0
## [23] pls_2.8-2
## [24] FDb.InfiniumMethylation.hg19_2.2.0
## [25] org.Hs.eg.db_3.14.0
## [26] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [27] GenomicFeatures_1.46.5
## [28] AnnotationDbi_1.56.2
## [29] minfi_1.40.0
## [30] bumphunter_1.36.0
## [31] locfit_1.5-9.8
## [32] iterators_1.0.14
## [33] foreach_1.5.2
## [34] Biostrings_2.62.0
## [35] XVector_0.34.0
## [36] SummarizedExperiment_1.24.0
## [37] Biobase_2.58.0
## [38] MatrixGenerics_1.10.0
## [39] matrixStats_1.0.0
## [40] GenomicRanges_1.46.1
## [41] GenomeInfoDb_1.34.9
## [42] IRanges_2.32.0
## [43] S4Vectors_0.36.2
## [44] BiocGenerics_0.44.0
## [45] forcats_0.5.2
## [46] stringr_1.5.0
## [47] dplyr_1.1.3
## [48] purrr_0.3.4
## [49] readr_2.1.2
## [50] tidyr_1.2.1
## [51] tibble_3.2.1
## [52] ggplot2_3.4.3
## [53] tidyverse_1.3.2
## [54] cli_3.6.1
## [55] htmltools_0.5.5
## [56] rlang_1.1.1
## [57] rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 tidyselect_1.2.0
## [3] RSQLite_2.2.17 grid_4.2.2
## [5] BiocParallel_1.32.6 cinaR_0.2.3
## [7] munsell_0.5.0 codetools_0.2-19
## [9] preprocessCore_1.60.2 withr_2.5.0
## [11] colorspace_2.1-0 filelock_1.0.2
## [13] highr_0.10 knitr_1.43
## [15] rstudioapi_0.14 ggsignif_0.6.3
## [17] labeling_0.4.2 GenomeInfoDbData_1.2.9
## [19] MCMCpack_1.6-3 farver_2.1.1
## [21] bit64_4.0.5 rhdf5_2.42.1
## [23] coda_0.19-4 vctrs_0.6.3
## [25] generics_0.1.3 xfun_0.39
## [27] timechange_0.2.0 R6_2.5.1
## [29] illuminaio_0.40.0 bitops_1.0-7
## [31] rhdf5filters_1.10.1 cachem_1.0.8
## [33] reshape_0.8.9 DelayedArray_0.24.0
## [35] assertthat_0.2.1 vroom_1.5.7
## [37] promises_1.2.0.1 BiocIO_1.8.0
## [39] scales_1.2.1 googlesheets4_1.0.1
## [41] gtable_0.3.3 mcmc_0.9-7
## [43] MatrixModels_0.5-1 splines_4.2.2
## [45] rstatix_0.7.0 rtracklayer_1.54.0
## [47] gargle_1.5.0 GEOquery_2.62.2
## [49] htm2txt_2.2.2 broom_1.0.1
## [51] abind_1.4-5 BiocManager_1.30.21
## [53] yaml_2.3.7 reshape2_1.4.4
## [55] modelr_0.1.9 backports_1.4.1
## [57] httpuv_1.6.11 tools_4.2.2
## [59] nor1mix_1.3-0 ellipsis_0.3.2
## [61] jquerylib_0.1.4 RColorBrewer_1.1-3
## [63] siggenes_1.68.0 Rcpp_1.0.10
## [65] plyr_1.8.8 sparseMatrixStats_1.10.0
## [67] progress_1.2.2 zlibbioc_1.44.0
## [69] RCurl_1.98-1.12 prettyunits_1.1.1
## [71] openssl_2.0.6 haven_2.5.1
## [73] fs_1.6.2 magrittr_2.0.3
## [75] data.table_1.14.8 SparseM_1.81
## [77] reprex_2.0.2 googledrive_2.0.0
## [79] mime_0.12 hms_1.1.2
## [81] evaluate_0.21 xtable_1.8-4
## [83] XML_3.99-0.14 mclust_6.0.0
## [85] readxl_1.4.1 compiler_4.2.2
## [87] biomaRt_2.50.3 crayon_1.5.2
## [89] minqa_1.2.5 later_1.3.1
## [91] tzdb_0.4.0 DBI_1.1.3
## [93] rappdirs_0.3.3 boot_1.3-28.1
## [95] car_3.1-0 pkgconfig_2.0.3
## [97] GenomicAlignments_1.30.0 numDeriv_2016.8-1.1
## [99] xml2_1.3.4 annotate_1.72.0
## [101] bslib_0.5.0 rngtools_1.5.2
## [103] multtest_2.50.0 beanplot_1.3.1
## [105] rvest_1.0.3 doRNG_1.8.6
## [107] scrime_1.3.5 digest_0.6.31
## [109] base64_2.0.1 cellranger_1.1.0
## [111] edgeR_3.40.2 DelayedMatrixStats_1.16.0
## [113] restfulr_0.0.15 curl_5.0.1
## [115] shiny_1.7.2 Rsamtools_2.10.0
## [117] quantreg_5.94 nloptr_2.0.3
## [119] rjson_0.2.21 lifecycle_1.0.3
## [121] jsonlite_1.8.5 Rhdf5lib_1.20.0
## [123] carData_3.0-5 askpass_1.1
## [125] limma_3.54.2 fansi_1.0.4
## [127] pillar_1.9.0 lattice_0.21-8
## [129] KEGGREST_1.34.0 fastmap_1.1.1
## [131] httr_1.4.6 survival_3.5-5
## [133] interactiveDisplayBase_1.32.0 glue_1.6.2
## [135] png_0.1-8 BiocVersion_3.16.0
## [137] bit_4.0.5 stringi_1.7.12
## [139] sass_0.4.6 HDF5Array_1.22.1
## [141] blob_1.2.4 memoise_2.0.1
Clear
rm(list=ls())