This script uses the ROADMAP E063 adipose reference epigenome to assess chromatin state enrichment of the adipose CpGs.
Load packages
library(tidyverse)
library(ggrepel)
library(GenomicRanges)
library(ggpubr)
library(DNAmArray)
library(MASS)
load("../GOTO_Data/GOTO_results-full-fat.Rdata")
sig_cpgs <- (limma_base %>% filter(padj_fdr <= 0.05))$cpg
length(sig_cpgs)
## [1] 230
manifest_hg19
(fetched on 4/4/2023 from https://zwdzwd.github.io/InfiniumAnnotation)
probeID
as cpg
- CpG IDCpG_chrm
as cpg_chr_hg19
- chromosome (hg19)CpG_beg
as cpg_start_hg19
- CpG start position (hg19)CpG_end
as cpg_end_hg19
- CpG end position (hg19)probe_strand
as cpg_strand
- strandgene_HGNC
manifest_hg19 <- read_tsv(
"/exports/molepi/users/ljsinke/LLS/Shared_Data/Manifests/EPIC.hg19.manifest.tsv.gz")
## Rows: 865918 Columns: 57
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (21): CpG_chrm, probe_strand, probeID, channel, designType, nextBase, ne...
## dbl (24): CpG_beg, CpG_end, address_A, address_B, probeCpGcnt, context35, pr...
## lgl (12): posMatch, MASK_mapping, MASK_typeINextBaseSwitch, MASK_rmsk15, MAS...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
anno <- manifest_hg19 %>%
dplyr::select(
cpg = probeID,
cpg_chr = CpG_chrm,
cpg_start = CpG_beg,
cpg_end = CpG_end,
cpg_strand = probe_strand,
gene_HGNC
) %>%
mutate(
cpg_chr = substr(cpg_chr,4,5)
)
anno <- anno %>%
dplyr::filter(cpg %in% limma_base$cpg)
manifest_chrom <- read_tsv(
"/exports/molepi/users/ljsinke/LLS/Shared_Data/Manifests/EPIC.hg19.REMC.chromHMM.tsv.gz"
)
## Rows: 865918 Columns: 131
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (129): CpG_chrm, probeID, E001, E002, E003, E004, E005, E006, E007, E008...
## dbl (2): CpG_beg, CpG_end
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
manifest_chrom <- manifest_chrom %>%
dplyr::select(
cpg = probeID,
E063)
anno <- left_join(
anno, manifest_chrom,
by="cpg"
)
limma_base <- left_join(limma_base, anno, by="cpg")
Save
save(limma_base, file="../GOTO_Data/GOTO_results-full-fat.Rdata")
Save chromatin states
states <- c("15_Quies", "14_ReprPCWk", "13_ReprPC",
"12_EnhBiv", "11_BivFlnk", "10_TssBiv",
"9_Het", "8_ZNF/Rpts", "7_Enh",
"6_EnhG", "5_TxWk", "4_Tx",
"3_TxFlnk", "2_TssAFlnk", "1_TssA")
Test enrichment
for(i in states){
# Binary indicators
res_road <- limma_base %>%
mutate(
sig = ifelse(limma_base$cpg %in% sig_cpgs, 1, 0),
chrom = ifelse(grepl(i, E063), 1, 0)
)
# GLM
x <- glm(chrom ~ sig, family=binomial, data=res_road)
out <- c(coef(summary(x))[2,],
exp(cbind(coef(x), confint.default(x)))[2,])
names(out) <- c('logOR', 'SE', 'z', 'p', 'OR', 'low_CI', 'upp_CI')
out <- as.data.frame(t(out))
out$Trait = i
out <- out %>% dplyr::select(Trait, OR, logOR,
low_CI, upp_CI, z, p)
if(i == states[1]){
res <- out
} else {
res <- rbind(res, out)
}
}
Adjust p-values
res$padj <- p.adjust(res$p, method='fdr')
Save results
write_csv(res %>% arrange(p, OR),
file="../GOTO_Data/Tables/ST12.csv")
Data for plot
chrom <- res %>%
mutate(
loglowCI = log(low_CI),
loguppCI = log(upp_CI),
padj = p.adjust(p, method='fdr')
) %>%
filter(OR < 200)
chrom %>% filter(padj < 0.05)
## Trait OR logOR low_CI upp_CI z p
## 1 15_Quies 1.6534934 0.5028903 1.25125770 2.1850339 3.536066 4.061332e-04
## 2 14_ReprPCWk 1.5007791 0.4059844 1.06344590 2.1179620 2.309969 2.088988e-02
## 3 13_ReprPC 7.7623481 2.0492849 5.81655137 10.3590675 13.918364 4.899897e-44
## 4 7_Enh 0.3798228 -0.9680503 0.19507278 0.7395465 -2.847444 4.407182e-03
## 5 5_TxWk 0.5178196 -0.6581283 0.30711735 0.8730773 -2.469209 1.354120e-02
## 6 4_Tx 0.3265452 -1.1191868 0.15403190 0.6922708 -2.919271 3.508511e-03
## 7 2_TssAFlnk 0.1271027 -2.0627601 0.04788246 0.3373905 -4.141309 3.453294e-05
## 8 1_TssA 0.1050503 -2.2533156 0.03366033 0.3278510 -3.880451 1.042631e-04
## padj loglowCI loguppCI
## 1 1.523000e-03 0.22414921 0.7816314
## 2 3.916852e-02 0.06151448 0.7504543
## 3 7.349845e-43 1.76070754 2.3378622
## 4 1.101796e-02 -1.63438255 -0.3017181
## 5 2.901685e-02 -1.18052535 -0.1357312
## 6 1.052553e-02 -1.87059553 -0.3677780
## 7 2.589971e-04 -3.03900593 -1.0865143
## 8 5.213154e-04 -3.39143535 -1.1151959
chrom$fill <- ifelse(chrom$padj < 0.05, "Enriched", "Not Enriched")
chrom$invlogOR <- -chrom$logOR
Plot
plot <- chrom %>%
ggplot(aes(x = logOR,
y = reorder(Trait,-invlogOR),
xmin = loglowCI,
xmax = loguppCI)) +
geom_vline(xintercept=0, linewidth=1,
color='grey60', linetype='dashed') +
geom_errorbar(width=0.5,
linewidth=1,
position=position_dodge(width=0.9)) +
geom_point(aes(fill=fill),
size=3,
shape=21,
stroke=1.2,
position=position_dodge(width=0.9)) +
xlab('log(OR)') + ylab('') + xlim(c(-4,4)) +
theme(axis.text = element_text(size=14, color = '#373334'),
axis.title = element_text(size=16, hjust=0.5,
color = '#373334'),
text=element_text(size=14),
panel.background = element_rect(fill = 'white',
color='#373334'),
panel.grid.major = element_line(color = 'grey95'),
panel.grid.minor = element_line(color = 'grey95'),
plot.background = element_rect(fill = 'white'),
axis.ticks.x = element_line(size=1))
print(plot)
Save
png("../GOTO_Data/Figures/Figure_3C.png")
print(plot)
dev.off()
## png
## 2
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] circlize_0.4.15
## [2] ComplexHeatmap_2.14.0
## [3] RColorBrewer_1.1-3
## [4] pheatmap_1.0.12
## [5] clusterProfiler_4.2.2
## [6] AnnotationHub_3.2.2
## [7] BiocFileCache_2.2.1
## [8] dbplyr_2.2.1
## [9] cinaR_0.2.3
## [10] edgeR_3.40.2
## [11] ggpubr_0.4.0
## [12] GEOquery_2.62.2
## [13] MuSiC_0.2.0
## [14] nnls_1.4
## [15] gplots_3.1.3
## [16] plotly_4.10.1
## [17] SeuratObject_4.1.3
## [18] Seurat_4.3.0
## [19] gridExtra_2.3
## [20] lattice_0.21-8
## [21] bacon_1.22.0
## [22] ellipse_0.4.5
## [23] methylGSA_1.12.0
## [24] sva_3.42.0
## [25] genefilter_1.76.0
## [26] mgcv_1.8-42
## [27] nlme_3.1-162
## [28] limma_3.54.2
## [29] lmerTest_3.1-3
## [30] lme4_1.1-30
## [31] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [32] snpStats_1.44.0
## [33] survival_3.5-5
## [34] ggrepel_0.9.1
## [35] ggfortify_0.4.14
## [36] irlba_2.3.5.1
## [37] Matrix_1.5-4.1
## [38] omicsPrint_1.14.0
## [39] MASS_7.3-60
## [40] DNAmArray_2.0.0
## [41] pls_2.8-2
## [42] FDb.InfiniumMethylation.hg19_2.2.0
## [43] org.Hs.eg.db_3.14.0
## [44] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [45] GenomicFeatures_1.46.5
## [46] AnnotationDbi_1.56.2
## [47] IlluminaHumanMethylationEPICmanifest_0.3.0
## [48] minfi_1.40.0
## [49] bumphunter_1.36.0
## [50] locfit_1.5-9.8
## [51] iterators_1.0.14
## [52] foreach_1.5.2
## [53] Biostrings_2.62.0
## [54] XVector_0.34.0
## [55] SummarizedExperiment_1.24.0
## [56] Biobase_2.58.0
## [57] MatrixGenerics_1.10.0
## [58] matrixStats_1.0.0
## [59] GenomicRanges_1.46.1
## [60] GenomeInfoDb_1.34.9
## [61] IRanges_2.32.0
## [62] S4Vectors_0.36.2
## [63] BiocGenerics_0.44.0
## [64] BiocParallel_1.32.6
## [65] MethylAid_1.28.0
## [66] forcats_0.5.2
## [67] stringr_1.5.0
## [68] dplyr_1.1.3
## [69] purrr_0.3.4
## [70] readr_2.1.2
## [71] tidyr_1.2.1
## [72] tibble_3.2.1
## [73] ggplot2_3.4.3
## [74] tidyverse_1.3.2
## [75] rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] graphlayouts_0.8.1
## [2] pbapply_1.7-0
## [3] haven_2.5.1
## [4] vctrs_0.6.3
## [5] beanplot_1.3.1
## [6] blob_1.2.4
## [7] spatstat.data_3.0-1
## [8] later_1.3.1
## [9] nloptr_2.0.3
## [10] DBI_1.1.3
## [11] rappdirs_0.3.3
## [12] uwot_0.1.14
## [13] zlibbioc_1.44.0
## [14] MatrixModels_0.5-1
## [15] GlobalOptions_0.1.2
## [16] htmlwidgets_1.5.4
## [17] future_1.32.0
## [18] leiden_0.4.3
## [19] illuminaio_0.40.0
## [20] tidygraph_1.2.2
## [21] Rcpp_1.0.10
## [22] KernSmooth_2.23-21
## [23] promises_1.2.0.1
## [24] DelayedArray_0.24.0
## [25] magick_2.7.4
## [26] fs_1.6.2
## [27] fastmatch_1.1-3
## [28] digest_0.6.31
## [29] png_0.1-8
## [30] nor1mix_1.3-0
## [31] sctransform_0.3.5
## [32] scatterpie_0.1.8
## [33] cowplot_1.1.1
## [34] DOSE_3.20.1
## [35] ggraph_2.0.6
## [36] pkgconfig_2.0.3
## [37] GO.db_3.14.0
## [38] gridBase_0.4-7
## [39] spatstat.random_3.1-5
## [40] DelayedMatrixStats_1.16.0
## [41] minqa_1.2.5
## [42] reticulate_1.30
## [43] GetoptLong_1.0.5
## [44] xfun_0.39
## [45] bslib_0.5.0
## [46] zoo_1.8-12
## [47] tidyselect_1.2.0
## [48] reshape2_1.4.4
## [49] ica_1.0-3
## [50] viridisLite_0.4.2
## [51] rtracklayer_1.54.0
## [52] rlang_1.1.1
## [53] hexbin_1.28.3
## [54] jquerylib_0.1.4
## [55] glue_1.6.2
## [56] modelr_0.1.9
## [57] ggsignif_0.6.3
## [58] labeling_0.4.2
## [59] SparseM_1.81
## [60] httpuv_1.6.11
## [61] preprocessCore_1.60.2
## [62] reactome.db_1.77.0
## [63] DO.db_2.9
## [64] annotate_1.72.0
## [65] jsonlite_1.8.5
## [66] bit_4.0.5
## [67] mime_0.12
## [68] Rsamtools_2.10.0
## [69] stringi_1.7.12
## [70] spatstat.sparse_3.0-1
## [71] scattermore_0.8
## [72] spatstat.explore_3.1-0
## [73] yulab.utils_0.0.6
## [74] quadprog_1.5-8
## [75] bitops_1.0-7
## [76] cli_3.6.1
## [77] rhdf5filters_1.10.1
## [78] RSQLite_2.2.17
## [79] data.table_1.14.8
## [80] timechange_0.2.0
## [81] rstudioapi_0.14
## [82] GenomicAlignments_1.30.0
## [83] qvalue_2.26.0
## [84] listenv_0.9.0
## [85] miniUI_0.1.1.1
## [86] gridGraphics_0.5-1
## [87] readxl_1.4.1
## [88] lifecycle_1.0.3
## [89] htm2txt_2.2.2
## [90] munsell_0.5.0
## [91] cellranger_1.1.0
## [92] caTools_1.18.2
## [93] codetools_0.2-19
## [94] coda_0.19-4
## [95] MultiAssayExperiment_1.20.0
## [96] lmtest_0.9-40
## [97] missMethyl_1.28.0
## [98] xtable_1.8-4
## [99] ROCR_1.0-11
## [100] googlesheets4_1.0.1
## [101] BiocManager_1.30.21
## [102] abind_1.4-5
## [103] farver_2.1.1
## [104] parallelly_1.36.0
## [105] RANN_2.6.1
## [106] aplot_0.1.7
## [107] askpass_1.1
## [108] ggtree_3.2.1
## [109] BiocIO_1.8.0
## [110] RcppAnnoy_0.0.20
## [111] goftest_1.2-3
## [112] patchwork_1.1.2
## [113] cluster_2.1.4
## [114] future.apply_1.11.0
## [115] tidytree_0.4.0
## [116] ellipsis_0.3.2
## [117] prettyunits_1.1.1
## [118] lubridate_1.9.2
## [119] ggridges_0.5.4
## [120] googledrive_2.0.0
## [121] reprex_2.0.2
## [122] mclust_6.0.0
## [123] igraph_1.4.3
## [124] multtest_2.50.0
## [125] fgsea_1.20.0
## [126] gargle_1.5.0
## [127] spatstat.utils_3.0-3
## [128] htmltools_0.5.5
## [129] yaml_2.3.7
## [130] utf8_1.2.3
## [131] MCMCpack_1.6-3
## [132] interactiveDisplayBase_1.32.0
## [133] XML_3.99-0.14
## [134] withr_2.5.0
## [135] fitdistrplus_1.1-11
## [136] bit64_4.0.5
## [137] rngtools_1.5.2
## [138] doRNG_1.8.6
## [139] progressr_0.13.0
## [140] GOSemSim_2.20.0
## [141] memoise_2.0.1
## [142] evaluate_0.21
## [143] tzdb_0.4.0
## [144] curl_5.0.1
## [145] fansi_1.0.4
## [146] highr_0.10
## [147] tensor_1.5
## [148] cachem_1.0.8
## [149] deldir_1.0-9
## [150] rjson_0.2.21
## [151] rstatix_0.7.0
## [152] clue_0.3-64
## [153] tools_4.2.2
## [154] sass_0.4.6
## [155] magrittr_2.0.3
## [156] RCurl_1.98-1.12
## [157] car_3.1-0
## [158] ape_5.7-1
## [159] ggplotify_0.1.0
## [160] xml2_1.3.4
## [161] httr_1.4.6
## [162] assertthat_0.2.1
## [163] boot_1.3-28.1
## [164] globals_0.16.2
## [165] R6_2.5.1
## [166] Rhdf5lib_1.20.0
## [167] progress_1.2.2
## [168] KEGGREST_1.34.0
## [169] treeio_1.18.1
## [170] shape_1.4.6
## [171] gtools_3.9.4
## [172] statmod_1.5.0
## [173] BiocVersion_3.16.0
## [174] HDF5Array_1.22.1
## [175] rhdf5_2.42.1
## [176] splines_4.2.2
## [177] carData_3.0-5
## [178] ggfun_0.0.7
## [179] colorspace_2.1-0
## [180] generics_0.1.3
## [181] RobustRankAggreg_1.2.1
## [182] pillar_1.9.0
## [183] tweenr_2.0.2
## [184] sp_1.6-1
## [185] GenomeInfoDbData_1.2.9
## [186] plyr_1.8.8
## [187] gtable_0.3.3
## [188] rvest_1.0.3
## [189] restfulr_0.0.15
## [190] knitr_1.43
## [191] shadowtext_0.1.2
## [192] biomaRt_2.50.3
## [193] fastmap_1.1.1
## [194] Cairo_1.6-0
## [195] doParallel_1.0.17
## [196] quantreg_5.94
## [197] broom_1.0.1
## [198] openssl_2.0.6
## [199] scales_1.2.1
## [200] filelock_1.0.2
## [201] backports_1.4.1
## [202] RaggedExperiment_1.18.0
## [203] base64_2.0.1
## [204] vroom_1.5.7
## [205] enrichplot_1.14.2
## [206] mcmc_0.9-7
## [207] hms_1.1.2
## [208] ggforce_0.3.4
## [209] scrime_1.3.5
## [210] Rtsne_0.16
## [211] shiny_1.7.2
## [212] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [213] polyclip_1.10-4
## [214] numDeriv_2016.8-1.1
## [215] siggenes_1.68.0
## [216] lazyeval_0.2.2
## [217] crayon_1.5.2
## [218] downloader_0.4
## [219] sparseMatrixStats_1.10.0
## [220] viridis_0.6.2
## [221] reshape_0.8.9
## [222] compiler_4.2.2
## [223] spatstat.geom_3.2-1
Clear
rm(list=ls())