This script uses the ROADMAP E107 and E108 muscle reference epigenome to assess chromatin state enrichment of the muscle CpGs.
Load packages
library(tidyverse)
library(ggrepel)
library(GenomicRanges)
library(ggpubr)
library(DNAmArray)
library(MASS)
load("../GOTO_Data/GOTO_results-full-muscle-adj.Rdata")
sig_cpgs <- (limma_base %>% filter(padj_fdr <= 0.05))$cpg
length(sig_cpgs)
## [1] 162
manifest_hg19 (fetched in 2023 from https://zwdzwd.github.io/InfiniumAnnotation)
probeID as cpg - CpG IDCpG_chrm as cpg_chr_hg19 - chromosome (hg19)CpG_beg as cpg_start_hg19 - CpG start position (hg19)CpG_end as cpg_end_hg19 - CpG end position (hg19)probe_strand as cpg_strand - strandgene_HGNCmanifest_hg19 <- read_tsv(
"/exports/molepi/users/ljsinke/LLS/Shared_Data/Manifests/EPIC.hg19.manifest.tsv.gz")
## Rows: 865918 Columns: 57
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (21): CpG_chrm, probe_strand, probeID, channel, designType, nextBase, ne...
## dbl (24): CpG_beg, CpG_end, address_A, address_B, probeCpGcnt, context35, pr...
## lgl (12): posMatch, MASK_mapping, MASK_typeINextBaseSwitch, MASK_rmsk15, MAS...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
anno <- manifest_hg19 %>%
dplyr::select(
cpg = probeID,
cpg_chr = CpG_chrm,
cpg_start = CpG_beg,
cpg_end = CpG_end,
cpg_strand = probe_strand,
gene_HGNC
) %>%
mutate(
cpg_chr = substr(cpg_chr,4,5)
)
anno <- anno %>%
dplyr::filter(cpg %in% limma_base$cpg)
manifest_chrom <- read_tsv(
"/exports/molepi/users/ljsinke/LLS/Shared_Data/Manifests/EPIC.hg19.REMC.chromHMM.tsv.gz"
)
## Rows: 865918 Columns: 131
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (129): CpG_chrm, probeID, E001, E002, E003, E004, E005, E006, E007, E008...
## dbl (2): CpG_beg, CpG_end
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
manifest_chrom <- manifest_chrom %>%
dplyr::select(
cpg = probeID,
E107, E108)
anno <- left_join(
anno, manifest_chrom,
by="cpg"
)
limma_base <- left_join(limma_base, anno, by="cpg")
Save
save(limma_base, file="../GOTO_Data/GOTO_results-full-muscle-adj.Rdata")
Save chromatin states
states <- c("15_Quies", "14_ReprPCWk", "13_ReprPC",
"12_EnhBiv", "11_BivFlnk", "10_TssBiv",
"9_Het", "8_ZNF/Rpts", "7_Enh",
"6_EnhG", "5_TxWk", "4_Tx",
"3_TxFlnk", "2_TssAFlnk", "1_TssA")
Test enrichment
for(i in states){
# Binary indicators
res_road <- limma_base %>%
mutate(
sig = ifelse(limma_base$cpg %in% sig_cpgs, 1, 0),
chrom = ifelse(grepl(i, E107), 1, 0)
)
# GLM
x <- glm(chrom ~ sig, family=binomial, data=res_road)
out <- c(coef(summary(x))[2,],
exp(cbind(coef(x), confint.default(x)))[2,])
names(out) <- c('logOR', 'SE', 'z', 'p', 'OR', 'low_CI', 'upp_CI')
out <- as.data.frame(t(out))
out$Trait = i
out <- out %>% dplyr::select(Trait, OR, logOR,
low_CI, upp_CI, z, p)
if(i == states[1]){
res <- out
} else {
res <- rbind(res, out)
}
}
Adjust p-values
res$padj <- p.adjust(res$p, method='fdr')
Save results
write_csv(res %>% arrange(p, OR),
file="../GOTO_Data/Tables/ST05-107.csv")
Data for plot
chrom <- res %>%
mutate(
loglowCI = log(low_CI),
loguppCI = log(upp_CI),
padj = p.adjust(p, method='fdr')
) %>%
filter(OR < 200)
chrom %>% filter(padj < 0.05)
## Trait OR logOR low_CI upp_CI z p
## 1 7_Enh 5.52936708 1.7100734 3.98189231 7.6782338 10.208702 1.812735e-24
## 2 6_EnhG 3.84521055 1.3468284 2.08444600 7.0933208 4.311002 1.625168e-05
## 3 2_TssAFlnk 2.07582827 0.7303602 1.36446835 3.1580527 3.411572 6.458947e-04
## 4 1_TssA 0.08294084 -2.4896277 0.02059591 0.3340073 -3.502841 4.603240e-04
## padj loglowCI loguppCI
## 1 2.719103e-23 1.3817572 2.038390
## 2 1.218876e-04 0.7345031 1.959154
## 3 2.422105e-03 0.3107649 1.149956
## 4 2.301620e-03 -3.8826628 -1.096593
chrom$fill <- ifelse(chrom$padj < 0.05, "Enriched", "Not Enriched")
chrom$invlogOR <- -chrom$logOR
Plot
plot <- chrom %>%
ggplot(aes(x = logOR,
y = reorder(Trait,-invlogOR),
xmin = loglowCI,
xmax = loguppCI)) +
geom_vline(xintercept=0, linewidth=1,
color='grey60', linetype='dashed') +
geom_errorbar(width=0.5,
linewidth=1,
position=position_dodge(width=0.9)) +
geom_point(aes(fill=fill),
size=3,
shape=21,
stroke=1.2,
position=position_dodge(width=0.9)) +
xlab('log(OR)') + ylab('') + xlim(c(-4,4)) +
theme(axis.text = element_text(size=14, color = '#373334'),
axis.title = element_text(size=16, hjust=0.5,
color = '#373334'),
text=element_text(size=14),
panel.background = element_rect(fill = 'white',
color='#373334'),
panel.grid.major = element_line(color = 'grey95'),
panel.grid.minor = element_line(color = 'grey95'),
plot.background = element_rect(fill = 'white'),
axis.ticks.x = element_line(size=1))
print(plot)
Save
png("../GOTO_Data/Figures/Figure_1C.png")
print(plot)
dev.off()
## png
## 2
Test for enrichment
for(i in states){
# Binary indicators
res_road <- limma_base %>%
mutate(
sig = ifelse(limma_base$cpg %in% sig_cpgs, 1, 0),
chrom = ifelse(grepl(i, E108), 1, 0)
)
# GLM
x <- glm(chrom ~ sig, family=binomial, data=res_road)
out <- c(coef(summary(x))[2,],
exp(cbind(coef(x), confint.default(x)))[2,])
names(out) <- c('logOR', 'SE', 'z', 'p', 'OR', 'low_CI', 'upp_CI')
out <- as.data.frame(t(out))
out$Trait = i
out <- out %>% dplyr::select(Trait, OR, logOR,
low_CI, upp_CI, z, p)
if(i == states[1]){
res <- out
} else {
res <- rbind(res, out)
}
}
Adjust p-values
res$padj <- p.adjust(res$p, method='fdr')
Save results
write_csv(res %>% arrange(p, OR),
file="../GOTO_Data/Tables/ST05-107.csv")
Data for plot
chrom <- res %>%
mutate(
loglowCI = log(low_CI),
loguppCI = log(upp_CI),
padj = p.adjust(p, method='fdr')
) %>%
filter(OR < 200)
chrom %>% filter(padj < 0.05)
## Trait OR logOR low_CI upp_CI z p
## 1 15_Quies 0.47371394 -0.7471516 0.29950465 0.7492535 -3.194055 1.402893e-03
## 2 14_ReprPCWk 0.39588069 -0.9266424 0.19552955 0.8015235 -2.574684 1.003317e-02
## 3 7_Enh 7.15391566 1.9676599 5.23590000 9.7745391 12.355917 4.525656e-35
## 4 6_EnhG 4.34097650 1.4680993 2.35312865 8.0080946 4.698955 2.614959e-06
## 5 1_TssA 0.08925065 -2.4163066 0.02216274 0.3594175 -3.399678 6.746525e-04
## padj loglowCI loguppCI
## 1 5.260849e-03 -1.2056253 -0.2886779
## 2 3.009950e-02 -1.6320438 -0.2212410
## 3 6.788485e-34 1.6555387 2.2797810
## 4 1.961219e-05 0.8557458 2.0804529
## 5 3.373263e-03 -3.8093426 -1.0232706
chrom$fill <- ifelse(chrom$padj < 0.05, "Enriched", "Not Enriched")
chrom$invlogOR <- -chrom$logOR
Plot
plot <- chrom %>%
ggplot(aes(x = logOR,
y = reorder(Trait,-invlogOR),
xmin = loglowCI,
xmax = loguppCI)) +
geom_vline(xintercept=0, linewidth=1,
color='grey60', linetype='dashed') +
geom_errorbar(width=0.5,
linewidth=1,
position=position_dodge(width=0.9)) +
geom_point(aes(fill=fill),
size=3,
shape=21,
stroke=1.2,
position=position_dodge(width=0.9)) +
xlab('log(OR)') + ylab('') + xlim(c(-4,4)) +
theme(axis.text = element_text(size=14, color = '#373334'),
axis.title = element_text(size=16, hjust=0.5,
color = '#373334'),
text=element_text(size=14),
panel.background = element_rect(fill = 'white',
color='#373334'),
panel.grid.major = element_line(color = 'grey95'),
panel.grid.minor = element_line(color = 'grey95'),
plot.background = element_rect(fill = 'white'),
axis.ticks.x = element_line(size=1))
print(plot)
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggpubr_0.4.0
## [2] GEOquery_2.62.2
## [3] MuSiC_0.2.0
## [4] nnls_1.4
## [5] gplots_3.1.3
## [6] plotly_4.10.1
## [7] SeuratObject_4.1.3
## [8] Seurat_4.3.0
## [9] gridExtra_2.3
## [10] lattice_0.21-8
## [11] bacon_1.22.0
## [12] ellipse_0.4.5
## [13] methylGSA_1.12.0
## [14] sva_3.42.0
## [15] genefilter_1.76.0
## [16] mgcv_1.8-42
## [17] nlme_3.1-162
## [18] limma_3.54.2
## [19] lmerTest_3.1-3
## [20] lme4_1.1-30
## [21] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [22] snpStats_1.44.0
## [23] survival_3.5-5
## [24] ggrepel_0.9.1
## [25] ggfortify_0.4.14
## [26] irlba_2.3.5.1
## [27] Matrix_1.5-4.1
## [28] omicsPrint_1.14.0
## [29] MASS_7.3-60
## [30] DNAmArray_2.0.0
## [31] pls_2.8-2
## [32] FDb.InfiniumMethylation.hg19_2.2.0
## [33] org.Hs.eg.db_3.14.0
## [34] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [35] GenomicFeatures_1.46.5
## [36] AnnotationDbi_1.56.2
## [37] IlluminaHumanMethylationEPICmanifest_0.3.0
## [38] minfi_1.40.0
## [39] bumphunter_1.36.0
## [40] locfit_1.5-9.8
## [41] iterators_1.0.14
## [42] foreach_1.5.2
## [43] Biostrings_2.62.0
## [44] XVector_0.34.0
## [45] SummarizedExperiment_1.24.0
## [46] Biobase_2.58.0
## [47] MatrixGenerics_1.10.0
## [48] matrixStats_1.0.0
## [49] GenomicRanges_1.46.1
## [50] GenomeInfoDb_1.34.9
## [51] IRanges_2.32.0
## [52] S4Vectors_0.36.2
## [53] BiocGenerics_0.44.0
## [54] BiocParallel_1.32.6
## [55] MethylAid_1.28.0
## [56] forcats_0.5.2
## [57] stringr_1.5.0
## [58] dplyr_1.1.3
## [59] purrr_0.3.4
## [60] readr_2.1.2
## [61] tidyr_1.2.1
## [62] tibble_3.2.1
## [63] ggplot2_3.4.3
## [64] tidyverse_1.3.2
## [65] rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] ica_1.0-3
## [2] Rsamtools_2.10.0
## [3] cinaR_0.2.3
## [4] lmtest_0.9-40
## [5] crayon_1.5.2
## [6] rhdf5filters_1.10.1
## [7] backports_1.4.1
## [8] reprex_2.0.2
## [9] GOSemSim_2.20.0
## [10] rlang_1.1.1
## [11] ROCR_1.0-11
## [12] readxl_1.4.1
## [13] SparseM_1.81
## [14] nloptr_2.0.3
## [15] filelock_1.0.2
## [16] rjson_0.2.21
## [17] bit64_4.0.5
## [18] glue_1.6.2
## [19] sctransform_0.3.5
## [20] rngtools_1.5.2
## [21] spatstat.sparse_3.0-1
## [22] mcmc_0.9-7
## [23] spatstat.geom_3.2-1
## [24] DOSE_3.20.1
## [25] haven_2.5.1
## [26] tidyselect_1.2.0
## [27] fitdistrplus_1.1-11
## [28] XML_3.99-0.14
## [29] zoo_1.8-12
## [30] GenomicAlignments_1.30.0
## [31] MatrixModels_0.5-1
## [32] xtable_1.8-4
## [33] magrittr_2.0.3
## [34] evaluate_0.21
## [35] cli_3.6.1
## [36] zlibbioc_1.44.0
## [37] miniUI_0.1.1.1
## [38] rstudioapi_0.14
## [39] doRNG_1.8.6
## [40] sp_1.6-1
## [41] MultiAssayExperiment_1.20.0
## [42] bslib_0.5.0
## [43] fastmatch_1.1-3
## [44] treeio_1.18.1
## [45] shiny_1.7.2
## [46] xfun_0.39
## [47] askpass_1.1
## [48] multtest_2.50.0
## [49] cluster_2.1.4
## [50] caTools_1.18.2
## [51] tidygraph_1.2.2
## [52] KEGGREST_1.34.0
## [53] quantreg_5.94
## [54] base64_2.0.1
## [55] ape_5.7-1
## [56] scrime_1.3.5
## [57] listenv_0.9.0
## [58] png_0.1-8
## [59] reshape_0.8.9
## [60] future_1.32.0
## [61] withr_2.5.0
## [62] bitops_1.0-7
## [63] ggforce_0.3.4
## [64] plyr_1.8.8
## [65] cellranger_1.1.0
## [66] coda_0.19-4
## [67] pillar_1.9.0
## [68] cachem_1.0.8
## [69] fs_1.6.2
## [70] clusterProfiler_4.2.2
## [71] DelayedMatrixStats_1.16.0
## [72] vctrs_0.6.3
## [73] ellipsis_0.3.2
## [74] generics_0.1.3
## [75] tools_4.2.2
## [76] munsell_0.5.0
## [77] tweenr_2.0.2
## [78] fgsea_1.20.0
## [79] DelayedArray_0.24.0
## [80] abind_1.4-5
## [81] fastmap_1.1.1
## [82] compiler_4.2.2
## [83] httpuv_1.6.11
## [84] rtracklayer_1.54.0
## [85] beanplot_1.3.1
## [86] MCMCpack_1.6-3
## [87] GenomeInfoDbData_1.2.9
## [88] edgeR_3.40.2
## [89] deldir_1.0-9
## [90] utf8_1.2.3
## [91] later_1.3.1
## [92] RobustRankAggreg_1.2.1
## [93] BiocFileCache_2.2.1
## [94] jsonlite_1.8.5
## [95] scales_1.2.1
## [96] carData_3.0-5
## [97] pbapply_1.7-0
## [98] tidytree_0.4.0
## [99] sparseMatrixStats_1.10.0
## [100] lazyeval_0.2.2
## [101] promises_1.2.0.1
## [102] car_3.1-0
## [103] goftest_1.2-3
## [104] spatstat.utils_3.0-3
## [105] reticulate_1.30
## [106] htm2txt_2.2.2
## [107] nor1mix_1.3-0
## [108] cowplot_1.1.1
## [109] statmod_1.5.0
## [110] siggenes_1.68.0
## [111] Rtsne_0.16
## [112] downloader_0.4
## [113] uwot_0.1.14
## [114] igraph_1.4.3
## [115] HDF5Array_1.22.1
## [116] numDeriv_2016.8-1.1
## [117] yaml_2.3.7
## [118] htmltools_0.5.5
## [119] memoise_2.0.1
## [120] BiocIO_1.8.0
## [121] graphlayouts_0.8.1
## [122] quadprog_1.5-8
## [123] viridisLite_0.4.2
## [124] digest_0.6.31
## [125] assertthat_0.2.1
## [126] mime_0.12
## [127] rappdirs_0.3.3
## [128] RSQLite_2.2.17
## [129] yulab.utils_0.0.6
## [130] future.apply_1.11.0
## [131] data.table_1.14.8
## [132] blob_1.2.4
## [133] preprocessCore_1.60.2
## [134] splines_4.2.2
## [135] labeling_0.4.2
## [136] Rhdf5lib_1.20.0
## [137] illuminaio_0.40.0
## [138] googledrive_2.0.0
## [139] RaggedExperiment_1.18.0
## [140] RCurl_1.98-1.12
## [141] broom_1.0.1
## [142] hms_1.1.2
## [143] modelr_0.1.9
## [144] rhdf5_2.42.1
## [145] colorspace_2.1-0
## [146] aplot_0.1.7
## [147] sass_0.4.6
## [148] Rcpp_1.0.10
## [149] mclust_6.0.0
## [150] RANN_2.6.1
## [151] enrichplot_1.14.2
## [152] fansi_1.0.4
## [153] tzdb_0.4.0
## [154] parallelly_1.36.0
## [155] R6_2.5.1
## [156] grid_4.2.2
## [157] ggridges_0.5.4
## [158] lifecycle_1.0.3
## [159] ggsignif_0.6.3
## [160] curl_5.0.1
## [161] googlesheets4_1.0.1
## [162] minqa_1.2.5
## [163] leiden_0.4.3
## [164] jquerylib_0.1.4
## [165] DO.db_2.9
## [166] qvalue_2.26.0
## [167] RcppAnnoy_0.0.20
## [168] RColorBrewer_1.1-3
## [169] spatstat.explore_3.1-0
## [170] htmlwidgets_1.5.4
## [171] polyclip_1.10-4
## [172] biomaRt_2.50.3
## [173] missMethyl_1.28.0
## [174] shadowtext_0.1.2
## [175] timechange_0.2.0
## [176] gridGraphics_0.5-1
## [177] reactome.db_1.77.0
## [178] rvest_1.0.3
## [179] globals_0.16.2
## [180] openssl_2.0.6
## [181] spatstat.random_3.1-5
## [182] patchwork_1.1.2
## [183] progressr_0.13.0
## [184] codetools_0.2-19
## [185] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [186] lubridate_1.9.2
## [187] GO.db_3.14.0
## [188] gtools_3.9.4
## [189] prettyunits_1.1.1
## [190] dbplyr_2.2.1
## [191] gridBase_0.4-7
## [192] gtable_0.3.3
## [193] DBI_1.1.3
## [194] tensor_1.5
## [195] ggfun_0.0.7
## [196] httr_1.4.6
## [197] highr_0.10
## [198] KernSmooth_2.23-21
## [199] stringi_1.7.12
## [200] vroom_1.5.7
## [201] progress_1.2.2
## [202] reshape2_1.4.4
## [203] farver_2.1.1
## [204] annotate_1.72.0
## [205] viridis_0.6.2
## [206] hexbin_1.28.3
## [207] ggtree_3.2.1
## [208] xml2_1.3.4
## [209] boot_1.3-28.1
## [210] restfulr_0.0.15
## [211] scattermore_0.8
## [212] ggplotify_0.1.0
## [213] bit_4.0.5
## [214] spatstat.data_3.0-1
## [215] scatterpie_0.1.8
## [216] ggraph_2.0.6
## [217] pkgconfig_2.0.3
## [218] gargle_1.5.0
## [219] rstatix_0.7.0
## [220] knitr_1.43
Clear
rm(list=ls())