This script assesses if health improvements are associated with differential methylation in blood tissue.
Load packages
library(edgeR)
library(cinaR)
library(limma)
library(tidyverse)
library(GenomicFeatures)
library(AnnotationHub)
library(SummarizedExperiment)
library(lme4)
library(lmerTest)
library(pheatmap)
library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)
Load top CpG results and methData
load('../GOTO_Data/GOTO_methData-filtered.Rdata')
load('../GOTO_Data/GOTO_results-top-blood.Rdata')
Save only blood samples
methData <- methData[ , methData$tissue == 'fasted blood']
Convert betas to data frame
beta_rows <- methData$Basename
betas <- as.data.frame(t(assay(methData)))
rownames(betas) <- beta_rows
Save only CpGs to be tested
betas <- betas %>% dplyr::select(any_of(top_cpgs$cpg))
dim(betas)
## [1] 196 441
Load data
load("../GOTO_Data/GOTO_targets-filtered.Rdata")
Filter blood
targets <- targets %>%
filter(tissue == 'fasted blood')
Save covariates
targets <- targets %>%
dplyr::select(IOP2_ID, timepoint, age, sex, smoke,
plate, array_row, Basename,
bmi, wc,
perc_body_fat, fasting_insulin,
leptin, adiponectin, interleukin_6,
hdl_size_fasted, hdl_c, systolic_bp) %>%
mutate(ID = paste0(IOP2_ID, '_', timepoint))
Ensure DNAm data for all samples
targets <- targets %>% filter(Basename %in% rownames(betas))
print(paste0('We have complete covariate and DNAm data for ',
nrow(targets), ' samples'))
## [1] "We have complete covariate and DNAm data for 196 samples"
Order uuid alphabetically
targets <- targets[order(targets$Basename),]
rownames(targets) <- targets$Basename
dim(targets)
## [1] 196 19
Order DNAm data
betas <- betas[rownames(betas) %in% targets$Basename, ]
betas <- betas[order(rownames(betas)), ]
dim(betas)
## [1] 196 441
Bind data frames
lm_df <- cbind(targets,
betas)
Save traits
traits <- c('bmi',
'wc',
'perc_body_fat',
'fasting_insulin',
'leptin',
'adiponectin',
'interleukin_6',
'hdl_size_fasted',
'hdl_c',
'systolic_bp')
Save CpGs
cpgs <- top_cpgs$cpg
length(cpgs)
## [1] 441
Expand pairwise comparisons
df <- expand.grid(traits, cpgs)
colnames(df) <- c('trait', 'cpg')
Run models
for(i in 1:nrow(df)){
cpg <- as.character(df$cpg[i])
trait <- as.character(df$trait[i])
fit <- lmer(substitute(cpg ~ trait + age + sex + smoke +
plate + array_row + (1 | IOP2_ID),
list(cpg = as.name(cpg),
trait = as.name(trait))),
data=lm_df)
if(i == 1){
out <- data.frame(
cpg = cpg,
trait = trait,
es = coef(summary(fit))[2,1],
se = coef(summary(fit))[2,2],
p = coef(summary(fit))[2,5]
)
} else {
out <- rbind(out,
data.frame(
cpg = cpg,
trait = trait,
es = coef(summary(fit))[2,1],
se = coef(summary(fit))[2,2],
p = coef(summary(fit))[2,5]))
}
}
Adjust p-values
out <- out %>% group_by(cpg) %>%
mutate(padj = p.adjust(p, method='fdr')) %>%
ungroup()
Inspect top
out <- out %>% arrange(padj)
head(out)
## # A tibble: 6 × 6
## cpg trait es se p padj
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cg16209776 wc 0.00109 0.000231 0.00000480 0.0000480
## 2 cg09222461 hdl_size_fasted -0.0277 0.00615 0.0000162 0.000162
## 3 cg16657397 perc_body_fat -0.00140 0.000328 0.0000338 0.000169
## 4 cg16657397 leptin -0.000787 0.000181 0.0000273 0.000169
## 5 cg06612452 wc 0.000335 0.0000803 0.0000458 0.000458
## 6 cg07735194 interleukin_6 0.00862 0.00209 0.0000582 0.000582
Save
save(out, file='../GOTO_Data/Traits/GOTO-blood_trait.Rdata')
write_csv(out, "../GOTO_Data/Tables/ST21.csv")
Format data
out <- out %>%
mutate(trait = case_when(
trait == "wc" ~ "WC",
trait == "perc_body_fat" ~ "Total Body Fat %",
trait == "leptin" ~ "Leptin",
trait == "hdl_size_fasted" ~ "HDL size",
trait == "bmi" ~ "BMI",
trait == "adiponectin" ~ "Adiponectin",
trait == "interleukin_6" ~ "Interleukin-6",
trait == "systolic_bp" ~ "SBP",
trait == "fasting_insulin" ~ "Fasting insulin",
trait == "hdl_c" ~ "HDL-C"))
unique(out$trait)
## [1] "WC" "HDL size" "Total Body Fat %" "Leptin"
## [5] "Interleukin-6" "BMI" "SBP" "Fasting insulin"
## [9] "HDL-C" "Adiponectin"
Make df
heat_df <- reshape2::dcast(out, trait ~ cpg,
value.var = "padj")
heat_df <- heat_df %>% column_to_rownames(var='trait')
heat_df <- as.matrix(heat_df)
Draw heatmap
hm <- draw(Heatmap(
heat_df,
column_title_gp = gpar(fontsize = 20, fontface = "bold"),
name = "padj",
col = colorRamp2(c(0, 0.001, 0.05, 0.1, 0.2, 1), c("#ab1226", "#e8213b", "#ef6678", "#b07f8b", "grey85", "grey95")),
show_row_dend = FALSE,
show_column_names = FALSE,
column_dend_height = unit(1, "cm"),
column_dend_side = "top",
width = ncol(heat_df)*unit(0.3, "mm"),
height = nrow(heat_df)*unit(3, "mm"),
column_km = 1, column_gap = unit(2, "mm"), border = TRUE,
row_names_gp = gpar(fontsize = 8),
heatmap_legend_param = list(
legend_direction = "horizontal",
legend_width = unit(3, "cm"),
border = 'black')),
heatmap_legend_side = "bottom")
Plot
print(hm)
png("../GOTO_Data/Figures/Figure_5E.pdf")
print(hm)
dev.off()
## png
## 2
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] circlize_0.4.15 ComplexHeatmap_2.14.0
## [3] RColorBrewer_1.1-3 pheatmap_1.0.12
## [5] lmerTest_3.1-3 lme4_1.1-30
## [7] Matrix_1.5-4.1 SummarizedExperiment_1.24.0
## [9] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [11] AnnotationHub_3.2.2 BiocFileCache_2.2.1
## [13] dbplyr_2.2.1 GenomicFeatures_1.46.5
## [15] AnnotationDbi_1.56.2 Biobase_2.58.0
## [17] GenomicRanges_1.46.1 GenomeInfoDb_1.34.9
## [19] IRanges_2.32.0 S4Vectors_0.36.2
## [21] BiocGenerics_0.44.0 forcats_0.5.2
## [23] stringr_1.5.0 dplyr_1.1.3
## [25] purrr_0.3.4 readr_2.1.2
## [27] tidyr_1.2.1 tibble_3.2.1
## [29] ggplot2_3.4.3 tidyverse_1.3.2
## [31] cinaR_0.2.3 edgeR_3.40.2
## [33] limma_3.54.2 rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.1 backports_1.4.1
## [3] plyr_1.8.8 splines_4.2.2
## [5] BiocParallel_1.32.6 digest_0.6.31
## [7] foreach_1.5.2 htmltools_0.5.5
## [9] magick_2.7.4 fansi_1.0.4
## [11] magrittr_2.0.3 memoise_2.0.1
## [13] cluster_2.1.4 doParallel_1.0.17
## [15] googlesheets4_1.0.1 tzdb_0.4.0
## [17] Biostrings_2.62.0 modelr_0.1.9
## [19] vroom_1.5.7 timechange_0.2.0
## [21] prettyunits_1.1.1 colorspace_2.1-0
## [23] blob_1.2.4 rvest_1.0.3
## [25] rappdirs_0.3.3 haven_2.5.1
## [27] xfun_0.39 crayon_1.5.2
## [29] RCurl_1.98-1.12 jsonlite_1.8.5
## [31] iterators_1.0.14 glue_1.6.2
## [33] gtable_0.3.3 gargle_1.5.0
## [35] zlibbioc_1.44.0 XVector_0.34.0
## [37] GetoptLong_1.0.5 DelayedArray_0.24.0
## [39] shape_1.4.6 scales_1.2.1
## [41] DBI_1.1.3 Rcpp_1.0.10
## [43] xtable_1.8-4 progress_1.2.2
## [45] clue_0.3-64 bit_4.0.5
## [47] httr_1.4.6 ellipsis_0.3.2
## [49] pkgconfig_2.0.3 XML_3.99-0.14
## [51] sass_0.4.6 locfit_1.5-9.8
## [53] utf8_1.2.3 reshape2_1.4.4
## [55] tidyselect_1.2.0 rlang_1.1.1
## [57] later_1.3.1 munsell_0.5.0
## [59] BiocVersion_3.16.0 cellranger_1.1.0
## [61] tools_4.2.2 cachem_1.0.8
## [63] cli_3.6.1 generics_0.1.3
## [65] RSQLite_2.2.17 broom_1.0.1
## [67] evaluate_0.21 fastmap_1.1.1
## [69] yaml_2.3.7 knitr_1.43
## [71] bit64_4.0.5 fs_1.6.2
## [73] KEGGREST_1.34.0 nlme_3.1-162
## [75] mime_0.12 xml2_1.3.4
## [77] biomaRt_2.50.3 compiler_4.2.2
## [79] rstudioapi_0.14 filelock_1.0.2
## [81] curl_5.0.1 png_0.1-8
## [83] interactiveDisplayBase_1.32.0 reprex_2.0.2
## [85] bslib_0.5.0 stringi_1.7.12
## [87] highr_0.10 lattice_0.21-8
## [89] nloptr_2.0.3 vctrs_0.6.3
## [91] pillar_1.9.0 lifecycle_1.0.3
## [93] BiocManager_1.30.21 GlobalOptions_0.1.2
## [95] jquerylib_0.1.4 bitops_1.0-7
## [97] httpuv_1.6.11 rtracklayer_1.54.0
## [99] R6_2.5.1 BiocIO_1.8.0
## [101] promises_1.2.0.1 codetools_0.2-19
## [103] boot_1.3-28.1 MASS_7.3-60
## [105] assertthat_0.2.1 rjson_0.2.21
## [107] withr_2.5.0 GenomicAlignments_1.30.0
## [109] Rsamtools_2.10.0 GenomeInfoDbData_1.2.9
## [111] parallel_4.2.2 hms_1.1.2
## [113] minqa_1.2.5 googledrive_2.0.0
## [115] Cairo_1.6-0 numDeriv_2016.8-1.1
## [117] shiny_1.7.2 lubridate_1.9.2
## [119] restfulr_0.0.15
Clear
rm(list=ls())