This script assesses if health improvements are associated with differential methylation in blood tissue.


Setup

Load packages

library(edgeR)
library(cinaR)
library(limma)
library(tidyverse)
library(GenomicFeatures)
library(AnnotationHub)
library(SummarizedExperiment)
library(lme4)
library(lmerTest)
library(pheatmap)
library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)

DNAm data

Load top CpG results and methData

load('../GOTO_Data/GOTO_methData-filtered.Rdata')
load('../GOTO_Data/GOTO_results-top-blood.Rdata')

Save only blood samples

methData <- methData[ , methData$tissue == 'fasted blood']

Convert betas to data frame

beta_rows <- methData$Basename
betas <- as.data.frame(t(assay(methData)))
rownames(betas) <- beta_rows

Save only CpGs to be tested

betas <- betas %>% dplyr::select(any_of(top_cpgs$cpg))
dim(betas)
## [1] 196 441

Phenotype data

Load data

load("../GOTO_Data/GOTO_targets-filtered.Rdata")

Filter blood

targets <- targets %>% 
  filter(tissue == 'fasted blood')

Save covariates

targets <- targets %>% 
  dplyr::select(IOP2_ID, timepoint, age, sex, smoke,
                plate, array_row, Basename, 
                bmi, wc, 
                perc_body_fat, fasting_insulin,
                leptin, adiponectin, interleukin_6,
                hdl_size_fasted, hdl_c, systolic_bp) %>% 
  mutate(ID = paste0(IOP2_ID, '_', timepoint))

Ordering and filtering

Ensure DNAm data for all samples

targets <- targets %>% filter(Basename %in% rownames(betas))

print(paste0('We have complete covariate and DNAm data for ',
             nrow(targets), ' samples'))
## [1] "We have complete covariate and DNAm data for 196 samples"

Order uuid alphabetically

targets <- targets[order(targets$Basename),]
rownames(targets) <- targets$Basename
dim(targets)
## [1] 196  19

Order DNAm data

betas <- betas[rownames(betas) %in% targets$Basename, ]
betas <- betas[order(rownames(betas)), ]
dim(betas)
## [1] 196 441

Analysis

Bind data frames

lm_df <- cbind(targets,
               betas)

Save traits

traits <- c('bmi', 
            'wc',
            'perc_body_fat', 
            'fasting_insulin',
            'leptin', 
            'adiponectin', 
            'interleukin_6',
            'hdl_size_fasted', 
            'hdl_c', 
            'systolic_bp')

Save CpGs

cpgs <- top_cpgs$cpg
length(cpgs)
## [1] 441

Expand pairwise comparisons

df <- expand.grid(traits, cpgs)
colnames(df) <- c('trait', 'cpg')

Run models

for(i in 1:nrow(df)){
  cpg <- as.character(df$cpg[i])
  trait <- as.character(df$trait[i])
  
  fit <- lmer(substitute(cpg ~ trait + age + sex + smoke +
                           plate + array_row + (1 | IOP2_ID),
                         list(cpg = as.name(cpg),
                              trait = as.name(trait))),
              data=lm_df)
  
  if(i == 1){
    out <- data.frame(
      cpg = cpg,
      trait = trait,
      es = coef(summary(fit))[2,1],
      se = coef(summary(fit))[2,2],
      p = coef(summary(fit))[2,5]
    )
  } else {
    out <- rbind(out,
                 data.frame(
                   cpg = cpg,
                   trait = trait,
                   es = coef(summary(fit))[2,1],
                   se = coef(summary(fit))[2,2],
                   p = coef(summary(fit))[2,5]))
  }
}

Adjust p-values

out <- out %>% group_by(cpg) %>% 
  mutate(padj = p.adjust(p, method='fdr')) %>% 
  ungroup()

Inspect top

out <- out %>% arrange(padj)
head(out)
## # A tibble: 6 × 6
##   cpg        trait                  es        se          p      padj
##   <chr>      <chr>               <dbl>     <dbl>      <dbl>     <dbl>
## 1 cg16209776 wc               0.00109  0.000231  0.00000480 0.0000480
## 2 cg09222461 hdl_size_fasted -0.0277   0.00615   0.0000162  0.000162 
## 3 cg16657397 perc_body_fat   -0.00140  0.000328  0.0000338  0.000169 
## 4 cg16657397 leptin          -0.000787 0.000181  0.0000273  0.000169 
## 5 cg06612452 wc               0.000335 0.0000803 0.0000458  0.000458 
## 6 cg07735194 interleukin_6    0.00862  0.00209   0.0000582  0.000582

Save

save(out, file='../GOTO_Data/Traits/GOTO-blood_trait.Rdata')
write_csv(out, "../GOTO_Data/Tables/ST21.csv")

Heatmap

Format data

out <- out %>% 
  mutate(trait = case_when(
    trait == "wc" ~ "WC",
    trait == "perc_body_fat" ~ "Total Body Fat %",
    trait == "leptin" ~ "Leptin",
    trait == "hdl_size_fasted" ~ "HDL size",
    trait == "bmi" ~ "BMI",
    trait == "adiponectin" ~ "Adiponectin",
    trait == "interleukin_6" ~ "Interleukin-6",
    trait == "systolic_bp" ~ "SBP",
    trait == "fasting_insulin" ~ "Fasting insulin",
    trait == "hdl_c" ~ "HDL-C"))

unique(out$trait)
##  [1] "WC"               "HDL size"         "Total Body Fat %" "Leptin"          
##  [5] "Interleukin-6"    "BMI"              "SBP"              "Fasting insulin" 
##  [9] "HDL-C"            "Adiponectin"

Make df

heat_df <- reshape2::dcast(out, trait ~ cpg, 
                           value.var = "padj")
heat_df <- heat_df %>% column_to_rownames(var='trait')
heat_df <- as.matrix(heat_df)

Draw heatmap

hm <- draw(Heatmap(
  heat_df,
  column_title_gp = gpar(fontsize = 20, fontface = "bold"),
  name = "padj",
  col = colorRamp2(c(0, 0.001, 0.05, 0.1, 0.2, 1), c("#ab1226", "#e8213b", "#ef6678", "#b07f8b", "grey85", "grey95")),
  show_row_dend = FALSE,
  show_column_names = FALSE,  
  column_dend_height = unit(1, "cm"), 
  column_dend_side = "top", 
  width = ncol(heat_df)*unit(0.3, "mm"), 
  height = nrow(heat_df)*unit(3, "mm"),
  column_km = 1, column_gap = unit(2, "mm"), border = TRUE,
  row_names_gp = gpar(fontsize = 8),
  heatmap_legend_param = list(
               legend_direction = "horizontal", 
               legend_width = unit(3, "cm"),
               border = 'black')), 
  heatmap_legend_side = "bottom")

Plot

print(hm)


Save

png("../GOTO_Data/Figures/Figure_5E.pdf")
print(hm)
dev.off()
## png 
##   2

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] circlize_0.4.15             ComplexHeatmap_2.14.0      
##  [3] RColorBrewer_1.1-3          pheatmap_1.0.12            
##  [5] lmerTest_3.1-3              lme4_1.1-30                
##  [7] Matrix_1.5-4.1              SummarizedExperiment_1.24.0
##  [9] MatrixGenerics_1.10.0       matrixStats_1.0.0          
## [11] AnnotationHub_3.2.2         BiocFileCache_2.2.1        
## [13] dbplyr_2.2.1                GenomicFeatures_1.46.5     
## [15] AnnotationDbi_1.56.2        Biobase_2.58.0             
## [17] GenomicRanges_1.46.1        GenomeInfoDb_1.34.9        
## [19] IRanges_2.32.0              S4Vectors_0.36.2           
## [21] BiocGenerics_0.44.0         forcats_0.5.2              
## [23] stringr_1.5.0               dplyr_1.1.3                
## [25] purrr_0.3.4                 readr_2.1.2                
## [27] tidyr_1.2.1                 tibble_3.2.1               
## [29] ggplot2_3.4.3               tidyverse_1.3.2            
## [31] cinaR_0.2.3                 edgeR_3.40.2               
## [33] limma_3.54.2                rmarkdown_2.16             
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.1                  backports_1.4.1              
##   [3] plyr_1.8.8                    splines_4.2.2                
##   [5] BiocParallel_1.32.6           digest_0.6.31                
##   [7] foreach_1.5.2                 htmltools_0.5.5              
##   [9] magick_2.7.4                  fansi_1.0.4                  
##  [11] magrittr_2.0.3                memoise_2.0.1                
##  [13] cluster_2.1.4                 doParallel_1.0.17            
##  [15] googlesheets4_1.0.1           tzdb_0.4.0                   
##  [17] Biostrings_2.62.0             modelr_0.1.9                 
##  [19] vroom_1.5.7                   timechange_0.2.0             
##  [21] prettyunits_1.1.1             colorspace_2.1-0             
##  [23] blob_1.2.4                    rvest_1.0.3                  
##  [25] rappdirs_0.3.3                haven_2.5.1                  
##  [27] xfun_0.39                     crayon_1.5.2                 
##  [29] RCurl_1.98-1.12               jsonlite_1.8.5               
##  [31] iterators_1.0.14              glue_1.6.2                   
##  [33] gtable_0.3.3                  gargle_1.5.0                 
##  [35] zlibbioc_1.44.0               XVector_0.34.0               
##  [37] GetoptLong_1.0.5              DelayedArray_0.24.0          
##  [39] shape_1.4.6                   scales_1.2.1                 
##  [41] DBI_1.1.3                     Rcpp_1.0.10                  
##  [43] xtable_1.8-4                  progress_1.2.2               
##  [45] clue_0.3-64                   bit_4.0.5                    
##  [47] httr_1.4.6                    ellipsis_0.3.2               
##  [49] pkgconfig_2.0.3               XML_3.99-0.14                
##  [51] sass_0.4.6                    locfit_1.5-9.8               
##  [53] utf8_1.2.3                    reshape2_1.4.4               
##  [55] tidyselect_1.2.0              rlang_1.1.1                  
##  [57] later_1.3.1                   munsell_0.5.0                
##  [59] BiocVersion_3.16.0            cellranger_1.1.0             
##  [61] tools_4.2.2                   cachem_1.0.8                 
##  [63] cli_3.6.1                     generics_0.1.3               
##  [65] RSQLite_2.2.17                broom_1.0.1                  
##  [67] evaluate_0.21                 fastmap_1.1.1                
##  [69] yaml_2.3.7                    knitr_1.43                   
##  [71] bit64_4.0.5                   fs_1.6.2                     
##  [73] KEGGREST_1.34.0               nlme_3.1-162                 
##  [75] mime_0.12                     xml2_1.3.4                   
##  [77] biomaRt_2.50.3                compiler_4.2.2               
##  [79] rstudioapi_0.14               filelock_1.0.2               
##  [81] curl_5.0.1                    png_0.1-8                    
##  [83] interactiveDisplayBase_1.32.0 reprex_2.0.2                 
##  [85] bslib_0.5.0                   stringi_1.7.12               
##  [87] highr_0.10                    lattice_0.21-8               
##  [89] nloptr_2.0.3                  vctrs_0.6.3                  
##  [91] pillar_1.9.0                  lifecycle_1.0.3              
##  [93] BiocManager_1.30.21           GlobalOptions_0.1.2          
##  [95] jquerylib_0.1.4               bitops_1.0-7                 
##  [97] httpuv_1.6.11                 rtracklayer_1.54.0           
##  [99] R6_2.5.1                      BiocIO_1.8.0                 
## [101] promises_1.2.0.1              codetools_0.2-19             
## [103] boot_1.3-28.1                 MASS_7.3-60                  
## [105] assertthat_0.2.1              rjson_0.2.21                 
## [107] withr_2.5.0                   GenomicAlignments_1.30.0     
## [109] Rsamtools_2.10.0              GenomeInfoDbData_1.2.9       
## [111] parallel_4.2.2                hms_1.1.2                    
## [113] minqa_1.2.5                   googledrive_2.0.0            
## [115] Cairo_1.6-0                   numDeriv_2016.8-1.1          
## [117] shiny_1.7.2                   lubridate_1.9.2              
## [119] restfulr_0.0.15

Clear

rm(list=ls())