This script estimates Horvath cAge, phenoAge, and grimAge from blood DNAm data


Setup

Load packages

library(tidyverse)
library(methylclock)
library(SummarizedExperiment)

Load beta values

load('../GOTO_Data/Processing/GOTO_methData-unfiltered.Rdata')
methData <- methData_unfiltered
methData
## class: SummarizedExperiment 
## dim: 865859 534 
## metadata(0):
## assays(1): beta
## rownames(865859): cg14817997 cg26928153 ... cg07587934 cg16855331
## rowData names(4): cpg chr start end
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename smoke

Save betas and targets

betas <- assay(methData)
targets <- as.data.frame(colData(methData))

Transform sex to female

targets$Female <- ifelse(targets$sex == 'female', 1, 0)
xtabs(~targets$Female+targets$sex)
##               targets$sex
## targets$Female male female
##              0  266      0
##              1    0    268

Reference CpGs

Save available CpGs

cpgs_in_GOTO <- as.vector(rownames(betas))
head(cpgs_in_GOTO)
## [1] "cg14817997" "cg26928153" "cg16269199" "cg13869341" "cg14008030"
## [6] "cg12045430"

Load in gold standard

gold <- read.csv('../GOTO_Data/Clocks/Clock_Data/datMiniAnnotation3_Gold.csv')
row.names(gold)<-gold$CpG
dim(gold)
## [1] 30084     2

Determine missing CpGs

missing_cpgs <-setdiff(rownames(gold), cpgs_in_GOTO) 
length(missing_cpgs)
## [1] 2558

Making a df with the mean value of gold for participants in our betas dataset

gold_missing <- gold[missing_cpgs,]
gold_missing <- as.numeric(gold_missing[,2])
gold_missing<- replicate(ncol(betas), gold_missing)
rownames(gold_missing)<-missing_cpgs

Combine measured betas with gold dataset

betas_final <- rbind(betas, gold_missing)
betas_final <- betas_final[gold$CpG,]

Save final betas object

write.csv(betas_final, file="../GOTO_Data/Clocks/Clock_Data/betas_imputed_with_gold_standard.csv")

DNAmAge

Create input for DNAmAge

input_methylclock <- as.data.frame(betas_final)
input_methylclock <- input_methylclock %>% mutate_all(as.numeric)
input_methylclock <- input_methylclock %>% rownames_to_column(var='CpG')

Calculate Horvath cAge and PhenoAge

methylclock <- DNAmAge(input_methylclock, 
                       clocks = c("Horvath", "Levine")) 
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache

Save

save(methylclock, file="../GOTO_Data/Clocks/Clock_Out/methylclock.RData")

GrimAge

Check

identical(colnames(betas_final), rownames(targets))
## [1] TRUE

Prepare data

grimage <- cbind(SampleID = colnames(betas_final), 
                 Age = targets$age, 
                 Female = targets$Female, t(betas_final))
r=19
n=dim(grimage)[1]

Define groups

f <- rep(seq_len(ceiling(n/r)),each = r, length.out = n) 
ldf <- split(grimage, f = f)

Save

for(i in unique(f)){
  tmp <- grimage[which(f %in% i),]
    write.csv(tmp,
              paste0("../GOTO_Data/Clocks/Clock_Data/GrimAgeFile",i,".csv"))
}

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] methylclock_1.4.0           quadprog_1.5-8             
##  [3] devtools_2.4.4              usethis_2.1.6              
##  [5] methylclockData_1.6.0       futile.logger_1.4.3        
##  [7] circlize_0.4.15             ComplexHeatmap_2.14.0      
##  [9] RColorBrewer_1.1-3          pheatmap_1.0.12            
## [11] lmerTest_3.1-3              lme4_1.1-30                
## [13] Matrix_1.5-4.1              SummarizedExperiment_1.24.0
## [15] MatrixGenerics_1.10.0       matrixStats_1.0.0          
## [17] AnnotationHub_3.2.2         BiocFileCache_2.2.1        
## [19] dbplyr_2.2.1                GenomicFeatures_1.46.5     
## [21] AnnotationDbi_1.56.2        Biobase_2.58.0             
## [23] GenomicRanges_1.46.1        GenomeInfoDb_1.34.9        
## [25] IRanges_2.32.0              S4Vectors_0.36.2           
## [27] BiocGenerics_0.44.0         forcats_0.5.2              
## [29] stringr_1.5.0               dplyr_1.1.3                
## [31] purrr_0.3.4                 readr_2.1.2                
## [33] tidyr_1.2.1                 tibble_3.2.1               
## [35] ggplot2_3.4.3               tidyverse_1.3.2            
## [37] cinaR_0.2.3                 edgeR_3.40.2               
## [39] limma_3.54.2                rmarkdown_2.16             
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                SparseM_1.81                 
##   [3] rtracklayer_1.54.0            AnnotationForge_1.36.0       
##   [5] bumphunter_1.36.0             ggpmisc_0.5.4-1              
##   [7] minfi_1.40.0                  bit64_4.0.5                  
##   [9] knitr_1.43                    DelayedArray_0.24.0          
##  [11] data.table_1.14.8             GEOquery_2.62.2              
##  [13] KEGGREST_1.34.0               RCurl_1.98-1.12              
##  [15] doParallel_1.0.17             generics_0.1.3               
##  [17] preprocessCore_1.60.2         callr_3.7.3                  
##  [19] lambda.r_1.2.4                RSQLite_2.2.17               
##  [21] bit_4.0.5                     tzdb_0.4.0                   
##  [23] xml2_1.3.4                    lubridate_1.9.2              
##  [25] httpuv_1.6.11                 ggpp_0.5.4                   
##  [27] assertthat_0.2.1              gargle_1.5.0                 
##  [29] xfun_0.39                     hms_1.1.2                    
##  [31] jquerylib_0.1.4               evaluate_0.21                
##  [33] promises_1.2.0.1              scrime_1.3.5                 
##  [35] fansi_1.0.4                   restfulr_0.0.15              
##  [37] progress_1.2.2                readxl_1.4.1                 
##  [39] DBI_1.1.3                     htmlwidgets_1.5.4            
##  [41] reshape_0.8.9                 stringdist_0.9.10            
##  [43] googledrive_2.0.0             ellipsis_0.3.2               
##  [45] ggpubr_0.4.0                  backports_1.4.1              
##  [47] annotate_1.72.0               sparseMatrixStats_1.10.0     
##  [49] biomaRt_2.50.3                vctrs_0.6.3                  
##  [51] quantreg_5.94                 remotes_2.4.2                
##  [53] Cairo_1.6-0                   abind_1.4-5                  
##  [55] cachem_1.0.8                  withr_2.5.0                  
##  [57] vroom_1.5.7                   AnnotationHubData_1.28.0     
##  [59] GenomicAlignments_1.30.0      xts_0.13.1                   
##  [61] prettyunits_1.1.1             mclust_6.0.0                 
##  [63] cluster_2.1.4                 ExperimentHub_2.2.1          
##  [65] RPMM_1.25                     crayon_1.5.2                 
##  [67] genefilter_1.76.0             pkgconfig_2.0.3              
##  [69] nlme_3.1-162                  pkgload_1.3.2                
##  [71] rlang_1.1.1                   lifecycle_1.0.3              
##  [73] miniUI_0.1.1.1                MatrixModels_0.5-1           
##  [75] filelock_1.0.2                modelr_0.1.9                 
##  [77] cellranger_1.1.0              rngtools_1.5.2               
##  [79] graph_1.76.0                  base64_2.0.1                 
##  [81] carData_3.0-5                 Rhdf5lib_1.20.0              
##  [83] boot_1.3-28.1                 zoo_1.8-12                   
##  [85] reprex_2.0.2                  GlobalOptions_0.1.2          
##  [87] processx_3.8.1                googlesheets4_1.0.1          
##  [89] png_0.1-8                     rjson_0.2.21                 
##  [91] bitops_1.0-7                  rhdf5filters_1.10.1          
##  [93] Biostrings_2.62.0             DelayedMatrixStats_1.16.0    
##  [95] blob_1.2.4                    doRNG_1.8.6                  
##  [97] shape_1.4.6                   nor1mix_1.3-0                
##  [99] rstatix_0.7.0                 ggsignif_0.6.3               
## [101] scales_1.2.1                  memoise_2.0.1                
## [103] magrittr_2.0.3                plyr_1.8.8                   
## [105] zlibbioc_1.44.0               compiler_4.2.2               
## [107] BiocIO_1.8.0                  illuminaio_0.40.0            
## [109] clue_0.3-64                   Rsamtools_2.10.0             
## [111] cli_3.6.1                     XVector_0.34.0               
## [113] urlchecker_1.0.1              ps_1.7.5                     
## [115] PerformanceAnalytics_2.0.4    formatR_1.14                 
## [117] MASS_7.3-60                   tidyselect_1.2.0             
## [119] stringi_1.7.12                highr_0.10                   
## [121] yaml_2.3.7                    askpass_1.1                  
## [123] locfit_1.5-9.8                biocViews_1.66.3             
## [125] sass_0.4.6                    polynom_1.4-1                
## [127] tools_4.2.2                   timechange_0.2.0             
## [129] parallel_4.2.2                rstudioapi_0.14              
## [131] foreach_1.5.2                 gridExtra_2.3                
## [133] digest_0.6.31                 BiocManager_1.30.21          
## [135] shiny_1.7.2                   Rcpp_1.0.10                  
## [137] siggenes_1.68.0               car_3.1-0                    
## [139] broom_1.0.1                   BiocVersion_3.16.0           
## [141] later_1.3.1                   OrganismDbi_1.36.0           
## [143] httr_1.4.6                    colorspace_2.1-0             
## [145] rvest_1.0.3                   XML_3.99-0.14                
## [147] fs_1.6.2                      splines_4.2.2                
## [149] BiocCheck_1.34.3              RBGL_1.74.0                  
## [151] multtest_2.50.0               sessioninfo_1.2.2            
## [153] xtable_1.8-4                  jsonlite_1.8.5               
## [155] nloptr_2.0.3                  futile.options_1.0.1         
## [157] dynamicTreeCut_1.63-1         R6_2.5.1                     
## [159] RUnit_0.4.32                  profvis_0.3.7                
## [161] ExperimentHubData_1.24.0      pillar_1.9.0                 
## [163] htmltools_0.5.5               mime_0.12                    
## [165] glue_1.6.2                    fastmap_1.1.1                
## [167] minqa_1.2.5                   BiocParallel_1.32.6          
## [169] beanplot_1.3.1                interactiveDisplayBase_1.32.0
## [171] codetools_0.2-19              pkgbuild_1.4.1               
## [173] utf8_1.2.3                    lattice_0.21-8               
## [175] bslib_0.5.0                   numDeriv_2016.8-1.1          
## [177] curl_5.0.1                    magick_2.7.4                 
## [179] openssl_2.0.6                 survival_3.5-5               
## [181] munsell_0.5.0                 rhdf5_2.42.1                 
## [183] GetoptLong_1.0.5              GenomeInfoDbData_1.2.9       
## [185] iterators_1.0.14              HDF5Array_1.22.1             
## [187] impute_1.72.3                 haven_2.5.1                  
## [189] reshape2_1.4.4                gtable_0.3.3

Clear

rm(list=ls())