This script estimates Horvath cAge, phenoAge, and grimAge from blood DNAm data
Load packages
library(tidyverse)
library(methylclock)
library(SummarizedExperiment)
Load beta values
load('../GOTO_Data/Processing/GOTO_methData-unfiltered.Rdata')
methData <- methData_unfiltered
methData
## class: SummarizedExperiment
## dim: 865859 534
## metadata(0):
## assays(1): beta
## rownames(865859): cg14817997 cg26928153 ... cg07587934 cg16855331
## rowData names(4): cpg chr start end
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename smoke
Save betas and targets
betas <- assay(methData)
targets <- as.data.frame(colData(methData))
Transform sex to female
targets$Female <- ifelse(targets$sex == 'female', 1, 0)
xtabs(~targets$Female+targets$sex)
## targets$sex
## targets$Female male female
## 0 266 0
## 1 0 268
Save available CpGs
cpgs_in_GOTO <- as.vector(rownames(betas))
head(cpgs_in_GOTO)
## [1] "cg14817997" "cg26928153" "cg16269199" "cg13869341" "cg14008030"
## [6] "cg12045430"
Load in gold standard
gold <- read.csv('../GOTO_Data/Clocks/Clock_Data/datMiniAnnotation3_Gold.csv')
row.names(gold)<-gold$CpG
dim(gold)
## [1] 30084 2
Determine missing CpGs
missing_cpgs <-setdiff(rownames(gold), cpgs_in_GOTO)
length(missing_cpgs)
## [1] 2558
Making a df with the mean value of gold for participants in our betas dataset
gold_missing <- gold[missing_cpgs,]
gold_missing <- as.numeric(gold_missing[,2])
gold_missing<- replicate(ncol(betas), gold_missing)
rownames(gold_missing)<-missing_cpgs
Combine measured betas with gold dataset
betas_final <- rbind(betas, gold_missing)
betas_final <- betas_final[gold$CpG,]
Save final betas object
write.csv(betas_final, file="../GOTO_Data/Clocks/Clock_Data/betas_imputed_with_gold_standard.csv")
Create input for DNAmAge
input_methylclock <- as.data.frame(betas_final)
input_methylclock <- input_methylclock %>% mutate_all(as.numeric)
input_methylclock <- input_methylclock %>% rownames_to_column(var='CpG')
Calculate Horvath cAge and PhenoAge
methylclock <- DNAmAge(input_methylclock,
clocks = c("Horvath", "Levine"))
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
## snapshotDate(): 2022-10-31
## see ?methylclockData and browseVignettes('methylclockData') for documentation
## loading from cache
Save
save(methylclock, file="../GOTO_Data/Clocks/Clock_Out/methylclock.RData")
Check
identical(colnames(betas_final), rownames(targets))
## [1] TRUE
Prepare data
grimage <- cbind(SampleID = colnames(betas_final),
Age = targets$age,
Female = targets$Female, t(betas_final))
r=19
n=dim(grimage)[1]
Define groups
f <- rep(seq_len(ceiling(n/r)),each = r, length.out = n)
ldf <- split(grimage, f = f)
Save
for(i in unique(f)){
tmp <- grimage[which(f %in% i),]
write.csv(tmp,
paste0("../GOTO_Data/Clocks/Clock_Data/GrimAgeFile",i,".csv"))
}
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] methylclock_1.4.0 quadprog_1.5-8
## [3] devtools_2.4.4 usethis_2.1.6
## [5] methylclockData_1.6.0 futile.logger_1.4.3
## [7] circlize_0.4.15 ComplexHeatmap_2.14.0
## [9] RColorBrewer_1.1-3 pheatmap_1.0.12
## [11] lmerTest_3.1-3 lme4_1.1-30
## [13] Matrix_1.5-4.1 SummarizedExperiment_1.24.0
## [15] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [17] AnnotationHub_3.2.2 BiocFileCache_2.2.1
## [19] dbplyr_2.2.1 GenomicFeatures_1.46.5
## [21] AnnotationDbi_1.56.2 Biobase_2.58.0
## [23] GenomicRanges_1.46.1 GenomeInfoDb_1.34.9
## [25] IRanges_2.32.0 S4Vectors_0.36.2
## [27] BiocGenerics_0.44.0 forcats_0.5.2
## [29] stringr_1.5.0 dplyr_1.1.3
## [31] purrr_0.3.4 readr_2.1.2
## [33] tidyr_1.2.1 tibble_3.2.1
## [35] ggplot2_3.4.3 tidyverse_1.3.2
## [37] cinaR_0.2.3 edgeR_3.40.2
## [39] limma_3.54.2 rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 SparseM_1.81
## [3] rtracklayer_1.54.0 AnnotationForge_1.36.0
## [5] bumphunter_1.36.0 ggpmisc_0.5.4-1
## [7] minfi_1.40.0 bit64_4.0.5
## [9] knitr_1.43 DelayedArray_0.24.0
## [11] data.table_1.14.8 GEOquery_2.62.2
## [13] KEGGREST_1.34.0 RCurl_1.98-1.12
## [15] doParallel_1.0.17 generics_0.1.3
## [17] preprocessCore_1.60.2 callr_3.7.3
## [19] lambda.r_1.2.4 RSQLite_2.2.17
## [21] bit_4.0.5 tzdb_0.4.0
## [23] xml2_1.3.4 lubridate_1.9.2
## [25] httpuv_1.6.11 ggpp_0.5.4
## [27] assertthat_0.2.1 gargle_1.5.0
## [29] xfun_0.39 hms_1.1.2
## [31] jquerylib_0.1.4 evaluate_0.21
## [33] promises_1.2.0.1 scrime_1.3.5
## [35] fansi_1.0.4 restfulr_0.0.15
## [37] progress_1.2.2 readxl_1.4.1
## [39] DBI_1.1.3 htmlwidgets_1.5.4
## [41] reshape_0.8.9 stringdist_0.9.10
## [43] googledrive_2.0.0 ellipsis_0.3.2
## [45] ggpubr_0.4.0 backports_1.4.1
## [47] annotate_1.72.0 sparseMatrixStats_1.10.0
## [49] biomaRt_2.50.3 vctrs_0.6.3
## [51] quantreg_5.94 remotes_2.4.2
## [53] Cairo_1.6-0 abind_1.4-5
## [55] cachem_1.0.8 withr_2.5.0
## [57] vroom_1.5.7 AnnotationHubData_1.28.0
## [59] GenomicAlignments_1.30.0 xts_0.13.1
## [61] prettyunits_1.1.1 mclust_6.0.0
## [63] cluster_2.1.4 ExperimentHub_2.2.1
## [65] RPMM_1.25 crayon_1.5.2
## [67] genefilter_1.76.0 pkgconfig_2.0.3
## [69] nlme_3.1-162 pkgload_1.3.2
## [71] rlang_1.1.1 lifecycle_1.0.3
## [73] miniUI_0.1.1.1 MatrixModels_0.5-1
## [75] filelock_1.0.2 modelr_0.1.9
## [77] cellranger_1.1.0 rngtools_1.5.2
## [79] graph_1.76.0 base64_2.0.1
## [81] carData_3.0-5 Rhdf5lib_1.20.0
## [83] boot_1.3-28.1 zoo_1.8-12
## [85] reprex_2.0.2 GlobalOptions_0.1.2
## [87] processx_3.8.1 googlesheets4_1.0.1
## [89] png_0.1-8 rjson_0.2.21
## [91] bitops_1.0-7 rhdf5filters_1.10.1
## [93] Biostrings_2.62.0 DelayedMatrixStats_1.16.0
## [95] blob_1.2.4 doRNG_1.8.6
## [97] shape_1.4.6 nor1mix_1.3-0
## [99] rstatix_0.7.0 ggsignif_0.6.3
## [101] scales_1.2.1 memoise_2.0.1
## [103] magrittr_2.0.3 plyr_1.8.8
## [105] zlibbioc_1.44.0 compiler_4.2.2
## [107] BiocIO_1.8.0 illuminaio_0.40.0
## [109] clue_0.3-64 Rsamtools_2.10.0
## [111] cli_3.6.1 XVector_0.34.0
## [113] urlchecker_1.0.1 ps_1.7.5
## [115] PerformanceAnalytics_2.0.4 formatR_1.14
## [117] MASS_7.3-60 tidyselect_1.2.0
## [119] stringi_1.7.12 highr_0.10
## [121] yaml_2.3.7 askpass_1.1
## [123] locfit_1.5-9.8 biocViews_1.66.3
## [125] sass_0.4.6 polynom_1.4-1
## [127] tools_4.2.2 timechange_0.2.0
## [129] parallel_4.2.2 rstudioapi_0.14
## [131] foreach_1.5.2 gridExtra_2.3
## [133] digest_0.6.31 BiocManager_1.30.21
## [135] shiny_1.7.2 Rcpp_1.0.10
## [137] siggenes_1.68.0 car_3.1-0
## [139] broom_1.0.1 BiocVersion_3.16.0
## [141] later_1.3.1 OrganismDbi_1.36.0
## [143] httr_1.4.6 colorspace_2.1-0
## [145] rvest_1.0.3 XML_3.99-0.14
## [147] fs_1.6.2 splines_4.2.2
## [149] BiocCheck_1.34.3 RBGL_1.74.0
## [151] multtest_2.50.0 sessioninfo_1.2.2
## [153] xtable_1.8-4 jsonlite_1.8.5
## [155] nloptr_2.0.3 futile.options_1.0.1
## [157] dynamicTreeCut_1.63-1 R6_2.5.1
## [159] RUnit_0.4.32 profvis_0.3.7
## [161] ExperimentHubData_1.24.0 pillar_1.9.0
## [163] htmltools_0.5.5 mime_0.12
## [165] glue_1.6.2 fastmap_1.1.1
## [167] minqa_1.2.5 BiocParallel_1.32.6
## [169] beanplot_1.3.1 interactiveDisplayBase_1.32.0
## [171] codetools_0.2-19 pkgbuild_1.4.1
## [173] utf8_1.2.3 lattice_0.21-8
## [175] bslib_0.5.0 numDeriv_2016.8-1.1
## [177] curl_5.0.1 magick_2.7.4
## [179] openssl_2.0.6 survival_3.5-5
## [181] munsell_0.5.0 rhdf5_2.42.1
## [183] GetoptLong_1.0.5 GenomeInfoDbData_1.2.9
## [185] iterators_1.0.14 HDF5Array_1.22.1
## [187] impute_1.72.3 haven_2.5.1
## [189] reshape2_1.4.4 gtable_0.3.3
Clear
rm(list=ls())