This script analyses the intervention effect on DNAm in 160 muscle samples.
Load required packages
library(DNAmArray)
library(SummarizedExperiment)
library(tidyverse)
library(limma)
library(sva)
library(methylGSA)
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
library(minfi)
library(bacon)
library(lattice)
library(ggrepel)
library(irlba)
library(BiocParallel)
library(FDb.InfiniumMethylation.hg19)
library(ggfortify)
library(GenomicRanges)
library(ggrepel)
library(irlba)
Load data
load("../GOTO_Data/GOTO_targets-filtered.Rdata")
load('../GOTO_Data/GOTO_methData-filtered.Rdata')
Subset
methData <- methData[ , methData$tissue == 'muscle']
targets <- targets %>% filter(tissue == "muscle")
methData
## class: SummarizedExperiment
## dim: 755777 160
## metadata(0):
## assays(1): beta
## rownames(755777): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(57): cpg chr ... MASK_extBase MASK_general
## colnames: NULL
## colData names(32): DNA_labnr IOP2_ID ... hdl_c systolic_bp
Complete betas
complete_betas <- na.omit(assay(methData))
dim(complete_betas)
## [1] 548242 160
Calculate PCs
pc <- prcomp_irlba(
t(complete_betas),
n=5)
Inspect
summary(pc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 14.580 12.3372 8.69421 5.87293 5.37469
## Proportion of Variance 0.217 0.1554 0.07717 0.03521 0.02949
## Cumulative Proportion 0.217 0.3724 0.44960 0.48481 0.51431
dim(pc$x)
## [1] 160 5
head(pc$x)
## PC1 PC2 PC3 PC4 PC5
## [1,] 13.288778 -8.553599 -6.181866 -1.116066 -5.517333
## [2,] 5.507855 -10.950377 -9.354874 8.201130 -6.598547
## [3,] 17.635972 -6.109277 -17.902844 4.392906 -0.643676
## [4,] 2.702749 -11.361153 -17.414077 -3.715791 -4.159595
## [5,] -29.920359 2.856232 -19.563603 -6.007014 -3.429310
## [6,] 12.540727 19.144574 -7.048747 4.729128 -2.173684
Merge
targets <- as.data.frame(colData(methData))
targets <- cbind(
targets,
pc$x)
colData(methData) <- DataFrame(targets)
Formula
formula <- ~timepoint + age + sex + smoke + plate + array_row +
PC1 + PC2 + PC3 + PC4 + PC5
Design
design <- model.matrix(formula,
data=colData(methData))
Correlation for random effect
dupcor <- duplicateCorrelation(assay(methData),
design,
block = colData(methData)$IOP2_ID)
Fit models
fit <- lmFit(assay(methData), design,
block = colData(methData)$IOP2_ID,
correlation = dupcor$consensus.correlation)
## Warning: Partial NA coefficients for 1213 probe(s)
Save
coef <- fit$coefficients[, 2]
se <- fit$stdev.unscaled[, 2] * fit$sigma
tstat <- coef / se
pval <- 2 * pt(-abs(tstat), fit$df.residual)
n <- ncol(design) + fit$df.residual
Apply bacon to estimate bias and inflation of test statistics
bc <- bacon::bacon(teststatistics = tstat,
effectsizes = coef,
standarderrors = se,
verbose = TRUE)
## Use multinomial weighted sampling...
## threshold = -5.4009
## Starting values:
## p0 = 1.0000, p1 = 0.0000, p2 = 0.0000
## mu0 = 0.0511, mu1 = 5.9471, mu2 = -5.8449
## sigma0 = 1.0917, sigma1 = 1.0917, sigma2 = 1.0917
bc
## Bacon-object containing 1 set(s) of 755777 test-statistics.
## ...estimated bias: 0.054.
## ...estimated inflation: 1.1.
##
## Empirical null estimates are based on 5000 iterations with a burnin-period of 2000.
bacon::inflation(bc)
## sigma.0
## 1.081619
bacon::bias(bc)
## mu.0
## 0.0540214
Save bacon adjusted p-values and t statistics
pval <- bacon::pval(bc)
tstat <- bacon::tstat(bc)
Rerun bacon to check bias and inflation
bc <- bacon::bacon(teststatistics = tstat,
effectsizes = coef,
standarderrors = se,
verbose = TRUE)
## Use multinomial weighted sampling...
## threshold = -5.4009
## Starting values:
## p0 = 1.0000, p1 = 0.0000, p2 = 0.0000
## mu0 = -0.0027, mu1 = 5.4484, mu2 = -5.4538
## sigma0 = 1.0093, sigma1 = 1.0093, sigma2 = 1.0093
bc
## Bacon-object containing 1 set(s) of 755777 test-statistics.
## ...estimated bias: 0.0013.
## ...estimated inflation: 1.
##
## Empirical null estimates are based on 5000 iterations with a burnin-period of 2000.
bacon::inflation(bc)
## sigma.0
## 1.006139
bacon::bias(bc)
## mu.0
## 0.001283238
Adjust p-values
padj_fdr <- p.adjust(pval, method="fdr")
Save bacon-adjusted results
limma_base <- data.frame(cpg = rownames(fit$coefficients),
beta = coef, SE = se,
p = pval, padj_fdr = padj_fdr,
t = tstat, N = n)
Look at top CpGs
top_cpgs <- limma_base %>%
filter(padj_fdr <= 0.05) %>%
arrange(padj_fdr)
print(paste0("There are ",
nrow(top_cpgs), " significant CpGs in muscle."))
## [1] "There are 354 significant CpGs in muscle."
print(paste0(nrow(top_cpgs %>% filter(beta<0)),
" of these are hypomethylated, and ",
nrow(top_cpgs %>% filter(beta>0)),
" are hypermethylated."))
## [1] "310 of these are hypomethylated, and 44 are hypermethylated."
save(top_cpgs, file='../GOTO_Data/GOTO_results-top-muscle.Rdata')
save(limma_base, file='../GOTO_Data/GOTO_results-full-muscle.Rdata')
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] lattice_0.21-8
## [2] bacon_1.22.0
## [3] ellipse_0.4.5
## [4] methylGSA_1.12.0
## [5] sva_3.42.0
## [6] genefilter_1.76.0
## [7] mgcv_1.8-42
## [8] nlme_3.1-162
## [9] limma_3.54.2
## [10] lmerTest_3.1-3
## [11] lme4_1.1-30
## [12] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [13] snpStats_1.44.0
## [14] survival_3.5-5
## [15] ggrepel_0.9.1
## [16] ggfortify_0.4.14
## [17] irlba_2.3.5.1
## [18] Matrix_1.5-4.1
## [19] omicsPrint_1.14.0
## [20] MASS_7.3-60
## [21] DNAmArray_2.0.0
## [22] pls_2.8-2
## [23] FDb.InfiniumMethylation.hg19_2.2.0
## [24] org.Hs.eg.db_3.14.0
## [25] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [26] GenomicFeatures_1.46.5
## [27] AnnotationDbi_1.56.2
## [28] IlluminaHumanMethylationEPICmanifest_0.3.0
## [29] minfi_1.40.0
## [30] bumphunter_1.36.0
## [31] locfit_1.5-9.8
## [32] iterators_1.0.14
## [33] foreach_1.5.2
## [34] Biostrings_2.62.0
## [35] XVector_0.34.0
## [36] SummarizedExperiment_1.24.0
## [37] Biobase_2.58.0
## [38] MatrixGenerics_1.10.0
## [39] matrixStats_1.0.0
## [40] GenomicRanges_1.46.1
## [41] GenomeInfoDb_1.34.9
## [42] IRanges_2.32.0
## [43] S4Vectors_0.36.2
## [44] BiocGenerics_0.44.0
## [45] BiocParallel_1.32.6
## [46] MethylAid_1.28.0
## [47] forcats_0.5.2
## [48] stringr_1.5.0
## [49] dplyr_1.1.3
## [50] purrr_0.3.4
## [51] readr_2.1.2
## [52] tidyr_1.2.1
## [53] tibble_3.2.1
## [54] ggplot2_3.4.3
## [55] tidyverse_1.3.2
## [56] rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3
## [2] rtracklayer_1.54.0
## [3] bit64_4.0.5
## [4] knitr_1.43
## [5] DelayedArray_0.24.0
## [6] data.table_1.14.8
## [7] KEGGREST_1.34.0
## [8] RCurl_1.98-1.12
## [9] GEOquery_2.62.2
## [10] generics_0.1.3
## [11] preprocessCore_1.60.2
## [12] RSQLite_2.2.17
## [13] shadowtext_0.1.2
## [14] bit_4.0.5
## [15] tzdb_0.4.0
## [16] enrichplot_1.14.2
## [17] xml2_1.3.4
## [18] lubridate_1.9.2
## [19] httpuv_1.6.11
## [20] assertthat_0.2.1
## [21] viridis_0.6.2
## [22] gargle_1.5.0
## [23] xfun_0.39
## [24] hms_1.1.2
## [25] jquerylib_0.1.4
## [26] missMethyl_1.28.0
## [27] evaluate_0.21
## [28] promises_1.2.0.1
## [29] fansi_1.0.4
## [30] restfulr_0.0.15
## [31] scrime_1.3.5
## [32] progress_1.2.2
## [33] dbplyr_2.2.1
## [34] readxl_1.4.1
## [35] igraph_1.4.3
## [36] DBI_1.1.3
## [37] reshape_0.8.9
## [38] googledrive_2.0.0
## [39] ellipsis_0.3.2
## [40] backports_1.4.1
## [41] annotate_1.72.0
## [42] gridBase_0.4-7
## [43] biomaRt_2.50.3
## [44] sparseMatrixStats_1.10.0
## [45] vctrs_0.6.3
## [46] cachem_1.0.8
## [47] withr_2.5.0
## [48] ggforce_0.3.4
## [49] vroom_1.5.7
## [50] treeio_1.18.1
## [51] GenomicAlignments_1.30.0
## [52] prettyunits_1.1.1
## [53] MultiAssayExperiment_1.20.0
## [54] mclust_6.0.0
## [55] DOSE_3.20.1
## [56] lazyeval_0.2.2
## [57] ape_5.7-1
## [58] crayon_1.5.2
## [59] edgeR_3.40.2
## [60] pkgconfig_2.0.3
## [61] labeling_0.4.2
## [62] tweenr_2.0.2
## [63] rlang_1.1.1
## [64] lifecycle_1.0.3
## [65] downloader_0.4
## [66] filelock_1.0.2
## [67] BiocFileCache_2.2.1
## [68] modelr_0.1.9
## [69] polyclip_1.10-4
## [70] cellranger_1.1.0
## [71] rngtools_1.5.2
## [72] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [73] aplot_0.1.7
## [74] base64_2.0.1
## [75] Rhdf5lib_1.20.0
## [76] boot_1.3-28.1
## [77] reprex_2.0.2
## [78] googlesheets4_1.0.1
## [79] viridisLite_0.4.2
## [80] png_0.1-8
## [81] rjson_0.2.21
## [82] bitops_1.0-7
## [83] rhdf5filters_1.10.1
## [84] blob_1.2.4
## [85] DelayedMatrixStats_1.16.0
## [86] doRNG_1.8.6
## [87] qvalue_2.26.0
## [88] nor1mix_1.3-0
## [89] gridGraphics_0.5-1
## [90] reactome.db_1.77.0
## [91] scales_1.2.1
## [92] memoise_2.0.1
## [93] magrittr_2.0.3
## [94] plyr_1.8.8
## [95] hexbin_1.28.3
## [96] RobustRankAggreg_1.2.1
## [97] zlibbioc_1.44.0
## [98] scatterpie_0.1.8
## [99] compiler_4.2.2
## [100] BiocIO_1.8.0
## [101] RColorBrewer_1.1-3
## [102] illuminaio_0.40.0
## [103] Rsamtools_2.10.0
## [104] cli_3.6.1
## [105] patchwork_1.1.2
## [106] tidyselect_1.2.0
## [107] stringi_1.7.12
## [108] highr_0.10
## [109] yaml_2.3.7
## [110] GOSemSim_2.20.0
## [111] askpass_1.1
## [112] grid_4.2.2
## [113] sass_0.4.6
## [114] fastmatch_1.1-3
## [115] tools_4.2.2
## [116] RaggedExperiment_1.18.0
## [117] timechange_0.2.0
## [118] rstudioapi_0.14
## [119] gridExtra_2.3
## [120] farver_2.1.1
## [121] ggraph_2.0.6
## [122] digest_0.6.31
## [123] shiny_1.7.2
## [124] quadprog_1.5-8
## [125] Rcpp_1.0.10
## [126] siggenes_1.68.0
## [127] broom_1.0.1
## [128] later_1.3.1
## [129] httr_1.4.6
## [130] colorspace_2.1-0
## [131] rvest_1.0.3
## [132] XML_3.99-0.14
## [133] fs_1.6.2
## [134] splines_4.2.2
## [135] statmod_1.5.0
## [136] yulab.utils_0.0.6
## [137] tidytree_0.4.0
## [138] graphlayouts_0.8.1
## [139] multtest_2.50.0
## [140] ggplotify_0.1.0
## [141] xtable_1.8-4
## [142] ggtree_3.2.1
## [143] jsonlite_1.8.5
## [144] nloptr_2.0.3
## [145] tidygraph_1.2.2
## [146] ggfun_0.0.7
## [147] R6_2.5.1
## [148] pillar_1.9.0
## [149] htmltools_0.5.5
## [150] mime_0.12
## [151] glue_1.6.2
## [152] fastmap_1.1.1
## [153] minqa_1.2.5
## [154] clusterProfiler_4.2.2
## [155] beanplot_1.3.1
## [156] htm2txt_2.2.2
## [157] codetools_0.2-19
## [158] fgsea_1.20.0
## [159] utf8_1.2.3
## [160] bslib_0.5.0
## [161] numDeriv_2016.8-1.1
## [162] curl_5.0.1
## [163] GO.db_3.14.0
## [164] openssl_2.0.6
## [165] munsell_0.5.0
## [166] DO.db_2.9
## [167] rhdf5_2.42.1
## [168] GenomeInfoDbData_1.2.9
## [169] HDF5Array_1.22.1
## [170] haven_2.5.1
## [171] reshape2_1.4.4
## [172] gtable_0.3.3
Clear
rm(list=ls())