This script corrects the sample mismatches identified using omicsPrint and removes non-compliers.
Load packages
library(SummarizedExperiment)
library(tidyverse)
Two muscle samples were mixed up and belong to a different individual than what they are assigned to.
We correct this by saving the person-level data and swapping it for these samples.
Load in targets
load('../GOTO_Data/Processing/GOTO_targets-unfiltered.Rdata')
load('../GOTO_Data/Processing/GOTO_methData-unfiltered.Rdata')
load('../GOTO_Data/Processing/GOTO_RGset-unfiltered.Rdata')
Show the mixed up IDs:
203548970088_R08C01 and 203548980011_R03C01Person level variables: IOP2_ID, sex, age, bmi, op_status
Save the variables of interest
person_60366 <- targets['203548970088_R08C01',
c('IOP2_ID', 'sex', 'age', 'bmi', 'op_status')]
person_60365 <- targets['203548980011_R03C01',
c('IOP2_ID', 'sex', 'age', 'bmi', 'op_status')]
Swap values
targets['203548970088_R08C01',
c('IOP2_ID', 'sex', 'age', 'bmi', 'op_status')] <- person_60365
targets['203548980011_R03C01',
c('IOP2_ID', 'sex', 'age', 'bmi', 'op_status')] <- person_60366
Add back to methData
colData(methData_unfiltered) <- DataFrame(targets)
Remove the mismatched fat sample and its pair
methData_unfiltered <- methData_unfiltered[,!colnames(methData_unfiltered) %in% c('203548970042_R06C01', '203548970042_R05C01')]
RGset <- RGset[,!colnames(RGset) %in% c('203548970042_R06C01', '203548970042_R05C01')]
methData_unfiltered
## class: SummarizedExperiment
## dim: 865859 546
## metadata(0):
## assays(1): beta
## rownames(865859): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(7): cpg chr ... gene_HGNC MASK_general
## colnames(546): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename smoke
RGset
## class: RGChannelSetExtended
## dim: 1051815 546
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(546): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename filenames
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
targets <- targets %>%
filter(!Basename %in% c('203548970042_R06C01', '203548970042_R05C01'))
Remove non-compliers
non_comply <- c("60227", "60220", "61792",
"61311", "61283", "61284",
"60474", "61763", "62238")
non_comply_basenames <- (targets %>%
dplyr::filter(IOP2_ID %in% non_comply))$Basename
targets <- targets %>%
dplyr::filter(
!IOP2_ID %in% non_comply)
methData_unfiltered <- methData_unfiltered[,!colnames(methData_unfiltered) %in% non_comply_basenames]
methData_unfiltered
## class: SummarizedExperiment
## dim: 865859 534
## metadata(0):
## assays(1): beta
## rownames(865859): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(7): cpg chr ... gene_HGNC MASK_general
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename smoke
RGset <- RGset[,!colnames(RGset) %in% non_comply_basenames]
RGset
## class: RGChannelSetExtended
## dim: 1051815 534
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename filenames
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
save(targets,
file='../GOTO_Data/Processing/GOTO_targets-unfiltered.Rdata')
save(methData_unfiltered,
file='../GOTO_Data/Processing/GOTO_methData-unfiltered.Rdata')
save(RGset,
file='../GOTO_Data/Processing/GOTO_RGset-unfiltered.Rdata')
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] snpStats_1.44.0
## [2] survival_3.5-5
## [3] ggrepel_0.9.1
## [4] ggfortify_0.4.14
## [5] irlba_2.3.5.1
## [6] Matrix_1.5-4.1
## [7] omicsPrint_1.14.0
## [8] MASS_7.3-60
## [9] DNAmArray_2.0.0
## [10] pls_2.8-2
## [11] FDb.InfiniumMethylation.hg19_2.2.0
## [12] org.Hs.eg.db_3.14.0
## [13] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [14] GenomicFeatures_1.46.5
## [15] AnnotationDbi_1.56.2
## [16] IlluminaHumanMethylationEPICmanifest_0.3.0
## [17] minfi_1.40.0
## [18] bumphunter_1.36.0
## [19] locfit_1.5-9.8
## [20] iterators_1.0.14
## [21] foreach_1.5.2
## [22] Biostrings_2.62.0
## [23] XVector_0.34.0
## [24] SummarizedExperiment_1.24.0
## [25] Biobase_2.58.0
## [26] MatrixGenerics_1.10.0
## [27] matrixStats_1.0.0
## [28] GenomicRanges_1.46.1
## [29] GenomeInfoDb_1.34.9
## [30] IRanges_2.32.0
## [31] S4Vectors_0.36.2
## [32] BiocGenerics_0.44.0
## [33] BiocParallel_1.32.6
## [34] MethylAid_1.28.0
## [35] forcats_0.5.2
## [36] stringr_1.5.0
## [37] dplyr_1.1.3
## [38] purrr_0.3.4
## [39] readr_2.1.2
## [40] tidyr_1.2.1
## [41] tibble_3.2.1
## [42] ggplot2_3.4.3
## [43] tidyverse_1.3.2
## [44] rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 tidyselect_1.2.0
## [3] RSQLite_2.2.17 grid_4.2.2
## [5] munsell_0.5.0 codetools_0.2-19
## [7] preprocessCore_1.60.2 withr_2.5.0
## [9] colorspace_2.1-0 filelock_1.0.2
## [11] highr_0.10 knitr_1.43
## [13] rstudioapi_0.14 labeling_0.4.2
## [15] GenomeInfoDbData_1.2.9 farver_2.1.1
## [17] bit64_4.0.5 rhdf5_2.42.1
## [19] vctrs_0.6.3 generics_0.1.3
## [21] xfun_0.39 timechange_0.2.0
## [23] BiocFileCache_2.2.1 R6_2.5.1
## [25] illuminaio_0.40.0 bitops_1.0-7
## [27] rhdf5filters_1.10.1 cachem_1.0.8
## [29] reshape_0.8.9 DelayedArray_0.24.0
## [31] assertthat_0.2.1 vroom_1.5.7
## [33] promises_1.2.0.1 BiocIO_1.8.0
## [35] scales_1.2.1 googlesheets4_1.0.1
## [37] gtable_0.3.3 rlang_1.1.1
## [39] genefilter_1.76.0 splines_4.2.2
## [41] rtracklayer_1.54.0 gargle_1.5.0
## [43] GEOquery_2.62.2 htm2txt_2.2.2
## [45] hexbin_1.28.3 broom_1.0.1
## [47] yaml_2.3.7 reshape2_1.4.4
## [49] RaggedExperiment_1.18.0 modelr_0.1.9
## [51] backports_1.4.1 httpuv_1.6.11
## [53] tools_4.2.2 gridBase_0.4-7
## [55] nor1mix_1.3-0 ellipsis_0.3.2
## [57] jquerylib_0.1.4 RColorBrewer_1.1-3
## [59] siggenes_1.68.0 MultiAssayExperiment_1.20.0
## [61] Rcpp_1.0.10 plyr_1.8.8
## [63] sparseMatrixStats_1.10.0 progress_1.2.2
## [65] zlibbioc_1.44.0 RCurl_1.98-1.12
## [67] prettyunits_1.1.1 openssl_2.0.6
## [69] haven_2.5.1 fs_1.6.2
## [71] magrittr_2.0.3 data.table_1.14.8
## [73] reprex_2.0.2 googledrive_2.0.0
## [75] hms_1.1.2 mime_0.12
## [77] evaluate_0.21 xtable_1.8-4
## [79] XML_3.99-0.14 mclust_6.0.0
## [81] readxl_1.4.1 gridExtra_2.3
## [83] compiler_4.2.2 biomaRt_2.50.3
## [85] crayon_1.5.2 htmltools_0.5.5
## [87] later_1.3.1 tzdb_0.4.0
## [89] lubridate_1.9.2 DBI_1.1.3
## [91] dbplyr_2.2.1 rappdirs_0.3.3
## [93] cli_3.6.1 quadprog_1.5-8
## [95] pkgconfig_2.0.3 GenomicAlignments_1.30.0
## [97] xml2_1.3.4 annotate_1.72.0
## [99] bslib_0.5.0 rngtools_1.5.2
## [101] multtest_2.50.0 beanplot_1.3.1
## [103] rvest_1.0.3 doRNG_1.8.6
## [105] scrime_1.3.5 digest_0.6.31
## [107] base64_2.0.1 cellranger_1.1.0
## [109] DelayedMatrixStats_1.16.0 restfulr_0.0.15
## [111] curl_5.0.1 shiny_1.7.2
## [113] Rsamtools_2.10.0 rjson_0.2.21
## [115] lifecycle_1.0.3 nlme_3.1-162
## [117] jsonlite_1.8.5 Rhdf5lib_1.20.0
## [119] askpass_1.1 limma_3.54.2
## [121] fansi_1.0.4 pillar_1.9.0
## [123] lattice_0.21-8 KEGGREST_1.34.0
## [125] fastmap_1.1.1 httr_1.4.6
## [127] glue_1.6.2 png_0.1-8
## [129] bit_4.0.5 stringi_1.7.12
## [131] sass_0.4.6 HDF5Array_1.22.1
## [133] blob_1.2.4 memoise_2.0.1
Clear
rm(list=ls())