This script corrects the sample mismatches identified using omicsPrint and removes non-compliers.


Setup

Load packages

library(SummarizedExperiment)
library(tidyverse)

Muscle Sample Mixup

Two muscle samples were mixed up and belong to a different individual than what they are assigned to.

We correct this by saving the person-level data and swapping it for these samples.

Load in targets

load('../GOTO_Data/Processing/GOTO_targets-unfiltered.Rdata')
load('../GOTO_Data/Processing/GOTO_methData-unfiltered.Rdata')
load('../GOTO_Data/Processing/GOTO_RGset-unfiltered.Rdata')

Show the mixed up IDs:

  • We want to swap 203548970088_R08C01 and 203548980011_R03C01

Person level variables: IOP2_ID, sex, age, bmi, op_status

Save the variables of interest

person_60366 <- targets['203548970088_R08C01', 
        c('IOP2_ID', 'sex', 'age', 'bmi', 'op_status')]

person_60365 <- targets['203548980011_R03C01', 
        c('IOP2_ID', 'sex', 'age', 'bmi', 'op_status')]

Swap values

targets['203548970088_R08C01', 
        c('IOP2_ID', 'sex', 'age', 'bmi', 'op_status')] <- person_60365

targets['203548980011_R03C01', 
        c('IOP2_ID', 'sex', 'age', 'bmi', 'op_status')] <- person_60366

Add back to methData

colData(methData_unfiltered) <- DataFrame(targets)

Fat Sample Removal

Remove the mismatched fat sample and its pair

methData_unfiltered <- methData_unfiltered[,!colnames(methData_unfiltered) %in% c('203548970042_R06C01', '203548970042_R05C01')]
RGset <- RGset[,!colnames(RGset) %in% c('203548970042_R06C01', '203548970042_R05C01')]

methData_unfiltered
## class: SummarizedExperiment 
## dim: 865859 546 
## metadata(0):
## assays(1): beta
## rownames(865859): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(7): cpg chr ... gene_HGNC MASK_general
## colnames(546): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename smoke
RGset
## class: RGChannelSetExtended 
## dim: 1051815 546 
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(546): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename filenames
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19
targets <- targets %>% 
  filter(!Basename %in% c('203548970042_R06C01', '203548970042_R05C01'))

Compliance

Remove non-compliers

non_comply <- c("60227", "60220", "61792",
                "61311", "61283", "61284",
                "60474", "61763", "62238")

non_comply_basenames <- (targets %>% 
          dplyr::filter(IOP2_ID %in% non_comply))$Basename

targets <- targets %>% 
  dplyr::filter(
    !IOP2_ID %in% non_comply)

methData_unfiltered <- methData_unfiltered[,!colnames(methData_unfiltered) %in% non_comply_basenames]

methData_unfiltered
## class: SummarizedExperiment 
## dim: 865859 534 
## metadata(0):
## assays(1): beta
## rownames(865859): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(7): cpg chr ... gene_HGNC MASK_general
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename smoke
RGset <- RGset[,!colnames(RGset) %in% non_comply_basenames]

RGset
## class: RGChannelSetExtended 
## dim: 1051815 534 
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename filenames
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19

Save

save(targets, 
     file='../GOTO_Data/Processing/GOTO_targets-unfiltered.Rdata')
save(methData_unfiltered,
     file='../GOTO_Data/Processing/GOTO_methData-unfiltered.Rdata')
save(RGset, 
     file='../GOTO_Data/Processing/GOTO_RGset-unfiltered.Rdata')

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] snpStats_1.44.0                           
##  [2] survival_3.5-5                            
##  [3] ggrepel_0.9.1                             
##  [4] ggfortify_0.4.14                          
##  [5] irlba_2.3.5.1                             
##  [6] Matrix_1.5-4.1                            
##  [7] omicsPrint_1.14.0                         
##  [8] MASS_7.3-60                               
##  [9] DNAmArray_2.0.0                           
## [10] pls_2.8-2                                 
## [11] FDb.InfiniumMethylation.hg19_2.2.0        
## [12] org.Hs.eg.db_3.14.0                       
## [13] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2   
## [14] GenomicFeatures_1.46.5                    
## [15] AnnotationDbi_1.56.2                      
## [16] IlluminaHumanMethylationEPICmanifest_0.3.0
## [17] minfi_1.40.0                              
## [18] bumphunter_1.36.0                         
## [19] locfit_1.5-9.8                            
## [20] iterators_1.0.14                          
## [21] foreach_1.5.2                             
## [22] Biostrings_2.62.0                         
## [23] XVector_0.34.0                            
## [24] SummarizedExperiment_1.24.0               
## [25] Biobase_2.58.0                            
## [26] MatrixGenerics_1.10.0                     
## [27] matrixStats_1.0.0                         
## [28] GenomicRanges_1.46.1                      
## [29] GenomeInfoDb_1.34.9                       
## [30] IRanges_2.32.0                            
## [31] S4Vectors_0.36.2                          
## [32] BiocGenerics_0.44.0                       
## [33] BiocParallel_1.32.6                       
## [34] MethylAid_1.28.0                          
## [35] forcats_0.5.2                             
## [36] stringr_1.5.0                             
## [37] dplyr_1.1.3                               
## [38] purrr_0.3.4                               
## [39] readr_2.1.2                               
## [40] tidyr_1.2.1                               
## [41] tibble_3.2.1                              
## [42] ggplot2_3.4.3                             
## [43] tidyverse_1.3.2                           
## [44] rmarkdown_2.16                            
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3                  tidyselect_1.2.0           
##   [3] RSQLite_2.2.17              grid_4.2.2                 
##   [5] munsell_0.5.0               codetools_0.2-19           
##   [7] preprocessCore_1.60.2       withr_2.5.0                
##   [9] colorspace_2.1-0            filelock_1.0.2             
##  [11] highr_0.10                  knitr_1.43                 
##  [13] rstudioapi_0.14             labeling_0.4.2             
##  [15] GenomeInfoDbData_1.2.9      farver_2.1.1               
##  [17] bit64_4.0.5                 rhdf5_2.42.1               
##  [19] vctrs_0.6.3                 generics_0.1.3             
##  [21] xfun_0.39                   timechange_0.2.0           
##  [23] BiocFileCache_2.2.1         R6_2.5.1                   
##  [25] illuminaio_0.40.0           bitops_1.0-7               
##  [27] rhdf5filters_1.10.1         cachem_1.0.8               
##  [29] reshape_0.8.9               DelayedArray_0.24.0        
##  [31] assertthat_0.2.1            vroom_1.5.7                
##  [33] promises_1.2.0.1            BiocIO_1.8.0               
##  [35] scales_1.2.1                googlesheets4_1.0.1        
##  [37] gtable_0.3.3                rlang_1.1.1                
##  [39] genefilter_1.76.0           splines_4.2.2              
##  [41] rtracklayer_1.54.0          gargle_1.5.0               
##  [43] GEOquery_2.62.2             htm2txt_2.2.2              
##  [45] hexbin_1.28.3               broom_1.0.1                
##  [47] yaml_2.3.7                  reshape2_1.4.4             
##  [49] RaggedExperiment_1.18.0     modelr_0.1.9               
##  [51] backports_1.4.1             httpuv_1.6.11              
##  [53] tools_4.2.2                 gridBase_0.4-7             
##  [55] nor1mix_1.3-0               ellipsis_0.3.2             
##  [57] jquerylib_0.1.4             RColorBrewer_1.1-3         
##  [59] siggenes_1.68.0             MultiAssayExperiment_1.20.0
##  [61] Rcpp_1.0.10                 plyr_1.8.8                 
##  [63] sparseMatrixStats_1.10.0    progress_1.2.2             
##  [65] zlibbioc_1.44.0             RCurl_1.98-1.12            
##  [67] prettyunits_1.1.1           openssl_2.0.6              
##  [69] haven_2.5.1                 fs_1.6.2                   
##  [71] magrittr_2.0.3              data.table_1.14.8          
##  [73] reprex_2.0.2                googledrive_2.0.0          
##  [75] hms_1.1.2                   mime_0.12                  
##  [77] evaluate_0.21               xtable_1.8-4               
##  [79] XML_3.99-0.14               mclust_6.0.0               
##  [81] readxl_1.4.1                gridExtra_2.3              
##  [83] compiler_4.2.2              biomaRt_2.50.3             
##  [85] crayon_1.5.2                htmltools_0.5.5            
##  [87] later_1.3.1                 tzdb_0.4.0                 
##  [89] lubridate_1.9.2             DBI_1.1.3                  
##  [91] dbplyr_2.2.1                rappdirs_0.3.3             
##  [93] cli_3.6.1                   quadprog_1.5-8             
##  [95] pkgconfig_2.0.3             GenomicAlignments_1.30.0   
##  [97] xml2_1.3.4                  annotate_1.72.0            
##  [99] bslib_0.5.0                 rngtools_1.5.2             
## [101] multtest_2.50.0             beanplot_1.3.1             
## [103] rvest_1.0.3                 doRNG_1.8.6                
## [105] scrime_1.3.5                digest_0.6.31              
## [107] base64_2.0.1                cellranger_1.1.0           
## [109] DelayedMatrixStats_1.16.0   restfulr_0.0.15            
## [111] curl_5.0.1                  shiny_1.7.2                
## [113] Rsamtools_2.10.0            rjson_0.2.21               
## [115] lifecycle_1.0.3             nlme_3.1-162               
## [117] jsonlite_1.8.5              Rhdf5lib_1.20.0            
## [119] askpass_1.1                 limma_3.54.2               
## [121] fansi_1.0.4                 pillar_1.9.0               
## [123] lattice_0.21-8              KEGGREST_1.34.0            
## [125] fastmap_1.1.1               httr_1.4.6                 
## [127] glue_1.6.2                  png_0.1-8                  
## [129] bit_4.0.5                   stringi_1.7.12             
## [131] sass_0.4.6                  HDF5Array_1.22.1           
## [133] blob_1.2.4                  memoise_2.0.1

Clear

rm(list=ls())