This script loads in sample data and splits it into three studies: (i) GOTO, (ii) CD4+ T-cell experiments, and (iii) TwinLife.


Setup

Load packages

library(tidyverse)

Load in the sample sheet from HMU merged with study sample information

load("../GOTO_Data/Sample_Sheets/GOTO_wave1-targets.Rdata")

Create a Basename variable, which points to the related IDAT files

targets <- targets %>% 
  unite(Basename, 
        c(SentrixBarcode_A, SentrixPosition_A), 
        sep="_", 
        remove=FALSE)

Split the targets file by Study:

  • 1: GOTO
  • 2: CD4+ T-cell functional experiments
  • 3: TwinLife pilot
targets <- split(targets, targets$study)

GOTO

Get targets for GOTO

targets_goto <- targets[[1]]

Clean targets

targets_goto <- targets_goto %>% 
  mutate(
    DNA_labnr = factor(DNA_labnr),
    IOP2_ID = factor(as.numeric(old_ID)), 
    HMU_ID = factor(ID),
    timepoint = factor(timepoint, 
                       levels = c("before", "after")),
    tissue = factor(tissue,
                    levels = c("fasted blood", "fat", "muscle")),
    op_status = factor(group,
                       levels = c("partner", "offspring")),
    sex = factor(old_sex, 
                 levels = c("male", "female")),
    plate = factor(Sample_Plate),
    well = factor(Sample_Well),
    array_n = factor(as.character(SentrixBarcode_A)),
    array_row = as.numeric(substr(SentrixPosition_A,3,3))) %>% 
  select(DNA_labnr, IOP2_ID, HMU_ID,
         tissue, timepoint, sex, 
         age, bmi, op_status,
         plate, well, isolationdate, 
         conc_ngul, A260280, volume, 
         array_n, array_row, Basename)
print(paste0("There is data on ", 
             ncol(targets_goto), 
             " variables for ",
             nrow(targets_goto),
             " samples in GOTO"))
## [1] "There is data on 18 variables for 562 samples in GOTO"

Save targets

save(targets_goto,
     file="../GOTO_Data/Processing/GOTO_targets-unfiltered.Rdata")

CD4+ T-cell functional experiments

Get targets for CD4+ T-cell functional experiments

targets_cd4t <- targets[[2]]

Clean targets

targets_cd4t <- targets_cd4t %>% 
  mutate(
    donor_ID = factor(old_ID),
    HMU_ID = factor(ID),
    well = factor(Sample_Well),
    timepoint = factor(timepoint, 
                      levels = c("30m", "3h", "24h", "48h", "72h")),
    stim_status = factor(group,
                        levels = c("Ethanol", "Oleic Acid")),
    plate = factor(Sample_Plate),
    array_n = factor(as.character(SentrixBarcode_A)),
    array_row = as.numeric(substr(SentrixPosition_A,3,3))) %>% 
  select(donor_ID, HMU_ID, timepoint, 
         stim_status, plate, well,
         isolationdate, conc_ngul, A260280, 
         volume, array_n, array_row, 
         Basename)
print(paste0("There is data on ", 
             ncol(targets_cd4t), 
             " variables for ",
             nrow(targets_cd4t),
             " samples from the CD4+ T-cell experiments"))
## [1] "There is data on 13 variables for 90 samples from the CD4+ T-cell experiments"

Save targets

save(targets_cd4t,
     file="../Study2_CD4T/CD4T_data-targets.Rdata")

TwinLife pilot

Get targets for TwinLife pilot

targets_twinlife <- targets[[3]]

Clean targets

targets_twinlife <- targets_twinlife %>% 
  mutate(
    pair_ID = factor(old_ID),
    twin_n = factor(timepoint),
    HMU_ID = factor(ID),
    dx = factor(group,
                levels = c("No", "Yes")),
    plate = factor(Sample_Plate),
    sex = factor(old_sex, 
                 levels = c("male", "female")),
    array_n = factor(as.character(SentrixBarcode_A)),
    well = factor(Sample_Well),
    array_row = as.numeric(substr(SentrixPosition_A,3,3))) %>% 
  select(pair_ID, twin_n, HMU_ID,
    weight_g = bmi, dx, sex,
    plate, well, conc_ngul, 
    A260280, volume, array_n, 
    array_row, Basename)
print(paste0("There is data on ", 
             ncol(targets_twinlife), 
             " variables for ",
             nrow(targets_twinlife),
             " samples in TwinLife"))
## [1] "There is data on 14 variables for 20 samples in TwinLife"

Save targets

save(targets_twinlife,
     file="../Study3_TwinLife/TwinLife_data-targets.Rdata")

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.2   stringr_1.5.0   dplyr_1.1.3     purrr_0.3.4    
##  [5] readr_2.1.2     tidyr_1.2.1     tibble_3.2.1    ggplot2_3.4.3  
##  [9] tidyverse_1.3.2 rmarkdown_2.16 
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.0    xfun_0.39           bslib_0.5.0        
##  [4] haven_2.5.1         gargle_1.5.0        colorspace_2.1-0   
##  [7] vctrs_0.6.3         generics_0.1.3      htmltools_0.5.5    
## [10] yaml_2.3.7          utf8_1.2.3          rlang_1.1.1        
## [13] jquerylib_0.1.4     pillar_1.9.0        withr_2.5.0        
## [16] glue_1.6.2          DBI_1.1.3           dbplyr_2.2.1       
## [19] modelr_0.1.9        readxl_1.4.1        lifecycle_1.0.3    
## [22] munsell_0.5.0       gtable_0.3.3        cellranger_1.1.0   
## [25] rvest_1.0.3         evaluate_0.21       knitr_1.43         
## [28] tzdb_0.4.0          fastmap_1.1.1       fansi_1.0.4        
## [31] broom_1.0.1         backports_1.4.1     scales_1.2.1       
## [34] googlesheets4_1.0.1 cachem_1.0.8        jsonlite_1.8.5     
## [37] fs_1.6.2            hms_1.1.2           digest_0.6.31      
## [40] stringi_1.7.12      grid_4.2.2          cli_3.6.1          
## [43] tools_4.2.2         magrittr_2.0.3      sass_0.4.6         
## [46] crayon_1.5.2        pkgconfig_2.0.3     ellipsis_0.3.2     
## [49] xml2_1.3.4          reprex_2.0.2        googledrive_2.0.0  
## [52] lubridate_1.9.2     timechange_0.2.0    assertthat_0.2.1   
## [55] httr_1.4.6          rstudioapi_0.14     R6_2.5.1           
## [58] compiler_4.2.2

Cleanup

rm(list=ls())