This script loads in sample data and splits it into three studies: (i) GOTO, (ii) CD4+ T-cell experiments, and (iii) TwinLife.
Load packages
library(tidyverse)
Load in the sample sheet from HMU merged with study sample information
load("../GOTO_Data/Sample_Sheets/GOTO_wave1-targets.Rdata")
Create a Basename
variable, which points to the related IDAT files
targets <- targets %>%
unite(Basename,
c(SentrixBarcode_A, SentrixPosition_A),
sep="_",
remove=FALSE)
Split the targets file by Study:
targets <- split(targets, targets$study)
Get targets for GOTO
targets_goto <- targets[[1]]
Clean targets
targets_goto <- targets_goto %>%
mutate(
DNA_labnr = factor(DNA_labnr),
IOP2_ID = factor(as.numeric(old_ID)),
HMU_ID = factor(ID),
timepoint = factor(timepoint,
levels = c("before", "after")),
tissue = factor(tissue,
levels = c("fasted blood", "fat", "muscle")),
op_status = factor(group,
levels = c("partner", "offspring")),
sex = factor(old_sex,
levels = c("male", "female")),
plate = factor(Sample_Plate),
well = factor(Sample_Well),
array_n = factor(as.character(SentrixBarcode_A)),
array_row = as.numeric(substr(SentrixPosition_A,3,3))) %>%
select(DNA_labnr, IOP2_ID, HMU_ID,
tissue, timepoint, sex,
age, bmi, op_status,
plate, well, isolationdate,
conc_ngul, A260280, volume,
array_n, array_row, Basename)
print(paste0("There is data on ",
ncol(targets_goto),
" variables for ",
nrow(targets_goto),
" samples in GOTO"))
## [1] "There is data on 18 variables for 562 samples in GOTO"
Save targets
save(targets_goto,
file="../GOTO_Data/Processing/GOTO_targets-unfiltered.Rdata")
Get targets for CD4+ T-cell functional experiments
targets_cd4t <- targets[[2]]
Clean targets
targets_cd4t <- targets_cd4t %>%
mutate(
donor_ID = factor(old_ID),
HMU_ID = factor(ID),
well = factor(Sample_Well),
timepoint = factor(timepoint,
levels = c("30m", "3h", "24h", "48h", "72h")),
stim_status = factor(group,
levels = c("Ethanol", "Oleic Acid")),
plate = factor(Sample_Plate),
array_n = factor(as.character(SentrixBarcode_A)),
array_row = as.numeric(substr(SentrixPosition_A,3,3))) %>%
select(donor_ID, HMU_ID, timepoint,
stim_status, plate, well,
isolationdate, conc_ngul, A260280,
volume, array_n, array_row,
Basename)
print(paste0("There is data on ",
ncol(targets_cd4t),
" variables for ",
nrow(targets_cd4t),
" samples from the CD4+ T-cell experiments"))
## [1] "There is data on 13 variables for 90 samples from the CD4+ T-cell experiments"
Save targets
save(targets_cd4t,
file="../Study2_CD4T/CD4T_data-targets.Rdata")
Get targets for TwinLife pilot
targets_twinlife <- targets[[3]]
Clean targets
targets_twinlife <- targets_twinlife %>%
mutate(
pair_ID = factor(old_ID),
twin_n = factor(timepoint),
HMU_ID = factor(ID),
dx = factor(group,
levels = c("No", "Yes")),
plate = factor(Sample_Plate),
sex = factor(old_sex,
levels = c("male", "female")),
array_n = factor(as.character(SentrixBarcode_A)),
well = factor(Sample_Well),
array_row = as.numeric(substr(SentrixPosition_A,3,3))) %>%
select(pair_ID, twin_n, HMU_ID,
weight_g = bmi, dx, sex,
plate, well, conc_ngul,
A260280, volume, array_n,
array_row, Basename)
print(paste0("There is data on ",
ncol(targets_twinlife),
" variables for ",
nrow(targets_twinlife),
" samples in TwinLife"))
## [1] "There is data on 14 variables for 20 samples in TwinLife"
Save targets
save(targets_twinlife,
file="../Study3_TwinLife/TwinLife_data-targets.Rdata")
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.2 stringr_1.5.0 dplyr_1.1.3 purrr_0.3.4
## [5] readr_2.1.2 tidyr_1.2.1 tibble_3.2.1 ggplot2_3.4.3
## [9] tidyverse_1.3.2 rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.0 xfun_0.39 bslib_0.5.0
## [4] haven_2.5.1 gargle_1.5.0 colorspace_2.1-0
## [7] vctrs_0.6.3 generics_0.1.3 htmltools_0.5.5
## [10] yaml_2.3.7 utf8_1.2.3 rlang_1.1.1
## [13] jquerylib_0.1.4 pillar_1.9.0 withr_2.5.0
## [16] glue_1.6.2 DBI_1.1.3 dbplyr_2.2.1
## [19] modelr_0.1.9 readxl_1.4.1 lifecycle_1.0.3
## [22] munsell_0.5.0 gtable_0.3.3 cellranger_1.1.0
## [25] rvest_1.0.3 evaluate_0.21 knitr_1.43
## [28] tzdb_0.4.0 fastmap_1.1.1 fansi_1.0.4
## [31] broom_1.0.1 backports_1.4.1 scales_1.2.1
## [34] googlesheets4_1.0.1 cachem_1.0.8 jsonlite_1.8.5
## [37] fs_1.6.2 hms_1.1.2 digest_0.6.31
## [40] stringi_1.7.12 grid_4.2.2 cli_3.6.1
## [43] tools_4.2.2 magrittr_2.0.3 sass_0.4.6
## [46] crayon_1.5.2 pkgconfig_2.0.3 ellipsis_0.3.2
## [49] xml2_1.3.4 reprex_2.0.2 googledrive_2.0.0
## [52] lubridate_1.9.2 timechange_0.2.0 assertthat_0.2.1
## [55] httr_1.4.6 rstudioapi_0.14 R6_2.5.1
## [58] compiler_4.2.2
Cleanup
rm(list=ls())