This script predicts proportions of blood cell-types from DNAm data using the IDOL and IDOLext algorithms, and also uses MuSiC alongside a scRNA reference to deconvolute bulk RNAseq data.


Setup

Load required packages

library(rlang)
library(htmltools)
library(rmarkdown)
library(cli)
library(tidyverse)
library(DNAmArray)
library(lubridate)
library(Biobase)
library(MuSiC)
library(SummarizedExperiment)
library(minfi)
library(ExperimentHub)
library(FlowSorted.Blood.EPIC)
library(FlowSorted.BloodExtended.EPIC)
library(IlluminaHumanMethylationEPICmanifest)
library(IlluminaHumanMethylation450kmanifest)
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)

Load DNA methylation data

load("../GOTO_Data/GOTO_targets-filtered.Rdata")
load("../GOTO_Data/GOTO_methData-filtered.Rdata")
load("../GOTO_Data/Processing/GOTO_RGset-unfiltered.Rdata")
colnames(methData) <- targets$Basename

methData
## class: SummarizedExperiment 
## dim: 755777 534 
## metadata(0):
## assays(1): beta
## rownames(755777): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(57): cpg chr ... MASK_extBase MASK_general
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(45): DNA_labnr IOP2_ID ... m1_macro m2_macro
RGset
## class: RGChannelSetExtended 
## dim: 1051815 534 
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename filenames
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19

Load measured cell count data alongside other variables on medication use and start dates.

blood_df <- read_csv("../GOTO_Data/Cell_Counts/Blood/GOTO_Cellcounts-Medication_20210401.csv")
## Rows: 326 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): visit_date
## dbl (22): IOP2_ID, timepoint, med_lipidlowering, med_antihypertensive, cc_hb...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Measured Cell Counts

Blood cell counts were measured using a differential test.

Make visit date a date

blood_df$visit_date <- as_date(blood_df$visit_date, 
                               format = "%m/%d/%Y")

Create start date

# Save targets order
blood_df$num <- 1:nrow(blood_df)

# Arrange by ID
blood_df <- blood_df %>% arrange(IOP2_ID)

# Create new df
start_date <- blood_df

# Select baseline visit dates
start_date <- start_date %>% 
  filter(timepoint == 0) %>% 
  dplyr::select(visit_date)

# Repeat each date twice
start_date <- data.frame(start_date = rep(
  start_date$visit_date, each=2
))

# Add to original targets
blood_df <- cbind(blood_df, start_date)

# Arrange back to original order
blood_df <- blood_df %>% arrange(num)

Create factors

blood_df$IOP2_ID <- as.factor(blood_df$IOP2_ID)
blood_df$timepoint <- as.factor(blood_df$timepoint)

Save ID list and timepoint order from methData

ID_list <- targets %>% dplyr::select(IOP2_ID, timepoint)
dim(ID_list)
## [1] 534   2

Merge the imported cell count data to get the same order.

blood_df <- left_join(ID_list, blood_df, by=c("IOP2_ID", "timepoint"))

Remove the ordering variables

blood_df <- blood_df %>% 
  dplyr::select(start_date,
                cc_eos_perc, cc_baso_perc, cc_neut_perc,
                cc_lymph_perc, cc_mono_perc)

Combine data frames

targets <- cbind(targets, blood_df)
dim(targets)
## [1] 534  51

Remove for non blood tissues

targets <- targets %>% 
  mutate(
    cc_eos_perc = ifelse(tissue != "fasted blood", NA, cc_eos_perc),
    cc_baso_perc = ifelse(tissue != "fasted blood", NA, cc_baso_perc),
    cc_neut_perc = ifelse(tissue != "fasted blood", NA, cc_neut_perc),
    cc_lymph_perc = ifelse(tissue != "fasted blood", NA, cc_lymph_perc),
    cc_mono_perc = ifelse(tissue != "fasted blood", NA, cc_mono_perc)
  )

Save variables of interest

targets <- targets %>% 
  dplyr::select(DNA_labnr, IOP2_ID, tissue, timepoint,
                age, sex, bmi, op_status,
                start_date, visit_date, isolationdate, 
                plate, well, array_n, array_row,
                Basename,
                cc_blood_meas_eos = cc_eos_perc,
                cc_blood_meas_baso = cc_baso_perc,
                cc_blood_meas_neut = cc_neut_perc,
                cc_blood_meas_lymph = cc_lymph_perc,
                cc_blood_meas_mono = cc_mono_perc, everything())

MuSiC

Expression Set for scRNA

Read in the scRNA data from GSE143704

sc_blood <- read.table('../GOTO_Data/Cell_Counts/Blood/scRNA-blood_GSE143704.tsv', sep = "\t")

Save cell names

cell_names <- as.data.frame(t(sc_blood[1,-1]))
colnames(cell_names) <- "Cell"

Remove the first row with cell names

sc_blood <- as.data.frame(sc_blood[-1,])

Save rownames

gene_names <- sc_blood[,1]

Remove GeneSymbol column

sc_blood <- sc_blood[,-1]

Create an expression matrix

sc_blood <- data.frame(apply(sc_blood, 2, as.numeric))

Add rownames

rownames(sc_blood) <- gene_names

Make column names

colnames(sc_blood) <- str_pad(1:ncol(sc_blood), width=3, pad="0")

Look

sc_blood[1:5, 1:5]
##            001 002 003 004 005
## OR4F5        0   0   0   0   0
## FO538757.3   0   0   0   0   0
## FO538757.2   0   0   0   0   0
## OR4F29       0   0   0   0   0
## OR4F16       0   0   0   0   0

Add sample labels to cell_names

cell_names$Sample <- str_pad(1:ncol(sc_blood), width=3, pad="0")
rownames(cell_names) <- str_pad(1:ncol(sc_blood), width=3, pad="0")
head(cell_names)
##     Cell Sample
## 001  BNK    001
## 002  BNK    002
## 003  BNK    003
## 004  BNK    004
## 005  BNK    005
## 006  BNK    006

GOTO Expresssion Data

Load functions

source('../GOTO_Data/RNAseq/goto.rnaseq.functions.R')

Load in RNAseq from complete blood pairs

pathIN_dat <- "../GOTO_Data/RNAseq/merge.gene.counts_biopet_13052016.RData"
pathIN_cov <- "../GOTO_Data/RNAseq/datasheet_RNAseq_blood_V2.csv"

filt.samp <- "tissue_blood|qc_sexswitch|qc_multdim2|qc_rep1|complete_pairs"

goto_exp <- read.gotornaseq(pathIN_dat = pathIN_dat, pathIN_cov, filt.samp = filt.samp, quiet = FALSE)
## ||| PREPARING GOTO RNASEQ DATA 
## || READING DATA 
## | Loading RNASEQ .. OK! 
##    [555 samples x 56520 features] 
## | Reading COVARIATES .. OK! 
##    [maintaining 379 samples x 84 features] 
## | Merging data .. OK! 
##    [555 samples x 56604 features] 
## || SUBSETTING SAMPLES 
## | Subsetting SAMPLES on ['tissue_blood']; PASS: 379 out of 555
## | Subsetting SAMPLES on ['qc_sexswitch']; PASS: 379 out of 379
## | Subsetting SAMPLES on ['qc_multdim2']; PASS: 379 out of 379
## | Subsetting SAMPLES on ['qc_rep1']; PASS: 376 out of 379
## | Subsetting SAMPLES on ['complete_pairs']; PASS: 376 out of 376
## | DONE!
goto_exp <- goto_exp[["dat"]]

Filter pre-challenge samples and save counts

goto_exp <- goto_exp %>% 
  dplyr::filter(nutridrink == 0) %>% 
  dplyr::select(sampID2, intervention, 
                       starts_with('ENS')) %>% 
  mutate(
  ID = str_c(sampID2, '_', intervention)
)

Save IDs

ID_name <- goto_exp$ID
ID_df <- data.frame(Sample = ID_name)
rownames(ID_df) = ID_name

Remove non-gene variables

goto_exp <- goto_exp %>% dplyr::select(-ID, -sampID2, -intervention)
goto_exp <- as.data.frame(t(goto_exp))
colnames(goto_exp) <- ID_name

Map to gene name

ens2gene <- cinaR::grch37
m <- match(rownames(goto_exp), ens2gene$ensgene)
mapped.genes <- ens2gene$symbol[m]

removed.genes <- duplicated(mapped.genes) | is.na(mapped.genes) | grepl("^MT", mapped.genes)
goto_exp <- goto_exp[!removed.genes,]
rownames(goto_exp) <- mapped.genes[!removed.genes]

Subset

goto_exp <- goto_exp[rownames(goto_exp) %in% rownames(sc_blood),]
sc_blood <- sc_blood[rownames(sc_blood) %in% rownames(goto_exp),]

Expression Sets

Create an expression set for the single cell data

C.eset <- Biobase::ExpressionSet(
  assayData = as.matrix(sc_blood), 
  phenoData = Biobase::AnnotatedDataFrame(cell_names))
C.eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 18531 features, 7643 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: 001 002 ... 7643 (7643 total)
##   varLabels: Cell Sample
##   varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:

Make expression set for bulk RNAseq from GOTO

T.eset <- Biobase::ExpressionSet(assayData = as.matrix(goto_exp),
            phenoData = Biobase::AnnotatedDataFrame(ID_df))
T.eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 18531 features, 183 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: 61482_1 62340_0 ... 61789_0 (183 total)
##   varLabels: Sample
##   varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:

MuSiC

Deconvolute

deconv <- music_prop(
  bulk.eset = T.eset, 
  sc.eset = C.eset, 
  clusters = 'Cell',
  markers = NULL, 
  normalize = FALSE, 
  samples = 'Sample', 
  verbose = F)$Est.prop.weighted

summary(deconv)
##       BNK         CD4T               CD8T             claM              CLP   
##  Min.   :0   Min.   :0.000000   Min.   :0.0000   Min.   :0.00000   Min.   :0  
##  1st Qu.:0   1st Qu.:0.000000   1st Qu.:0.1224   1st Qu.:0.00000   1st Qu.:0  
##  Median :0   Median :0.000000   Median :0.1765   Median :0.00000   Median :0  
##  Mean   :0   Mean   :0.008223   Mean   :0.1808   Mean   :0.01352   Mean   :0  
##  3rd Qu.:0   3rd Qu.:0.000000   3rd Qu.:0.2293   3rd Qu.:0.02043   3rd Qu.:0  
##  Max.   :0   Max.   :0.421140   Max.   :0.5105   Max.   :0.13934   Max.   :0  
##       cMOP                CMP         ery                 GMP           
##  Min.   :0.0000000   Min.   :0   Min.   :0.0000000   Min.   :0.000e+00  
##  1st Qu.:0.0000000   1st Qu.:0   1st Qu.:0.0000000   1st Qu.:0.000e+00  
##  Median :0.0000000   Median :0   Median :0.0004511   Median :0.000e+00  
##  Mean   :0.0007752   Mean   :0   Mean   :0.0014828   Mean   :3.112e-06  
##  3rd Qu.:0.0004220   3rd Qu.:0   3rd Qu.:0.0023557   3rd Qu.:0.000e+00  
##  Max.   :0.0404196   Max.   :0   Max.   :0.0156175   Max.   :3.736e-04  
##       hMDP                HSC         immB              interM       
##  Min.   :0.0000000   Min.   :0   Min.   :0.000000   Min.   :0.03935  
##  1st Qu.:0.0000000   1st Qu.:0   1st Qu.:0.001233   1st Qu.:0.17359  
##  Median :0.0000000   Median :0   Median :0.006863   Median :0.21533  
##  Mean   :0.0002782   Mean   :0   Mean   :0.010768   Mean   :0.20676  
##  3rd Qu.:0.0000000   3rd Qu.:0   3rd Qu.:0.013084   3rd Qu.:0.24116  
##  Max.   :0.0157085   Max.   :0   Max.   :0.174863   Max.   :0.33207  
##      kineNK       LMPP      matureN            memB        MEP   
##  Min.   :0   Min.   :0   Min.   :0.1228   Min.   :0   Min.   :0  
##  1st Qu.:0   1st Qu.:0   1st Qu.:0.2630   1st Qu.:0   1st Qu.:0  
##  Median :0   Median :0   Median :0.2948   Median :0   Median :0  
##  Mean   :0   Mean   :0   Mean   :0.2931   Mean   :0   Mean   :0  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.3260   3rd Qu.:0   3rd Qu.:0  
##  Max.   :0   Max.   :0   Max.   :0.4218   Max.   :0   Max.   :0  
##      metaN              MLP         MPP         myeN         
##  Min.   :0.08611   Min.   :0   Min.   :0   Min.   :0.000000  
##  1st Qu.:0.20954   1st Qu.:0   1st Qu.:0   1st Qu.:0.000000  
##  Median :0.24646   Median :0   Median :0   Median :0.000000  
##  Mean   :0.24488   Mean   :0   Mean   :0   Mean   :0.000529  
##  3rd Qu.:0.28270   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.000000  
##  Max.   :0.37715   Max.   :0   Max.   :0   Max.   :0.063761  
##       naiB                NKP         nonM              plasma         
##  Min.   :0.0000000   Min.   :0   Min.   :0.000000   Min.   :0.000e+00  
##  1st Qu.:0.0000000   1st Qu.:0   1st Qu.:0.000000   1st Qu.:0.000e+00  
##  Median :0.0000000   Median :0   Median :0.000000   Median :4.373e-05  
##  Mean   :0.0008892   Mean   :0   Mean   :0.006094   Mean   :3.212e-04  
##  3rd Qu.:0.0000000   3rd Qu.:0   3rd Qu.:0.002170   3rd Qu.:4.158e-04  
##  Max.   :0.0920783   Max.   :0   Max.   :0.098688   Max.   :4.052e-03  
##       preB        preM              proB        proN        regB          
##  Min.   :0   Min.   :0.00000   Min.   :0   Min.   :0   Min.   :0.000e+00  
##  1st Qu.:0   1st Qu.:0.01988   1st Qu.:0   1st Qu.:0   1st Qu.:0.000e+00  
##  Median :0   Median :0.02970   Median :0   Median :0   Median :0.000e+00  
##  Mean   :0   Mean   :0.03143   Mean   :0   Mean   :0   Mean   :7.168e-06  
##  3rd Qu.:0   3rd Qu.:0.04244   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.000e+00  
##  Max.   :0   Max.   :0.08264   Max.   :0   Max.   :0   Max.   :1.057e-03  
##      toxiNK         
##  Min.   :0.0000000  
##  1st Qu.:0.0000000  
##  Median :0.0000000  
##  Mean   :0.0001496  
##  3rd Qu.:0.0000000  
##  Max.   :0.0062376

Heatmap

heatmap(deconv, margins=c(12,8))

Save

save(deconv, file="../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-Music.Rdata")

Add to targets

Make ID variable in targets

targets <- targets %>% 
  mutate(
    ID = paste0(IOP2_ID, "_",as.numeric(timepoint)-1)
  )

Make variable names

colnames(deconv) <- paste0("cc_blood_music_", colnames(deconv))

Make percentages

deconv <- as.data.frame(deconv) %>% mutate_if(is.numeric, ~ . * 100)

Make ID variable in deconv

deconv <- as.data.frame(deconv) %>% rownames_to_column(var="ID")

Merge

targets <- left_join(targets, deconv, by="ID")

Remove for non blood tissues

targets <- targets %>% 
  mutate(
    cc_blood_music_BNK = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_BNK),
    cc_blood_music_CD4T = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_CD4T),
    cc_blood_music_CD8T = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_CD8T),
    cc_blood_music_claM = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_claM),
    cc_blood_music_CLP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_CLP),
    cc_blood_music_cMOP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_cMOP),
    cc_blood_music_CMP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_CMP),
    cc_blood_music_ery = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_ery),
    cc_blood_music_GMP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_GMP),
    cc_blood_music_hMDP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_hMDP),
    cc_blood_music_HSC = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_HSC),
    cc_blood_music_immB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_immB),
    cc_blood_music_interM = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_interM),
    cc_blood_music_kineNK = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_kineNK),
    cc_blood_music_LMPP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_LMPP),
    cc_blood_music_matureN = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_matureN),
    cc_blood_music_memB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_memB),
    cc_blood_music_MEP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_MEP),
    cc_blood_music_metaN = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_metaN),
    cc_blood_music_MLP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_MLP),
    cc_blood_music_MPP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_MPP),
    cc_blood_music_myeN = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_myeN),
    cc_blood_music_naiB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_naiB),
    cc_blood_music_NKP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_NKP),
    cc_blood_music_nonM = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_nonM),
    cc_blood_music_plasma = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_plasma),
    cc_blood_music_preB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_preB),
    cc_blood_music_preM = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_preM),
    cc_blood_music_proB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_proB),
    cc_blood_music_proN = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_proN),
    cc_blood_music_regB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_regB),
    cc_blood_music_toxiNK = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_toxiNK),
  )

IDOL

Save the RGset for blood only

RGset_blood <- RGset[ , RGset$tissue == 'fasted blood']

Minfi

hub <- ExperimentHub()  
## snapshotDate(): 2022-10-31
query(hub, "FlowSorted.Blood.EPIC")  
## ExperimentHub with 1 record
## # snapshotDate(): 2022-10-31
## # names(): EH1136
## # package(): FlowSorted.Blood.EPIC
## # $dataprovider: GEO
## # $species: Homo sapiens
## # $rdataclass: RGChannelSet
## # $rdatadateadded: 2018-04-20
## # $title: FlowSorted.Blood.EPIC: Illumina Human Methylation data from EPIC o...
## # $description: The FlowSorted.Blood.EPIC package contains Illumina HumanMet...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: tar.gz
## # $sourceurl: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110554
## # $sourcesize: NA
## # $tags: c("ExperimentData", "Homo_sapiens_Data", "Tissue",
## #   "MicroarrayData", "Genome", "TissueMicroarrayData",
## #   "MethylationArrayData") 
## # retrieve record with 'object[["EH1136"]]'
FlowSorted.Blood.EPIC <- hub[["EH1136"]]  
## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation
## loading from cache

Calculate cell counts

idol_blood <- estimateCellCounts(
  rgSet = RGset_blood, 
  referencePlatform = 'IlluminaHumanMethylationEPIC', 
  cellTypes = c("CD8T", "CD4T", "NK", "Bcell",  
                                "Mono", "Neu"),
  verbose = TRUE, meanPlot = TRUE)
## [estimateCellCounts] Combining user data with reference (flow sorted) data.
## [estimateCellCounts] Processing user and reference data together.
## [preprocessQuantile] Mapping to genome.
## [preprocessQuantile] Fixing outliers.
## [preprocessQuantile] Quantile normalizing.
## [estimateCellCounts] Picking probes for composition estimation.
## [estimateCellCounts] Estimating composition.

Save

save(idol_blood, 
     file="../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-IDOL.Rdata")

Join with targets

Create merging variable

idol_blood <- as.data.frame(idol_blood) %>% 
  rownames_to_column(var = 'Basename') 
summary(idol_blood)
##    Basename              CD8T              CD4T               NK         
##  Length:196         Min.   :0.03718   Min.   :0.03219   Min.   :0.00370  
##  Class :character   1st Qu.:0.10224   1st Qu.:0.12119   1st Qu.:0.03752  
##  Mode  :character   Median :0.12223   Median :0.16240   Median :0.05209  
##                     Mean   :0.13009   Mean   :0.15779   Mean   :0.05783  
##                     3rd Qu.:0.15545   3rd Qu.:0.19628   3rd Qu.:0.07413  
##                     Max.   :0.28816   Max.   :0.33827   Max.   :0.24852  
##      Bcell              Mono              Neu        
##  Min.   :0.01079   Min.   :0.04267   Min.   :0.1633  
##  1st Qu.:0.04604   1st Qu.:0.08572   1st Qu.:0.4720  
##  Median :0.06048   Median :0.09969   Median :0.5201  
##  Mean   :0.06562   Mean   :0.10159   Mean   :0.5179  
##  3rd Qu.:0.07527   3rd Qu.:0.11647   3rd Qu.:0.5723  
##  Max.   :0.45958   Max.   :0.19203   Max.   :0.7167

Make percentages

idol_blood <- idol_blood %>% mutate_if(is.numeric, ~ . * 100)

Column names

colnames(idol_blood) <- c("Basename", "cc_blood_idol_CD8T",
                          "cc_blood_idol_CD4T", 
                          "cc_blood_idol_NK",
                          "cc_blood_idol_Bcell",
                          "cc_blood_idol_Mono",
                          "cc_blood_idol_Neu")

Merge

targets <- left_join(targets, idol_blood, by="Basename")

IDOL Extended

Load data

load("../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-IDOLext.Rdata")

Make percentages

idol_ext <- idol_ext %>% mutate_if(is.numeric, ~ . * 100)

Look at it

summary(idol_ext)
##       Bas              Bmem             Bnv            CD4mem      
##  Min.   :0.0000   Min.   : 0.000   Min.   :0.000   Min.   : 0.570  
##  1st Qu.:0.0300   1st Qu.: 1.127   1st Qu.:2.485   1st Qu.: 7.862  
##  Median :0.6700   Median : 1.630   Median :3.405   Median :10.560  
##  Mean   :0.7264   Mean   : 2.345   Mean   :3.639   Mean   :10.594  
##  3rd Qu.:1.0825   3rd Qu.: 2.312   3rd Qu.:4.582   3rd Qu.:12.835  
##  Max.   :4.9600   Max.   :48.990   Max.   :9.350   Max.   :24.820  
##      CD4nv            CD8mem           CD8nv             Eos         
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.0000   Min.   : 0.0000  
##  1st Qu.: 3.167   1st Qu.: 3.915   1st Qu.:0.0000   1st Qu.: 0.5125  
##  Median : 6.765   Median : 5.910   Median :0.0000   Median : 1.5750  
##  Mean   : 6.748   Mean   : 7.232   Mean   :0.8024   Mean   : 2.3382  
##  3rd Qu.: 9.682   3rd Qu.: 9.180   3rd Qu.:1.1625   3rd Qu.: 3.1450  
##  Max.   :18.780   Max.   :36.450   Max.   :5.6700   Max.   :12.3900  
##       Mono             Neu              NK              Treg       
##  Min.   : 2.690   Min.   :10.79   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.: 5.928   1st Qu.:41.97   1st Qu.: 3.958   1st Qu.:0.5475  
##  Median : 7.280   Median :47.98   Median : 5.190   Median :1.1550  
##  Mean   : 7.369   Mean   :47.64   Mean   : 5.521   Mean   :1.2803  
##  3rd Qu.: 8.545   3rd Qu.:53.74   3rd Qu.: 6.520   3rd Qu.:1.8375  
##  Max.   :14.140   Max.   :74.11   Max.   :15.840   Max.   :4.3900

Create join variable

idol_ext <- idol_ext %>% rownames_to_column(var="Basename")

Set colnames

colnames(idol_ext) <- c("Basename", "cc_blood_ext_Bas",
                        "cc_blood_ext_Bmem", "cc_blood_ext_Bnv",
                        "cc_blood_ext_CD4mem", "cc_blood_ext_CD4nv",
                        "cc_blood_ext_CD8mem", "cc_blood_ext_CD8nv",
                        "cc_blood_ext_Eos", "cc_blood_ext_Mono",
                        "cc_blood_ext_Neu", "cc_blood_ext_NK",
                        "cc_blood_ext_Treg")

Merge

targets <- left_join(targets, idol_ext, by="Basename")

Add to methData and RGset

check <- targets$Basename == colnames(methData)
xtabs(~check)
## check
## TRUE 
##  534
check <- targets$Basename == colnames(RGset)
xtabs(~check)
## check
## TRUE 
##  534
check <- colnames(RGset) == colnames(methData)
xtabs(~check)
## check
## TRUE 
##  534

Reorder targets

order <- colnames(methData)

targets <- targets[match(order, targets$Basename),]
rownames(targets) <- targets$Basename
colData(methData) <- DataFrame(targets)
colData(RGset) <- DataFrame(targets)

Look

methData
## class: SummarizedExperiment 
## dim: 755777 534 
## metadata(0):
## assays(1): beta
## rownames(755777): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(57): cpg chr ... MASK_extBase MASK_general
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(102): DNA_labnr IOP2_ID ... cc_blood_ext_NK
##   cc_blood_ext_Treg
RGset
## class: RGChannelSetExtended 
## dim: 1051815 534 
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(102): DNA_labnr IOP2_ID ... cc_blood_ext_NK
##   cc_blood_ext_Treg
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19

Save

save(targets, file="../GOTO_Data/GOTO_targets-filtered.Rdata")
save(methData, file="../GOTO_Data/GOTO_methData-filtered.Rdata")

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [2] IlluminaHumanMethylationEPICmanifest_0.3.0         
##  [3] FlowSorted.BloodExtended.EPIC_1.1.1                
##  [4] FlowSorted.Blood.EPIC_1.12.1                       
##  [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
##  [6] nlme_3.1-162                                       
##  [7] quadprog_1.5-8                                     
##  [8] genefilter_1.76.0                                  
##  [9] ExperimentHub_2.2.1                                
## [10] AnnotationHub_3.2.2                                
## [11] BiocFileCache_2.2.1                                
## [12] dbplyr_2.2.1                                       
## [13] MuSiC_0.2.0                                        
## [14] nnls_1.4                                           
## [15] lubridate_1.9.2                                    
## [16] DNAmArray_2.0.0                                    
## [17] pls_2.8-2                                          
## [18] FDb.InfiniumMethylation.hg19_2.2.0                 
## [19] org.Hs.eg.db_3.14.0                                
## [20] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2            
## [21] GenomicFeatures_1.46.5                             
## [22] AnnotationDbi_1.56.2                               
## [23] minfi_1.40.0                                       
## [24] bumphunter_1.36.0                                  
## [25] locfit_1.5-9.8                                     
## [26] iterators_1.0.14                                   
## [27] foreach_1.5.2                                      
## [28] Biostrings_2.62.0                                  
## [29] XVector_0.34.0                                     
## [30] SummarizedExperiment_1.24.0                        
## [31] Biobase_2.58.0                                     
## [32] MatrixGenerics_1.10.0                              
## [33] matrixStats_1.0.0                                  
## [34] GenomicRanges_1.46.1                               
## [35] GenomeInfoDb_1.34.9                                
## [36] IRanges_2.32.0                                     
## [37] S4Vectors_0.36.2                                   
## [38] BiocGenerics_0.44.0                                
## [39] forcats_0.5.2                                      
## [40] stringr_1.5.0                                      
## [41] dplyr_1.1.3                                        
## [42] purrr_0.3.4                                        
## [43] readr_2.1.2                                        
## [44] tidyr_1.2.1                                        
## [45] tibble_3.2.1                                       
## [46] ggplot2_3.4.3                                      
## [47] tidyverse_1.3.2                                    
## [48] cli_3.6.1                                          
## [49] htmltools_0.5.5                                    
## [50] rlang_1.1.1                                        
## [51] rmarkdown_2.16                                     
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3                    tidyselect_1.2.0             
##   [3] RSQLite_2.2.17                grid_4.2.2                   
##   [5] BiocParallel_1.32.6           cinaR_0.2.3                  
##   [7] munsell_0.5.0                 codetools_0.2-19             
##   [9] preprocessCore_1.60.2         withr_2.5.0                  
##  [11] colorspace_2.1-0              filelock_1.0.2               
##  [13] highr_0.10                    knitr_1.43                   
##  [15] rstudioapi_0.14               GenomeInfoDbData_1.2.9       
##  [17] MCMCpack_1.6-3                bit64_4.0.5                  
##  [19] rhdf5_2.42.1                  coda_0.19-4                  
##  [21] vctrs_0.6.3                   generics_0.1.3               
##  [23] xfun_0.39                     timechange_0.2.0             
##  [25] R6_2.5.1                      illuminaio_0.40.0            
##  [27] bitops_1.0-7                  rhdf5filters_1.10.1          
##  [29] cachem_1.0.8                  reshape_0.8.9                
##  [31] DelayedArray_0.24.0           assertthat_0.2.1             
##  [33] vroom_1.5.7                   promises_1.2.0.1             
##  [35] BiocIO_1.8.0                  scales_1.2.1                 
##  [37] googlesheets4_1.0.1           gtable_0.3.3                 
##  [39] mcmc_0.9-7                    MatrixModels_0.5-1           
##  [41] splines_4.2.2                 rtracklayer_1.54.0           
##  [43] gargle_1.5.0                  GEOquery_2.62.2              
##  [45] htm2txt_2.2.2                 broom_1.0.1                  
##  [47] BiocManager_1.30.21           yaml_2.3.7                   
##  [49] reshape2_1.4.4                modelr_0.1.9                 
##  [51] backports_1.4.1               httpuv_1.6.11                
##  [53] tools_4.2.2                   nor1mix_1.3-0                
##  [55] ellipsis_0.3.2                jquerylib_0.1.4              
##  [57] RColorBrewer_1.1-3            siggenes_1.68.0              
##  [59] Rcpp_1.0.10                   plyr_1.8.8                   
##  [61] sparseMatrixStats_1.10.0      progress_1.2.2               
##  [63] zlibbioc_1.44.0               RCurl_1.98-1.12              
##  [65] prettyunits_1.1.1             openssl_2.0.6                
##  [67] haven_2.5.1                   fs_1.6.2                     
##  [69] magrittr_2.0.3                data.table_1.14.8            
##  [71] SparseM_1.81                  reprex_2.0.2                 
##  [73] googledrive_2.0.0             mime_0.12                    
##  [75] hms_1.1.2                     evaluate_0.21                
##  [77] xtable_1.8-4                  XML_3.99-0.14                
##  [79] mclust_6.0.0                  readxl_1.4.1                 
##  [81] compiler_4.2.2                biomaRt_2.50.3               
##  [83] crayon_1.5.2                  later_1.3.1                  
##  [85] tzdb_0.4.0                    DBI_1.1.3                    
##  [87] MASS_7.3-60                   rappdirs_0.3.3               
##  [89] Matrix_1.5-4.1                pkgconfig_2.0.3              
##  [91] GenomicAlignments_1.30.0      xml2_1.3.4                   
##  [93] annotate_1.72.0               bslib_0.5.0                  
##  [95] rngtools_1.5.2                multtest_2.50.0              
##  [97] beanplot_1.3.1                rvest_1.0.3                  
##  [99] doRNG_1.8.6                   scrime_1.3.5                 
## [101] digest_0.6.31                 base64_2.0.1                 
## [103] cellranger_1.1.0              edgeR_3.40.2                 
## [105] DelayedMatrixStats_1.16.0     restfulr_0.0.15              
## [107] curl_5.0.1                    shiny_1.7.2                  
## [109] Rsamtools_2.10.0              quantreg_5.94                
## [111] rjson_0.2.21                  lifecycle_1.0.3              
## [113] jsonlite_1.8.5                Rhdf5lib_1.20.0              
## [115] askpass_1.1                   limma_3.54.2                 
## [117] fansi_1.0.4                   pillar_1.9.0                 
## [119] lattice_0.21-8                KEGGREST_1.34.0              
## [121] fastmap_1.1.1                 httr_1.4.6                   
## [123] survival_3.5-5                interactiveDisplayBase_1.32.0
## [125] glue_1.6.2                    png_0.1-8                    
## [127] BiocVersion_3.16.0            bit_4.0.5                    
## [129] stringi_1.7.12                sass_0.4.6                   
## [131] HDF5Array_1.22.1              blob_1.2.4                   
## [133] memoise_2.0.1

Clear

rm(list=ls())