This script predicts proportions of blood cell-types from DNAm data using the IDOL and IDOLext algorithms, and also uses MuSiC alongside a scRNA reference to deconvolute bulk RNAseq data.

Setup

Load required packages

library(rlang)
library(htmltools)
library(rmarkdown)
library(cli)
library(tidyverse)
library(DNAmArray)
library(lubridate)
library(Biobase)
library(MuSiC)
library(SummarizedExperiment)
library(minfi)
library(ExperimentHub)
library(FlowSorted.Blood.EPIC)
library(FlowSorted.BloodExtended.EPIC)
library(IlluminaHumanMethylationEPICmanifest)
library(IlluminaHumanMethylation450kmanifest)
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)

Load DNA methylation data

load("../GOTO_Data/GOTO_targets-filtered.Rdata")
load("../GOTO_Data/GOTO_methData-filtered.Rdata")
load("../GOTO_Data/Processing/GOTO_RGset-unfiltered.Rdata")
colnames(methData) <- targets$Basename

methData

## class: SummarizedExperiment 
## dim: 755777 534 
## metadata(0):
## assays(1): beta
## rownames(755777): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(57): cpg chr ... MASK_extBase MASK_general
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(45): DNA_labnr IOP2_ID ... m1_macro m2_macro

RGset

## class: RGChannelSetExtended 
## dim: 1051815 534 
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename filenames
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19

Load measured cell count data alongside other variables on medication use and start dates.

blood_df <- read_csv("../GOTO_Data/Cell_Counts/Blood/GOTO_Cellcounts-Medication_20210401.csv")

## Rows: 326 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): visit_date
## dbl (22): IOP2_ID, timepoint, med_lipidlowering, med_antihypertensive, cc_hb...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Measured Cell Counts

Blood cell counts were measured using a differential test.

Make visit date a date

blood_df$visit_date <- as_date(blood_df$visit_date, 
                               format = "%m/%d/%Y")

Create start date

# Save targets order
blood_df$num <- 1:nrow(blood_df)

# Arrange by ID
blood_df <- blood_df %>% arrange(IOP2_ID)

# Create new df
start_date <- blood_df

# Select baseline visit dates
start_date <- start_date %>% 
  filter(timepoint == 0) %>% 
  dplyr::select(visit_date)

# Repeat each date twice
start_date <- data.frame(start_date = rep(
  start_date$visit_date, each=2
))

# Add to original targets
blood_df <- cbind(blood_df, start_date)

# Arrange back to original order
blood_df <- blood_df %>% arrange(num)

Create factors

blood_df$IOP2_ID <- as.factor(blood_df$IOP2_ID)
blood_df$timepoint <- as.factor(blood_df$timepoint)

Save ID list and timepoint order from methData

ID_list <- targets %>% dplyr::select(IOP2_ID, timepoint)
dim(ID_list)

## [1] 534   2

Merge the imported cell count data to get the same order.

blood_df <- left_join(ID_list, blood_df, by=c("IOP2_ID", "timepoint"))

Remove the ordering variables

blood_df <- blood_df %>% 
  dplyr::select(start_date,
                cc_eos_perc, cc_baso_perc, cc_neut_perc,
                cc_lymph_perc, cc_mono_perc)

Combine data frames

targets <- cbind(targets, blood_df)
dim(targets)

## [1] 534  51

Remove for non blood tissues

targets <- targets %>% 
  mutate(
    cc_eos_perc = ifelse(tissue != "fasted blood", NA, cc_eos_perc),
    cc_baso_perc = ifelse(tissue != "fasted blood", NA, cc_baso_perc),
    cc_neut_perc = ifelse(tissue != "fasted blood", NA, cc_neut_perc),
    cc_lymph_perc = ifelse(tissue != "fasted blood", NA, cc_lymph_perc),
    cc_mono_perc = ifelse(tissue != "fasted blood", NA, cc_mono_perc)
  )

Save variables of interest

targets <- targets %>% 
  dplyr::select(DNA_labnr, IOP2_ID, tissue, timepoint,
                age, sex, bmi, op_status,
                start_date, visit_date, isolationdate, 
                plate, well, array_n, array_row,
                Basename,
                cc_blood_meas_eos = cc_eos_perc,
                cc_blood_meas_baso = cc_baso_perc,
                cc_blood_meas_neut = cc_neut_perc,
                cc_blood_meas_lymph = cc_lymph_perc,
                cc_blood_meas_mono = cc_mono_perc, everything())

MuSiC

Expression Set for scRNA

Read in the scRNA data from GSE143704

sc_blood <- read.table('../GOTO_Data/Cell_Counts/Blood/scRNA-blood_GSE143704.tsv', sep = "\t")

Save cell names

cell_names <- as.data.frame(t(sc_blood[1,-1]))
colnames(cell_names) <- "Cell"

Remove the first row with cell names

sc_blood <- as.data.frame(sc_blood[-1,])

Save rownames

gene_names <- sc_blood[,1]

Remove GeneSymbol column

sc_blood <- sc_blood[,-1]

Create an expression matrix

sc_blood <- data.frame(apply(sc_blood, 2, as.numeric))

Add rownames

rownames(sc_blood) <- gene_names

Make column names

colnames(sc_blood) <- str_pad(1:ncol(sc_blood), width=3, pad="0")

Look

sc_blood[1:5, 1:5]

##            001 002 003 004 005
## OR4F5        0   0   0   0   0
## FO538757.3   0   0   0   0   0
## FO538757.2   0   0   0   0   0
## OR4F29       0   0   0   0   0
## OR4F16       0   0   0   0   0

Add sample labels to cell_names

cell_names$Sample <- str_pad(1:ncol(sc_blood), width=3, pad="0")
rownames(cell_names) <- str_pad(1:ncol(sc_blood), width=3, pad="0")
head(cell_names)

##     Cell Sample
## 001  BNK    001
## 002  BNK    002
## 003  BNK    003
## 004  BNK    004
## 005  BNK    005
## 006  BNK    006

GOTO Expresssion Data

Load functions

source('../GOTO_Data/RNAseq/goto.rnaseq.functions.R')

Load in RNAseq from complete blood pairs

pathIN_dat <- "../GOTO_Data/RNAseq/merge.gene.counts_biopet_13052016.RData"
pathIN_cov <- "../GOTO_Data/RNAseq/datasheet_RNAseq_blood_V2.csv"

filt.samp <- "tissue_blood|qc_sexswitch|qc_multdim2|qc_rep1|complete_pairs"

goto_exp <- read.gotornaseq(pathIN_dat = pathIN_dat, pathIN_cov, filt.samp = filt.samp, quiet = FALSE)

## ||| PREPARING GOTO RNASEQ DATA 
## || READING DATA 
## | Loading RNASEQ .. OK! 
##    [555 samples x 56520 features] 
## | Reading COVARIATES .. OK! 
##    [maintaining 379 samples x 84 features] 
## | Merging data .. OK! 
##    [555 samples x 56604 features] 
## || SUBSETTING SAMPLES 
## | Subsetting SAMPLES on ['tissue_blood']; PASS: 379 out of 555
## | Subsetting SAMPLES on ['qc_sexswitch']; PASS: 379 out of 379
## | Subsetting SAMPLES on ['qc_multdim2']; PASS: 379 out of 379
## | Subsetting SAMPLES on ['qc_rep1']; PASS: 376 out of 379
## | Subsetting SAMPLES on ['complete_pairs']; PASS: 376 out of 376
## | DONE!

goto_exp <- goto_exp[["dat"]]

Filter pre-challenge samples and save counts

goto_exp <- goto_exp %>% 
  dplyr::filter(nutridrink == 0) %>% 
  dplyr::select(sampID2, intervention, 
                       starts_with('ENS')) %>% 
  mutate(
  ID = str_c(sampID2, '_', intervention)
)

Save IDs

ID_name <- goto_exp$ID
ID_df <- data.frame(Sample = ID_name)
rownames(ID_df) = ID_name

Remove non-gene variables

goto_exp <- goto_exp %>% dplyr::select(-ID, -sampID2, -intervention)
goto_exp <- as.data.frame(t(goto_exp))
colnames(goto_exp) <- ID_name

Map to gene name

ens2gene <- cinaR::grch37
m <- match(rownames(goto_exp), ens2gene$ensgene)
mapped.genes <- ens2gene$symbol[m]

removed.genes <- duplicated(mapped.genes) | is.na(mapped.genes) | grepl("^MT", mapped.genes)
goto_exp <- goto_exp[!removed.genes,]
rownames(goto_exp) <- mapped.genes[!removed.genes]

Subset

goto_exp <- goto_exp[rownames(goto_exp) %in% rownames(sc_blood),]
sc_blood <- sc_blood[rownames(sc_blood) %in% rownames(goto_exp),]

Expression Sets

Create an expression set for the single cell data

C.eset <- Biobase::ExpressionSet(
  assayData = as.matrix(sc_blood), 
  phenoData = Biobase::AnnotatedDataFrame(cell_names))
C.eset

## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 18531 features, 7643 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: 001 002 ... 7643 (7643 total)
##   varLabels: Cell Sample
##   varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:

Make expression set for bulk RNAseq from GOTO

T.eset <- Biobase::ExpressionSet(assayData = as.matrix(goto_exp),
            phenoData = Biobase::AnnotatedDataFrame(ID_df))
T.eset

## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 18531 features, 183 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: 61482_1 62340_0 ... 61789_0 (183 total)
##   varLabels: Sample
##   varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:

MuSiC

Deconvolute

deconv <- music_prop(
  bulk.eset = T.eset, 
  sc.eset = C.eset, 
  clusters = 'Cell',
  markers = NULL, 
  normalize = FALSE, 
  samples = 'Sample', 
  verbose = F)$Est.prop.weighted

summary(deconv)

##       BNK         CD4T               CD8T             claM              CLP   
##  Min.   :0   Min.   :0.000000   Min.   :0.0000   Min.   :0.00000   Min.   :0  
##  1st Qu.:0   1st Qu.:0.000000   1st Qu.:0.1224   1st Qu.:0.00000   1st Qu.:0  
##  Median :0   Median :0.000000   Median :0.1765   Median :0.00000   Median :0  
##  Mean   :0   Mean   :0.008223   Mean   :0.1808   Mean   :0.01352   Mean   :0  
##  3rd Qu.:0   3rd Qu.:0.000000   3rd Qu.:0.2293   3rd Qu.:0.02043   3rd Qu.:0  
##  Max.   :0   Max.   :0.421140   Max.   :0.5105   Max.   :0.13934   Max.   :0  
##       cMOP                CMP         ery                 GMP           
##  Min.   :0.0000000   Min.   :0   Min.   :0.0000000   Min.   :0.000e+00  
##  1st Qu.:0.0000000   1st Qu.:0   1st Qu.:0.0000000   1st Qu.:0.000e+00  
##  Median :0.0000000   Median :0   Median :0.0004511   Median :0.000e+00  
##  Mean   :0.0007752   Mean   :0   Mean   :0.0014828   Mean   :3.112e-06  
##  3rd Qu.:0.0004220   3rd Qu.:0   3rd Qu.:0.0023557   3rd Qu.:0.000e+00  
##  Max.   :0.0404196   Max.   :0   Max.   :0.0156175   Max.   :3.736e-04  
##       hMDP                HSC         immB              interM       
##  Min.   :0.0000000   Min.   :0   Min.   :0.000000   Min.   :0.03935  
##  1st Qu.:0.0000000   1st Qu.:0   1st Qu.:0.001233   1st Qu.:0.17359  
##  Median :0.0000000   Median :0   Median :0.006863   Median :0.21533  
##  Mean   :0.0002782   Mean   :0   Mean   :0.010768   Mean   :0.20676  
##  3rd Qu.:0.0000000   3rd Qu.:0   3rd Qu.:0.013084   3rd Qu.:0.24116  
##  Max.   :0.0157085   Max.   :0   Max.   :0.174863   Max.   :0.33207  
##      kineNK       LMPP      matureN            memB        MEP   
##  Min.   :0   Min.   :0   Min.   :0.1228   Min.   :0   Min.   :0  
##  1st Qu.:0   1st Qu.:0   1st Qu.:0.2630   1st Qu.:0   1st Qu.:0  
##  Median :0   Median :0   Median :0.2948   Median :0   Median :0  
##  Mean   :0   Mean   :0   Mean   :0.2931   Mean   :0   Mean   :0  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.3260   3rd Qu.:0   3rd Qu.:0  
##  Max.   :0   Max.   :0   Max.   :0.4218   Max.   :0   Max.   :0  
##      metaN              MLP         MPP         myeN         
##  Min.   :0.08611   Min.   :0   Min.   :0   Min.   :0.000000  
##  1st Qu.:0.20954   1st Qu.:0   1st Qu.:0   1st Qu.:0.000000  
##  Median :0.24646   Median :0   Median :0   Median :0.000000  
##  Mean   :0.24488   Mean   :0   Mean   :0   Mean   :0.000529  
##  3rd Qu.:0.28270   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.000000  
##  Max.   :0.37715   Max.   :0   Max.   :0   Max.   :0.063761  
##       naiB                NKP         nonM              plasma         
##  Min.   :0.0000000   Min.   :0   Min.   :0.000000   Min.   :0.000e+00  
##  1st Qu.:0.0000000   1st Qu.:0   1st Qu.:0.000000   1st Qu.:0.000e+00  
##  Median :0.0000000   Median :0   Median :0.000000   Median :4.373e-05  
##  Mean   :0.0008892   Mean   :0   Mean   :0.006094   Mean   :3.212e-04  
##  3rd Qu.:0.0000000   3rd Qu.:0   3rd Qu.:0.002170   3rd Qu.:4.158e-04  
##  Max.   :0.0920783   Max.   :0   Max.   :0.098688   Max.   :4.052e-03  
##       preB        preM              proB        proN        regB          
##  Min.   :0   Min.   :0.00000   Min.   :0   Min.   :0   Min.   :0.000e+00  
##  1st Qu.:0   1st Qu.:0.01988   1st Qu.:0   1st Qu.:0   1st Qu.:0.000e+00  
##  Median :0   Median :0.02970   Median :0   Median :0   Median :0.000e+00  
##  Mean   :0   Mean   :0.03143   Mean   :0   Mean   :0   Mean   :7.168e-06  
##  3rd Qu.:0   3rd Qu.:0.04244   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0.000e+00  
##  Max.   :0   Max.   :0.08264   Max.   :0   Max.   :0   Max.   :1.057e-03  
##      toxiNK         
##  Min.   :0.0000000  
##  1st Qu.:0.0000000  
##  Median :0.0000000  
##  Mean   :0.0001496  
##  3rd Qu.:0.0000000  
##  Max.   :0.0062376

Heatmap

heatmap(deconv, margins=c(12,8))

Save

save(deconv, file="../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-Music.Rdata")

Add to targets

Make ID variable in targets

targets <- targets %>% 
  mutate(
    ID = paste0(IOP2_ID, "_",as.numeric(timepoint)-1)
  )

Make variable names

colnames(deconv) <- paste0("cc_blood_music_", colnames(deconv))

Make percentages

deconv <- as.data.frame(deconv) %>% mutate_if(is.numeric, ~ . * 100)

Make ID variable in deconv

deconv <- as.data.frame(deconv) %>% rownames_to_column(var="ID")

Merge

targets <- left_join(targets, deconv, by="ID")

Remove for non blood tissues

targets <- targets %>% 
  mutate(
    cc_blood_music_BNK = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_BNK),
    cc_blood_music_CD4T = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_CD4T),
    cc_blood_music_CD8T = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_CD8T),
    cc_blood_music_claM = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_claM),
    cc_blood_music_CLP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_CLP),
    cc_blood_music_cMOP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_cMOP),
    cc_blood_music_CMP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_CMP),
    cc_blood_music_ery = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_ery),
    cc_blood_music_GMP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_GMP),
    cc_blood_music_hMDP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_hMDP),
    cc_blood_music_HSC = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_HSC),
    cc_blood_music_immB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_immB),
    cc_blood_music_interM = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_interM),
    cc_blood_music_kineNK = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_kineNK),
    cc_blood_music_LMPP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_LMPP),
    cc_blood_music_matureN = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_matureN),
    cc_blood_music_memB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_memB),
    cc_blood_music_MEP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_MEP),
    cc_blood_music_metaN = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_metaN),
    cc_blood_music_MLP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_MLP),
    cc_blood_music_MPP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_MPP),
    cc_blood_music_myeN = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_myeN),
    cc_blood_music_naiB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_naiB),
    cc_blood_music_NKP = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_NKP),
    cc_blood_music_nonM = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_nonM),
    cc_blood_music_plasma = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_plasma),
    cc_blood_music_preB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_preB),
    cc_blood_music_preM = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_preM),
    cc_blood_music_proB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_proB),
    cc_blood_music_proN = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_proN),
    cc_blood_music_regB = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_regB),
    cc_blood_music_toxiNK = ifelse(
      tissue != "fasted blood", NA, cc_blood_music_toxiNK),
  )

IDOL

Save the RGset for blood only

RGset_blood <- RGset[ , RGset$tissue == 'fasted blood']

Minfi

hub <- ExperimentHub()

## snapshotDate(): 2022-10-31

query(hub, "FlowSorted.Blood.EPIC")

## ExperimentHub with 1 record
## # snapshotDate(): 2022-10-31
## # names(): EH1136
## # package(): FlowSorted.Blood.EPIC
## # $dataprovider: GEO
## # $species: Homo sapiens
## # $rdataclass: RGChannelSet
## # $rdatadateadded: 2018-04-20
## # $title: FlowSorted.Blood.EPIC: Illumina Human Methylation data from EPIC o...
## # $description: The FlowSorted.Blood.EPIC package contains Illumina HumanMet...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: tar.gz
## # $sourceurl: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110554
## # $sourcesize: NA
## # $tags: c("ExperimentData", "Homo_sapiens_Data", "Tissue",
## #   "MicroarrayData", "Genome", "TissueMicroarrayData",
## #   "MethylationArrayData") 
## # retrieve record with 'object[["EH1136"]]'

FlowSorted.Blood.EPIC <- hub[["EH1136"]]

## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation

## loading from cache

Calculate cell counts

idol_blood <- estimateCellCounts(
  rgSet = RGset_blood, 
  referencePlatform = 'IlluminaHumanMethylationEPIC', 
  cellTypes = c("CD8T", "CD4T", "NK", "Bcell",  
                                "Mono", "Neu"),
  verbose = TRUE, meanPlot = TRUE)

## [estimateCellCounts] Combining user data with reference (flow sorted) data.

## [estimateCellCounts] Processing user and reference data together.

## [preprocessQuantile] Mapping to genome.

## [preprocessQuantile] Fixing outliers.

## [preprocessQuantile] Quantile normalizing.

## [estimateCellCounts] Picking probes for composition estimation.

## [estimateCellCounts] Estimating composition.

Save

save(idol_blood, 
     file="../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-IDOL.Rdata")

Join with targets

Create merging variable

idol_blood <- as.data.frame(idol_blood) %>% 
  rownames_to_column(var = 'Basename') 
summary(idol_blood)

##    Basename              CD8T              CD4T               NK         
##  Length:196         Min.   :0.03718   Min.   :0.03219   Min.   :0.00370  
##  Class :character   1st Qu.:0.10224   1st Qu.:0.12119   1st Qu.:0.03752  
##  Mode  :character   Median :0.12223   Median :0.16240   Median :0.05209  
##                     Mean   :0.13009   Mean   :0.15779   Mean   :0.05783  
##                     3rd Qu.:0.15545   3rd Qu.:0.19628   3rd Qu.:0.07413  
##                     Max.   :0.28816   Max.   :0.33827   Max.   :0.24852  
##      Bcell              Mono              Neu        
##  Min.   :0.01079   Min.   :0.04267   Min.   :0.1633  
##  1st Qu.:0.04604   1st Qu.:0.08572   1st Qu.:0.4720  
##  Median :0.06048   Median :0.09969   Median :0.5201  
##  Mean   :0.06562   Mean   :0.10159   Mean   :0.5179  
##  3rd Qu.:0.07527   3rd Qu.:0.11647   3rd Qu.:0.5723  
##  Max.   :0.45958   Max.   :0.19203   Max.   :0.7167

Make percentages

idol_blood <- idol_blood %>% mutate_if(is.numeric, ~ . * 100)

Column names

colnames(idol_blood) <- c("Basename", "cc_blood_idol_CD8T",
                          "cc_blood_idol_CD4T", 
                          "cc_blood_idol_NK",
                          "cc_blood_idol_Bcell",
                          "cc_blood_idol_Mono",
                          "cc_blood_idol_Neu")

Merge

targets <- left_join(targets, idol_blood, by="Basename")

IDOL Extended

Load data

load("../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-IDOLext.Rdata")

Make percentages

idol_ext <- idol_ext %>% mutate_if(is.numeric, ~ . * 100)

Look at it

summary(idol_ext)

##       Bas              Bmem             Bnv            CD4mem      
##  Min.   :0.0000   Min.   : 0.000   Min.   :0.000   Min.   : 0.570  
##  1st Qu.:0.0300   1st Qu.: 1.127   1st Qu.:2.485   1st Qu.: 7.862  
##  Median :0.6700   Median : 1.630   Median :3.405   Median :10.560  
##  Mean   :0.7264   Mean   : 2.345   Mean   :3.639   Mean   :10.594  
##  3rd Qu.:1.0825   3rd Qu.: 2.312   3rd Qu.:4.582   3rd Qu.:12.835  
##  Max.   :4.9600   Max.   :48.990   Max.   :9.350   Max.   :24.820  
##      CD4nv            CD8mem           CD8nv             Eos         
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.0000   Min.   : 0.0000  
##  1st Qu.: 3.167   1st Qu.: 3.915   1st Qu.:0.0000   1st Qu.: 0.5125  
##  Median : 6.765   Median : 5.910   Median :0.0000   Median : 1.5750  
##  Mean   : 6.748   Mean   : 7.232   Mean   :0.8024   Mean   : 2.3382  
##  3rd Qu.: 9.682   3rd Qu.: 9.180   3rd Qu.:1.1625   3rd Qu.: 3.1450  
##  Max.   :18.780   Max.   :36.450   Max.   :5.6700   Max.   :12.3900  
##       Mono             Neu              NK              Treg       
##  Min.   : 2.690   Min.   :10.79   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.: 5.928   1st Qu.:41.97   1st Qu.: 3.958   1st Qu.:0.5475  
##  Median : 7.280   Median :47.98   Median : 5.190   Median :1.1550  
##  Mean   : 7.369   Mean   :47.64   Mean   : 5.521   Mean   :1.2803  
##  3rd Qu.: 8.545   3rd Qu.:53.74   3rd Qu.: 6.520   3rd Qu.:1.8375  
##  Max.   :14.140   Max.   :74.11   Max.   :15.840   Max.   :4.3900

Create join variable

idol_ext <- idol_ext %>% rownames_to_column(var="Basename")

Set colnames

colnames(idol_ext) <- c("Basename", "cc_blood_ext_Bas",
                        "cc_blood_ext_Bmem", "cc_blood_ext_Bnv",
                        "cc_blood_ext_CD4mem", "cc_blood_ext_CD4nv",
                        "cc_blood_ext_CD8mem", "cc_blood_ext_CD8nv",
                        "cc_blood_ext_Eos", "cc_blood_ext_Mono",
                        "cc_blood_ext_Neu", "cc_blood_ext_NK",
                        "cc_blood_ext_Treg")

Merge

targets <- left_join(targets, idol_ext, by="Basename")

Add to methData and RGset

check <- targets$Basename == colnames(methData)
xtabs(~check)

## check
## TRUE 
##  534

check <- targets$Basename == colnames(RGset)
xtabs(~check)

## check
## TRUE 
##  534

check <- colnames(RGset) == colnames(methData)
xtabs(~check)

## check
## TRUE 
##  534

Reorder targets

order <- colnames(methData)

targets <- targets[match(order, targets$Basename),]
rownames(targets) <- targets$Basename

colData(methData) <- DataFrame(targets)
colData(RGset) <- DataFrame(targets)

Look

methData

## class: SummarizedExperiment 
## dim: 755777 534 
## metadata(0):
## assays(1): beta
## rownames(755777): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(57): cpg chr ... MASK_extBase MASK_general
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(102): DNA_labnr IOP2_ID ... cc_blood_ext_NK
##   cc_blood_ext_Treg

RGset

## class: RGChannelSetExtended 
## dim: 1051815 534 
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
##   203550300093_R07C01 203550300093_R08C01
## colData names(102): DNA_labnr IOP2_ID ... cc_blood_ext_NK
##   cc_blood_ext_Treg
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19

Save

save(targets, file="../GOTO_Data/GOTO_targets-filtered.Rdata")
save(methData, file="../GOTO_Data/GOTO_methData-filtered.Rdata")

Session Info

sessionInfo()

## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] IlluminaHumanMethylation450kmanifest_0.4.0         
##  [2] IlluminaHumanMethylationEPICmanifest_0.3.0         
##  [3] FlowSorted.BloodExtended.EPIC_1.1.1                
##  [4] FlowSorted.Blood.EPIC_1.12.1                       
##  [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
##  [6] nlme_3.1-162                                       
##  [7] quadprog_1.5-8                                     
##  [8] genefilter_1.76.0                                  
##  [9] ExperimentHub_2.2.1                                
## [10] AnnotationHub_3.2.2                                
## [11] BiocFileCache_2.2.1                                
## [12] dbplyr_2.2.1                                       
## [13] MuSiC_0.2.0                                        
## [14] nnls_1.4                                           
## [15] lubridate_1.9.2                                    
## [16] DNAmArray_2.0.0                                    
## [17] pls_2.8-2                                          
## [18] FDb.InfiniumMethylation.hg19_2.2.0                 
## [19] org.Hs.eg.db_3.14.0                                
## [20] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2            
## [21] GenomicFeatures_1.46.5                             
## [22] AnnotationDbi_1.56.2                               
## [23] minfi_1.40.0                                       
## [24] bumphunter_1.36.0                                  
## [25] locfit_1.5-9.8                                     
## [26] iterators_1.0.14                                   
## [27] foreach_1.5.2                                      
## [28] Biostrings_2.62.0                                  
## [29] XVector_0.34.0                                     
## [30] SummarizedExperiment_1.24.0                        
## [31] Biobase_2.58.0                                     
## [32] MatrixGenerics_1.10.0                              
## [33] matrixStats_1.0.0                                  
## [34] GenomicRanges_1.46.1                               
## [35] GenomeInfoDb_1.34.9                                
## [36] IRanges_2.32.0                                     
## [37] S4Vectors_0.36.2                                   
## [38] BiocGenerics_0.44.0                                
## [39] forcats_0.5.2                                      
## [40] stringr_1.5.0                                      
## [41] dplyr_1.1.3                                        
## [42] purrr_0.3.4                                        
## [43] readr_2.1.2                                        
## [44] tidyr_1.2.1                                        
## [45] tibble_3.2.1                                       
## [46] ggplot2_3.4.3                                      
## [47] tidyverse_1.3.2                                    
## [48] cli_3.6.1                                          
## [49] htmltools_0.5.5                                    
## [50] rlang_1.1.1                                        
## [51] rmarkdown_2.16                                     
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3                    tidyselect_1.2.0             
##   [3] RSQLite_2.2.17                grid_4.2.2                   
##   [5] BiocParallel_1.32.6           cinaR_0.2.3                  
##   [7] munsell_0.5.0                 codetools_0.2-19             
##   [9] preprocessCore_1.60.2         withr_2.5.0                  
##  [11] colorspace_2.1-0              filelock_1.0.2               
##  [13] highr_0.10                    knitr_1.43                   
##  [15] rstudioapi_0.14               GenomeInfoDbData_1.2.9       
##  [17] MCMCpack_1.6-3                bit64_4.0.5                  
##  [19] rhdf5_2.42.1                  coda_0.19-4                  
##  [21] vctrs_0.6.3                   generics_0.1.3               
##  [23] xfun_0.39                     timechange_0.2.0             
##  [25] R6_2.5.1                      illuminaio_0.40.0            
##  [27] bitops_1.0-7                  rhdf5filters_1.10.1          
##  [29] cachem_1.0.8                  reshape_0.8.9                
##  [31] DelayedArray_0.24.0           assertthat_0.2.1             
##  [33] vroom_1.5.7                   promises_1.2.0.1             
##  [35] BiocIO_1.8.0                  scales_1.2.1                 
##  [37] googlesheets4_1.0.1           gtable_0.3.3                 
##  [39] mcmc_0.9-7                    MatrixModels_0.5-1           
##  [41] splines_4.2.2                 rtracklayer_1.54.0           
##  [43] gargle_1.5.0                  GEOquery_2.62.2              
##  [45] htm2txt_2.2.2                 broom_1.0.1                  
##  [47] BiocManager_1.30.21           yaml_2.3.7                   
##  [49] reshape2_1.4.4                modelr_0.1.9                 
##  [51] backports_1.4.1               httpuv_1.6.11                
##  [53] tools_4.2.2                   nor1mix_1.3-0                
##  [55] ellipsis_0.3.2                jquerylib_0.1.4              
##  [57] RColorBrewer_1.1-3            siggenes_1.68.0              
##  [59] Rcpp_1.0.10                   plyr_1.8.8                   
##  [61] sparseMatrixStats_1.10.0      progress_1.2.2               
##  [63] zlibbioc_1.44.0               RCurl_1.98-1.12              
##  [65] prettyunits_1.1.1             openssl_2.0.6                
##  [67] haven_2.5.1                   fs_1.6.2                     
##  [69] magrittr_2.0.3                data.table_1.14.8            
##  [71] SparseM_1.81                  reprex_2.0.2                 
##  [73] googledrive_2.0.0             mime_0.12                    
##  [75] hms_1.1.2                     evaluate_0.21                
##  [77] xtable_1.8-4                  XML_3.99-0.14                
##  [79] mclust_6.0.0                  readxl_1.4.1                 
##  [81] compiler_4.2.2                biomaRt_2.50.3               
##  [83] crayon_1.5.2                  later_1.3.1                  
##  [85] tzdb_0.4.0                    DBI_1.1.3                    
##  [87] MASS_7.3-60                   rappdirs_0.3.3               
##  [89] Matrix_1.5-4.1                pkgconfig_2.0.3              
##  [91] GenomicAlignments_1.30.0      xml2_1.3.4                   
##  [93] annotate_1.72.0               bslib_0.5.0                  
##  [95] rngtools_1.5.2                multtest_2.50.0              
##  [97] beanplot_1.3.1                rvest_1.0.3                  
##  [99] doRNG_1.8.6                   scrime_1.3.5                 
## [101] digest_0.6.31                 base64_2.0.1                 
## [103] cellranger_1.1.0              edgeR_3.40.2                 
## [105] DelayedMatrixStats_1.16.0     restfulr_0.0.15              
## [107] curl_5.0.1                    shiny_1.7.2                  
## [109] Rsamtools_2.10.0              quantreg_5.94                
## [111] rjson_0.2.21                  lifecycle_1.0.3              
## [113] jsonlite_1.8.5                Rhdf5lib_1.20.0              
## [115] askpass_1.1                   limma_3.54.2                 
## [117] fansi_1.0.4                   pillar_1.9.0                 
## [119] lattice_0.21-8                KEGGREST_1.34.0              
## [121] fastmap_1.1.1                 httr_1.4.6                   
## [123] survival_3.5-5                interactiveDisplayBase_1.32.0
## [125] glue_1.6.2                    png_0.1-8                    
## [127] BiocVersion_3.16.0            bit_4.0.5                    
## [129] stringi_1.7.12                sass_0.4.6                   
## [131] HDF5Array_1.22.1              blob_1.2.4                   
## [133] memoise_2.0.1

Clear

rm(list=ls())

Script 34: Predict cell types in bulk samples

Compiled: June 26, 2024

Setup

Measured Cell Counts

MuSiC

Expression Set for scRNA

GOTO Expresssion Data

Subset

Expression Sets

MuSiC

Heatmap

Add to targets

IDOL

Join with targets

IDOL Extended

Add to methData and RGset

Session Info