This script predicts proportions of blood cell-types from DNAm data using the IDOL and IDOLext algorithms, and also uses MuSiC alongside a scRNA reference to deconvolute bulk RNAseq data.
Load required packages
library(rlang)
library(htmltools)
library(rmarkdown)
library(cli)
library(tidyverse)
library(DNAmArray)
library(lubridate)
library(Biobase)
library(MuSiC)
library(SummarizedExperiment)
library(minfi)
library(ExperimentHub)
library(FlowSorted.Blood.EPIC)
library(FlowSorted.BloodExtended.EPIC)
library(IlluminaHumanMethylationEPICmanifest)
library(IlluminaHumanMethylation450kmanifest)
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
Load DNA methylation data
load("../GOTO_Data/GOTO_targets-filtered.Rdata")
load("../GOTO_Data/GOTO_methData-filtered.Rdata")
load("../GOTO_Data/Processing/GOTO_RGset-unfiltered.Rdata")
colnames(methData) <- targets$Basename
methData
## class: SummarizedExperiment
## dim: 755777 534
## metadata(0):
## assays(1): beta
## rownames(755777): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(57): cpg chr ... MASK_extBase MASK_general
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(45): DNA_labnr IOP2_ID ... m1_macro m2_macro
RGset
## class: RGChannelSetExtended
## dim: 1051815 534
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(19): DNA_labnr IOP2_ID ... Basename filenames
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
Load measured cell count data alongside other variables on medication use and start dates.
blood_df <- read_csv("../GOTO_Data/Cell_Counts/Blood/GOTO_Cellcounts-Medication_20210401.csv")
## Rows: 326 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): visit_date
## dbl (22): IOP2_ID, timepoint, med_lipidlowering, med_antihypertensive, cc_hb...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Blood cell counts were measured using a differential test.
Make visit date a date
blood_df$visit_date <- as_date(blood_df$visit_date,
format = "%m/%d/%Y")
Create start date
# Save targets order
blood_df$num <- 1:nrow(blood_df)
# Arrange by ID
blood_df <- blood_df %>% arrange(IOP2_ID)
# Create new df
start_date <- blood_df
# Select baseline visit dates
start_date <- start_date %>%
filter(timepoint == 0) %>%
dplyr::select(visit_date)
# Repeat each date twice
start_date <- data.frame(start_date = rep(
start_date$visit_date, each=2
))
# Add to original targets
blood_df <- cbind(blood_df, start_date)
# Arrange back to original order
blood_df <- blood_df %>% arrange(num)
Create factors
blood_df$IOP2_ID <- as.factor(blood_df$IOP2_ID)
blood_df$timepoint <- as.factor(blood_df$timepoint)
Save ID list and timepoint order from methData
ID_list <- targets %>% dplyr::select(IOP2_ID, timepoint)
dim(ID_list)
## [1] 534 2
Merge the imported cell count data to get the same order.
blood_df <- left_join(ID_list, blood_df, by=c("IOP2_ID", "timepoint"))
Remove the ordering variables
blood_df <- blood_df %>%
dplyr::select(start_date,
cc_eos_perc, cc_baso_perc, cc_neut_perc,
cc_lymph_perc, cc_mono_perc)
Combine data frames
targets <- cbind(targets, blood_df)
dim(targets)
## [1] 534 51
Remove for non blood tissues
targets <- targets %>%
mutate(
cc_eos_perc = ifelse(tissue != "fasted blood", NA, cc_eos_perc),
cc_baso_perc = ifelse(tissue != "fasted blood", NA, cc_baso_perc),
cc_neut_perc = ifelse(tissue != "fasted blood", NA, cc_neut_perc),
cc_lymph_perc = ifelse(tissue != "fasted blood", NA, cc_lymph_perc),
cc_mono_perc = ifelse(tissue != "fasted blood", NA, cc_mono_perc)
)
Save variables of interest
targets <- targets %>%
dplyr::select(DNA_labnr, IOP2_ID, tissue, timepoint,
age, sex, bmi, op_status,
start_date, visit_date, isolationdate,
plate, well, array_n, array_row,
Basename,
cc_blood_meas_eos = cc_eos_perc,
cc_blood_meas_baso = cc_baso_perc,
cc_blood_meas_neut = cc_neut_perc,
cc_blood_meas_lymph = cc_lymph_perc,
cc_blood_meas_mono = cc_mono_perc, everything())
Read in the scRNA data from GSE143704
sc_blood <- read.table('../GOTO_Data/Cell_Counts/Blood/scRNA-blood_GSE143704.tsv', sep = "\t")
Save cell names
cell_names <- as.data.frame(t(sc_blood[1,-1]))
colnames(cell_names) <- "Cell"
Remove the first row with cell names
sc_blood <- as.data.frame(sc_blood[-1,])
Save rownames
gene_names <- sc_blood[,1]
Remove GeneSymbol column
sc_blood <- sc_blood[,-1]
Create an expression matrix
sc_blood <- data.frame(apply(sc_blood, 2, as.numeric))
Add rownames
rownames(sc_blood) <- gene_names
Make column names
colnames(sc_blood) <- str_pad(1:ncol(sc_blood), width=3, pad="0")
Look
sc_blood[1:5, 1:5]
## 001 002 003 004 005
## OR4F5 0 0 0 0 0
## FO538757.3 0 0 0 0 0
## FO538757.2 0 0 0 0 0
## OR4F29 0 0 0 0 0
## OR4F16 0 0 0 0 0
Add sample labels to cell_names
cell_names$Sample <- str_pad(1:ncol(sc_blood), width=3, pad="0")
rownames(cell_names) <- str_pad(1:ncol(sc_blood), width=3, pad="0")
head(cell_names)
## Cell Sample
## 001 BNK 001
## 002 BNK 002
## 003 BNK 003
## 004 BNK 004
## 005 BNK 005
## 006 BNK 006
Load functions
source('../GOTO_Data/RNAseq/goto.rnaseq.functions.R')
Load in RNAseq from complete blood pairs
pathIN_dat <- "../GOTO_Data/RNAseq/merge.gene.counts_biopet_13052016.RData"
pathIN_cov <- "../GOTO_Data/RNAseq/datasheet_RNAseq_blood_V2.csv"
filt.samp <- "tissue_blood|qc_sexswitch|qc_multdim2|qc_rep1|complete_pairs"
goto_exp <- read.gotornaseq(pathIN_dat = pathIN_dat, pathIN_cov, filt.samp = filt.samp, quiet = FALSE)
## ||| PREPARING GOTO RNASEQ DATA
## || READING DATA
## | Loading RNASEQ .. OK!
## [555 samples x 56520 features]
## | Reading COVARIATES .. OK!
## [maintaining 379 samples x 84 features]
## | Merging data .. OK!
## [555 samples x 56604 features]
## || SUBSETTING SAMPLES
## | Subsetting SAMPLES on ['tissue_blood']; PASS: 379 out of 555
## | Subsetting SAMPLES on ['qc_sexswitch']; PASS: 379 out of 379
## | Subsetting SAMPLES on ['qc_multdim2']; PASS: 379 out of 379
## | Subsetting SAMPLES on ['qc_rep1']; PASS: 376 out of 379
## | Subsetting SAMPLES on ['complete_pairs']; PASS: 376 out of 376
## | DONE!
goto_exp <- goto_exp[["dat"]]
Filter pre-challenge samples and save counts
goto_exp <- goto_exp %>%
dplyr::filter(nutridrink == 0) %>%
dplyr::select(sampID2, intervention,
starts_with('ENS')) %>%
mutate(
ID = str_c(sampID2, '_', intervention)
)
Save IDs
ID_name <- goto_exp$ID
ID_df <- data.frame(Sample = ID_name)
rownames(ID_df) = ID_name
Remove non-gene variables
goto_exp <- goto_exp %>% dplyr::select(-ID, -sampID2, -intervention)
goto_exp <- as.data.frame(t(goto_exp))
colnames(goto_exp) <- ID_name
Map to gene name
ens2gene <- cinaR::grch37
m <- match(rownames(goto_exp), ens2gene$ensgene)
mapped.genes <- ens2gene$symbol[m]
removed.genes <- duplicated(mapped.genes) | is.na(mapped.genes) | grepl("^MT", mapped.genes)
goto_exp <- goto_exp[!removed.genes,]
rownames(goto_exp) <- mapped.genes[!removed.genes]
goto_exp <- goto_exp[rownames(goto_exp) %in% rownames(sc_blood),]
sc_blood <- sc_blood[rownames(sc_blood) %in% rownames(goto_exp),]
Create an expression set for the single cell data
C.eset <- Biobase::ExpressionSet(
assayData = as.matrix(sc_blood),
phenoData = Biobase::AnnotatedDataFrame(cell_names))
C.eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 18531 features, 7643 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: 001 002 ... 7643 (7643 total)
## varLabels: Cell Sample
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:
Make expression set for bulk RNAseq from GOTO
T.eset <- Biobase::ExpressionSet(assayData = as.matrix(goto_exp),
phenoData = Biobase::AnnotatedDataFrame(ID_df))
T.eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 18531 features, 183 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: 61482_1 62340_0 ... 61789_0 (183 total)
## varLabels: Sample
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:
Deconvolute
deconv <- music_prop(
bulk.eset = T.eset,
sc.eset = C.eset,
clusters = 'Cell',
markers = NULL,
normalize = FALSE,
samples = 'Sample',
verbose = F)$Est.prop.weighted
summary(deconv)
## BNK CD4T CD8T claM CLP
## Min. :0 Min. :0.000000 Min. :0.0000 Min. :0.00000 Min. :0
## 1st Qu.:0 1st Qu.:0.000000 1st Qu.:0.1224 1st Qu.:0.00000 1st Qu.:0
## Median :0 Median :0.000000 Median :0.1765 Median :0.00000 Median :0
## Mean :0 Mean :0.008223 Mean :0.1808 Mean :0.01352 Mean :0
## 3rd Qu.:0 3rd Qu.:0.000000 3rd Qu.:0.2293 3rd Qu.:0.02043 3rd Qu.:0
## Max. :0 Max. :0.421140 Max. :0.5105 Max. :0.13934 Max. :0
## cMOP CMP ery GMP
## Min. :0.0000000 Min. :0 Min. :0.0000000 Min. :0.000e+00
## 1st Qu.:0.0000000 1st Qu.:0 1st Qu.:0.0000000 1st Qu.:0.000e+00
## Median :0.0000000 Median :0 Median :0.0004511 Median :0.000e+00
## Mean :0.0007752 Mean :0 Mean :0.0014828 Mean :3.112e-06
## 3rd Qu.:0.0004220 3rd Qu.:0 3rd Qu.:0.0023557 3rd Qu.:0.000e+00
## Max. :0.0404196 Max. :0 Max. :0.0156175 Max. :3.736e-04
## hMDP HSC immB interM
## Min. :0.0000000 Min. :0 Min. :0.000000 Min. :0.03935
## 1st Qu.:0.0000000 1st Qu.:0 1st Qu.:0.001233 1st Qu.:0.17359
## Median :0.0000000 Median :0 Median :0.006863 Median :0.21533
## Mean :0.0002782 Mean :0 Mean :0.010768 Mean :0.20676
## 3rd Qu.:0.0000000 3rd Qu.:0 3rd Qu.:0.013084 3rd Qu.:0.24116
## Max. :0.0157085 Max. :0 Max. :0.174863 Max. :0.33207
## kineNK LMPP matureN memB MEP
## Min. :0 Min. :0 Min. :0.1228 Min. :0 Min. :0
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.2630 1st Qu.:0 1st Qu.:0
## Median :0 Median :0 Median :0.2948 Median :0 Median :0
## Mean :0 Mean :0 Mean :0.2931 Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.3260 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0 Max. :0.4218 Max. :0 Max. :0
## metaN MLP MPP myeN
## Min. :0.08611 Min. :0 Min. :0 Min. :0.000000
## 1st Qu.:0.20954 1st Qu.:0 1st Qu.:0 1st Qu.:0.000000
## Median :0.24646 Median :0 Median :0 Median :0.000000
## Mean :0.24488 Mean :0 Mean :0 Mean :0.000529
## 3rd Qu.:0.28270 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000000
## Max. :0.37715 Max. :0 Max. :0 Max. :0.063761
## naiB NKP nonM plasma
## Min. :0.0000000 Min. :0 Min. :0.000000 Min. :0.000e+00
## 1st Qu.:0.0000000 1st Qu.:0 1st Qu.:0.000000 1st Qu.:0.000e+00
## Median :0.0000000 Median :0 Median :0.000000 Median :4.373e-05
## Mean :0.0008892 Mean :0 Mean :0.006094 Mean :3.212e-04
## 3rd Qu.:0.0000000 3rd Qu.:0 3rd Qu.:0.002170 3rd Qu.:4.158e-04
## Max. :0.0920783 Max. :0 Max. :0.098688 Max. :4.052e-03
## preB preM proB proN regB
## Min. :0 Min. :0.00000 Min. :0 Min. :0 Min. :0.000e+00
## 1st Qu.:0 1st Qu.:0.01988 1st Qu.:0 1st Qu.:0 1st Qu.:0.000e+00
## Median :0 Median :0.02970 Median :0 Median :0 Median :0.000e+00
## Mean :0 Mean :0.03143 Mean :0 Mean :0 Mean :7.168e-06
## 3rd Qu.:0 3rd Qu.:0.04244 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000e+00
## Max. :0 Max. :0.08264 Max. :0 Max. :0 Max. :1.057e-03
## toxiNK
## Min. :0.0000000
## 1st Qu.:0.0000000
## Median :0.0000000
## Mean :0.0001496
## 3rd Qu.:0.0000000
## Max. :0.0062376
heatmap(deconv, margins=c(12,8))
Save
save(deconv, file="../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-Music.Rdata")
Make ID variable in targets
targets <- targets %>%
mutate(
ID = paste0(IOP2_ID, "_",as.numeric(timepoint)-1)
)
Make variable names
colnames(deconv) <- paste0("cc_blood_music_", colnames(deconv))
Make percentages
deconv <- as.data.frame(deconv) %>% mutate_if(is.numeric, ~ . * 100)
Make ID variable in deconv
deconv <- as.data.frame(deconv) %>% rownames_to_column(var="ID")
Merge
targets <- left_join(targets, deconv, by="ID")
Remove for non blood tissues
targets <- targets %>%
mutate(
cc_blood_music_BNK = ifelse(
tissue != "fasted blood", NA, cc_blood_music_BNK),
cc_blood_music_CD4T = ifelse(
tissue != "fasted blood", NA, cc_blood_music_CD4T),
cc_blood_music_CD8T = ifelse(
tissue != "fasted blood", NA, cc_blood_music_CD8T),
cc_blood_music_claM = ifelse(
tissue != "fasted blood", NA, cc_blood_music_claM),
cc_blood_music_CLP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_CLP),
cc_blood_music_cMOP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_cMOP),
cc_blood_music_CMP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_CMP),
cc_blood_music_ery = ifelse(
tissue != "fasted blood", NA, cc_blood_music_ery),
cc_blood_music_GMP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_GMP),
cc_blood_music_hMDP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_hMDP),
cc_blood_music_HSC = ifelse(
tissue != "fasted blood", NA, cc_blood_music_HSC),
cc_blood_music_immB = ifelse(
tissue != "fasted blood", NA, cc_blood_music_immB),
cc_blood_music_interM = ifelse(
tissue != "fasted blood", NA, cc_blood_music_interM),
cc_blood_music_kineNK = ifelse(
tissue != "fasted blood", NA, cc_blood_music_kineNK),
cc_blood_music_LMPP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_LMPP),
cc_blood_music_matureN = ifelse(
tissue != "fasted blood", NA, cc_blood_music_matureN),
cc_blood_music_memB = ifelse(
tissue != "fasted blood", NA, cc_blood_music_memB),
cc_blood_music_MEP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_MEP),
cc_blood_music_metaN = ifelse(
tissue != "fasted blood", NA, cc_blood_music_metaN),
cc_blood_music_MLP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_MLP),
cc_blood_music_MPP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_MPP),
cc_blood_music_myeN = ifelse(
tissue != "fasted blood", NA, cc_blood_music_myeN),
cc_blood_music_naiB = ifelse(
tissue != "fasted blood", NA, cc_blood_music_naiB),
cc_blood_music_NKP = ifelse(
tissue != "fasted blood", NA, cc_blood_music_NKP),
cc_blood_music_nonM = ifelse(
tissue != "fasted blood", NA, cc_blood_music_nonM),
cc_blood_music_plasma = ifelse(
tissue != "fasted blood", NA, cc_blood_music_plasma),
cc_blood_music_preB = ifelse(
tissue != "fasted blood", NA, cc_blood_music_preB),
cc_blood_music_preM = ifelse(
tissue != "fasted blood", NA, cc_blood_music_preM),
cc_blood_music_proB = ifelse(
tissue != "fasted blood", NA, cc_blood_music_proB),
cc_blood_music_proN = ifelse(
tissue != "fasted blood", NA, cc_blood_music_proN),
cc_blood_music_regB = ifelse(
tissue != "fasted blood", NA, cc_blood_music_regB),
cc_blood_music_toxiNK = ifelse(
tissue != "fasted blood", NA, cc_blood_music_toxiNK),
)
Save the RGset for blood only
RGset_blood <- RGset[ , RGset$tissue == 'fasted blood']
Minfi
hub <- ExperimentHub()
## snapshotDate(): 2022-10-31
query(hub, "FlowSorted.Blood.EPIC")
## ExperimentHub with 1 record
## # snapshotDate(): 2022-10-31
## # names(): EH1136
## # package(): FlowSorted.Blood.EPIC
## # $dataprovider: GEO
## # $species: Homo sapiens
## # $rdataclass: RGChannelSet
## # $rdatadateadded: 2018-04-20
## # $title: FlowSorted.Blood.EPIC: Illumina Human Methylation data from EPIC o...
## # $description: The FlowSorted.Blood.EPIC package contains Illumina HumanMet...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: tar.gz
## # $sourceurl: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110554
## # $sourcesize: NA
## # $tags: c("ExperimentData", "Homo_sapiens_Data", "Tissue",
## # "MicroarrayData", "Genome", "TissueMicroarrayData",
## # "MethylationArrayData")
## # retrieve record with 'object[["EH1136"]]'
FlowSorted.Blood.EPIC <- hub[["EH1136"]]
## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation
## loading from cache
Calculate cell counts
idol_blood <- estimateCellCounts(
rgSet = RGset_blood,
referencePlatform = 'IlluminaHumanMethylationEPIC',
cellTypes = c("CD8T", "CD4T", "NK", "Bcell",
"Mono", "Neu"),
verbose = TRUE, meanPlot = TRUE)
## [estimateCellCounts] Combining user data with reference (flow sorted) data.
## [estimateCellCounts] Processing user and reference data together.
## [preprocessQuantile] Mapping to genome.
## [preprocessQuantile] Fixing outliers.
## [preprocessQuantile] Quantile normalizing.
## [estimateCellCounts] Picking probes for composition estimation.
## [estimateCellCounts] Estimating composition.
Save
save(idol_blood,
file="../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-IDOL.Rdata")
Create merging variable
idol_blood <- as.data.frame(idol_blood) %>%
rownames_to_column(var = 'Basename')
summary(idol_blood)
## Basename CD8T CD4T NK
## Length:196 Min. :0.03718 Min. :0.03219 Min. :0.00370
## Class :character 1st Qu.:0.10224 1st Qu.:0.12119 1st Qu.:0.03752
## Mode :character Median :0.12223 Median :0.16240 Median :0.05209
## Mean :0.13009 Mean :0.15779 Mean :0.05783
## 3rd Qu.:0.15545 3rd Qu.:0.19628 3rd Qu.:0.07413
## Max. :0.28816 Max. :0.33827 Max. :0.24852
## Bcell Mono Neu
## Min. :0.01079 Min. :0.04267 Min. :0.1633
## 1st Qu.:0.04604 1st Qu.:0.08572 1st Qu.:0.4720
## Median :0.06048 Median :0.09969 Median :0.5201
## Mean :0.06562 Mean :0.10159 Mean :0.5179
## 3rd Qu.:0.07527 3rd Qu.:0.11647 3rd Qu.:0.5723
## Max. :0.45958 Max. :0.19203 Max. :0.7167
Make percentages
idol_blood <- idol_blood %>% mutate_if(is.numeric, ~ . * 100)
Column names
colnames(idol_blood) <- c("Basename", "cc_blood_idol_CD8T",
"cc_blood_idol_CD4T",
"cc_blood_idol_NK",
"cc_blood_idol_Bcell",
"cc_blood_idol_Mono",
"cc_blood_idol_Neu")
Merge
targets <- left_join(targets, idol_blood, by="Basename")
Load data
load("../GOTO_Data/Cell_Counts/Blood/GOTO_Blood-IDOLext.Rdata")
Make percentages
idol_ext <- idol_ext %>% mutate_if(is.numeric, ~ . * 100)
Look at it
summary(idol_ext)
## Bas Bmem Bnv CD4mem
## Min. :0.0000 Min. : 0.000 Min. :0.000 Min. : 0.570
## 1st Qu.:0.0300 1st Qu.: 1.127 1st Qu.:2.485 1st Qu.: 7.862
## Median :0.6700 Median : 1.630 Median :3.405 Median :10.560
## Mean :0.7264 Mean : 2.345 Mean :3.639 Mean :10.594
## 3rd Qu.:1.0825 3rd Qu.: 2.312 3rd Qu.:4.582 3rd Qu.:12.835
## Max. :4.9600 Max. :48.990 Max. :9.350 Max. :24.820
## CD4nv CD8mem CD8nv Eos
## Min. : 0.000 Min. : 0.000 Min. :0.0000 Min. : 0.0000
## 1st Qu.: 3.167 1st Qu.: 3.915 1st Qu.:0.0000 1st Qu.: 0.5125
## Median : 6.765 Median : 5.910 Median :0.0000 Median : 1.5750
## Mean : 6.748 Mean : 7.232 Mean :0.8024 Mean : 2.3382
## 3rd Qu.: 9.682 3rd Qu.: 9.180 3rd Qu.:1.1625 3rd Qu.: 3.1450
## Max. :18.780 Max. :36.450 Max. :5.6700 Max. :12.3900
## Mono Neu NK Treg
## Min. : 2.690 Min. :10.79 Min. : 0.000 Min. :0.0000
## 1st Qu.: 5.928 1st Qu.:41.97 1st Qu.: 3.958 1st Qu.:0.5475
## Median : 7.280 Median :47.98 Median : 5.190 Median :1.1550
## Mean : 7.369 Mean :47.64 Mean : 5.521 Mean :1.2803
## 3rd Qu.: 8.545 3rd Qu.:53.74 3rd Qu.: 6.520 3rd Qu.:1.8375
## Max. :14.140 Max. :74.11 Max. :15.840 Max. :4.3900
Create join variable
idol_ext <- idol_ext %>% rownames_to_column(var="Basename")
Set colnames
colnames(idol_ext) <- c("Basename", "cc_blood_ext_Bas",
"cc_blood_ext_Bmem", "cc_blood_ext_Bnv",
"cc_blood_ext_CD4mem", "cc_blood_ext_CD4nv",
"cc_blood_ext_CD8mem", "cc_blood_ext_CD8nv",
"cc_blood_ext_Eos", "cc_blood_ext_Mono",
"cc_blood_ext_Neu", "cc_blood_ext_NK",
"cc_blood_ext_Treg")
Merge
targets <- left_join(targets, idol_ext, by="Basename")
check <- targets$Basename == colnames(methData)
xtabs(~check)
## check
## TRUE
## 534
check <- targets$Basename == colnames(RGset)
xtabs(~check)
## check
## TRUE
## 534
check <- colnames(RGset) == colnames(methData)
xtabs(~check)
## check
## TRUE
## 534
Reorder targets
order <- colnames(methData)
targets <- targets[match(order, targets$Basename),]
rownames(targets) <- targets$Basename
colData(methData) <- DataFrame(targets)
colData(RGset) <- DataFrame(targets)
Look
methData
## class: SummarizedExperiment
## dim: 755777 534
## metadata(0):
## assays(1): beta
## rownames(755777): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(57): cpg chr ... MASK_extBase MASK_general
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(102): DNA_labnr IOP2_ID ... cc_blood_ext_NK
## cc_blood_ext_Treg
RGset
## class: RGChannelSetExtended
## dim: 1051815 534
## metadata(0):
## assays(5): Green Red GreenSD RedSD NBeads
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(534): 203527980082_R01C01 203527980082_R02C01 ...
## 203550300093_R07C01 203550300093_R08C01
## colData names(102): DNA_labnr IOP2_ID ... cc_blood_ext_NK
## cc_blood_ext_Treg
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
Save
save(targets, file="../GOTO_Data/GOTO_targets-filtered.Rdata")
save(methData, file="../GOTO_Data/GOTO_methData-filtered.Rdata")
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] IlluminaHumanMethylation450kmanifest_0.4.0
## [2] IlluminaHumanMethylationEPICmanifest_0.3.0
## [3] FlowSorted.BloodExtended.EPIC_1.1.1
## [4] FlowSorted.Blood.EPIC_1.12.1
## [5] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [6] nlme_3.1-162
## [7] quadprog_1.5-8
## [8] genefilter_1.76.0
## [9] ExperimentHub_2.2.1
## [10] AnnotationHub_3.2.2
## [11] BiocFileCache_2.2.1
## [12] dbplyr_2.2.1
## [13] MuSiC_0.2.0
## [14] nnls_1.4
## [15] lubridate_1.9.2
## [16] DNAmArray_2.0.0
## [17] pls_2.8-2
## [18] FDb.InfiniumMethylation.hg19_2.2.0
## [19] org.Hs.eg.db_3.14.0
## [20] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [21] GenomicFeatures_1.46.5
## [22] AnnotationDbi_1.56.2
## [23] minfi_1.40.0
## [24] bumphunter_1.36.0
## [25] locfit_1.5-9.8
## [26] iterators_1.0.14
## [27] foreach_1.5.2
## [28] Biostrings_2.62.0
## [29] XVector_0.34.0
## [30] SummarizedExperiment_1.24.0
## [31] Biobase_2.58.0
## [32] MatrixGenerics_1.10.0
## [33] matrixStats_1.0.0
## [34] GenomicRanges_1.46.1
## [35] GenomeInfoDb_1.34.9
## [36] IRanges_2.32.0
## [37] S4Vectors_0.36.2
## [38] BiocGenerics_0.44.0
## [39] forcats_0.5.2
## [40] stringr_1.5.0
## [41] dplyr_1.1.3
## [42] purrr_0.3.4
## [43] readr_2.1.2
## [44] tidyr_1.2.1
## [45] tibble_3.2.1
## [46] ggplot2_3.4.3
## [47] tidyverse_1.3.2
## [48] cli_3.6.1
## [49] htmltools_0.5.5
## [50] rlang_1.1.1
## [51] rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 tidyselect_1.2.0
## [3] RSQLite_2.2.17 grid_4.2.2
## [5] BiocParallel_1.32.6 cinaR_0.2.3
## [7] munsell_0.5.0 codetools_0.2-19
## [9] preprocessCore_1.60.2 withr_2.5.0
## [11] colorspace_2.1-0 filelock_1.0.2
## [13] highr_0.10 knitr_1.43
## [15] rstudioapi_0.14 GenomeInfoDbData_1.2.9
## [17] MCMCpack_1.6-3 bit64_4.0.5
## [19] rhdf5_2.42.1 coda_0.19-4
## [21] vctrs_0.6.3 generics_0.1.3
## [23] xfun_0.39 timechange_0.2.0
## [25] R6_2.5.1 illuminaio_0.40.0
## [27] bitops_1.0-7 rhdf5filters_1.10.1
## [29] cachem_1.0.8 reshape_0.8.9
## [31] DelayedArray_0.24.0 assertthat_0.2.1
## [33] vroom_1.5.7 promises_1.2.0.1
## [35] BiocIO_1.8.0 scales_1.2.1
## [37] googlesheets4_1.0.1 gtable_0.3.3
## [39] mcmc_0.9-7 MatrixModels_0.5-1
## [41] splines_4.2.2 rtracklayer_1.54.0
## [43] gargle_1.5.0 GEOquery_2.62.2
## [45] htm2txt_2.2.2 broom_1.0.1
## [47] BiocManager_1.30.21 yaml_2.3.7
## [49] reshape2_1.4.4 modelr_0.1.9
## [51] backports_1.4.1 httpuv_1.6.11
## [53] tools_4.2.2 nor1mix_1.3-0
## [55] ellipsis_0.3.2 jquerylib_0.1.4
## [57] RColorBrewer_1.1-3 siggenes_1.68.0
## [59] Rcpp_1.0.10 plyr_1.8.8
## [61] sparseMatrixStats_1.10.0 progress_1.2.2
## [63] zlibbioc_1.44.0 RCurl_1.98-1.12
## [65] prettyunits_1.1.1 openssl_2.0.6
## [67] haven_2.5.1 fs_1.6.2
## [69] magrittr_2.0.3 data.table_1.14.8
## [71] SparseM_1.81 reprex_2.0.2
## [73] googledrive_2.0.0 mime_0.12
## [75] hms_1.1.2 evaluate_0.21
## [77] xtable_1.8-4 XML_3.99-0.14
## [79] mclust_6.0.0 readxl_1.4.1
## [81] compiler_4.2.2 biomaRt_2.50.3
## [83] crayon_1.5.2 later_1.3.1
## [85] tzdb_0.4.0 DBI_1.1.3
## [87] MASS_7.3-60 rappdirs_0.3.3
## [89] Matrix_1.5-4.1 pkgconfig_2.0.3
## [91] GenomicAlignments_1.30.0 xml2_1.3.4
## [93] annotate_1.72.0 bslib_0.5.0
## [95] rngtools_1.5.2 multtest_2.50.0
## [97] beanplot_1.3.1 rvest_1.0.3
## [99] doRNG_1.8.6 scrime_1.3.5
## [101] digest_0.6.31 base64_2.0.1
## [103] cellranger_1.1.0 edgeR_3.40.2
## [105] DelayedMatrixStats_1.16.0 restfulr_0.0.15
## [107] curl_5.0.1 shiny_1.7.2
## [109] Rsamtools_2.10.0 quantreg_5.94
## [111] rjson_0.2.21 lifecycle_1.0.3
## [113] jsonlite_1.8.5 Rhdf5lib_1.20.0
## [115] askpass_1.1 limma_3.54.2
## [117] fansi_1.0.4 pillar_1.9.0
## [119] lattice_0.21-8 KEGGREST_1.34.0
## [121] fastmap_1.1.1 httr_1.4.6
## [123] survival_3.5-5 interactiveDisplayBase_1.32.0
## [125] glue_1.6.2 png_0.1-8
## [127] BiocVersion_3.16.0 bit_4.0.5
## [129] stringi_1.7.12 sass_0.4.6
## [131] HDF5Array_1.22.1 blob_1.2.4
## [133] memoise_2.0.1
Clear
rm(list=ls())