This script uses the MuSiC algorithm to predict muscle nuclei type proportions from bulk RNAseq and a single nuclei reference.

Setup

Load required packages

library(tidyverse)
library(Biobase)
library(MuSiC)
library(SummarizedExperiment)
library(GEOquery)

Load data

load('../GOTO_Data/Cell_Counts/Muscle/snCounts.Rdata')
counts[is.na(counts)] <- 0

Save genes

gene_names <- rownames(counts)

Bulk RNAseq

Load functions

source("../GOTO_Data/RNAseq/goto.rnaseq.functions.R")

# Paths
pathIN_dat <- "../GOTO_Data/RNAseq/merge.gene.counts_biopet_13052016.RData"
pathIN_cov <- "../GOTO_Data/RNAseq/muscle_QC_covariates_filesv2.csv"

# Muscle complete pairs
filt.samp <- "tissue_muscle|qc_sexswitch|qc_multdim2|qc_rep1|complete_pairs"

# Read in
dat1 <- read.gotornaseq(pathIN_dat = pathIN_dat, 
                        pathIN_cov, 
                        filt.samp = filt.samp, 
                        type="counts",
                        quiet = FALSE)

## ||| PREPARING GOTO RNASEQ DATA 
## || READING DATA 
## | Loading RNASEQ .. OK! 
##    [555 samples x 56520 features] 
## | Reading COVARIATES .. OK! 
##    [maintaining 174 samples x 117 features] 
## | Merging data .. OK! 
##    [555 samples x 56637 features] 
## || SUBSETTING SAMPLES 
## | Subsetting SAMPLES on ['tissue_muscle']; PASS: 174 out of 555
## | Subsetting SAMPLES on ['qc_sexswitch']; PASS: 172 out of 174
## | Subsetting SAMPLES on ['qc_multdim2']; PASS: 168 out of 172
## | Subsetting SAMPLES on ['qc_rep1']; PASS: 168 out of 168
## | Subsetting SAMPLES on ['complete_pairs']; PASS: 162 out of 168
## | DONE!

# Format
goto_counts <- dat1[[1]][,grep('ENS',colnames(dat1[[1]]))]
goto_counts <- as.data.frame(t(goto_counts))
colnames(goto_counts) <- str_c(dat1[[1]]$sampID2,
                               "_",
                               dat1[[1]]$visitnr)

# Map ensembl ID to name
ens2gene <- cinaR::grch37
m <- match(rownames(goto_counts), ens2gene$ensgene)
mapped.genes <- ens2gene$symbol[m]

Subsetting

Remove genes

removed.genes <- duplicated(mapped.genes) | is.na(mapped.genes) | grepl("^MT", mapped.genes)
goto_counts <- goto_counts[!removed.genes,]
rownames(goto_counts) <- mapped.genes[!removed.genes]

Subset snRNA genes

goto_counts <- goto_counts[gene_names,]
goto_counts <- goto_counts[!rowSums(is.na(goto_counts)) == 162,]

Save goto genes

goto_genes <- rownames(goto_counts)

Save overlapping genes

bin <- gene_names %in% goto_genes
xtabs(~bin)

## bin
## FALSE  TRUE 
##  4858 16310

new_names <- gene_names[bin]
counts <- counts[bin,]
goto_counts <- goto_counts[new_names,]

Annotate to Clusters

Load clusters

load('../GOTO_Data/Cell_Counts/Muscle/snClusters.Rdata')

Annotate clusters

cluster_key <- data.frame(
  seurat_clusters = c(0,1,2,3,4,5,6,7),
  cell_type = c('Fast Skeletal Muscle',
                'Slow Skeletal Muscle',
                'FAPs',
                'Endothelial cell',
                'T/NK/Macrophages',
                'SATs',
                'Smooth Muscle Cell',
                'Slow Skeletal Muscle')
)

nuclei_clusters <- nuclei_clusters %>% 
  mutate(
    cell_type = case_when(
      seurat_clusters == 0 ~ "Fast Skeletal Muscle",
      seurat_clusters == 1 ~ "Slow Skeletal Muscle",
      seurat_clusters == 2 ~ "FAPs",
      seurat_clusters == 3 ~ "Endothelial Cells",
      seurat_clusters == 4 ~ "T/NK/Macrophages",
      seurat_clusters == 5 ~ "SATs",
      seurat_clusters == 6 ~ "Smooth Muscle",
      seurat_clusters == 7 ~ 'Slow Skeletal Muscle'
    )
  )

nuclei_clusters <- nuclei_clusters %>% 
  dplyr::select(cell_type)

head(nuclei_clusters)

##                                 cell_type
## HM1_AAACCTGAGACTACAA Slow Skeletal Muscle
## HM1_AAACCTGAGCATGGCA                 FAPs
## HM1_AAACCTGAGCTAACTC Slow Skeletal Muscle
## HM1_AAACCTGAGGGATCTG Fast Skeletal Muscle
## HM1_AAACCTGCAACTTGAC Fast Skeletal Muscle
## HM1_AAACCTGCACAAGACG Slow Skeletal Muscle

cell_types <- nuclei_clusters[colnames(counts),]
cell_types[1:5]

## [1] "Slow Skeletal Muscle" "FAPs"                 "Slow Skeletal Muscle"
## [4] "Fast Skeletal Muscle" "Fast Skeletal Muscle"

Transform and label samples

meta <- as.data.frame(cell_types)
colnames(meta) <-'Cell' 
meta$Sample <- colnames(counts)
rownames(meta) <- colnames(counts)

Prepare data

Remove non-zero genes

nz.gene = rownames(counts)[(rowSums(counts) != 0)]
counts <- counts[nz.gene,]
dim(counts)

## [1]  16310 140000

Convert counts to matrix

dGC_to_matrix <- function(x,ncol_break = 49999){
  if(length(colnames(x))>(ncol_break+1)){
    total_cols = length(colnames(x)) 
    the_seq <- c(seq(1,total_cols,ncol_break), total_cols)  
    the_seq <- unique(the_seq) 
  }
  matrix_list <- list() 
  total_parts <- length(the_seq)-1 
  for(i in 1:total_parts){
    start_no = ifelse(i==1,1,the_seq[i]+1)  
    print(paste0(i, " is i"))
    print(paste0("start_no is ", start_no))
    end_no = the_seq[i+1] 
    print(paste0("part_number: ", i, ";cols-",start_no,":",end_no))
    matrix_list[[i]] <- as.matrix(x[,start_no:end_no,drop = F])
  }
  return(do.call(cbind, matrix_list)) 
}
full_mtx <- dGC_to_matrix(counts, 50000)

## [1] "1 is i"
## [1] "start_no is 1"
## [1] "part_number: 1;cols-1:50001"

## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 6.1 GiB

## [1] "2 is i"
## [1] "start_no is 50002"
## [1] "part_number: 2;cols-50002:100001"

## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 6.1 GiB

## [1] "3 is i"
## [1] "start_no is 100002"
## [1] "part_number: 3;cols-100002:140000"

## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 4.9 GiB

Create expression sets

C.eset <- Biobase::ExpressionSet(
  assayData = full_mtx, 
  phenoData = Biobase::AnnotatedDataFrame(meta))
C.eset

## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 16310 features, 140000 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: HM1_AAACCTGAGACTACAA HM1_AAACCTGAGCATGGCA ...
##     HM23_TCTCTAATCATGTCTT (140000 total)
##   varLabels: Cell Sample
##   varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:

T.eset <- Biobase::ExpressionSet(assayData = as.matrix(goto_counts))
T.eset

## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 16310 features, 162 samples 
##   element names: exprs 
## protocolData: none
## phenoData: none
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:

Deconvolute!

deconv <- music_prop(
  bulk.eset = T.eset, 
  sc.eset = C.eset, 
  clusters = 'Cell',
  markers = NULL, 
  normalize = FALSE, 
  samples = 'Sample', 
  verbose = F)$Est.prop.weighted

Summarize

names <- colnames(deconv)
summary(deconv)

##  Slow Skeletal Muscle      FAPs           Fast Skeletal Muscle
##  Min.   :0.0000       Min.   :0.0000000   Min.   :0.001605    
##  1st Qu.:0.1848       1st Qu.:0.0000000   1st Qu.:0.105158    
##  Median :0.3735       Median :0.0000000   Median :0.219360    
##  Mean   :0.3353       Mean   :0.0007975   Mean   :0.278944    
##  3rd Qu.:0.4749       3rd Qu.:0.0000000   3rd Qu.:0.436309    
##  Max.   :0.6957       Max.   :0.0208963   Max.   :0.786071    
##       SATs           T/NK/Macrophages    Endothelial Cells Smooth Muscle      
##  Min.   :0.0000000   Min.   :0.0000000   Min.   :0.2093    Min.   :0.0000000  
##  1st Qu.:0.0000000   1st Qu.:0.0000000   1st Qu.:0.3262    1st Qu.:0.0000000  
##  Median :0.0000000   Median :0.0007591   Median :0.3754    Median :0.0000000  
##  Mean   :0.0003633   Mean   :0.0071134   Mean   :0.3774    Mean   :0.0001438  
##  3rd Qu.:0.0000000   3rd Qu.:0.0088130   3rd Qu.:0.4291    3rd Qu.:0.0000000  
##  Max.   :0.0216730   Max.   :0.0909209   Max.   :0.5930    Max.   :0.0090539

heatmap(deconv, margins=c(12,8))

Save

save(deconv, file='../GOTO_Data/Cell_Counts/Muscle/Music_PredictedMuscleCells.Rdata')

Session Info

sessionInfo()

## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] GEOquery_2.62.2                                    
##  [2] MuSiC_0.2.0                                        
##  [3] nnls_1.4                                           
##  [4] gplots_3.1.3                                       
##  [5] plotly_4.10.1                                      
##  [6] SeuratObject_4.1.3                                 
##  [7] Seurat_4.3.0                                       
##  [8] gridExtra_2.3                                      
##  [9] lattice_0.21-8                                     
## [10] bacon_1.22.0                                       
## [11] ellipse_0.4.5                                      
## [12] methylGSA_1.12.0                                   
## [13] sva_3.42.0                                         
## [14] genefilter_1.76.0                                  
## [15] mgcv_1.8-42                                        
## [16] nlme_3.1-162                                       
## [17] limma_3.54.2                                       
## [18] lmerTest_3.1-3                                     
## [19] lme4_1.1-30                                        
## [20] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [21] snpStats_1.44.0                                    
## [22] survival_3.5-5                                     
## [23] ggrepel_0.9.1                                      
## [24] ggfortify_0.4.14                                   
## [25] irlba_2.3.5.1                                      
## [26] Matrix_1.5-4.1                                     
## [27] omicsPrint_1.14.0                                  
## [28] MASS_7.3-60                                        
## [29] DNAmArray_2.0.0                                    
## [30] pls_2.8-2                                          
## [31] FDb.InfiniumMethylation.hg19_2.2.0                 
## [32] org.Hs.eg.db_3.14.0                                
## [33] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2            
## [34] GenomicFeatures_1.46.5                             
## [35] AnnotationDbi_1.56.2                               
## [36] IlluminaHumanMethylationEPICmanifest_0.3.0         
## [37] minfi_1.40.0                                       
## [38] bumphunter_1.36.0                                  
## [39] locfit_1.5-9.8                                     
## [40] iterators_1.0.14                                   
## [41] foreach_1.5.2                                      
## [42] Biostrings_2.62.0                                  
## [43] XVector_0.34.0                                     
## [44] SummarizedExperiment_1.24.0                        
## [45] Biobase_2.58.0                                     
## [46] MatrixGenerics_1.10.0                              
## [47] matrixStats_1.0.0                                  
## [48] GenomicRanges_1.46.1                               
## [49] GenomeInfoDb_1.34.9                                
## [50] IRanges_2.32.0                                     
## [51] S4Vectors_0.36.2                                   
## [52] BiocGenerics_0.44.0                                
## [53] BiocParallel_1.32.6                                
## [54] MethylAid_1.28.0                                   
## [55] forcats_0.5.2                                      
## [56] stringr_1.5.0                                      
## [57] dplyr_1.1.3                                        
## [58] purrr_0.3.4                                        
## [59] readr_2.1.2                                        
## [60] tidyr_1.2.1                                        
## [61] tibble_3.2.1                                       
## [62] ggplot2_3.4.3                                      
## [63] tidyverse_1.3.2                                    
## [64] rmarkdown_2.16                                     
## 
## loaded via a namespace (and not attached):
##   [1] ica_1.0-3                                         
##   [2] Rsamtools_2.10.0                                  
##   [3] cinaR_0.2.3                                       
##   [4] lmtest_0.9-40                                     
##   [5] crayon_1.5.2                                      
##   [6] rhdf5filters_1.10.1                               
##   [7] backports_1.4.1                                   
##   [8] reprex_2.0.2                                      
##   [9] GOSemSim_2.20.0                                   
##  [10] rlang_1.1.1                                       
##  [11] ROCR_1.0-11                                       
##  [12] readxl_1.4.1                                      
##  [13] SparseM_1.81                                      
##  [14] nloptr_2.0.3                                      
##  [15] filelock_1.0.2                                    
##  [16] rjson_0.2.21                                      
##  [17] bit64_4.0.5                                       
##  [18] glue_1.6.2                                        
##  [19] sctransform_0.3.5                                 
##  [20] rngtools_1.5.2                                    
##  [21] spatstat.sparse_3.0-1                             
##  [22] mcmc_0.9-7                                        
##  [23] spatstat.geom_3.2-1                               
##  [24] DOSE_3.20.1                                       
##  [25] haven_2.5.1                                       
##  [26] tidyselect_1.2.0                                  
##  [27] fitdistrplus_1.1-11                               
##  [28] XML_3.99-0.14                                     
##  [29] zoo_1.8-12                                        
##  [30] GenomicAlignments_1.30.0                          
##  [31] MatrixModels_0.5-1                                
##  [32] xtable_1.8-4                                      
##  [33] magrittr_2.0.3                                    
##  [34] evaluate_0.21                                     
##  [35] cli_3.6.1                                         
##  [36] zlibbioc_1.44.0                                   
##  [37] miniUI_0.1.1.1                                    
##  [38] rstudioapi_0.14                                   
##  [39] doRNG_1.8.6                                       
##  [40] sp_1.6-1                                          
##  [41] MultiAssayExperiment_1.20.0                       
##  [42] bslib_0.5.0                                       
##  [43] fastmatch_1.1-3                                   
##  [44] treeio_1.18.1                                     
##  [45] shiny_1.7.2                                       
##  [46] xfun_0.39                                         
##  [47] askpass_1.1                                       
##  [48] multtest_2.50.0                                   
##  [49] cluster_2.1.4                                     
##  [50] caTools_1.18.2                                    
##  [51] tidygraph_1.2.2                                   
##  [52] KEGGREST_1.34.0                                   
##  [53] quantreg_5.94                                     
##  [54] base64_2.0.1                                      
##  [55] ape_5.7-1                                         
##  [56] scrime_1.3.5                                      
##  [57] listenv_0.9.0                                     
##  [58] png_0.1-8                                         
##  [59] reshape_0.8.9                                     
##  [60] future_1.32.0                                     
##  [61] withr_2.5.0                                       
##  [62] bitops_1.0-7                                      
##  [63] ggforce_0.3.4                                     
##  [64] plyr_1.8.8                                        
##  [65] cellranger_1.1.0                                  
##  [66] coda_0.19-4                                       
##  [67] pillar_1.9.0                                      
##  [68] cachem_1.0.8                                      
##  [69] fs_1.6.2                                          
##  [70] clusterProfiler_4.2.2                             
##  [71] DelayedMatrixStats_1.16.0                         
##  [72] vctrs_0.6.3                                       
##  [73] ellipsis_0.3.2                                    
##  [74] generics_0.1.3                                    
##  [75] tools_4.2.2                                       
##  [76] munsell_0.5.0                                     
##  [77] tweenr_2.0.2                                      
##  [78] fgsea_1.20.0                                      
##  [79] DelayedArray_0.24.0                               
##  [80] abind_1.4-5                                       
##  [81] fastmap_1.1.1                                     
##  [82] compiler_4.2.2                                    
##  [83] httpuv_1.6.11                                     
##  [84] rtracklayer_1.54.0                                
##  [85] beanplot_1.3.1                                    
##  [86] MCMCpack_1.6-3                                    
##  [87] GenomeInfoDbData_1.2.9                            
##  [88] edgeR_3.40.2                                      
##  [89] deldir_1.0-9                                      
##  [90] utf8_1.2.3                                        
##  [91] later_1.3.1                                       
##  [92] RobustRankAggreg_1.2.1                            
##  [93] BiocFileCache_2.2.1                               
##  [94] jsonlite_1.8.5                                    
##  [95] scales_1.2.1                                      
##  [96] pbapply_1.7-0                                     
##  [97] tidytree_0.4.0                                    
##  [98] sparseMatrixStats_1.10.0                          
##  [99] lazyeval_0.2.2                                    
## [100] promises_1.2.0.1                                  
## [101] goftest_1.2-3                                     
## [102] spatstat.utils_3.0-3                              
## [103] reticulate_1.30                                   
## [104] htm2txt_2.2.2                                     
## [105] nor1mix_1.3-0                                     
## [106] cowplot_1.1.1                                     
## [107] statmod_1.5.0                                     
## [108] siggenes_1.68.0                                   
## [109] Rtsne_0.16                                        
## [110] downloader_0.4                                    
## [111] uwot_0.1.14                                       
## [112] igraph_1.4.3                                      
## [113] HDF5Array_1.22.1                                  
## [114] numDeriv_2016.8-1.1                               
## [115] yaml_2.3.7                                        
## [116] htmltools_0.5.5                                   
## [117] memoise_2.0.1                                     
## [118] BiocIO_1.8.0                                      
## [119] graphlayouts_0.8.1                                
## [120] quadprog_1.5-8                                    
## [121] viridisLite_0.4.2                                 
## [122] digest_0.6.31                                     
## [123] assertthat_0.2.1                                  
## [124] mime_0.12                                         
## [125] rappdirs_0.3.3                                    
## [126] RSQLite_2.2.17                                    
## [127] yulab.utils_0.0.6                                 
## [128] future.apply_1.11.0                               
## [129] data.table_1.14.8                                 
## [130] blob_1.2.4                                        
## [131] preprocessCore_1.60.2                             
## [132] splines_4.2.2                                     
## [133] labeling_0.4.2                                    
## [134] Rhdf5lib_1.20.0                                   
## [135] illuminaio_0.40.0                                 
## [136] googledrive_2.0.0                                 
## [137] RaggedExperiment_1.18.0                           
## [138] RCurl_1.98-1.12                                   
## [139] broom_1.0.1                                       
## [140] hms_1.1.2                                         
## [141] modelr_0.1.9                                      
## [142] rhdf5_2.42.1                                      
## [143] colorspace_2.1-0                                  
## [144] aplot_0.1.7                                       
## [145] sass_0.4.6                                        
## [146] Rcpp_1.0.10                                       
## [147] mclust_6.0.0                                      
## [148] RANN_2.6.1                                        
## [149] enrichplot_1.14.2                                 
## [150] fansi_1.0.4                                       
## [151] tzdb_0.4.0                                        
## [152] parallelly_1.36.0                                 
## [153] R6_2.5.1                                          
## [154] grid_4.2.2                                        
## [155] ggridges_0.5.4                                    
## [156] lifecycle_1.0.3                                   
## [157] curl_5.0.1                                        
## [158] googlesheets4_1.0.1                               
## [159] minqa_1.2.5                                       
## [160] leiden_0.4.3                                      
## [161] jquerylib_0.1.4                                   
## [162] DO.db_2.9                                         
## [163] qvalue_2.26.0                                     
## [164] RcppAnnoy_0.0.20                                  
## [165] RColorBrewer_1.1-3                                
## [166] spatstat.explore_3.1-0                            
## [167] htmlwidgets_1.5.4                                 
## [168] polyclip_1.10-4                                   
## [169] biomaRt_2.50.3                                    
## [170] missMethyl_1.28.0                                 
## [171] shadowtext_0.1.2                                  
## [172] timechange_0.2.0                                  
## [173] gridGraphics_0.5-1                                
## [174] reactome.db_1.77.0                                
## [175] rvest_1.0.3                                       
## [176] globals_0.16.2                                    
## [177] openssl_2.0.6                                     
## [178] spatstat.random_3.1-5                             
## [179] patchwork_1.1.2                                   
## [180] progressr_0.13.0                                  
## [181] codetools_0.2-19                                  
## [182] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [183] lubridate_1.9.2                                   
## [184] GO.db_3.14.0                                      
## [185] gtools_3.9.4                                      
## [186] prettyunits_1.1.1                                 
## [187] dbplyr_2.2.1                                      
## [188] gridBase_0.4-7                                    
## [189] gtable_0.3.3                                      
## [190] DBI_1.1.3                                         
## [191] tensor_1.5                                        
## [192] ggfun_0.0.7                                       
## [193] httr_1.4.6                                        
## [194] highr_0.10                                        
## [195] KernSmooth_2.23-21                                
## [196] stringi_1.7.12                                    
## [197] vroom_1.5.7                                       
## [198] progress_1.2.2                                    
## [199] reshape2_1.4.4                                    
## [200] farver_2.1.1                                      
## [201] annotate_1.72.0                                   
## [202] viridis_0.6.2                                     
## [203] hexbin_1.28.3                                     
## [204] ggtree_3.2.1                                      
## [205] xml2_1.3.4                                        
## [206] boot_1.3-28.1                                     
## [207] restfulr_0.0.15                                   
## [208] scattermore_0.8                                   
## [209] ggplotify_0.1.0                                   
## [210] bit_4.0.5                                         
## [211] spatstat.data_3.0-1                               
## [212] scatterpie_0.1.8                                  
## [213] ggraph_2.0.6                                      
## [214] pkgconfig_2.0.3                                   
## [215] gargle_1.5.0                                      
## [216] knitr_1.43

Clear

rm(list=ls())

Script 14: Predict nuclei types in bulk samples

Compiled: June 26, 2024