This script loads in differential expression results and tests if genes within 100kb of muscle CpGs are differentially expressed in muscle.


Setup

Load packages

library(tidyverse)
library(ggrepel)
library(GenomicRanges)
library(ggpubr)
library(DNAmArray)
library(MASS)

Load data

load("../GOTO_Data/GOTO_results-top-muscle-adj.Rdata")

Genes within 100kb

Save significant CpGs

print(paste0("There are ", 
             nrow(top_cpgs), 
             " CpGs significant at 5% FDR level"))
## [1] "There are 162 CpGs significant at 5% FDR level"
cpg_top <- top_cpgs$cpg

Save gene locations (saved previously)

txdb <- makeTxDbFromEnsembl(organism = "Homo sapiens",
                            release = 75,
                            circ_seqs = NULL,
                            server = "ensembldb.ensembl.org",
                            username = "anonymous", password = NULL, port = 0L,
                            tx_attrib = NULL)

gene_range <- unlist(cdsBy(txdb, by = "gene"))
gene_range <- unique(gene_range)

save(gene_range, file='/exports/molepi/users/ljsinke/LLS/Shared_Data/allGeneRanges.Rdata)

Load gene_range object

load('/exports/molepi/users/ljsinke/LLS/Shared_Data/allGeneRanges.Rdata')
gene_range
## GRanges object with 302587 ranges and 2 metadata columns:
##                   seqnames            ranges strand |    cds_id        cds_name
##                      <Rle>         <IRanges>  <Rle> | <integer>     <character>
##   ENSG00000000003        X 99885795-99885863      - |    717267 ENSP00000362111
##   ENSG00000000003        X 99887482-99887565      - |    717268 ENSP00000362111
##   ENSG00000000003        X 99888402-99888536      - |    717269 ENSP00000362111
##   ENSG00000000003        X 99888928-99889026      - |    717270 ENSP00000362111
##   ENSG00000000003        X 99890175-99890249      - |    717271 ENSP00000362111
##               ...      ...               ...    ... .       ...             ...
##            LRG_97   LRG_97       17272-17334      + |    799400        LRG_97p1
##            LRG_97   LRG_97       17876-18035      + |    799401        LRG_97p1
##            LRG_97   LRG_97       22463-22593      + |    799402        LRG_97p1
##            LRG_98   LRG_98       10293-13424      + |    799403        LRG_98p1
##            LRG_99   LRG_99        9069-10652      + |    799404        LRG_99p1
##   -------
##   seqinfo: 722 sequences (1 circular) from an unspecified genome

Make a GenomicRanges object for the top CpGs

cpg_range <- GRanges(seqnames = top_cpgs$cpg_chr,
        IRanges(
          start = top_cpgs$cpg_start,
          end = top_cpgs$cpg_end,
          names = top_cpgs$cpg
        ))

cpg_range
## GRanges object with 162 ranges and 0 metadata columns:
##              seqnames              ranges strand
##                 <Rle>           <IRanges>  <Rle>
##   cg09563940        7 130873557-130873559      *
##   cg26998535       10 103699236-103699238      *
##   cg03045139        5   77826273-77826275      *
##   cg19003556        6 136399169-136399171      *
##   cg00353432        7 139372971-139372973      *
##          ...      ...                 ...    ...
##   cg21342383       10 127661991-127661993      *
##   cg01668986        8   21541541-21541543      *
##   cg05008948        7   99160713-99160715      *
##   cg24161080       18   33889786-33889788      *
##   cg12394201       11   43942417-43942419      *
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths

Initialize a distance matrix

distance.matrix <- matrix(
  NaN, 
  nrow = length(gene_range), 
  ncol = length(cpg_range), 
  dimnames = list(names(gene_range), 
                  names(cpg_range)))

Loop through CpGs

for(i in 1:nrow(top_cpgs)){
    cpg <- cpg_range[i]
    calc.distance <- distance(
      cpg, 
      gene_range, 
      ignore.strand = T)
    distance.matrix[,i] <- calc.distance
}

dim(distance.matrix)
## [1] 302587    162
distance.matrix <- as.data.frame(distance.matrix)
colnames(distance.matrix) <- names(cpg_range)
distance.matrix$gene <- names(gene_range)

Make a list of genes within 100kb of each cpg

gene_list <- lapply(names(cpg_range), function(x){
  df <- (distance.matrix %>% dplyr::select(gene, all_of(x)))
  colnames(df) <- c('gene', 'cpg')
  df <- df %>% filter(cpg <= 100000)
  if (nrow(df) > 0) {
    return(data.frame(cpg = x,
                      gene_ens = unique(df$gene)))
  }
})

Save gene symbols

ens2gene <- cinaR::grch37

Bind

df <- data.frame()

for(i in 1:length(gene_list)){
  gene_df <- gene_list[[i]]
  
  cpg <- gene_df$cpg[1]
  genes <- unique(gene_df$gene_ens)
  
  df <- rbind(df,
              data.frame(cpg = cpg,
                         gene_ens = genes))
}

sym <- ens2gene$symbol[match(df$gene_ens,
                             ens2gene$ensgene)]

df$gene <- sym

Inspect

head(df)
##          cpg        gene_ens     gene
## 1 cg26998535 ENSG00000120029 C10orf76
## 2 cg26998535 ENSG00000120049   KCNIP2
## 3 cg03045139 ENSG00000085365   SCAMP1
## 4 cg03045139 ENSG00000145685   LHFPL2
## 5 cg19003556 ENSG00000171408    PDE7B
## 6 cg00353432 ENSG00000064393    HIPK2
dim(df)
## [1] 474   3

Save

save(df, file='../GOTO_Data/DEGs/Muscle-100kb.Rdata')

DEGs

print(paste0("There are ", length(unique(df$gene_ens)), " unique genes within 100kb of ",
             length(unique(df$cpg)), " CpGs."))
## [1] "There are 454 unique genes within 100kb of 149 CpGs."

Read in DEG results

deg <- read.table('../GOTO_Data/DEGs/DEGs_muscle_postprandial_combined_new.txt')

Filter for those within 100kb

deg <- deg %>% 
  filter(GENE.SYMBOL %in% df$gene) %>% 
  select(logFC, AveExpr, pvalue, gene = GENE.SYMBOL)

Adjust for multiple testing

deg$padj <- p.adjust(deg$pvalue, method='fdr')

Inspect top genes

deg <- deg %>% filter(padj <= 0.05)
head(deg)
##                      logFC   AveExpr       pvalue    gene         padj
## ENSG00000117594 -0.4646230 0.9101785 2.070661e-09 HSD11B1 3.788328e-07
## ENSG00000127527 -0.2020087 5.2722313 3.077723e-09 EPS15L1 3.788328e-07
## ENSG00000169692 -0.2709130 3.5211115 3.284677e-09  AGPAT2 3.788328e-07
## ENSG00000055208 -0.1768428 6.3665159 2.927471e-08    TAB2 2.070958e-06
## ENSG00000104221 -0.2097496 3.1564529 2.992713e-08    BRF2 2.070958e-06
## ENSG00000126091 -0.2469463 5.9603147 3.682312e-08 ST3GAL3 2.123467e-06
unique(deg$gene)
##   [1] "HSD11B1"   "EPS15L1"   "AGPAT2"    "TAB2"      "BRF2"      "ST3GAL3"  
##   [7] "LRRC20"    "FLII"      "FYN"       "INPP5A"    "LHFPL2"    "RUNX1"    
##  [13] "DPP9"      "MCFD2"     "TMOD3"     "RBMS1"     "CRKL"      "NCOR2"    
##  [19] "FDFT1"     "FSCN1"     "FAM96B"    "UBQLN4"    "PSMD3"     "KIAA0895L"
##  [25] "THAP7"     "ATP1A1"    "FAM220A"   "FHOD1"     "C10orf76"  "SRL"      
##  [31] "PMF1"      "RNF216"    "CAPRIN2"   "PROSC"     "ADAM12"    "PLXND1"   
##  [37] "RAC1"      "MDH2"      "GRB10"     "RPRD1B"    "DHX32"     "TMOD2"    
##  [43] "TMEM120A"  "SLC35E4"   "LEO1"      "EXOC3L1"   "ZFP36L1"   "PPIF"     
##  [49] "DLL1"      "PCDHGC3"   "MRPS34"    "ZNF143"    "PES1"      "CHRND"    
##  [55] "CD59"      "SRC"       "GSDMB"     "ARHGEF17"  "SERBP1"    "CDH24"    
##  [61] "ALKBH3"    "ZNF219"    "ZNF703"    "EXTL3"     "HAGH"      "ARHGEF40" 
##  [67] "PSMB5"     "ALKBH5"    "PLEKHG4"   "ZNF394"    "DRG2"      "HEG1"     
##  [73] "GGA1"      "RAPGEF1"   "GINM1"     "THBS2"     "TOP2A"     "FBXO3"    
##  [79] "MAPK8IP3"  "PCDHGA12"  "PSMA6"     "ZMIZ1"     "TTC7A"     "LINC01119"
##  [85] "FPR3"      "SLFN11"    "TRAF6"     "FAHD1"     "FKBP5"     "UCK2"     
##  [91] "EME2"      "ZNF789"    "NUBP2"     "PCDHGA2"   "PCDHGC5"   "TAF2"     
##  [97] "HRH1"      "JMJD1C"    "CHSY1"     "SWAP70"    "ATP5J2"    "E2F4"     
## [103] "CEP68"     "C11orf96"  "SPSB3"     "GID4"      "UNC45B"    "ZNF655"   
## [109] "LRRK1"     "KIFC3"     "EDEM3"     "SPCS2"     "CHRNG"     "NET1"     
## [115] "C14orf159" "PPID"      "SEMA6B"    "FAM129A"   "FPR2"      "DEPTOR"   
## [121] "CPQ"       "RPS6KA5"   "AP1M1"     "ADAM19"    "PPARGC1B"  "CAPN2"    
## [127] "ID4"       "FAM193B"   "DDC"       "CLEC3B"    "P2RY6"     "DBN1"     
## [133] "CAMK2D"    "LCOR"      "RPL26L1"   "TMCC1"     "ATP6V0E1"  "LZTR1"    
## [139] "LRG1"      "SRP9"      "MED24"     "GAL3ST1"   "LRRC55"    "SCAMP1"   
## [145] "C5orf66"   "WSCD1"     "THRB"      "REV3L"     "FPR1"      "PAMR1"    
## [151] "GPR68"     "EEPD1"     "KATNB1"    "FAM120B"   "PTCD1"     "SLC9A5"   
## [157] "C9orf3"    "ZKSCAN5"   "ABCA8"     "DDX41"     "INTS9"     "RPN2"     
## [163] "H2AFY"     "TRAF3IP2"  "GLIS2"     "FAM213A"

Save

save(deg, file='../GOTO_Data/DEGs/Muscle-DEG.Rdata')

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.4.0                                       
##  [2] GEOquery_2.62.2                                    
##  [3] MuSiC_0.2.0                                        
##  [4] nnls_1.4                                           
##  [5] gplots_3.1.3                                       
##  [6] plotly_4.10.1                                      
##  [7] SeuratObject_4.1.3                                 
##  [8] Seurat_4.3.0                                       
##  [9] gridExtra_2.3                                      
## [10] lattice_0.21-8                                     
## [11] bacon_1.22.0                                       
## [12] ellipse_0.4.5                                      
## [13] methylGSA_1.12.0                                   
## [14] sva_3.42.0                                         
## [15] genefilter_1.76.0                                  
## [16] mgcv_1.8-42                                        
## [17] nlme_3.1-162                                       
## [18] limma_3.54.2                                       
## [19] lmerTest_3.1-3                                     
## [20] lme4_1.1-30                                        
## [21] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [22] snpStats_1.44.0                                    
## [23] survival_3.5-5                                     
## [24] ggrepel_0.9.1                                      
## [25] ggfortify_0.4.14                                   
## [26] irlba_2.3.5.1                                      
## [27] Matrix_1.5-4.1                                     
## [28] omicsPrint_1.14.0                                  
## [29] MASS_7.3-60                                        
## [30] DNAmArray_2.0.0                                    
## [31] pls_2.8-2                                          
## [32] FDb.InfiniumMethylation.hg19_2.2.0                 
## [33] org.Hs.eg.db_3.14.0                                
## [34] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2            
## [35] GenomicFeatures_1.46.5                             
## [36] AnnotationDbi_1.56.2                               
## [37] IlluminaHumanMethylationEPICmanifest_0.3.0         
## [38] minfi_1.40.0                                       
## [39] bumphunter_1.36.0                                  
## [40] locfit_1.5-9.8                                     
## [41] iterators_1.0.14                                   
## [42] foreach_1.5.2                                      
## [43] Biostrings_2.62.0                                  
## [44] XVector_0.34.0                                     
## [45] SummarizedExperiment_1.24.0                        
## [46] Biobase_2.58.0                                     
## [47] MatrixGenerics_1.10.0                              
## [48] matrixStats_1.0.0                                  
## [49] GenomicRanges_1.46.1                               
## [50] GenomeInfoDb_1.34.9                                
## [51] IRanges_2.32.0                                     
## [52] S4Vectors_0.36.2                                   
## [53] BiocGenerics_0.44.0                                
## [54] BiocParallel_1.32.6                                
## [55] MethylAid_1.28.0                                   
## [56] forcats_0.5.2                                      
## [57] stringr_1.5.0                                      
## [58] dplyr_1.1.3                                        
## [59] purrr_0.3.4                                        
## [60] readr_2.1.2                                        
## [61] tidyr_1.2.1                                        
## [62] tibble_3.2.1                                       
## [63] ggplot2_3.4.3                                      
## [64] tidyverse_1.3.2                                    
## [65] rmarkdown_2.16                                     
## 
## loaded via a namespace (and not attached):
##   [1] ica_1.0-3                                         
##   [2] Rsamtools_2.10.0                                  
##   [3] cinaR_0.2.3                                       
##   [4] lmtest_0.9-40                                     
##   [5] crayon_1.5.2                                      
##   [6] rhdf5filters_1.10.1                               
##   [7] backports_1.4.1                                   
##   [8] reprex_2.0.2                                      
##   [9] GOSemSim_2.20.0                                   
##  [10] rlang_1.1.1                                       
##  [11] ROCR_1.0-11                                       
##  [12] readxl_1.4.1                                      
##  [13] SparseM_1.81                                      
##  [14] nloptr_2.0.3                                      
##  [15] filelock_1.0.2                                    
##  [16] rjson_0.2.21                                      
##  [17] bit64_4.0.5                                       
##  [18] glue_1.6.2                                        
##  [19] sctransform_0.3.5                                 
##  [20] rngtools_1.5.2                                    
##  [21] spatstat.sparse_3.0-1                             
##  [22] mcmc_0.9-7                                        
##  [23] spatstat.geom_3.2-1                               
##  [24] DOSE_3.20.1                                       
##  [25] haven_2.5.1                                       
##  [26] tidyselect_1.2.0                                  
##  [27] fitdistrplus_1.1-11                               
##  [28] XML_3.99-0.14                                     
##  [29] zoo_1.8-12                                        
##  [30] GenomicAlignments_1.30.0                          
##  [31] MatrixModels_0.5-1                                
##  [32] xtable_1.8-4                                      
##  [33] magrittr_2.0.3                                    
##  [34] evaluate_0.21                                     
##  [35] cli_3.6.1                                         
##  [36] zlibbioc_1.44.0                                   
##  [37] miniUI_0.1.1.1                                    
##  [38] rstudioapi_0.14                                   
##  [39] doRNG_1.8.6                                       
##  [40] sp_1.6-1                                          
##  [41] MultiAssayExperiment_1.20.0                       
##  [42] bslib_0.5.0                                       
##  [43] fastmatch_1.1-3                                   
##  [44] treeio_1.18.1                                     
##  [45] shiny_1.7.2                                       
##  [46] xfun_0.39                                         
##  [47] askpass_1.1                                       
##  [48] multtest_2.50.0                                   
##  [49] cluster_2.1.4                                     
##  [50] caTools_1.18.2                                    
##  [51] tidygraph_1.2.2                                   
##  [52] KEGGREST_1.34.0                                   
##  [53] quantreg_5.94                                     
##  [54] base64_2.0.1                                      
##  [55] ape_5.7-1                                         
##  [56] scrime_1.3.5                                      
##  [57] listenv_0.9.0                                     
##  [58] png_0.1-8                                         
##  [59] reshape_0.8.9                                     
##  [60] future_1.32.0                                     
##  [61] withr_2.5.0                                       
##  [62] bitops_1.0-7                                      
##  [63] ggforce_0.3.4                                     
##  [64] plyr_1.8.8                                        
##  [65] cellranger_1.1.0                                  
##  [66] coda_0.19-4                                       
##  [67] pillar_1.9.0                                      
##  [68] cachem_1.0.8                                      
##  [69] fs_1.6.2                                          
##  [70] clusterProfiler_4.2.2                             
##  [71] DelayedMatrixStats_1.16.0                         
##  [72] vctrs_0.6.3                                       
##  [73] ellipsis_0.3.2                                    
##  [74] generics_0.1.3                                    
##  [75] tools_4.2.2                                       
##  [76] munsell_0.5.0                                     
##  [77] tweenr_2.0.2                                      
##  [78] fgsea_1.20.0                                      
##  [79] DelayedArray_0.24.0                               
##  [80] abind_1.4-5                                       
##  [81] fastmap_1.1.1                                     
##  [82] compiler_4.2.2                                    
##  [83] httpuv_1.6.11                                     
##  [84] rtracklayer_1.54.0                                
##  [85] beanplot_1.3.1                                    
##  [86] MCMCpack_1.6-3                                    
##  [87] GenomeInfoDbData_1.2.9                            
##  [88] edgeR_3.40.2                                      
##  [89] deldir_1.0-9                                      
##  [90] utf8_1.2.3                                        
##  [91] later_1.3.1                                       
##  [92] RobustRankAggreg_1.2.1                            
##  [93] BiocFileCache_2.2.1                               
##  [94] jsonlite_1.8.5                                    
##  [95] scales_1.2.1                                      
##  [96] carData_3.0-5                                     
##  [97] pbapply_1.7-0                                     
##  [98] tidytree_0.4.0                                    
##  [99] sparseMatrixStats_1.10.0                          
## [100] lazyeval_0.2.2                                    
## [101] promises_1.2.0.1                                  
## [102] car_3.1-0                                         
## [103] goftest_1.2-3                                     
## [104] spatstat.utils_3.0-3                              
## [105] reticulate_1.30                                   
## [106] htm2txt_2.2.2                                     
## [107] nor1mix_1.3-0                                     
## [108] cowplot_1.1.1                                     
## [109] statmod_1.5.0                                     
## [110] siggenes_1.68.0                                   
## [111] Rtsne_0.16                                        
## [112] downloader_0.4                                    
## [113] uwot_0.1.14                                       
## [114] igraph_1.4.3                                      
## [115] HDF5Array_1.22.1                                  
## [116] numDeriv_2016.8-1.1                               
## [117] yaml_2.3.7                                        
## [118] htmltools_0.5.5                                   
## [119] memoise_2.0.1                                     
## [120] BiocIO_1.8.0                                      
## [121] graphlayouts_0.8.1                                
## [122] quadprog_1.5-8                                    
## [123] viridisLite_0.4.2                                 
## [124] digest_0.6.31                                     
## [125] assertthat_0.2.1                                  
## [126] mime_0.12                                         
## [127] rappdirs_0.3.3                                    
## [128] RSQLite_2.2.17                                    
## [129] yulab.utils_0.0.6                                 
## [130] future.apply_1.11.0                               
## [131] data.table_1.14.8                                 
## [132] blob_1.2.4                                        
## [133] preprocessCore_1.60.2                             
## [134] splines_4.2.2                                     
## [135] labeling_0.4.2                                    
## [136] Rhdf5lib_1.20.0                                   
## [137] illuminaio_0.40.0                                 
## [138] googledrive_2.0.0                                 
## [139] RaggedExperiment_1.18.0                           
## [140] RCurl_1.98-1.12                                   
## [141] broom_1.0.1                                       
## [142] hms_1.1.2                                         
## [143] modelr_0.1.9                                      
## [144] rhdf5_2.42.1                                      
## [145] colorspace_2.1-0                                  
## [146] aplot_0.1.7                                       
## [147] sass_0.4.6                                        
## [148] Rcpp_1.0.10                                       
## [149] mclust_6.0.0                                      
## [150] RANN_2.6.1                                        
## [151] enrichplot_1.14.2                                 
## [152] fansi_1.0.4                                       
## [153] tzdb_0.4.0                                        
## [154] parallelly_1.36.0                                 
## [155] R6_2.5.1                                          
## [156] grid_4.2.2                                        
## [157] ggridges_0.5.4                                    
## [158] lifecycle_1.0.3                                   
## [159] ggsignif_0.6.3                                    
## [160] curl_5.0.1                                        
## [161] googlesheets4_1.0.1                               
## [162] minqa_1.2.5                                       
## [163] leiden_0.4.3                                      
## [164] jquerylib_0.1.4                                   
## [165] DO.db_2.9                                         
## [166] qvalue_2.26.0                                     
## [167] RcppAnnoy_0.0.20                                  
## [168] RColorBrewer_1.1-3                                
## [169] spatstat.explore_3.1-0                            
## [170] htmlwidgets_1.5.4                                 
## [171] polyclip_1.10-4                                   
## [172] biomaRt_2.50.3                                    
## [173] missMethyl_1.28.0                                 
## [174] shadowtext_0.1.2                                  
## [175] timechange_0.2.0                                  
## [176] gridGraphics_0.5-1                                
## [177] reactome.db_1.77.0                                
## [178] rvest_1.0.3                                       
## [179] globals_0.16.2                                    
## [180] openssl_2.0.6                                     
## [181] spatstat.random_3.1-5                             
## [182] patchwork_1.1.2                                   
## [183] progressr_0.13.0                                  
## [184] codetools_0.2-19                                  
## [185] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [186] lubridate_1.9.2                                   
## [187] GO.db_3.14.0                                      
## [188] gtools_3.9.4                                      
## [189] prettyunits_1.1.1                                 
## [190] dbplyr_2.2.1                                      
## [191] gridBase_0.4-7                                    
## [192] gtable_0.3.3                                      
## [193] DBI_1.1.3                                         
## [194] tensor_1.5                                        
## [195] ggfun_0.0.7                                       
## [196] httr_1.4.6                                        
## [197] highr_0.10                                        
## [198] KernSmooth_2.23-21                                
## [199] stringi_1.7.12                                    
## [200] vroom_1.5.7                                       
## [201] progress_1.2.2                                    
## [202] reshape2_1.4.4                                    
## [203] farver_2.1.1                                      
## [204] annotate_1.72.0                                   
## [205] viridis_0.6.2                                     
## [206] hexbin_1.28.3                                     
## [207] ggtree_3.2.1                                      
## [208] xml2_1.3.4                                        
## [209] boot_1.3-28.1                                     
## [210] restfulr_0.0.15                                   
## [211] scattermore_0.8                                   
## [212] ggplotify_0.1.0                                   
## [213] bit_4.0.5                                         
## [214] spatstat.data_3.0-1                               
## [215] scatterpie_0.1.8                                  
## [216] ggraph_2.0.6                                      
## [217] pkgconfig_2.0.3                                   
## [218] gargle_1.5.0                                      
## [219] rstatix_0.7.0                                     
## [220] knitr_1.43

Clear

rm(list=ls())