This script loads in differential expression results and tests if genes within 100kb of adipose CpGs are differentially expressed in adipose.


Setup

Load packages

library(tidyverse)
library(ggrepel)
library(GenomicRanges)
library(ggpubr)
library(DNAmArray)
library(MASS)

Load data

load("../GOTO_Data/GOTO_results-top-fat.Rdata")

Genes within 100kb

Save significant CpGs

print(paste0("There are ", 
             nrow(top_cpgs), 
             " CpGs significant at 5% FDR level"))
## [1] "There are 230 CpGs significant at 5% FDR level"
cpg_top <- top_cpgs$cpg

Save gene locations (saved previously)

txdb <- makeTxDbFromEnsembl(organism = "Homo sapiens",
                            release = 75,
                            circ_seqs = NULL,
                            server = "ensembldb.ensembl.org",
                            username = "anonymous", password = NULL, port = 0L,
                            tx_attrib = NULL)

gene_range <- unlist(cdsBy(txdb, by = "gene"))
gene_range <- unique(gene_range)

save(gene_range, file='/exports/molepi/users/ljsinke/LLS/Shared_Data/allGeneRanges.Rdata)

Load gene_range object

load('/exports/molepi/users/ljsinke/LLS/Shared_Data/allGeneRanges.Rdata')
gene_range
## GRanges object with 302587 ranges and 2 metadata columns:
##                   seqnames            ranges strand |    cds_id        cds_name
##                      <Rle>         <IRanges>  <Rle> | <integer>     <character>
##   ENSG00000000003        X 99885795-99885863      - |    717267 ENSP00000362111
##   ENSG00000000003        X 99887482-99887565      - |    717268 ENSP00000362111
##   ENSG00000000003        X 99888402-99888536      - |    717269 ENSP00000362111
##   ENSG00000000003        X 99888928-99889026      - |    717270 ENSP00000362111
##   ENSG00000000003        X 99890175-99890249      - |    717271 ENSP00000362111
##               ...      ...               ...    ... .       ...             ...
##            LRG_97   LRG_97       17272-17334      + |    799400        LRG_97p1
##            LRG_97   LRG_97       17876-18035      + |    799401        LRG_97p1
##            LRG_97   LRG_97       22463-22593      + |    799402        LRG_97p1
##            LRG_98   LRG_98       10293-13424      + |    799403        LRG_98p1
##            LRG_99   LRG_99        9069-10652      + |    799404        LRG_99p1
##   -------
##   seqinfo: 722 sequences (1 circular) from an unspecified genome

Make a GenomicRanges object for the top CpGs

cpg_range <- GRanges(seqnames = top_cpgs$cpg_chr,
        IRanges(
          start = top_cpgs$cpg_start,
          end = top_cpgs$cpg_end,
          names = top_cpgs$cpg
        ))

cpg_range
## GRanges object with 230 ranges and 0 metadata columns:
##              seqnames              ranges strand
##                 <Rle>           <IRanges>  <Rle>
##   cg02282833        5   32726188-32726190      *
##   cg03689324       15   74426924-74426926      *
##   cg02468230       15   79405155-79405157      *
##   cg02848074       13   31306177-31306179      *
##   cg25662390       17   26850032-26850034      *
##          ...      ...                 ...    ...
##   cg08156066       20   21695333-21695335      *
##   cg12544951       20   21695342-21695344      *
##   cg03475429       10   94982392-94982394      *
##   cg01733176        4 111561069-111561071      *
##   cg06524692        1   28864470-28864472      *
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths

Initialize a distance matrix

distance.matrix <- matrix(
  NaN, 
  nrow = length(gene_range), 
  ncol = length(cpg_range), 
  dimnames = list(names(gene_range), 
                  names(cpg_range)))

Loop through CpGs

for(i in 1:nrow(top_cpgs)){
    cpg <- cpg_range[i]
    calc.distance <- distance(
      cpg, 
      gene_range, 
      ignore.strand = T)
    distance.matrix[,i] <- calc.distance
}

dim(distance.matrix)
## [1] 302587    230
distance.matrix <- as.data.frame(distance.matrix)
colnames(distance.matrix) <- names(cpg_range)
distance.matrix$gene <- names(gene_range)

Make a list of genes within 100kb of each cpg

gene_list <- lapply(names(cpg_range), function(x){
  df <- (distance.matrix %>% dplyr::select(gene, all_of(x)))
  colnames(df) <- c('gene', 'cpg')
  df <- df %>% filter(cpg <= 100000)
  if (nrow(df) > 0) {
    return(data.frame(cpg = x,
                      gene_ens = unique(df$gene)))
  }
})
gene_list[[1]]
##          cpg        gene_ens
## 1 cg02282833 ENSG00000113389
## 2 cg02282833 ENSG00000181495

Save gene symbols

ens2gene <- cinaR::grch37

Bind

df <- data.frame()

for(i in 1:length(gene_list)){
  gene_df <- gene_list[[i]]
  
  cpg <- gene_df$cpg[1]
  genes <- unique(gene_df$gene_ens)
  
  df <- rbind(df,
              data.frame(cpg = cpg,
                         gene_ens = genes))
}

sym <- ens2gene$symbol[match(df$gene_ens,
                             ens2gene$ensgene)]

df$gene <- sym

Inspect

head(df)
##          cpg        gene_ens       gene
## 1 cg02282833 ENSG00000113389       NPR3
## 2 cg02282833 ENSG00000181495 AC026703.1
## 3 cg03689324 ENSG00000129009       ISLR
## 4 cg03689324 ENSG00000137868      STRA6
## 5 cg03689324 ENSG00000140464        PML
## 6 cg03689324 ENSG00000140481     CCDC33
dim(df)
## [1] 550   3

Save

save(df, file='../GOTO_Data/DEGs/Fat-100kb.Rdata')

DEGs

print(paste0("There are ", length(unique(df$gene_ens)), " unique genes within 100kb of ",
             length(unique(df$cpg)), " CpGs."))
## [1] "There are 412 unique genes within 100kb of 201 CpGs."

Read in DEG results

deg <- read.table('../GOTO_Data/DEGs/DEGs_fat_postprandial_combined_new.txt')

Filter for those within 100kb

deg <- deg %>% 
  filter(GENE.SYMBOL %in% df$gene) %>% 
  dplyr::select(logFC, AveExpr, pvalue, gene = GENE.SYMBOL)

Adjust for multiple testing

deg$padj <- p.adjust(deg$pvalue, method='fdr')

Inspect top genes

deg <- deg %>% filter(padj <= 0.05)
head(deg)
##                      logFC  AveExpr       pvalue   gene         padj
## ENSG00000130203  0.4693372 9.239312 1.952693e-09   APOE 5.233216e-07
## ENSG00000099204 -0.1595299 7.502941 1.058650e-08 ABLIM1 1.418591e-06
## ENSG00000189337 -0.1865120 5.786929 7.980329e-08   KAZN 7.129094e-06
## ENSG00000130208  0.5193805 3.689601 2.371871e-07  APOC1 1.589154e-05
## ENSG00000109107 -0.3280902 7.380156 1.188488e-06  ALDOC 6.370293e-05
## ENSG00000171444  0.1420484 5.145873 4.298163e-06    MCC 1.919846e-04
unique(deg$gene)
##  [1] "APOE"    "ABLIM1"  "KAZN"    "APOC1"   "ALDOC"   "MCC"     "PLEKHA1"
##  [8] "RBM20"   "MAP2K6"  "UNC119"  "TRDC"    "NR2F1"   "DMRT3"   "HOXB8"  
## [15] "GPR155"  "DHX35"   "OLA1"    "STK3"    "TOMM40"  "KCNJ2"   "ZNF780B"
## [22] "MED29"   "EN1"     "PAN2"    "DSTYK"   "MRPL35"  "SLC15A4" "CS"     
## [29] "ARID1A"  "TTC39C"  "PAF1"    "ACR"     "RPS16"   "RCC1"    "ALX1"   
## [36] "PITX2"   "PLEKHH2" "TREM1"   "SAP30L"  "IFNAR1"  "HOXB3"   "TGS1"   
## [43] "NPR3"    "PML"     "HOXB4"   "ZW10"    "TRDJ1"   "ARMC6"   "LRIT3"  
## [50] "AMN1"    "WARS2"   "OSBPL1A" "ABCA5"   "IMMT"    "HOXB2"   "LRP12"  
## [57] "ID2"

Save

save(deg, file='../GOTO_Data/DEGs/Fat-DEG.Rdata')

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] circlize_0.4.15                                    
##  [2] ComplexHeatmap_2.14.0                              
##  [3] RColorBrewer_1.1-3                                 
##  [4] pheatmap_1.0.12                                    
##  [5] clusterProfiler_4.2.2                              
##  [6] AnnotationHub_3.2.2                                
##  [7] BiocFileCache_2.2.1                                
##  [8] dbplyr_2.2.1                                       
##  [9] cinaR_0.2.3                                        
## [10] edgeR_3.40.2                                       
## [11] ggpubr_0.4.0                                       
## [12] GEOquery_2.62.2                                    
## [13] MuSiC_0.2.0                                        
## [14] nnls_1.4                                           
## [15] gplots_3.1.3                                       
## [16] plotly_4.10.1                                      
## [17] SeuratObject_4.1.3                                 
## [18] Seurat_4.3.0                                       
## [19] gridExtra_2.3                                      
## [20] lattice_0.21-8                                     
## [21] bacon_1.22.0                                       
## [22] ellipse_0.4.5                                      
## [23] methylGSA_1.12.0                                   
## [24] sva_3.42.0                                         
## [25] genefilter_1.76.0                                  
## [26] mgcv_1.8-42                                        
## [27] nlme_3.1-162                                       
## [28] limma_3.54.2                                       
## [29] lmerTest_3.1-3                                     
## [30] lme4_1.1-30                                        
## [31] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [32] snpStats_1.44.0                                    
## [33] survival_3.5-5                                     
## [34] ggrepel_0.9.1                                      
## [35] ggfortify_0.4.14                                   
## [36] irlba_2.3.5.1                                      
## [37] Matrix_1.5-4.1                                     
## [38] omicsPrint_1.14.0                                  
## [39] MASS_7.3-60                                        
## [40] DNAmArray_2.0.0                                    
## [41] pls_2.8-2                                          
## [42] FDb.InfiniumMethylation.hg19_2.2.0                 
## [43] org.Hs.eg.db_3.14.0                                
## [44] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2            
## [45] GenomicFeatures_1.46.5                             
## [46] AnnotationDbi_1.56.2                               
## [47] IlluminaHumanMethylationEPICmanifest_0.3.0         
## [48] minfi_1.40.0                                       
## [49] bumphunter_1.36.0                                  
## [50] locfit_1.5-9.8                                     
## [51] iterators_1.0.14                                   
## [52] foreach_1.5.2                                      
## [53] Biostrings_2.62.0                                  
## [54] XVector_0.34.0                                     
## [55] SummarizedExperiment_1.24.0                        
## [56] Biobase_2.58.0                                     
## [57] MatrixGenerics_1.10.0                              
## [58] matrixStats_1.0.0                                  
## [59] GenomicRanges_1.46.1                               
## [60] GenomeInfoDb_1.34.9                                
## [61] IRanges_2.32.0                                     
## [62] S4Vectors_0.36.2                                   
## [63] BiocGenerics_0.44.0                                
## [64] BiocParallel_1.32.6                                
## [65] MethylAid_1.28.0                                   
## [66] forcats_0.5.2                                      
## [67] stringr_1.5.0                                      
## [68] dplyr_1.1.3                                        
## [69] purrr_0.3.4                                        
## [70] readr_2.1.2                                        
## [71] tidyr_1.2.1                                        
## [72] tibble_3.2.1                                       
## [73] ggplot2_3.4.3                                      
## [74] tidyverse_1.3.2                                    
## [75] rmarkdown_2.16                                     
## 
## loaded via a namespace (and not attached):
##   [1] graphlayouts_0.8.1                                
##   [2] pbapply_1.7-0                                     
##   [3] haven_2.5.1                                       
##   [4] vctrs_0.6.3                                       
##   [5] beanplot_1.3.1                                    
##   [6] blob_1.2.4                                        
##   [7] spatstat.data_3.0-1                               
##   [8] later_1.3.1                                       
##   [9] nloptr_2.0.3                                      
##  [10] DBI_1.1.3                                         
##  [11] rappdirs_0.3.3                                    
##  [12] uwot_0.1.14                                       
##  [13] zlibbioc_1.44.0                                   
##  [14] MatrixModels_0.5-1                                
##  [15] GlobalOptions_0.1.2                               
##  [16] htmlwidgets_1.5.4                                 
##  [17] future_1.32.0                                     
##  [18] leiden_0.4.3                                      
##  [19] illuminaio_0.40.0                                 
##  [20] tidygraph_1.2.2                                   
##  [21] Rcpp_1.0.10                                       
##  [22] KernSmooth_2.23-21                                
##  [23] promises_1.2.0.1                                  
##  [24] DelayedArray_0.24.0                               
##  [25] magick_2.7.4                                      
##  [26] fs_1.6.2                                          
##  [27] fastmatch_1.1-3                                   
##  [28] digest_0.6.31                                     
##  [29] png_0.1-8                                         
##  [30] nor1mix_1.3-0                                     
##  [31] sctransform_0.3.5                                 
##  [32] scatterpie_0.1.8                                  
##  [33] cowplot_1.1.1                                     
##  [34] DOSE_3.20.1                                       
##  [35] ggraph_2.0.6                                      
##  [36] pkgconfig_2.0.3                                   
##  [37] GO.db_3.14.0                                      
##  [38] gridBase_0.4-7                                    
##  [39] spatstat.random_3.1-5                             
##  [40] DelayedMatrixStats_1.16.0                         
##  [41] minqa_1.2.5                                       
##  [42] reticulate_1.30                                   
##  [43] GetoptLong_1.0.5                                  
##  [44] xfun_0.39                                         
##  [45] bslib_0.5.0                                       
##  [46] zoo_1.8-12                                        
##  [47] tidyselect_1.2.0                                  
##  [48] reshape2_1.4.4                                    
##  [49] ica_1.0-3                                         
##  [50] viridisLite_0.4.2                                 
##  [51] rtracklayer_1.54.0                                
##  [52] rlang_1.1.1                                       
##  [53] hexbin_1.28.3                                     
##  [54] jquerylib_0.1.4                                   
##  [55] glue_1.6.2                                        
##  [56] modelr_0.1.9                                      
##  [57] ggsignif_0.6.3                                    
##  [58] labeling_0.4.2                                    
##  [59] SparseM_1.81                                      
##  [60] httpuv_1.6.11                                     
##  [61] preprocessCore_1.60.2                             
##  [62] reactome.db_1.77.0                                
##  [63] DO.db_2.9                                         
##  [64] annotate_1.72.0                                   
##  [65] jsonlite_1.8.5                                    
##  [66] bit_4.0.5                                         
##  [67] mime_0.12                                         
##  [68] Rsamtools_2.10.0                                  
##  [69] stringi_1.7.12                                    
##  [70] spatstat.sparse_3.0-1                             
##  [71] scattermore_0.8                                   
##  [72] spatstat.explore_3.1-0                            
##  [73] yulab.utils_0.0.6                                 
##  [74] quadprog_1.5-8                                    
##  [75] bitops_1.0-7                                      
##  [76] cli_3.6.1                                         
##  [77] rhdf5filters_1.10.1                               
##  [78] RSQLite_2.2.17                                    
##  [79] data.table_1.14.8                                 
##  [80] timechange_0.2.0                                  
##  [81] rstudioapi_0.14                                   
##  [82] GenomicAlignments_1.30.0                          
##  [83] qvalue_2.26.0                                     
##  [84] listenv_0.9.0                                     
##  [85] miniUI_0.1.1.1                                    
##  [86] gridGraphics_0.5-1                                
##  [87] readxl_1.4.1                                      
##  [88] lifecycle_1.0.3                                   
##  [89] htm2txt_2.2.2                                     
##  [90] munsell_0.5.0                                     
##  [91] cellranger_1.1.0                                  
##  [92] caTools_1.18.2                                    
##  [93] codetools_0.2-19                                  
##  [94] coda_0.19-4                                       
##  [95] MultiAssayExperiment_1.20.0                       
##  [96] lmtest_0.9-40                                     
##  [97] missMethyl_1.28.0                                 
##  [98] xtable_1.8-4                                      
##  [99] ROCR_1.0-11                                       
## [100] googlesheets4_1.0.1                               
## [101] BiocManager_1.30.21                               
## [102] abind_1.4-5                                       
## [103] farver_2.1.1                                      
## [104] parallelly_1.36.0                                 
## [105] RANN_2.6.1                                        
## [106] aplot_0.1.7                                       
## [107] askpass_1.1                                       
## [108] ggtree_3.2.1                                      
## [109] BiocIO_1.8.0                                      
## [110] RcppAnnoy_0.0.20                                  
## [111] goftest_1.2-3                                     
## [112] patchwork_1.1.2                                   
## [113] cluster_2.1.4                                     
## [114] future.apply_1.11.0                               
## [115] tidytree_0.4.0                                    
## [116] ellipsis_0.3.2                                    
## [117] prettyunits_1.1.1                                 
## [118] lubridate_1.9.2                                   
## [119] ggridges_0.5.4                                    
## [120] googledrive_2.0.0                                 
## [121] reprex_2.0.2                                      
## [122] mclust_6.0.0                                      
## [123] igraph_1.4.3                                      
## [124] multtest_2.50.0                                   
## [125] fgsea_1.20.0                                      
## [126] gargle_1.5.0                                      
## [127] spatstat.utils_3.0-3                              
## [128] htmltools_0.5.5                                   
## [129] yaml_2.3.7                                        
## [130] utf8_1.2.3                                        
## [131] MCMCpack_1.6-3                                    
## [132] interactiveDisplayBase_1.32.0                     
## [133] XML_3.99-0.14                                     
## [134] withr_2.5.0                                       
## [135] fitdistrplus_1.1-11                               
## [136] bit64_4.0.5                                       
## [137] rngtools_1.5.2                                    
## [138] doRNG_1.8.6                                       
## [139] progressr_0.13.0                                  
## [140] GOSemSim_2.20.0                                   
## [141] memoise_2.0.1                                     
## [142] evaluate_0.21                                     
## [143] tzdb_0.4.0                                        
## [144] curl_5.0.1                                        
## [145] fansi_1.0.4                                       
## [146] highr_0.10                                        
## [147] tensor_1.5                                        
## [148] cachem_1.0.8                                      
## [149] deldir_1.0-9                                      
## [150] rjson_0.2.21                                      
## [151] rstatix_0.7.0                                     
## [152] clue_0.3-64                                       
## [153] tools_4.2.2                                       
## [154] sass_0.4.6                                        
## [155] magrittr_2.0.3                                    
## [156] RCurl_1.98-1.12                                   
## [157] car_3.1-0                                         
## [158] ape_5.7-1                                         
## [159] ggplotify_0.1.0                                   
## [160] xml2_1.3.4                                        
## [161] httr_1.4.6                                        
## [162] assertthat_0.2.1                                  
## [163] boot_1.3-28.1                                     
## [164] globals_0.16.2                                    
## [165] R6_2.5.1                                          
## [166] Rhdf5lib_1.20.0                                   
## [167] progress_1.2.2                                    
## [168] KEGGREST_1.34.0                                   
## [169] treeio_1.18.1                                     
## [170] shape_1.4.6                                       
## [171] gtools_3.9.4                                      
## [172] statmod_1.5.0                                     
## [173] BiocVersion_3.16.0                                
## [174] HDF5Array_1.22.1                                  
## [175] rhdf5_2.42.1                                      
## [176] splines_4.2.2                                     
## [177] carData_3.0-5                                     
## [178] ggfun_0.0.7                                       
## [179] colorspace_2.1-0                                  
## [180] generics_0.1.3                                    
## [181] RobustRankAggreg_1.2.1                            
## [182] pillar_1.9.0                                      
## [183] tweenr_2.0.2                                      
## [184] sp_1.6-1                                          
## [185] GenomeInfoDbData_1.2.9                            
## [186] plyr_1.8.8                                        
## [187] gtable_0.3.3                                      
## [188] rvest_1.0.3                                       
## [189] restfulr_0.0.15                                   
## [190] knitr_1.43                                        
## [191] shadowtext_0.1.2                                  
## [192] biomaRt_2.50.3                                    
## [193] fastmap_1.1.1                                     
## [194] Cairo_1.6-0                                       
## [195] doParallel_1.0.17                                 
## [196] quantreg_5.94                                     
## [197] broom_1.0.1                                       
## [198] openssl_2.0.6                                     
## [199] scales_1.2.1                                      
## [200] filelock_1.0.2                                    
## [201] backports_1.4.1                                   
## [202] RaggedExperiment_1.18.0                           
## [203] base64_2.0.1                                      
## [204] vroom_1.5.7                                       
## [205] enrichplot_1.14.2                                 
## [206] mcmc_0.9-7                                        
## [207] hms_1.1.2                                         
## [208] ggforce_0.3.4                                     
## [209] scrime_1.3.5                                      
## [210] Rtsne_0.16                                        
## [211] shiny_1.7.2                                       
## [212] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [213] polyclip_1.10-4                                   
## [214] numDeriv_2016.8-1.1                               
## [215] siggenes_1.68.0                                   
## [216] lazyeval_0.2.2                                    
## [217] crayon_1.5.2                                      
## [218] downloader_0.4                                    
## [219] sparseMatrixStats_1.10.0                          
## [220] viridis_0.6.2                                     
## [221] reshape_0.8.9                                     
## [222] compiler_4.2.2                                    
## [223] spatstat.geom_3.2-1

Clear

rm(list=ls())