This script loads in differential expression results and tests if genes within 100kb of adipose CpGs are differentially expressed in adipose.
Load packages
library(tidyverse)
library(ggrepel)
library(GenomicRanges)
library(ggpubr)
library(DNAmArray)
library(MASS)
Load data
load("../GOTO_Data/GOTO_results-top-fat.Rdata")
Save significant CpGs
print(paste0("There are ",
nrow(top_cpgs),
" CpGs significant at 5% FDR level"))
## [1] "There are 230 CpGs significant at 5% FDR level"
cpg_top <- top_cpgs$cpg
Save gene locations (saved previously)
txdb <- makeTxDbFromEnsembl(organism = "Homo sapiens",
release = 75,
circ_seqs = NULL,
server = "ensembldb.ensembl.org",
username = "anonymous", password = NULL, port = 0L,
tx_attrib = NULL)
gene_range <- unlist(cdsBy(txdb, by = "gene"))
gene_range <- unique(gene_range)
save(gene_range, file='/exports/molepi/users/ljsinke/LLS/Shared_Data/allGeneRanges.Rdata)
Load gene_range
object
load('/exports/molepi/users/ljsinke/LLS/Shared_Data/allGeneRanges.Rdata')
gene_range
## GRanges object with 302587 ranges and 2 metadata columns:
## seqnames ranges strand | cds_id cds_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## ENSG00000000003 X 99885795-99885863 - | 717267 ENSP00000362111
## ENSG00000000003 X 99887482-99887565 - | 717268 ENSP00000362111
## ENSG00000000003 X 99888402-99888536 - | 717269 ENSP00000362111
## ENSG00000000003 X 99888928-99889026 - | 717270 ENSP00000362111
## ENSG00000000003 X 99890175-99890249 - | 717271 ENSP00000362111
## ... ... ... ... . ... ...
## LRG_97 LRG_97 17272-17334 + | 799400 LRG_97p1
## LRG_97 LRG_97 17876-18035 + | 799401 LRG_97p1
## LRG_97 LRG_97 22463-22593 + | 799402 LRG_97p1
## LRG_98 LRG_98 10293-13424 + | 799403 LRG_98p1
## LRG_99 LRG_99 9069-10652 + | 799404 LRG_99p1
## -------
## seqinfo: 722 sequences (1 circular) from an unspecified genome
Make a GenomicRanges
object for the top CpGs
cpg_range <- GRanges(seqnames = top_cpgs$cpg_chr,
IRanges(
start = top_cpgs$cpg_start,
end = top_cpgs$cpg_end,
names = top_cpgs$cpg
))
cpg_range
## GRanges object with 230 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## cg02282833 5 32726188-32726190 *
## cg03689324 15 74426924-74426926 *
## cg02468230 15 79405155-79405157 *
## cg02848074 13 31306177-31306179 *
## cg25662390 17 26850032-26850034 *
## ... ... ... ...
## cg08156066 20 21695333-21695335 *
## cg12544951 20 21695342-21695344 *
## cg03475429 10 94982392-94982394 *
## cg01733176 4 111561069-111561071 *
## cg06524692 1 28864470-28864472 *
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
Initialize a distance matrix
distance.matrix <- matrix(
NaN,
nrow = length(gene_range),
ncol = length(cpg_range),
dimnames = list(names(gene_range),
names(cpg_range)))
Loop through CpGs
for(i in 1:nrow(top_cpgs)){
cpg <- cpg_range[i]
calc.distance <- distance(
cpg,
gene_range,
ignore.strand = T)
distance.matrix[,i] <- calc.distance
}
dim(distance.matrix)
## [1] 302587 230
distance.matrix <- as.data.frame(distance.matrix)
colnames(distance.matrix) <- names(cpg_range)
distance.matrix$gene <- names(gene_range)
Make a list of genes within 100kb of each cpg
gene_list <- lapply(names(cpg_range), function(x){
df <- (distance.matrix %>% dplyr::select(gene, all_of(x)))
colnames(df) <- c('gene', 'cpg')
df <- df %>% filter(cpg <= 100000)
if (nrow(df) > 0) {
return(data.frame(cpg = x,
gene_ens = unique(df$gene)))
}
})
gene_list[[1]]
## cpg gene_ens
## 1 cg02282833 ENSG00000113389
## 2 cg02282833 ENSG00000181495
Save gene symbols
ens2gene <- cinaR::grch37
Bind
df <- data.frame()
for(i in 1:length(gene_list)){
gene_df <- gene_list[[i]]
cpg <- gene_df$cpg[1]
genes <- unique(gene_df$gene_ens)
df <- rbind(df,
data.frame(cpg = cpg,
gene_ens = genes))
}
sym <- ens2gene$symbol[match(df$gene_ens,
ens2gene$ensgene)]
df$gene <- sym
Inspect
head(df)
## cpg gene_ens gene
## 1 cg02282833 ENSG00000113389 NPR3
## 2 cg02282833 ENSG00000181495 AC026703.1
## 3 cg03689324 ENSG00000129009 ISLR
## 4 cg03689324 ENSG00000137868 STRA6
## 5 cg03689324 ENSG00000140464 PML
## 6 cg03689324 ENSG00000140481 CCDC33
dim(df)
## [1] 550 3
Save
save(df, file='../GOTO_Data/DEGs/Fat-100kb.Rdata')
print(paste0("There are ", length(unique(df$gene_ens)), " unique genes within 100kb of ",
length(unique(df$cpg)), " CpGs."))
## [1] "There are 412 unique genes within 100kb of 201 CpGs."
Read in DEG results
deg <- read.table('../GOTO_Data/DEGs/DEGs_fat_postprandial_combined_new.txt')
Filter for those within 100kb
deg <- deg %>%
filter(GENE.SYMBOL %in% df$gene) %>%
dplyr::select(logFC, AveExpr, pvalue, gene = GENE.SYMBOL)
Adjust for multiple testing
deg$padj <- p.adjust(deg$pvalue, method='fdr')
Inspect top genes
deg <- deg %>% filter(padj <= 0.05)
head(deg)
## logFC AveExpr pvalue gene padj
## ENSG00000130203 0.4693372 9.239312 1.952693e-09 APOE 5.233216e-07
## ENSG00000099204 -0.1595299 7.502941 1.058650e-08 ABLIM1 1.418591e-06
## ENSG00000189337 -0.1865120 5.786929 7.980329e-08 KAZN 7.129094e-06
## ENSG00000130208 0.5193805 3.689601 2.371871e-07 APOC1 1.589154e-05
## ENSG00000109107 -0.3280902 7.380156 1.188488e-06 ALDOC 6.370293e-05
## ENSG00000171444 0.1420484 5.145873 4.298163e-06 MCC 1.919846e-04
unique(deg$gene)
## [1] "APOE" "ABLIM1" "KAZN" "APOC1" "ALDOC" "MCC" "PLEKHA1"
## [8] "RBM20" "MAP2K6" "UNC119" "TRDC" "NR2F1" "DMRT3" "HOXB8"
## [15] "GPR155" "DHX35" "OLA1" "STK3" "TOMM40" "KCNJ2" "ZNF780B"
## [22] "MED29" "EN1" "PAN2" "DSTYK" "MRPL35" "SLC15A4" "CS"
## [29] "ARID1A" "TTC39C" "PAF1" "ACR" "RPS16" "RCC1" "ALX1"
## [36] "PITX2" "PLEKHH2" "TREM1" "SAP30L" "IFNAR1" "HOXB3" "TGS1"
## [43] "NPR3" "PML" "HOXB4" "ZW10" "TRDJ1" "ARMC6" "LRIT3"
## [50] "AMN1" "WARS2" "OSBPL1A" "ABCA5" "IMMT" "HOXB2" "LRP12"
## [57] "ID2"
save(deg, file='../GOTO_Data/DEGs/Fat-DEG.Rdata')
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] circlize_0.4.15
## [2] ComplexHeatmap_2.14.0
## [3] RColorBrewer_1.1-3
## [4] pheatmap_1.0.12
## [5] clusterProfiler_4.2.2
## [6] AnnotationHub_3.2.2
## [7] BiocFileCache_2.2.1
## [8] dbplyr_2.2.1
## [9] cinaR_0.2.3
## [10] edgeR_3.40.2
## [11] ggpubr_0.4.0
## [12] GEOquery_2.62.2
## [13] MuSiC_0.2.0
## [14] nnls_1.4
## [15] gplots_3.1.3
## [16] plotly_4.10.1
## [17] SeuratObject_4.1.3
## [18] Seurat_4.3.0
## [19] gridExtra_2.3
## [20] lattice_0.21-8
## [21] bacon_1.22.0
## [22] ellipse_0.4.5
## [23] methylGSA_1.12.0
## [24] sva_3.42.0
## [25] genefilter_1.76.0
## [26] mgcv_1.8-42
## [27] nlme_3.1-162
## [28] limma_3.54.2
## [29] lmerTest_3.1-3
## [30] lme4_1.1-30
## [31] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [32] snpStats_1.44.0
## [33] survival_3.5-5
## [34] ggrepel_0.9.1
## [35] ggfortify_0.4.14
## [36] irlba_2.3.5.1
## [37] Matrix_1.5-4.1
## [38] omicsPrint_1.14.0
## [39] MASS_7.3-60
## [40] DNAmArray_2.0.0
## [41] pls_2.8-2
## [42] FDb.InfiniumMethylation.hg19_2.2.0
## [43] org.Hs.eg.db_3.14.0
## [44] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [45] GenomicFeatures_1.46.5
## [46] AnnotationDbi_1.56.2
## [47] IlluminaHumanMethylationEPICmanifest_0.3.0
## [48] minfi_1.40.0
## [49] bumphunter_1.36.0
## [50] locfit_1.5-9.8
## [51] iterators_1.0.14
## [52] foreach_1.5.2
## [53] Biostrings_2.62.0
## [54] XVector_0.34.0
## [55] SummarizedExperiment_1.24.0
## [56] Biobase_2.58.0
## [57] MatrixGenerics_1.10.0
## [58] matrixStats_1.0.0
## [59] GenomicRanges_1.46.1
## [60] GenomeInfoDb_1.34.9
## [61] IRanges_2.32.0
## [62] S4Vectors_0.36.2
## [63] BiocGenerics_0.44.0
## [64] BiocParallel_1.32.6
## [65] MethylAid_1.28.0
## [66] forcats_0.5.2
## [67] stringr_1.5.0
## [68] dplyr_1.1.3
## [69] purrr_0.3.4
## [70] readr_2.1.2
## [71] tidyr_1.2.1
## [72] tibble_3.2.1
## [73] ggplot2_3.4.3
## [74] tidyverse_1.3.2
## [75] rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] graphlayouts_0.8.1
## [2] pbapply_1.7-0
## [3] haven_2.5.1
## [4] vctrs_0.6.3
## [5] beanplot_1.3.1
## [6] blob_1.2.4
## [7] spatstat.data_3.0-1
## [8] later_1.3.1
## [9] nloptr_2.0.3
## [10] DBI_1.1.3
## [11] rappdirs_0.3.3
## [12] uwot_0.1.14
## [13] zlibbioc_1.44.0
## [14] MatrixModels_0.5-1
## [15] GlobalOptions_0.1.2
## [16] htmlwidgets_1.5.4
## [17] future_1.32.0
## [18] leiden_0.4.3
## [19] illuminaio_0.40.0
## [20] tidygraph_1.2.2
## [21] Rcpp_1.0.10
## [22] KernSmooth_2.23-21
## [23] promises_1.2.0.1
## [24] DelayedArray_0.24.0
## [25] magick_2.7.4
## [26] fs_1.6.2
## [27] fastmatch_1.1-3
## [28] digest_0.6.31
## [29] png_0.1-8
## [30] nor1mix_1.3-0
## [31] sctransform_0.3.5
## [32] scatterpie_0.1.8
## [33] cowplot_1.1.1
## [34] DOSE_3.20.1
## [35] ggraph_2.0.6
## [36] pkgconfig_2.0.3
## [37] GO.db_3.14.0
## [38] gridBase_0.4-7
## [39] spatstat.random_3.1-5
## [40] DelayedMatrixStats_1.16.0
## [41] minqa_1.2.5
## [42] reticulate_1.30
## [43] GetoptLong_1.0.5
## [44] xfun_0.39
## [45] bslib_0.5.0
## [46] zoo_1.8-12
## [47] tidyselect_1.2.0
## [48] reshape2_1.4.4
## [49] ica_1.0-3
## [50] viridisLite_0.4.2
## [51] rtracklayer_1.54.0
## [52] rlang_1.1.1
## [53] hexbin_1.28.3
## [54] jquerylib_0.1.4
## [55] glue_1.6.2
## [56] modelr_0.1.9
## [57] ggsignif_0.6.3
## [58] labeling_0.4.2
## [59] SparseM_1.81
## [60] httpuv_1.6.11
## [61] preprocessCore_1.60.2
## [62] reactome.db_1.77.0
## [63] DO.db_2.9
## [64] annotate_1.72.0
## [65] jsonlite_1.8.5
## [66] bit_4.0.5
## [67] mime_0.12
## [68] Rsamtools_2.10.0
## [69] stringi_1.7.12
## [70] spatstat.sparse_3.0-1
## [71] scattermore_0.8
## [72] spatstat.explore_3.1-0
## [73] yulab.utils_0.0.6
## [74] quadprog_1.5-8
## [75] bitops_1.0-7
## [76] cli_3.6.1
## [77] rhdf5filters_1.10.1
## [78] RSQLite_2.2.17
## [79] data.table_1.14.8
## [80] timechange_0.2.0
## [81] rstudioapi_0.14
## [82] GenomicAlignments_1.30.0
## [83] qvalue_2.26.0
## [84] listenv_0.9.0
## [85] miniUI_0.1.1.1
## [86] gridGraphics_0.5-1
## [87] readxl_1.4.1
## [88] lifecycle_1.0.3
## [89] htm2txt_2.2.2
## [90] munsell_0.5.0
## [91] cellranger_1.1.0
## [92] caTools_1.18.2
## [93] codetools_0.2-19
## [94] coda_0.19-4
## [95] MultiAssayExperiment_1.20.0
## [96] lmtest_0.9-40
## [97] missMethyl_1.28.0
## [98] xtable_1.8-4
## [99] ROCR_1.0-11
## [100] googlesheets4_1.0.1
## [101] BiocManager_1.30.21
## [102] abind_1.4-5
## [103] farver_2.1.1
## [104] parallelly_1.36.0
## [105] RANN_2.6.1
## [106] aplot_0.1.7
## [107] askpass_1.1
## [108] ggtree_3.2.1
## [109] BiocIO_1.8.0
## [110] RcppAnnoy_0.0.20
## [111] goftest_1.2-3
## [112] patchwork_1.1.2
## [113] cluster_2.1.4
## [114] future.apply_1.11.0
## [115] tidytree_0.4.0
## [116] ellipsis_0.3.2
## [117] prettyunits_1.1.1
## [118] lubridate_1.9.2
## [119] ggridges_0.5.4
## [120] googledrive_2.0.0
## [121] reprex_2.0.2
## [122] mclust_6.0.0
## [123] igraph_1.4.3
## [124] multtest_2.50.0
## [125] fgsea_1.20.0
## [126] gargle_1.5.0
## [127] spatstat.utils_3.0-3
## [128] htmltools_0.5.5
## [129] yaml_2.3.7
## [130] utf8_1.2.3
## [131] MCMCpack_1.6-3
## [132] interactiveDisplayBase_1.32.0
## [133] XML_3.99-0.14
## [134] withr_2.5.0
## [135] fitdistrplus_1.1-11
## [136] bit64_4.0.5
## [137] rngtools_1.5.2
## [138] doRNG_1.8.6
## [139] progressr_0.13.0
## [140] GOSemSim_2.20.0
## [141] memoise_2.0.1
## [142] evaluate_0.21
## [143] tzdb_0.4.0
## [144] curl_5.0.1
## [145] fansi_1.0.4
## [146] highr_0.10
## [147] tensor_1.5
## [148] cachem_1.0.8
## [149] deldir_1.0-9
## [150] rjson_0.2.21
## [151] rstatix_0.7.0
## [152] clue_0.3-64
## [153] tools_4.2.2
## [154] sass_0.4.6
## [155] magrittr_2.0.3
## [156] RCurl_1.98-1.12
## [157] car_3.1-0
## [158] ape_5.7-1
## [159] ggplotify_0.1.0
## [160] xml2_1.3.4
## [161] httr_1.4.6
## [162] assertthat_0.2.1
## [163] boot_1.3-28.1
## [164] globals_0.16.2
## [165] R6_2.5.1
## [166] Rhdf5lib_1.20.0
## [167] progress_1.2.2
## [168] KEGGREST_1.34.0
## [169] treeio_1.18.1
## [170] shape_1.4.6
## [171] gtools_3.9.4
## [172] statmod_1.5.0
## [173] BiocVersion_3.16.0
## [174] HDF5Array_1.22.1
## [175] rhdf5_2.42.1
## [176] splines_4.2.2
## [177] carData_3.0-5
## [178] ggfun_0.0.7
## [179] colorspace_2.1-0
## [180] generics_0.1.3
## [181] RobustRankAggreg_1.2.1
## [182] pillar_1.9.0
## [183] tweenr_2.0.2
## [184] sp_1.6-1
## [185] GenomeInfoDbData_1.2.9
## [186] plyr_1.8.8
## [187] gtable_0.3.3
## [188] rvest_1.0.3
## [189] restfulr_0.0.15
## [190] knitr_1.43
## [191] shadowtext_0.1.2
## [192] biomaRt_2.50.3
## [193] fastmap_1.1.1
## [194] Cairo_1.6-0
## [195] doParallel_1.0.17
## [196] quantreg_5.94
## [197] broom_1.0.1
## [198] openssl_2.0.6
## [199] scales_1.2.1
## [200] filelock_1.0.2
## [201] backports_1.4.1
## [202] RaggedExperiment_1.18.0
## [203] base64_2.0.1
## [204] vroom_1.5.7
## [205] enrichplot_1.14.2
## [206] mcmc_0.9-7
## [207] hms_1.1.2
## [208] ggforce_0.3.4
## [209] scrime_1.3.5
## [210] Rtsne_0.16
## [211] shiny_1.7.2
## [212] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [213] polyclip_1.10-4
## [214] numDeriv_2016.8-1.1
## [215] siggenes_1.68.0
## [216] lazyeval_0.2.2
## [217] crayon_1.5.2
## [218] downloader_0.4
## [219] sparseMatrixStats_1.10.0
## [220] viridis_0.6.2
## [221] reshape_0.8.9
## [222] compiler_4.2.2
## [223] spatstat.geom_3.2-1
Clear
rm(list=ls())