This script loads in differential expression results and tests if genes within 100kb of muscle CpGs are differentially expressed in muscle.
Load packages
library(tidyverse)
library(ggrepel)
library(GenomicRanges)
library(ggpubr)
library(DNAmArray)
library(MASS)
Load data
load("../GOTO_Data/GOTO_results-top-muscle-adj.Rdata")
Save significant CpGs
print(paste0("There are ",
nrow(top_cpgs),
" CpGs significant at 5% FDR level"))
## [1] "There are 162 CpGs significant at 5% FDR level"
cpg_top <- top_cpgs$cpg
Save gene locations (saved previously)
txdb <- makeTxDbFromEnsembl(organism = "Homo sapiens",
release = 75,
circ_seqs = NULL,
server = "ensembldb.ensembl.org",
username = "anonymous", password = NULL, port = 0L,
tx_attrib = NULL)
gene_range <- unlist(cdsBy(txdb, by = "gene"))
gene_range <- unique(gene_range)
save(gene_range, file='/exports/molepi/users/ljsinke/LLS/Shared_Data/allGeneRanges.Rdata)
Load gene_range
object
load('/exports/molepi/users/ljsinke/LLS/Shared_Data/allGeneRanges.Rdata')
gene_range
## GRanges object with 302587 ranges and 2 metadata columns:
## seqnames ranges strand | cds_id cds_name
## <Rle> <IRanges> <Rle> | <integer> <character>
## ENSG00000000003 X 99885795-99885863 - | 717267 ENSP00000362111
## ENSG00000000003 X 99887482-99887565 - | 717268 ENSP00000362111
## ENSG00000000003 X 99888402-99888536 - | 717269 ENSP00000362111
## ENSG00000000003 X 99888928-99889026 - | 717270 ENSP00000362111
## ENSG00000000003 X 99890175-99890249 - | 717271 ENSP00000362111
## ... ... ... ... . ... ...
## LRG_97 LRG_97 17272-17334 + | 799400 LRG_97p1
## LRG_97 LRG_97 17876-18035 + | 799401 LRG_97p1
## LRG_97 LRG_97 22463-22593 + | 799402 LRG_97p1
## LRG_98 LRG_98 10293-13424 + | 799403 LRG_98p1
## LRG_99 LRG_99 9069-10652 + | 799404 LRG_99p1
## -------
## seqinfo: 722 sequences (1 circular) from an unspecified genome
Make a GenomicRanges
object for the top CpGs
cpg_range <- GRanges(seqnames = top_cpgs$cpg_chr,
IRanges(
start = top_cpgs$cpg_start,
end = top_cpgs$cpg_end,
names = top_cpgs$cpg
))
cpg_range
## GRanges object with 162 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## cg09563940 7 130873557-130873559 *
## cg26998535 10 103699236-103699238 *
## cg03045139 5 77826273-77826275 *
## cg19003556 6 136399169-136399171 *
## cg00353432 7 139372971-139372973 *
## ... ... ... ...
## cg21342383 10 127661991-127661993 *
## cg01668986 8 21541541-21541543 *
## cg05008948 7 99160713-99160715 *
## cg24161080 18 33889786-33889788 *
## cg12394201 11 43942417-43942419 *
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
Initialize a distance matrix
distance.matrix <- matrix(
NaN,
nrow = length(gene_range),
ncol = length(cpg_range),
dimnames = list(names(gene_range),
names(cpg_range)))
Loop through CpGs
for(i in 1:nrow(top_cpgs)){
cpg <- cpg_range[i]
calc.distance <- distance(
cpg,
gene_range,
ignore.strand = T)
distance.matrix[,i] <- calc.distance
}
dim(distance.matrix)
## [1] 302587 162
distance.matrix <- as.data.frame(distance.matrix)
colnames(distance.matrix) <- names(cpg_range)
distance.matrix$gene <- names(gene_range)
Make a list of genes within 100kb of each cpg
gene_list <- lapply(names(cpg_range), function(x){
df <- (distance.matrix %>% dplyr::select(gene, all_of(x)))
colnames(df) <- c('gene', 'cpg')
df <- df %>% filter(cpg <= 100000)
if (nrow(df) > 0) {
return(data.frame(cpg = x,
gene_ens = unique(df$gene)))
}
})
Save gene symbols
ens2gene <- cinaR::grch37
Bind
df <- data.frame()
for(i in 1:length(gene_list)){
gene_df <- gene_list[[i]]
cpg <- gene_df$cpg[1]
genes <- unique(gene_df$gene_ens)
df <- rbind(df,
data.frame(cpg = cpg,
gene_ens = genes))
}
sym <- ens2gene$symbol[match(df$gene_ens,
ens2gene$ensgene)]
df$gene <- sym
Inspect
head(df)
## cpg gene_ens gene
## 1 cg26998535 ENSG00000120029 C10orf76
## 2 cg26998535 ENSG00000120049 KCNIP2
## 3 cg03045139 ENSG00000085365 SCAMP1
## 4 cg03045139 ENSG00000145685 LHFPL2
## 5 cg19003556 ENSG00000171408 PDE7B
## 6 cg00353432 ENSG00000064393 HIPK2
dim(df)
## [1] 474 3
Save
save(df, file='../GOTO_Data/DEGs/Muscle-100kb.Rdata')
print(paste0("There are ", length(unique(df$gene_ens)), " unique genes within 100kb of ",
length(unique(df$cpg)), " CpGs."))
## [1] "There are 454 unique genes within 100kb of 149 CpGs."
Read in DEG results
deg <- read.table('../GOTO_Data/DEGs/DEGs_muscle_postprandial_combined_new.txt')
Filter for those within 100kb
deg <- deg %>%
filter(GENE.SYMBOL %in% df$gene) %>%
select(logFC, AveExpr, pvalue, gene = GENE.SYMBOL)
Adjust for multiple testing
deg$padj <- p.adjust(deg$pvalue, method='fdr')
Inspect top genes
deg <- deg %>% filter(padj <= 0.05)
head(deg)
## logFC AveExpr pvalue gene padj
## ENSG00000117594 -0.4646230 0.9101785 2.070661e-09 HSD11B1 3.788328e-07
## ENSG00000127527 -0.2020087 5.2722313 3.077723e-09 EPS15L1 3.788328e-07
## ENSG00000169692 -0.2709130 3.5211115 3.284677e-09 AGPAT2 3.788328e-07
## ENSG00000055208 -0.1768428 6.3665159 2.927471e-08 TAB2 2.070958e-06
## ENSG00000104221 -0.2097496 3.1564529 2.992713e-08 BRF2 2.070958e-06
## ENSG00000126091 -0.2469463 5.9603147 3.682312e-08 ST3GAL3 2.123467e-06
unique(deg$gene)
## [1] "HSD11B1" "EPS15L1" "AGPAT2" "TAB2" "BRF2" "ST3GAL3"
## [7] "LRRC20" "FLII" "FYN" "INPP5A" "LHFPL2" "RUNX1"
## [13] "DPP9" "MCFD2" "TMOD3" "RBMS1" "CRKL" "NCOR2"
## [19] "FDFT1" "FSCN1" "FAM96B" "UBQLN4" "PSMD3" "KIAA0895L"
## [25] "THAP7" "ATP1A1" "FAM220A" "FHOD1" "C10orf76" "SRL"
## [31] "PMF1" "RNF216" "CAPRIN2" "PROSC" "ADAM12" "PLXND1"
## [37] "RAC1" "MDH2" "GRB10" "RPRD1B" "DHX32" "TMOD2"
## [43] "TMEM120A" "SLC35E4" "LEO1" "EXOC3L1" "ZFP36L1" "PPIF"
## [49] "DLL1" "PCDHGC3" "MRPS34" "ZNF143" "PES1" "CHRND"
## [55] "CD59" "SRC" "GSDMB" "ARHGEF17" "SERBP1" "CDH24"
## [61] "ALKBH3" "ZNF219" "ZNF703" "EXTL3" "HAGH" "ARHGEF40"
## [67] "PSMB5" "ALKBH5" "PLEKHG4" "ZNF394" "DRG2" "HEG1"
## [73] "GGA1" "RAPGEF1" "GINM1" "THBS2" "TOP2A" "FBXO3"
## [79] "MAPK8IP3" "PCDHGA12" "PSMA6" "ZMIZ1" "TTC7A" "LINC01119"
## [85] "FPR3" "SLFN11" "TRAF6" "FAHD1" "FKBP5" "UCK2"
## [91] "EME2" "ZNF789" "NUBP2" "PCDHGA2" "PCDHGC5" "TAF2"
## [97] "HRH1" "JMJD1C" "CHSY1" "SWAP70" "ATP5J2" "E2F4"
## [103] "CEP68" "C11orf96" "SPSB3" "GID4" "UNC45B" "ZNF655"
## [109] "LRRK1" "KIFC3" "EDEM3" "SPCS2" "CHRNG" "NET1"
## [115] "C14orf159" "PPID" "SEMA6B" "FAM129A" "FPR2" "DEPTOR"
## [121] "CPQ" "RPS6KA5" "AP1M1" "ADAM19" "PPARGC1B" "CAPN2"
## [127] "ID4" "FAM193B" "DDC" "CLEC3B" "P2RY6" "DBN1"
## [133] "CAMK2D" "LCOR" "RPL26L1" "TMCC1" "ATP6V0E1" "LZTR1"
## [139] "LRG1" "SRP9" "MED24" "GAL3ST1" "LRRC55" "SCAMP1"
## [145] "C5orf66" "WSCD1" "THRB" "REV3L" "FPR1" "PAMR1"
## [151] "GPR68" "EEPD1" "KATNB1" "FAM120B" "PTCD1" "SLC9A5"
## [157] "C9orf3" "ZKSCAN5" "ABCA8" "DDX41" "INTS9" "RPN2"
## [163] "H2AFY" "TRAF3IP2" "GLIS2" "FAM213A"
save(deg, file='../GOTO_Data/DEGs/Muscle-DEG.Rdata')
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.10 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggpubr_0.4.0
## [2] GEOquery_2.62.2
## [3] MuSiC_0.2.0
## [4] nnls_1.4
## [5] gplots_3.1.3
## [6] plotly_4.10.1
## [7] SeuratObject_4.1.3
## [8] Seurat_4.3.0
## [9] gridExtra_2.3
## [10] lattice_0.21-8
## [11] bacon_1.22.0
## [12] ellipse_0.4.5
## [13] methylGSA_1.12.0
## [14] sva_3.42.0
## [15] genefilter_1.76.0
## [16] mgcv_1.8-42
## [17] nlme_3.1-162
## [18] limma_3.54.2
## [19] lmerTest_3.1-3
## [20] lme4_1.1-30
## [21] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [22] snpStats_1.44.0
## [23] survival_3.5-5
## [24] ggrepel_0.9.1
## [25] ggfortify_0.4.14
## [26] irlba_2.3.5.1
## [27] Matrix_1.5-4.1
## [28] omicsPrint_1.14.0
## [29] MASS_7.3-60
## [30] DNAmArray_2.0.0
## [31] pls_2.8-2
## [32] FDb.InfiniumMethylation.hg19_2.2.0
## [33] org.Hs.eg.db_3.14.0
## [34] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [35] GenomicFeatures_1.46.5
## [36] AnnotationDbi_1.56.2
## [37] IlluminaHumanMethylationEPICmanifest_0.3.0
## [38] minfi_1.40.0
## [39] bumphunter_1.36.0
## [40] locfit_1.5-9.8
## [41] iterators_1.0.14
## [42] foreach_1.5.2
## [43] Biostrings_2.62.0
## [44] XVector_0.34.0
## [45] SummarizedExperiment_1.24.0
## [46] Biobase_2.58.0
## [47] MatrixGenerics_1.10.0
## [48] matrixStats_1.0.0
## [49] GenomicRanges_1.46.1
## [50] GenomeInfoDb_1.34.9
## [51] IRanges_2.32.0
## [52] S4Vectors_0.36.2
## [53] BiocGenerics_0.44.0
## [54] BiocParallel_1.32.6
## [55] MethylAid_1.28.0
## [56] forcats_0.5.2
## [57] stringr_1.5.0
## [58] dplyr_1.1.3
## [59] purrr_0.3.4
## [60] readr_2.1.2
## [61] tidyr_1.2.1
## [62] tibble_3.2.1
## [63] ggplot2_3.4.3
## [64] tidyverse_1.3.2
## [65] rmarkdown_2.16
##
## loaded via a namespace (and not attached):
## [1] ica_1.0-3
## [2] Rsamtools_2.10.0
## [3] cinaR_0.2.3
## [4] lmtest_0.9-40
## [5] crayon_1.5.2
## [6] rhdf5filters_1.10.1
## [7] backports_1.4.1
## [8] reprex_2.0.2
## [9] GOSemSim_2.20.0
## [10] rlang_1.1.1
## [11] ROCR_1.0-11
## [12] readxl_1.4.1
## [13] SparseM_1.81
## [14] nloptr_2.0.3
## [15] filelock_1.0.2
## [16] rjson_0.2.21
## [17] bit64_4.0.5
## [18] glue_1.6.2
## [19] sctransform_0.3.5
## [20] rngtools_1.5.2
## [21] spatstat.sparse_3.0-1
## [22] mcmc_0.9-7
## [23] spatstat.geom_3.2-1
## [24] DOSE_3.20.1
## [25] haven_2.5.1
## [26] tidyselect_1.2.0
## [27] fitdistrplus_1.1-11
## [28] XML_3.99-0.14
## [29] zoo_1.8-12
## [30] GenomicAlignments_1.30.0
## [31] MatrixModels_0.5-1
## [32] xtable_1.8-4
## [33] magrittr_2.0.3
## [34] evaluate_0.21
## [35] cli_3.6.1
## [36] zlibbioc_1.44.0
## [37] miniUI_0.1.1.1
## [38] rstudioapi_0.14
## [39] doRNG_1.8.6
## [40] sp_1.6-1
## [41] MultiAssayExperiment_1.20.0
## [42] bslib_0.5.0
## [43] fastmatch_1.1-3
## [44] treeio_1.18.1
## [45] shiny_1.7.2
## [46] xfun_0.39
## [47] askpass_1.1
## [48] multtest_2.50.0
## [49] cluster_2.1.4
## [50] caTools_1.18.2
## [51] tidygraph_1.2.2
## [52] KEGGREST_1.34.0
## [53] quantreg_5.94
## [54] base64_2.0.1
## [55] ape_5.7-1
## [56] scrime_1.3.5
## [57] listenv_0.9.0
## [58] png_0.1-8
## [59] reshape_0.8.9
## [60] future_1.32.0
## [61] withr_2.5.0
## [62] bitops_1.0-7
## [63] ggforce_0.3.4
## [64] plyr_1.8.8
## [65] cellranger_1.1.0
## [66] coda_0.19-4
## [67] pillar_1.9.0
## [68] cachem_1.0.8
## [69] fs_1.6.2
## [70] clusterProfiler_4.2.2
## [71] DelayedMatrixStats_1.16.0
## [72] vctrs_0.6.3
## [73] ellipsis_0.3.2
## [74] generics_0.1.3
## [75] tools_4.2.2
## [76] munsell_0.5.0
## [77] tweenr_2.0.2
## [78] fgsea_1.20.0
## [79] DelayedArray_0.24.0
## [80] abind_1.4-5
## [81] fastmap_1.1.1
## [82] compiler_4.2.2
## [83] httpuv_1.6.11
## [84] rtracklayer_1.54.0
## [85] beanplot_1.3.1
## [86] MCMCpack_1.6-3
## [87] GenomeInfoDbData_1.2.9
## [88] edgeR_3.40.2
## [89] deldir_1.0-9
## [90] utf8_1.2.3
## [91] later_1.3.1
## [92] RobustRankAggreg_1.2.1
## [93] BiocFileCache_2.2.1
## [94] jsonlite_1.8.5
## [95] scales_1.2.1
## [96] carData_3.0-5
## [97] pbapply_1.7-0
## [98] tidytree_0.4.0
## [99] sparseMatrixStats_1.10.0
## [100] lazyeval_0.2.2
## [101] promises_1.2.0.1
## [102] car_3.1-0
## [103] goftest_1.2-3
## [104] spatstat.utils_3.0-3
## [105] reticulate_1.30
## [106] htm2txt_2.2.2
## [107] nor1mix_1.3-0
## [108] cowplot_1.1.1
## [109] statmod_1.5.0
## [110] siggenes_1.68.0
## [111] Rtsne_0.16
## [112] downloader_0.4
## [113] uwot_0.1.14
## [114] igraph_1.4.3
## [115] HDF5Array_1.22.1
## [116] numDeriv_2016.8-1.1
## [117] yaml_2.3.7
## [118] htmltools_0.5.5
## [119] memoise_2.0.1
## [120] BiocIO_1.8.0
## [121] graphlayouts_0.8.1
## [122] quadprog_1.5-8
## [123] viridisLite_0.4.2
## [124] digest_0.6.31
## [125] assertthat_0.2.1
## [126] mime_0.12
## [127] rappdirs_0.3.3
## [128] RSQLite_2.2.17
## [129] yulab.utils_0.0.6
## [130] future.apply_1.11.0
## [131] data.table_1.14.8
## [132] blob_1.2.4
## [133] preprocessCore_1.60.2
## [134] splines_4.2.2
## [135] labeling_0.4.2
## [136] Rhdf5lib_1.20.0
## [137] illuminaio_0.40.0
## [138] googledrive_2.0.0
## [139] RaggedExperiment_1.18.0
## [140] RCurl_1.98-1.12
## [141] broom_1.0.1
## [142] hms_1.1.2
## [143] modelr_0.1.9
## [144] rhdf5_2.42.1
## [145] colorspace_2.1-0
## [146] aplot_0.1.7
## [147] sass_0.4.6
## [148] Rcpp_1.0.10
## [149] mclust_6.0.0
## [150] RANN_2.6.1
## [151] enrichplot_1.14.2
## [152] fansi_1.0.4
## [153] tzdb_0.4.0
## [154] parallelly_1.36.0
## [155] R6_2.5.1
## [156] grid_4.2.2
## [157] ggridges_0.5.4
## [158] lifecycle_1.0.3
## [159] ggsignif_0.6.3
## [160] curl_5.0.1
## [161] googlesheets4_1.0.1
## [162] minqa_1.2.5
## [163] leiden_0.4.3
## [164] jquerylib_0.1.4
## [165] DO.db_2.9
## [166] qvalue_2.26.0
## [167] RcppAnnoy_0.0.20
## [168] RColorBrewer_1.1-3
## [169] spatstat.explore_3.1-0
## [170] htmlwidgets_1.5.4
## [171] polyclip_1.10-4
## [172] biomaRt_2.50.3
## [173] missMethyl_1.28.0
## [174] shadowtext_0.1.2
## [175] timechange_0.2.0
## [176] gridGraphics_0.5-1
## [177] reactome.db_1.77.0
## [178] rvest_1.0.3
## [179] globals_0.16.2
## [180] openssl_2.0.6
## [181] spatstat.random_3.1-5
## [182] patchwork_1.1.2
## [183] progressr_0.13.0
## [184] codetools_0.2-19
## [185] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [186] lubridate_1.9.2
## [187] GO.db_3.14.0
## [188] gtools_3.9.4
## [189] prettyunits_1.1.1
## [190] dbplyr_2.2.1
## [191] gridBase_0.4-7
## [192] gtable_0.3.3
## [193] DBI_1.1.3
## [194] tensor_1.5
## [195] ggfun_0.0.7
## [196] httr_1.4.6
## [197] highr_0.10
## [198] KernSmooth_2.23-21
## [199] stringi_1.7.12
## [200] vroom_1.5.7
## [201] progress_1.2.2
## [202] reshape2_1.4.4
## [203] farver_2.1.1
## [204] annotate_1.72.0
## [205] viridis_0.6.2
## [206] hexbin_1.28.3
## [207] ggtree_3.2.1
## [208] xml2_1.3.4
## [209] boot_1.3-28.1
## [210] restfulr_0.0.15
## [211] scattermore_0.8
## [212] ggplotify_0.1.0
## [213] bit_4.0.5
## [214] spatstat.data_3.0-1
## [215] scatterpie_0.1.8
## [216] ggraph_2.0.6
## [217] pkgconfig_2.0.3
## [218] gargle_1.5.0
## [219] rstatix_0.7.0
## [220] knitr_1.43
Clear
rm(list=ls())