vignettes/SegmentMovements.Rmd
SegmentMovements.Rmd
See the
vignette("OikScrambling", package = "OikScrambling")
for
general details on package and data load.
See
vignette("LoadGenomicBreaks", package = "OikScrambling")
for how the different GBreaks
objects are prepared.
suppressPackageStartupMessages({
library('GenomicBreaks')
library('ggplot2')
library("BreakpointsData")
})
load("BreakPoints.Rdata")
Whenever a collinear segment moves between species, if the homologous segment is located on an equivalent chromosome, a distance can be calculated.
# Find distance between two equivalent GRanges.
# Input: a GBreaks object
# Output: Absolute distance between the target and query ranges based on some metric ("start", "end", or "midpoint")
GBreaksDist <- function(gb, distMetric=c("midpoint", "start", "end")) {
distMetric <- match.arg(distMetric)
distFun <- switch(distMetric,
start = start,
end = end,
midpoint = \(gr) gr |> resize(1, fix = "center") |> start() )
abs(distFun(gb) - distFun(gb$query))
}
filterSyntenicPairs <- function(gb, chromosomePairing) {
syntenic_pairs <- paste(chromosomePairing[["sp1"]], chromosomePairing[["sp2"]], sep = "_")
is_syntenic <- paste(seqnames(gb), seqnames(gb$query), sep = "_") %in% syntenic_pairs
gb[is_syntenic]
}
chromDist <- function(gb, chromosomePairing=NULL, distMetric="midpoint"){
# Subset to include only ranges in which all chrom in sp1 are paired to their equivalent in sp2
# Note that a similar function could be used to compare the identity of chromosome arms
gb <- filterSyntenicPairs(gb, chromosomePairing)
# Now that the GRanges are only on equivalent chromosomes, calculate the distances between them.
# This can be "start", "end", or "midpoint" distance.
gb$distance <- GBreaksDist(gb, distMetric=distMetric)
gb
}
# Unfortunately, the use of upper and lower case in chromosome names was not
# standardised when assembling the genomes.
synPairs <- SimpleList()
synPairs$chr_Chr <- list("sp1"=c("chr1", "chr2", "PAR", "XSR", "YSR"),
"sp2"=c("Chr1", "Chr2", "PAR", "XSR", "YSR"))
synPairs$Chr_Chr <- list("sp1"=c("Chr1", "Chr2", "PAR", "XSR", "YSR"),
"sp2"=c("Chr1", "Chr2", "PAR", "XSR", "YSR"))
segDists <- SimpleList()
segDists$Oki_Osa <- chromDist(gbs$Oki_Osa, synPairs$chr_Chr)
segDists$Oki_Bar <- chromDist(gbs$Oki_Bar, synPairs$chr_Chr)
segDists$Osa_Bar <- chromDist(gbs$Osa_Bar, synPairs$Chr_Chr)
plotSegDists <- function(segDist, facet.by='seqnames~Arm', binWidth=100000) {
ggplot(as.data.frame(segDist)) +
aes(x=distance, fill = Arm) +
geom_histogram(binwidth = binWidth) +
facet_wrap(facet.by, scales="free_x", ncol=2)
}
plotSegDists(segDists$Oki_Osa, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Oki-Osa: segment movement distances (uncoalesced)")
plotSegDists(segDists$Oki_Bar, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Oki-Bar: segment movement distances (uncoalesced)")
plotSegDists(segDists$Osa_Bar, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Osa-Bar: segment movement distances (uncoalesced)")
segDistsCoa <- SimpleList()
segDistsCoa$Oki_Osa <- chromDist(coa$Oki_Osa, synPairs$chr_Chr)
segDistsCoa$Oki_Bar <- chromDist(coa$Oki_Bar, synPairs$chr_Chr)
segDistsCoa$Osa_Bar <- chromDist(coa$Osa_Bar, synPairs$Chr_Chr)
plotSegDists(segDistsCoa$Oki_Osa, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Oki-Osa: segment movement distances (coalesced)")
plotSegDists(segDistsCoa$Oki_Bar, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Oki-Bar: segment movement distances (coalesced)")
plotSegDists(segDistsCoa$Osa_Bar, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Osa-Bar: segment movement distances (coalesced)")
Uses, start coordinate as a proxy for midpoint, but the difference should be hardly perceptible.
plot_movement_distance_on_coords <- \(gb) gb |>
as.data.frame() |>
ggplot() +
aes(start, distance, col=Arm) +
geom_point() +
xlab("Start on target") +
ylab("Distance between midpoints on query and target") +
facet_wrap(~seqnames)
segDists$Oki_Osa |> plot_movement_distance_on_coords() + ggtitle("Oki - Osa")
segDists$Oki_Bar |> plot_movement_distance_on_coords() + ggtitle("Oki - Bar")
segDists$Osa_Bar |> plot_movement_distance_on_coords() + ggtitle("Osa - Bar")
This assumes that the center an unaligned region would be plotted on the same pixel as the center of its flanking aligned regions.
plot_distance_to_next_query_on_coords <- \(gb) gb |>
dist2next() |>
as.data.frame() |>
ggplot() +
aes(end + 1, qdist, col=Arm) +
geom_point() +
xlab("Start unaligned region on target") +
ylab("Distance between flanking regions on query") +
facet_wrap(~seqnames)
segDists$Oki_Osa |> plot_distance_to_next_query_on_coords() + ggtitle("Oki - Osa")
## Warning: Removed 4 rows containing missing values (`geom_point()`).
segDists$Oki_Bar |> plot_distance_to_next_query_on_coords() + ggtitle("Oki - Bar")
## Warning: Removed 4 rows containing missing values (`geom_point()`).
segDists$Osa_Bar |> plot_distance_to_next_query_on_coords() + ggtitle("Osa - Bar")
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 12 (bookworm)
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.11.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.11.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] BreakpointsData_3.10.1 ggplot2_3.4.3 GenomicBreaks_0.14.2
## [4] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1 IRanges_2.34.1
## [7] S4Vectors_0.38.1 BiocGenerics_0.46.0
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.1 BiocIO_1.10.0
## [3] bitops_1.0-7 tibble_3.2.1
## [5] R.oo_1.25.0 XML_3.99-0.14
## [7] rpart_4.1.19 lifecycle_1.0.3
## [9] rprojroot_2.0.3 lattice_0.20-45
## [11] MASS_7.3-58.2 backports_1.4.1
## [13] magrittr_2.0.3 Hmisc_5.1-0
## [15] sass_0.4.7 rmarkdown_2.23
## [17] jquerylib_0.1.4 yaml_2.3.7
## [19] plotrix_3.8-2 DBI_1.1.3
## [21] CNEr_1.36.0 minqa_1.2.5
## [23] RColorBrewer_1.1-3 ade4_1.7-22
## [25] abind_1.4-5 zlibbioc_1.46.0
## [27] purrr_1.0.2 R.utils_2.12.2
## [29] RCurl_1.98-1.12 nnet_7.3-18
## [31] pracma_2.4.2 GenomeInfoDbData_1.2.10
## [33] gdata_2.19.0 annotate_1.78.0
## [35] pkgdown_2.0.7 codetools_0.2-19
## [37] DelayedArray_0.26.7 tidyselect_1.2.0
## [39] shape_1.4.6 farver_2.1.1
## [41] lme4_1.1-34 matrixStats_1.0.0
## [43] base64enc_0.1-3 GenomicAlignments_1.36.0
## [45] jsonlite_1.8.7 mitml_0.4-5
## [47] Formula_1.2-5 survival_3.5-3
## [49] iterators_1.0.14 systemfonts_1.0.5
## [51] foreach_1.5.2 tools_4.3.1
## [53] ragg_1.2.5 Rcpp_1.0.11
## [55] glue_1.6.2 gridExtra_2.3
## [57] pan_1.8 xfun_0.40
## [59] MatrixGenerics_1.12.2 EBImage_4.42.0
## [61] dplyr_1.1.3 withr_2.5.1
## [63] fastmap_1.1.1 boot_1.3-28.1
## [65] fansi_1.0.5 digest_0.6.33
## [67] R6_2.5.1 mice_3.16.0
## [69] textshaping_0.3.7 colorspace_2.1-0
## [71] GO.db_3.17.0 gtools_3.9.4
## [73] poweRlaw_0.70.6 jpeg_0.1-10
## [75] RSQLite_2.3.1 weights_1.0.4
## [77] R.methodsS3_1.8.2 utf8_1.2.3
## [79] tidyr_1.3.0 generics_0.1.3
## [81] data.table_1.14.8 rtracklayer_1.60.0
## [83] httr_1.4.7 htmlwidgets_1.6.2
## [85] S4Arrays_1.0.5 pkgconfig_2.0.3
## [87] gtable_0.3.4 blob_1.2.4
## [89] XVector_0.40.0 htmltools_0.5.6.1
## [91] fftwtools_0.9-11 scales_1.2.1
## [93] Biobase_2.60.0 png_0.1-8
## [95] knitr_1.44 heatmaps_1.24.0
## [97] rstudioapi_0.15.0 tzdb_0.4.0
## [99] reshape2_1.4.4 rjson_0.2.21
## [101] checkmate_2.2.0 nlme_3.1-162
## [103] nloptr_2.0.3 cachem_1.0.8
## [105] stringr_1.5.0 KernSmooth_2.23-20
## [107] parallel_4.3.1 genoPlotR_0.8.11
## [109] foreign_0.8-84 AnnotationDbi_1.62.2
## [111] restfulr_0.0.15 desc_1.4.2
## [113] pillar_1.9.0 grid_4.3.1
## [115] vctrs_0.6.3 jomo_2.7-6
## [117] xtable_1.8-4 cluster_2.1.4
## [119] htmlTable_2.4.1 evaluate_0.22
## [121] readr_2.1.4 cli_3.6.1
## [123] locfit_1.5-9.8 compiler_4.3.1
## [125] Rsamtools_2.16.0 rlang_1.1.1
## [127] crayon_1.5.2 labeling_0.4.3
## [129] plyr_1.8.8 fs_1.6.3
## [131] stringi_1.7.12 BiocParallel_1.34.2
## [133] munsell_0.5.0 Biostrings_2.68.1
## [135] tiff_0.1-11 glmnet_4.1-7
## [137] Matrix_1.5-3 BSgenome_1.68.0
## [139] hms_1.1.3 bit64_4.0.5
## [141] KEGGREST_1.40.0 SummarizedExperiment_1.30.2
## [143] broom_1.0.5 memoise_2.0.1
## [145] bslib_0.5.1 bit_4.0.5