Load packages and data

See the vignette("OikScrambling", package = "OikScrambling") for general details on package and data load.

See vignette("LoadGenomicBreaks", package = "OikScrambling") for how the different GBreaks objects are prepared.

suppressPackageStartupMessages({
  library('GenomicBreaks')
  library('ggplot2')
  library("BreakpointsData")
})
load("BreakPoints.Rdata")

Segment movement distances

Whenever a collinear segment moves between species, if the homologous segment is located on an equivalent chromosome, a distance can be calculated.

# Find distance between two equivalent GRanges.
# Input: a GBreaks object
# Output: Absolute distance between the target and query ranges based on some metric ("start", "end", or "midpoint")
GBreaksDist <- function(gb, distMetric=c("midpoint", "start", "end")) {
  distMetric <- match.arg(distMetric)
  distFun <- switch(distMetric,
                    start    = start,
                    end      = end,
                    midpoint = \(gr) gr |> resize(1, fix = "center") |> start() )
  abs(distFun(gb) - distFun(gb$query))
}

filterSyntenicPairs <- function(gb, chromosomePairing) {
  syntenic_pairs <- paste(chromosomePairing[["sp1"]], chromosomePairing[["sp2"]], sep = "_")
  is_syntenic    <- paste(seqnames(gb),               seqnames(gb$query),         sep = "_") %in% syntenic_pairs
  gb[is_syntenic]
}

chromDist <- function(gb, chromosomePairing=NULL, distMetric="midpoint"){
  # Subset to include only ranges in which all chrom in sp1 are paired to their equivalent in sp2
  # Note that a similar function could be used to compare the identity of chromosome arms
  gb <- filterSyntenicPairs(gb, chromosomePairing)
  
  # Now that the GRanges are only on equivalent chromosomes, calculate the distances between them.
  # This can be "start", "end", or "midpoint" distance.
  gb$distance <- GBreaksDist(gb, distMetric=distMetric)
  gb
}

# Unfortunately, the use of upper and lower case in chromosome names was not
# standardised when assembling the genomes.
synPairs <- SimpleList()
synPairs$chr_Chr <- list("sp1"=c("chr1", "chr2", "PAR", "XSR", "YSR"),
                         "sp2"=c("Chr1", "Chr2", "PAR", "XSR", "YSR"))
synPairs$Chr_Chr <- list("sp1"=c("Chr1", "Chr2", "PAR", "XSR", "YSR"),
                         "sp2"=c("Chr1", "Chr2", "PAR", "XSR", "YSR"))

segDists <- SimpleList()
segDists$Oki_Osa <- chromDist(gbs$Oki_Osa, synPairs$chr_Chr)
segDists$Oki_Bar <- chromDist(gbs$Oki_Bar, synPairs$chr_Chr)
segDists$Osa_Bar <- chromDist(gbs$Osa_Bar, synPairs$Chr_Chr)

Plotting segment movement distances

plotSegDists <- function(segDist, facet.by='seqnames~Arm', binWidth=100000) {
  ggplot(as.data.frame(segDist)) +
    aes(x=distance, fill = Arm) +
    geom_histogram(binwidth = binWidth) +
    facet_wrap(facet.by, scales="free_x", ncol=2)
}
plotSegDists(segDists$Oki_Osa, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Oki-Osa: segment movement distances (uncoalesced)")

plotSegDists(segDists$Oki_Bar, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Oki-Bar: segment movement distances (uncoalesced)")

plotSegDists(segDists$Osa_Bar, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Osa-Bar: segment movement distances (uncoalesced)")

Segment movement distances (coalesced)

segDistsCoa <- SimpleList()
segDistsCoa$Oki_Osa <- chromDist(coa$Oki_Osa, synPairs$chr_Chr)
segDistsCoa$Oki_Bar <- chromDist(coa$Oki_Bar, synPairs$chr_Chr)
segDistsCoa$Osa_Bar <- chromDist(coa$Osa_Bar, synPairs$Chr_Chr)
plotSegDists(segDistsCoa$Oki_Osa, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Oki-Osa: segment movement distances (coalesced)")

plotSegDists(segDistsCoa$Oki_Bar, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Oki-Bar: segment movement distances (coalesced)")

plotSegDists(segDistsCoa$Osa_Bar, facet.by='seqnames~Arm', binWidth=100000) + ggtitle("Osa-Bar: segment movement distances (coalesced)")

Plotting segment movements relative to coordinates

Uses, start coordinate as a proxy for midpoint, but the difference should be hardly perceptible.

plot_movement_distance_on_coords <- \(gb) gb |>
  as.data.frame() |>
  ggplot() +
    aes(start, distance, col=Arm) +
    geom_point() +
    xlab("Start on target") +
    ylab("Distance between midpoints on query and target") +
    facet_wrap(~seqnames)
segDists$Oki_Osa |> plot_movement_distance_on_coords() + ggtitle("Oki - Osa")

segDists$Oki_Bar |> plot_movement_distance_on_coords() + ggtitle("Oki - Bar")

segDists$Osa_Bar |> plot_movement_distance_on_coords() + ggtitle("Osa - Bar")

Plotting distance between query ranges relative to target coordinates

This assumes that the center an unaligned region would be plotted on the same pixel as the center of its flanking aligned regions.

plot_distance_to_next_query_on_coords <- \(gb) gb |>
  dist2next() |>
  as.data.frame() |>
  ggplot() +
    aes(end + 1, qdist, col=Arm) +
    geom_point() +
    xlab("Start unaligned region on target") +
    ylab("Distance between flanking regions on query") +
    facet_wrap(~seqnames)
segDists$Oki_Osa |> plot_distance_to_next_query_on_coords() + ggtitle("Oki - Osa")
## Warning: Removed 4 rows containing missing values (`geom_point()`).

segDists$Oki_Bar |> plot_distance_to_next_query_on_coords() + ggtitle("Oki - Bar")
## Warning: Removed 4 rows containing missing values (`geom_point()`).

segDists$Osa_Bar |> plot_distance_to_next_query_on_coords() + ggtitle("Osa - Bar")
## Warning: Removed 4 rows containing missing values (`geom_point()`).

Session information

## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 12 (bookworm)
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.11.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.11.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] BreakpointsData_3.10.1 ggplot2_3.4.3          GenomicBreaks_0.14.2  
## [4] GenomicRanges_1.52.0   GenomeInfoDb_1.36.1    IRanges_2.34.1        
## [7] S4Vectors_0.38.1       BiocGenerics_0.46.0   
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.3.1               BiocIO_1.10.0              
##   [3] bitops_1.0-7                tibble_3.2.1               
##   [5] R.oo_1.25.0                 XML_3.99-0.14              
##   [7] rpart_4.1.19                lifecycle_1.0.3            
##   [9] rprojroot_2.0.3             lattice_0.20-45            
##  [11] MASS_7.3-58.2               backports_1.4.1            
##  [13] magrittr_2.0.3              Hmisc_5.1-0                
##  [15] sass_0.4.7                  rmarkdown_2.23             
##  [17] jquerylib_0.1.4             yaml_2.3.7                 
##  [19] plotrix_3.8-2               DBI_1.1.3                  
##  [21] CNEr_1.36.0                 minqa_1.2.5                
##  [23] RColorBrewer_1.1-3          ade4_1.7-22                
##  [25] abind_1.4-5                 zlibbioc_1.46.0            
##  [27] purrr_1.0.2                 R.utils_2.12.2             
##  [29] RCurl_1.98-1.12             nnet_7.3-18                
##  [31] pracma_2.4.2                GenomeInfoDbData_1.2.10    
##  [33] gdata_2.19.0                annotate_1.78.0            
##  [35] pkgdown_2.0.7               codetools_0.2-19           
##  [37] DelayedArray_0.26.7         tidyselect_1.2.0           
##  [39] shape_1.4.6                 farver_2.1.1               
##  [41] lme4_1.1-34                 matrixStats_1.0.0          
##  [43] base64enc_0.1-3             GenomicAlignments_1.36.0   
##  [45] jsonlite_1.8.7              mitml_0.4-5                
##  [47] Formula_1.2-5               survival_3.5-3             
##  [49] iterators_1.0.14            systemfonts_1.0.5          
##  [51] foreach_1.5.2               tools_4.3.1                
##  [53] ragg_1.2.5                  Rcpp_1.0.11                
##  [55] glue_1.6.2                  gridExtra_2.3              
##  [57] pan_1.8                     xfun_0.40                  
##  [59] MatrixGenerics_1.12.2       EBImage_4.42.0             
##  [61] dplyr_1.1.3                 withr_2.5.1                
##  [63] fastmap_1.1.1               boot_1.3-28.1              
##  [65] fansi_1.0.5                 digest_0.6.33              
##  [67] R6_2.5.1                    mice_3.16.0                
##  [69] textshaping_0.3.7           colorspace_2.1-0           
##  [71] GO.db_3.17.0                gtools_3.9.4               
##  [73] poweRlaw_0.70.6             jpeg_0.1-10                
##  [75] RSQLite_2.3.1               weights_1.0.4              
##  [77] R.methodsS3_1.8.2           utf8_1.2.3                 
##  [79] tidyr_1.3.0                 generics_0.1.3             
##  [81] data.table_1.14.8           rtracklayer_1.60.0         
##  [83] httr_1.4.7                  htmlwidgets_1.6.2          
##  [85] S4Arrays_1.0.5              pkgconfig_2.0.3            
##  [87] gtable_0.3.4                blob_1.2.4                 
##  [89] XVector_0.40.0              htmltools_0.5.6.1          
##  [91] fftwtools_0.9-11            scales_1.2.1               
##  [93] Biobase_2.60.0              png_0.1-8                  
##  [95] knitr_1.44                  heatmaps_1.24.0            
##  [97] rstudioapi_0.15.0           tzdb_0.4.0                 
##  [99] reshape2_1.4.4              rjson_0.2.21               
## [101] checkmate_2.2.0             nlme_3.1-162               
## [103] nloptr_2.0.3                cachem_1.0.8               
## [105] stringr_1.5.0               KernSmooth_2.23-20         
## [107] parallel_4.3.1              genoPlotR_0.8.11           
## [109] foreign_0.8-84              AnnotationDbi_1.62.2       
## [111] restfulr_0.0.15             desc_1.4.2                 
## [113] pillar_1.9.0                grid_4.3.1                 
## [115] vctrs_0.6.3                 jomo_2.7-6                 
## [117] xtable_1.8-4                cluster_2.1.4              
## [119] htmlTable_2.4.1             evaluate_0.22              
## [121] readr_2.1.4                 cli_3.6.1                  
## [123] locfit_1.5-9.8              compiler_4.3.1             
## [125] Rsamtools_2.16.0            rlang_1.1.1                
## [127] crayon_1.5.2                labeling_0.4.3             
## [129] plyr_1.8.8                  fs_1.6.3                   
## [131] stringi_1.7.12              BiocParallel_1.34.2        
## [133] munsell_0.5.0               Biostrings_2.68.1          
## [135] tiff_0.1-11                 glmnet_4.1-7               
## [137] Matrix_1.5-3                BSgenome_1.68.0            
## [139] hms_1.1.3                   bit64_4.0.5                
## [141] KEGGREST_1.40.0             SummarizedExperiment_1.30.2
## [143] broom_1.0.5                 memoise_2.0.1              
## [145] bslib_0.5.1                 bit_4.0.5