knitr::opts_chunk$set(cache = TRUE, cache.lazy = FALSE)
options(width = 200)

Load R pacakges and data

## Warning in runHook(".onLoad", env, package.lib, package): input string 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8

## Warning in runHook(".onLoad", env, package.lib, package): input string 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
load("BreakPoints.Rdata")
ces <- readRDS("CEs.Rds")

See vignette("LoadGenomicBreaks", package = "OikScrambling") for how the different GBreaks objects are prepared.

Precompute widths

widths <- SimpleList()
makeWidthsDF <- function (objList) {
  lapply(names(objList), \(name) {
    DF <- mcols(objList[[name]])
    if (nrow(DF) == 0) return(DataFrame())
    if (is.null(DF$Arm)   ) DF$Arm     <- NA
    if (is.null(DF$flag))   DF$flag    <- NA
    if (is.null(DF$nonCoa)) DF$nonCoa  <- NA
    DF$name  <- name
    DF$width <- width(objList[[name]])
    DF$chr   <- seqnames(objList[[name]]) |>
      sub(pat = "Chr",    rep = "chr")    |>
      sub(pat = "BNJZ.*", rep = "Ply")    |>
      sub(pat = "BNKA.*", rep = "Ros")    |>
      sub(pat = "BJTB.*", rep = "Rob")    |>
      sub(pat = "^R.*",   rep = "Sav")    |>
      sub(pat = "^2L$",   rep = "Dro")    |>
      sub(pat = "^2R$",   rep = "Dro")    |>
      sub(pat = "^3L$",   rep = "Dro")    |>
      sub(pat = "^3R$",   rep = "Dro")    |>
      sub(pat = "^4$",    rep = "Dro")    |>
      sub(pat = "^X$",    rep = "Dro")    |>
      sub(pat = "^Y$",    rep = "Dro")   

    DF$query <- DF$score <- DF$rep <- DF$repOvlp <- DF$transcripts <- NULL
    DF$dist  <- OikScrambling:::compDistance(DF$name)
    DF$distS <- OikScrambling:::compDistance(DF$name, short = TRUE)
    DF$genus <- OikScrambling:::compGenus(DF$name)
    DF$class <- OikScrambling:::compDistClass(DF$name)
    DF
  }) |> do.call(what=rbind)
}

isol <- sapply(gbs, \(gb) gb[gb$nonCoa]) |> SimpleList()

widths$gbs     <- makeWidthsDF(gbs)     |> tibble::as_tibble()
widths$isol    <- makeWidthsDF(isol)    |> tibble::as_tibble()
widths$unal    <- makeWidthsDF(unal)    |> tibble::as_tibble()
widths$coa     <- makeWidthsDF(coa)     |> tibble::as_tibble()
widths$unmap   <- makeWidthsDF(unmap)   |> tibble::as_tibble()
widths$bri     <- makeWidthsDF(bri)     |> tibble::as_tibble()
widths$tra     <- makeWidthsDF(tra)     |> tibble::as_tibble()
widths$tra2    <- makeWidthsDF(tra2)    |> tibble::as_tibble()
widths$coa2    <- makeWidthsDF(coa2)    |> tibble::as_tibble()
names_table <- tibble::tibble(
  pairnames = levels(factor((widths$gbs$name))),
  class     = OikScrambling:::compDistClass(pairnames),
  distS     = OikScrambling:::compDistance(pairnames, short = TRUE),
  target    = pairnames |> sub(pat="_.*", rep="") |> factor(),
  query     = pairnames |> sub(pat=".*_", rep="") |> factor()
)

Plot all width distributions

This is useful to verify by eye that collections of distributions in the same distance group resemble each other.

gg_freq_poly <- function(tibble, bins = 30, binwidth = 30, scale = "log") {
  p <-
    ggplot(tibble) +
    aes(width)
  if(scale == "log") {
    p <- p + 
      geom_freqpoly(bins = bins) +
      scale_x_log10()
  } else {
    p <- p +
      geom_freqpoly(binwidth = binwidth)
  }
  p
}

w_df <- rbind(widths$gbs     |> as.data.frame()|> dplyr::mutate(what = "aligned"),
              widths$isol    |> as.data.frame()|> dplyr::mutate(what = "isolated"),
              widths$unal    |> as.data.frame()|> dplyr::mutate(what = "unaligned"),
              widths$coa     |> as.data.frame()|> dplyr::mutate(what = "mapped"),
              widths$bri     |> as.data.frame()|> dplyr::mutate(what = "bridge"),
              widths$unmap   |> as.data.frame()|> dplyr::mutate(what = "unmaped"),
              widths$coa2    |> as.data.frame()|> dplyr::mutate(what = "doublecoalesced"))


widths_all_tibble <- rbind(widths$gbs     |> dplyr::mutate(what = "aligned"),
                           widths$isol    |> dplyr::mutate(what = "isolated"),
                           widths$unal    |> dplyr::mutate(what = "unaligned"),
                           widths$coa     |> dplyr::mutate(what = "mapped"),
                           widths$bri     |> dplyr::mutate(what = "bridge"),
                           widths$unmap   |> dplyr::mutate(what = "unmaped"),
                           widths$coa2    |> dplyr::mutate(what = "doublecoalesced"))

p <-gg_freq_poly(widths_all_tibble) +
  aes(col = what) +
  ggtitle("Distribution of alignment width before and after collapsing")

# Showing all individual distributions, to demonstrate we can pool some.
p + facet_wrap(~class + genus + distS + name, scales = "free_y")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 22514 rows containing non-finite values (`stat_bin()`).

# Aggregating by distance class
p_droso <- gg_freq_poly(widths_all_tibble |> dplyr::filter(genus == "Drosophila")) + aes(col = what) + facet_wrap(~genus + class, nrow = 1, scales = "free_y")
p_ciona <- gg_freq_poly(widths_all_tibble |> dplyr::filter(genus == "Ciona")) + aes(col = what)      + facet_wrap(~genus + class, nrow = 1, scales = "free_y")
p_oikop <- gg_freq_poly(widths_all_tibble |> dplyr::filter(genus == "Oikopleura")) + aes(col = what) + facet_wrap(~genus + class, nrow = 1, scales = "free_y")

layout <- "
AAAA
BBB#
CCC#
"

library("patchwork")
(p_droso / p_ciona / p_oikop) +   plot_layout(design = layout, guides = 'collect')
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 914 rows containing non-finite values (`stat_bin()`).
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 10307 rows containing non-finite values (`stat_bin()`).
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 7561 rows containing non-finite values (`stat_bin()`).

Saving width distributions and some statistics

Below, I take the widths_all_tibble and perform some statistical operations on it for later. Click below to view.

Details

Click here to view the full table with many summary statistics for width distributions.
library(dplyr) |> suppressPackageStartupMessages()

t_CI <- function(vector, conf.level){
  if(length(vector) < 3)
    return(NA_character_)
  conf_test <- t.test(vector, conf.level = conf.level)
  conf_str <- paste(round(conf_test$conf.int,2), collapse=',')
  conf_str
}

# Note that because the Wilcox test involves ranking (and therefore sorting),
# this test is computationally demanding for large data frames.
# It works the same as the function above - just slower.
wilcox_CI <- function(vector, conf.level){
  if(length(vector) < 3)
     return(NA_character_)
  conf_test <- wilcox.test(vector, conf.level = conf.level, conf.int = TRUE)
  conf_str <- paste(round(conf_test$conf.int, 2), collapse=',')
  conf_str
}

stats_to_compute <- list(n         = ~n(),
                         t_CI      = ~t_CI(., conf.level=0.95),
                         wilcox_CI = ~wilcox_CI(., conf.level=0.95),
                         min       = min,
                         q25       = ~quantile(., 0.25),
                         median    = median,
                         q75       = ~quantile(., 0.75),
                         max       = max,
                         mean      = mean,
                         sd        = sd)

old.o <- options(max.print = 2000)

widths_all_tibble |>
    group_by(name, class, what) |>
    summarise(across(width, stats_to_compute)) |> as.data.frame()
## `summarise()` has grouped output by 'name', 'class'. You can override using the `.groups` argument.
##        name          class            what width_n          width_t_CI width_wilcox_CI width_min width_q25 width_median width_q75 width_max  width_mean    width_sd
## 1   Bar_Aom   intermediate         aligned   24717     1538.64,1610.24       844,872.5        51    368.00        634.0   1447.00     54452   1574.4417   2871.3143
## 2   Bar_Aom   intermediate          bridge   20520        302.15,327.1         191,197         0     38.00        178.0    309.00     38380    314.6256    911.6882
## 3   Bar_Aom   intermediate doublecoalesced    3452    11606.52,14764.2       3836,4392        58    562.75       2251.5   8361.50   1199960  13185.3589  47312.1501
## 4   Bar_Aom   intermediate        isolated    1853      862.03,1000.66       559.5,621        58    271.00        481.0    960.00     19742    931.3475   1521.3645
## 5   Bar_Aom   intermediate          mapped    4197    9887.49,11733.47   3636.5,4119.5        58    547.00       2104.0   7894.00    677793  10810.4818  30499.4833
## 6   Bar_Aom   intermediate       unaligned   24170       611.96,681.74       210.5,217         1     48.00        194.0    351.00     79613    646.8544   2767.3868
## 7   Bar_Aom   intermediate         unmaped    4129     2035.51,2410.29       526.5,629         1     82.00        272.0   1330.00     79613   2222.9002   6141.9161
## 8   Bar_Kum        distant         aligned   31557       957.75,991.34         617,635        51    250.00        469.0   1052.00     43546    974.5433   1522.0555
## 9   Bar_Kum        distant          bridge   23028       408.46,440.56     289.5,296.5         0    144.00        258.0    441.00    135099    424.5125   1242.4483
## 10  Bar_Kum        distant doublecoalesced    7849     4903.46,5419.47   2151.5,2342.5        51    316.00       1354.0   4436.00    216498   5161.4630  11660.5964
## 11  Bar_Kum        distant        isolated    4346       653.68,717.84       408,449.5        51    177.00        302.0    764.75     16862    685.7561   1078.6911
## 12  Bar_Kum        distant          mapped    8529      4530.09,4973.8   2005.5,2181.5        51    296.00       1227.0   4188.00    138044   4751.9448  10452.2655
## 13  Bar_Kum        distant       unaligned   31373        704.7,796.64         323,331         1    147.00        273.0    521.00    273024    750.6715   4154.0813
## 14  Bar_Kum        distant         unmaped    8515     1455.44,1780.07         502,540         1    144.00        351.0    975.00    273024   1617.7504   7640.8206
## 15  Bar_Nor same_or_sister         aligned   15184     2880.98,3022.93   1814.5,1908.5        40    526.00       1305.5   3380.25     55611   2951.9559   4462.0282
## 16  Bar_Nor same_or_sister          bridge    8996        378.79,417.1         274,297         0      2.00        124.0    406.00     17680    397.9464    926.7815
## 17  Bar_Nor same_or_sister doublecoalesced    4319    10271.06,12273.5       2512,2848        40    529.00       1517.0   5440.00    569531  11272.2790  33562.3130
## 18  Bar_Nor same_or_sister        isolated    3882     1262.66,1415.56         780,845        40    312.00        633.0   1366.50     44180   1339.1121   2429.5923
## 19  Bar_Nor same_or_sister          mapped    6188     7355.49,8288.48     2419,2720.5        40    488.00       1373.5   5444.75    240933   7821.9819  18719.2179
## 20  Bar_Nor same_or_sister       unaligned   12425       755.51,822.08       359.5,383         1     24.00        225.0    734.00     56097    788.7979   1892.7856
## 21  Bar_Nor same_or_sister         unmaped    5214      1122.1,1264.13       557,629.5         1     16.00        325.0   1250.00     56097   1193.1124   2615.7709
## 22  Bar_Oki        distant         aligned   32265       924.88,956.96     597.5,614.5        51    245.00        454.0   1014.00     43420    940.9199   1469.8924
## 23  Bar_Oki        distant          bridge   23772       413.37,438.69     288.5,295.5         0    142.00        256.0    443.00     59129    426.0277    995.8294
## 24  Bar_Oki        distant doublecoalesced    7849     4896.93,5417.11     2145.5,2338        54    306.00       1347.0   4450.00    218004   5157.0210  11754.6679
## 25  Bar_Oki        distant        isolated    4321       645.66,714.44       390,427.5        54    172.00        296.0    714.00     21112    680.0470   1153.0032
## 26  Bar_Oki        distant          mapped    8493     4540.11,4993.94     2005.5,2179        54    291.00       1218.0   4177.00    218004   4767.0211  10667.9952
## 27  Bar_Oki        distant       unaligned   32076       703.97,795.69     322.5,330.5         1    146.00        272.0    520.00    269200    749.8278   4190.4617
## 28  Bar_Oki        distant         unmaped    8484       1472.9,1809.5     503.5,539.5         1    148.00        358.0    968.00    269200   1641.2007   7908.2528
## 29  Bar_Osa   intermediate         aligned   24851      1523.3,1593.59       844.5,873        52    367.00        634.0   1449.00     49546   1558.4455   2826.8737
## 30  Bar_Osa   intermediate          bridge   20778        303.39,328.1     188.5,194.5         0     37.00        174.0    305.00     32212    315.7473    908.4853
## 31  Bar_Osa   intermediate doublecoalesced    3373   12040.47,14823.71     3990.5,4583        59    585.00       2310.0   8781.00    714822  13432.0910  41221.5092
## 32  Bar_Osa   intermediate        isolated    1732       814.98,929.84         571,634        59    279.75        478.0    968.25     16143    872.4094   1218.6145
## 33  Bar_Osa   intermediate          mapped    4073   10180.46,12058.44       3738,4238        59    572.00       2153.0   8140.00    562164  11119.4513  30566.1990
## 34  Bar_Osa   intermediate       unaligned   24266       616.12,690.07       208,214.5         1     47.00        189.0    346.00    106962    653.0948   2938.6433
## 35  Bar_Osa   intermediate         unmaped    4008     2110.15,2524.29       516.5,607         1     83.00        277.0   1249.50    106962   2317.2161   6686.5554
## 36  Cbr_Cel        distant         aligned   47012       523.92,534.98       413,419.5        53    210.00        363.0    622.00     23152    529.4498    611.7579
## 37  Cbr_Cel        distant          bridge   43326     1114.48,1175.78       623.5,639         0    160.00        494.0   1077.00    248393   1145.1281   3254.9048
## 38  Cbr_Cel        distant doublecoalesced    3546   19066.68,23080.29      9597,11187        70    953.00       4744.0  22150.50   2037825  21073.4873  60950.6693
## 39  Cbr_Cel        distant        isolated     993       520.53,582.69       428,476.5        70    238.00        401.0    658.00      4860    551.6113    499.0966
## 40  Cbr_Cel        distant          mapped    3686    18298.4,22127.16      9262,10799        70    910.50       4530.5  21520.00   2037825  20212.7811  59280.8720
## 41  Cbr_Cel        distant       unaligned   46608     1674.24,1800.27       708,726.5         1    189.00        555.0   1257.00    540635   1737.2543   6941.2578
## 42  Cbr_Cel        distant         unmaped    3675     7856.75,9207.81     4321.5,4771         1   1059.00       3163.0   8405.00    540635   8532.2808  20887.2596
## 43  Cbr_Cni          close         aligned   51726     1418.23,1450.18     1037,1055.5        48    451.00        885.0   1639.00     42453   1434.2038   1854.0210
## 44  Cbr_Cni          close          bridge   41711       527.58,550.53     386.5,394.5         0    116.00        324.0    621.00     74381    539.0541   1195.3952
## 45  Cbr_Cni          close doublecoalesced    6677   13734.59,15563.37     3214.5,3861        52    376.00       1146.0   9736.00    619682  14648.9759  38114.9360
## 46  Cbr_Cni          close        isolated    5576       579.87,620.17       456.5,480        52    236.00        390.0    713.00     30395    600.0187    767.5146
## 47  Cbr_Cni          close          mapped   10015     9185.2,10119.87   2706.5,3085.5        52    350.50       1051.0   7105.00    527175   9652.5324  23859.0372
## 48  Cbr_Cni          close       unaligned   49877        622.3,650.97       398.5,407         1    120.00        336.0    674.00     95699    636.6340   1633.0832
## 49  Cbr_Cni          close         unmaped    9394      930.55,1042.82         489,525         1     52.00        341.0    974.75     95699    986.6839   2775.6234
## 50  Cbr_Cre   intermediate         aligned   44819       724.89,741.84         545,554        54    267.00        477.0    829.00     25357    733.3664    915.6811
## 51  Cbr_Cre   intermediate          bridge   41373     1145.76,1206.59     697.5,711.5         0    290.00        605.0   1106.00    248371   1176.1738   3156.3155
## 52  Cbr_Cre   intermediate doublecoalesced    3274   22486.72,27520.38   10373,12213.5        70   1006.50       5313.0  24194.00   1794548  25003.5464  73448.7989
## 53  Cbr_Cre   intermediate        isolated    1013       644.12,727.14         540,605        70    266.00        476.0    879.00      7030    685.6328    673.3018
## 54  Cbr_Cre   intermediate          mapped    3446   21350.32,25968.65   10091,11802.5        70    988.00       5078.5  23458.25   1794548  23659.4855  69137.1135
## 55  Cbr_Cre   intermediate       unaligned   44593     1585.87,1691.14       763.5,780         1    308.00        648.0   1239.00    285240   1638.5042   5670.9042
## 56  Cbr_Cre   intermediate         unmaped    3434     6561.56,7651.59       3659,4063         1    916.25       2630.0   7107.25    285240   7106.5754  16289.4619
## 57  Cni_Cbr          close         aligned   51324      1431.4,1463.42   1048.5,1067.5        43    456.00        891.0   1656.00     39397   1447.4074   1850.4720
## 58  Cni_Cbr          close          bridge   41487       623.34,656.79       385.5,394         0    101.00        323.0    618.00     85250    640.0672   1738.2862
## 59  Cni_Cbr          close doublecoalesced    6479   15081.78,17064.17   3471.5,4222.5        43    380.00       1125.0  10826.50    525245  16072.9728  40698.9749
## 60  Cni_Cbr          close        isolated    5492       571.77,605.96       457.5,481        43    234.00        394.0    713.00     12227    588.8616    646.2795
## 61  Cni_Cbr          close          mapped    9837     9757.63,10744.8   2882.5,3301.5        43    354.00       1040.0   7658.00    487247  10251.2150  24974.2632
## 62  Cni_Cbr          close       unaligned   49573     1047.92,1112.46       442,452.5         1    136.00        362.0    760.00    165349   1080.1929   3665.8063
## 63  Cni_Cbr          close         unmaped    9504      2693.7,2986.84       1136,1240         1    153.00        684.0   2449.50    165349   2840.2713   7289.6177
## 64  Cni_Cin        distant         aligned   41207       501.66,512.98       390.5,397        52    190.00        334.0    600.00     16541    507.3222    586.1596
## 65  Cni_Cin        distant          bridge   38347     1444.19,1565.89       669,687.5         0    190.00        532.0   1161.00    522515   1505.0371   6079.4902
## 66  Cni_Cin        distant doublecoalesced    2811   25278.56,30673.32   12221,14210.5        55   1198.00       6294.0  27444.00   1433591  27975.9431  72935.1950
## 67  Cni_Cin        distant        isolated     638        472.9,548.19     404.5,466.5        55    211.00        346.5    675.25      4665    510.5439    484.1822
## 68  Cni_Cin        distant          mapped    2860    24839.14,30139.1     12026,13958        55   1163.00       6199.0  26778.50   1433591  27489.1199  72275.8818
## 69  Cni_Cin        distant       unaligned   40942     2451.15,2746.36         764,787         1    213.00        592.0   1379.00   1062215   2598.7560  15238.1718
## 70  Cni_Cin        distant         unmaped    2853   15188.36,18940.35     6952.5,7869         1   1555.00       5117.0  13941.00   1062215  17064.3575  51103.4333
## 71  Cni_Cre   intermediate         aligned   44877        718.3,735.08         542,551        54    267.00        476.0    823.00     26910    726.6916    907.0093
## 72  Cni_Cre   intermediate          bridge   41389     1281.45,1383.01     694.5,708.5         0    286.00        610.0   1091.00    456231   1332.2298   5271.0991
## 73  Cni_Cre   intermediate doublecoalesced    3292   23984.77,29573.74 10398.5,12151.5        66    905.50       4619.5  24288.25   2187342  26779.2527  81775.6460
## 74  Cni_Cre   intermediate        isolated    1093        652.41,732.4       548,610.5        66    278.00        503.0    872.00      7539    692.4071    673.9110
## 75  Cni_Cre   intermediate          mapped    3488   22598.78,27717.39    9891.5,11617        66    879.75       4410.5  23256.25   2187342  25158.0849  77092.2783
## 76  Cni_Cre   intermediate       unaligned   44650     2013.55,2226.93         762,778         1    306.00        654.0   1228.00   1210598   2120.2391  11502.1956
## 77  Cni_Cre   intermediate         unmaped    3480   10172.19,12545.63     4457.5,4991         1    959.00       2977.5   8936.25   1210598  11358.9124  35705.8170
## 78  Dme_Dbu        distant         aligned   47893        610.2,623.09       463.5,472        48    205.00        397.0    731.00     16014    616.6455    719.1531
## 79  Dme_Dbu        distant          bridge   41934     1570.08,1628.57    999.5,1023.5         0    395.00        783.0   1684.00    174913   1599.3246   3055.7262
## 80  Dme_Dbu        distant doublecoalesced    5877    15727.3,17388.08     7514,8553.5        54    716.00       3233.0  17797.00    615658  16557.6888  32472.8670
## 81  Dme_Dbu        distant        isolated    2086       638.88,695.17         519,565        54    245.00        462.0    849.75      5460    667.0240    655.5648
## 82  Dme_Dbu        distant          mapped    5959   15413.96,17007.28     7325.5,8328        54    701.00       3134.0  17379.50    532037  16210.6196  31370.6132
## 83  Dme_Dbu        distant       unaligned   47867     2115.49,2325.33     1100,1125.5         1    424.00        853.0   1868.00   1031382   2220.4080  11711.7224
## 84  Dme_Dbu        distant         unmaped    5951     5780.24,7400.13     2069.5,2221         1    772.50       1602.0   3679.50   1031382   6590.1850  31872.2350
## 85  Dme_Dma same_or_sister         aligned    9732   10954.29,11804.24       5294,5971        49    404.00       1953.0  13037.50    291858  11379.2660  21387.8094
## 86  Dme_Dma same_or_sister          bridge    7026      884.17,1011.76       270.5,296         0     25.00        183.0    479.00     67561    947.9668   2727.8052
## 87  Dme_Dma same_or_sister doublecoalesced    2272   29884.44,73666.11         623,729        56    240.00        458.5   1274.25  15422642  51775.2720 532092.2108
## 88  Dme_Dma same_or_sister        isolated    2008        492.3,638.59       369,398.5        56    199.00        333.5    574.25     60697    565.4417   1671.3122
## 89  Dme_Dma same_or_sister          mapped    2706   32527.05,54245.63       667,793.5        56    237.25        465.0   1472.75   8110313  43386.3385 288086.3839
## 90  Dme_Dma same_or_sister       unaligned    9138     2583.15,3115.67       521.5,592         1     71.00        276.0   1299.00    545408   2849.4082  12984.5839
## 91  Dme_Dma same_or_sister         unmaped    2616     6516.25,8298.34     2456,2975.5         1    266.75       1311.0   5929.50    545408   7407.2924  23241.7218
## 92  Dme_Dsu   intermediate         aligned   68750        1168.64,1187     931.5,942.5        50    553.00        843.0   1333.00     33370   1177.8198   1227.9791
## 93  Dme_Dsu   intermediate          bridge   67758       327.89,343.23     221.5,223.5         0    148.00        207.0    298.00     76314    335.5584   1018.9091
## 94  Dme_Dsu   intermediate doublecoalesced     878 103687.77,132644.64   45163.5,67214        77    665.75       7701.0 134819.75   1457965 118166.2050 218585.3124
## 95  Dme_Dsu   intermediate        isolated     379       639.68,784.97       522.5,637        77    264.50        498.0    895.50      7094    712.3245    719.2297
## 96  Dme_Dsu   intermediate          mapped     992  92498.83,116597.69   39379,60320.5        77    661.50       7251.5 122432.00   1338161 104548.2621 193394.2339
## 97  Dme_Dsu   intermediate       unaligned   68626       665.01,916.59     223.5,225.5         1    149.00        209.0    303.00   2615430    790.8018  16812.8419
## 98  Dme_Dsu   intermediate         unmaped     984   23497.61,40593.44     1173,1914.5         1    313.75        693.5   4526.00   2615430  32045.5264 136638.9172
## 99  Dme_Dya          close         aligned   23059     4475.07,4642.76     3052,3167.5        60    919.00       2264.0   5468.00     92398   4558.9142   6495.6166
## 100 Dme_Dya          close          bridge   21243        480.45,527.1         213,218         0    123.00        191.0    309.00     75233    503.7723   1734.2930
## 101 Dme_Dya          close doublecoalesced    1582   56273.14,90367.16      764.5,1000        63    243.25        500.0   1919.25   6070020  73320.1492 345677.1415
## 102 Dme_Dya          close        isolated    1314       536.32,649.82       390.5,436        63    203.00        335.0    646.00     25871    593.0723   1048.5917
## 103 Dme_Dya          close          mapped    1816    51540.18,76021.1    913.5,1292.5        63    247.75        531.0   2852.50   4893795  63780.6382 265960.8921
## 104 Dme_Dya          close       unaligned   22743     1251.61,1518.29       232,238.5         1    129.00        202.0    360.00    859148   1384.9524  10259.1500
## 105 Dme_Dya          close         unmaped    1776   10099.58,13319.72       4087,5246         1    407.50       2005.5  10593.00    859148  11709.6492  34595.6295
## 106 Oki_Aom        distant         aligned   32398      970.18,1004.75         599,616        55    257.25        459.0   1014.00     38135    987.4651   1587.6967
## 107 Oki_Aom        distant          bridge   23534       556.31,599.03       329.5,340         0     91.00        278.0    548.00     81689    577.6661   1671.8453
## 108 Oki_Aom        distant doublecoalesced    8176     5354.77,5893.45     2325,2534.5        64    324.75       1434.0   4868.50    216565   5624.1087  12423.8043
## 109 Oki_Aom        distant        isolated    4586         723.3,801.8       432.5,474        55    184.00        322.0    803.75     23204    762.5497   1355.7858
## 110 Oki_Aom        distant          mapped    8864     4909.98,5375.83     2150.5,2340        55    310.75       1291.5   4535.00    216565   5142.9025  11187.1566
## 111 Oki_Aom        distant       unaligned   32175      925.18,1015.58         400,413         1    129.00        313.0    710.00    189661    970.3809   4136.8088
## 112 Oki_Aom        distant         unmaped    8848     1839.99,2144.46     786.5,848.5         1    176.00        497.0   1577.50    189661   1992.2255   7305.3661
## 113 Oki_Bar        distant         aligned   30841     1005.87,1041.09       652,671.5        51    261.00        494.0   1118.00     43759   1023.4778   1577.7563
## 114 Oki_Bar        distant          bridge   22308        589.8,629.34       359.5,370         0    165.00        302.0    583.00     51342    609.5723   1506.3415
## 115 Oki_Bar        distant doublecoalesced    7876     5494.42,6064.46   2353.5,2569.5        54    316.00       1438.0   4926.75    216563   5779.4397  12903.7318
## 116 Oki_Bar        distant        isolated    4435       691.58,762.42       423,463.5        54    184.50        316.0    775.00     21231    726.9998   1203.0890
## 117 Oki_Bar        distant          mapped    8533     5044.56,5541.03       2182,2374        54    302.00       1312.0   4580.00    168062   5292.7949  11697.6288
## 118 Oki_Bar        distant       unaligned   30684      969.67,1068.66       426,439.5         1    172.00        333.0    744.00    283076   1019.1656   4423.5389
## 119 Oki_Bar        distant         unmaped    8515     1906.94,2244.26       780,845.5         1    182.00        503.0   1572.50    283076   2075.6006   7939.4455
## 120 Oki_Kum same_or_sister         aligned   15086        3444.82,3615     2263.5,2378        44    601.00       1598.0   4234.00     72046   3529.9073   5331.9172
## 121 Oki_Kum same_or_sister          bridge    9662       554.25,637.24       311.5,332         0      7.00        180.0    497.00     49765    595.7452   2080.8174
## 122 Oki_Kum same_or_sister doublecoalesced    1625   32119.88,41848.39  8992.5,12752.5        48    427.00       2193.0  27502.00   1365817  36984.1342  99970.3102
## 123 Oki_Kum same_or_sister        isolated    2993     1640.94,1904.58       726.5,808        44    321.00        597.0   1305.00     58461   1772.7618   3677.9126
## 124 Oki_Kum same_or_sister          mapped    5424   10370.48,11387.73     5736,6459.5        44    542.75       2506.0  12995.25    262072  10879.1062  19107.8473
## 125 Oki_Kum same_or_sister       unaligned   12386        790.2,882.38         309,329         1     25.00        211.0    614.00     53783    836.2917   2616.6886
## 126 Oki_Kum same_or_sister         unmaped    4119      1018.53,1216.1     305.5,364.5         1      8.00        121.0    794.00     53783   1117.3146   3233.7276
## 127 Oki_Nor        distant         aligned   30055      974.05,1006.36     656.5,675.5        52    261.00        502.0   1122.00     43752    990.2015   1428.7816
## 128 Oki_Nor        distant          bridge   21180       616.65,658.38     369.5,381.5         0    171.00        305.0    607.00     83492    637.5155   1549.4677
## 129 Oki_Nor        distant doublecoalesced    8280     5027.56,5504.15   2359.5,2552.5        54    365.75       1530.0   4800.25    216563   5265.8556  11061.6823
## 130 Oki_Nor        distant        isolated    4578       769.55,842.95         498,544        54    190.00        357.0    926.25     17200    806.2532   1266.6301
## 131 Oki_Nor        distant          mapped    8875     4662.56,5086.86     2202.5,2381        54    340.50       1408.0   4490.50    216563   4874.7136  10195.7615
## 132 Oki_Nor        distant       unaligned   29897     1065.36,1166.48     462.5,478.5         1    178.00        348.0    824.00    280268   1115.9183   4460.4825
## 133 Oki_Nor        distant         unmaped    8854     2082.17,2403.95    951.5,1028.5         1    194.00        596.0   1929.75    280268   2243.0576   7723.0016
## 134 Oki_Osa        distant         aligned   30852     1042.47,1080.36       642.5,662        56    273.00        487.0   1098.25     38135   1061.4169   1697.9866
## 135 Oki_Osa        distant          bridge   22009       554.32,592.64     333.5,344.5         0     75.00        284.0    559.00     47969    573.4841   1450.1810
## 136 Oki_Osa        distant doublecoalesced    8132     5358.65,5881.01   2385.5,2593.5        67    338.75       1489.0   4944.25    164824   5619.8308  12014.8889
## 137 Oki_Osa        distant        isolated    4646        780.7,861.56       471.5,518        61    195.00        345.5    888.75     24353    821.1334   1405.6672
## 138 Oki_Osa        distant          mapped    8843     4903.95,5356.97     2195,2380.5        61    324.00       1358.0   4561.00    164824   5130.4588  10866.2362
## 139 Oki_Osa        distant       unaligned   30636      937.76,1034.33       403,416.5         1    114.00        318.0    722.00    211398    986.0471   4311.6897
## 140 Oki_Osa        distant         unmaped    8821     1834.91,2152.56     757.5,819.5         1    167.00        475.0   1543.00    211398   1993.7338   7609.5710
## 141 Osa_Aom same_or_sister         aligned   15261     3005.27,3189.26       1625,1709        44    501.00       1142.0   3015.00    112674   3097.2689   5797.9303
## 142 Osa_Aom same_or_sister          bridge   10311       327.61,359.27       263,276.5         0     12.00        186.0    392.00     31147    343.4384    820.0688
## 143 Osa_Aom same_or_sister doublecoalesced    2256   18388.65,27235.22   3626.5,4766.5        46    467.75       1569.0  10310.00   2627520  22811.9331 107135.4024
## 144 Osa_Aom same_or_sister        isolated    2791     1152.21,1353.59         619,670        46    297.50        546.0   1015.50     37928   1252.9022   2712.9315
## 145 Osa_Aom same_or_sister          mapped    4950    9581.21,10947.52       3444,4108        46    487.25       1441.0   8766.75    394294  10264.3665  24517.1044
## 146 Osa_Aom same_or_sister       unaligned   13147       536.81,588.57       281.5,297         1     34.00        219.0    539.50     37950    562.6859   1513.9013
## 147 Osa_Aom same_or_sister         unmaped    4063       876.8,1021.52         375,431         1     10.00        200.0    868.00     37950    949.1605   2352.5788
## 148 Osa_Bar   intermediate         aligned   24430     1555.18,1626.72     870.5,900.5        50    372.00        651.0   1500.00     49309   1590.9494   2852.6515
## 149 Osa_Bar   intermediate          bridge   20349       374.78,396.54       276.5,283         0    121.00        257.0    400.00     34004    385.6573    791.8057
## 150 Osa_Bar   intermediate doublecoalesced    3358   12551.79,15470.77     4230,4939.5        59    596.25       2474.0   9656.25    735649  14011.2802  43135.7029
## 151 Osa_Bar   intermediate        isolated    1757       837.18,973.65       570.5,635        59    282.00        487.0    962.00     19831    905.4104   1458.2906
## 152 Osa_Bar   intermediate          mapped    4081    10494.2,12399.52     3967,4547.5        59    588.00       2321.0   8849.00    541207  11446.8598  31041.5565
## 153 Osa_Bar   intermediate       unaligned   24028          628,681.41       293,300.5         1    128.00        267.0    437.25     79670    654.7069   2111.8623
##  [ reached 'max' / getOption("max.print") -- omitted 78 rows ]
widths_all_tibble |>
    group_by(what, class, genus) |>
    summarise(across(width, stats_to_compute)) |> as.data.frame()
## `summarise()` has grouped output by 'what', 'class'. You can override using the `.groups` argument.
##               what           class          genus width_n          width_t_CI width_wilcox_CI width_min width_q25 width_median width_q75 width_max  width_mean    width_sd
## 1          aligned  same_or_sister     Drosophila    9732   10954.29,11804.24       5294,5971        49    404.00       1953.0  13037.50    291858  11379.2660  21387.8094
## 2          aligned  same_or_sister          Ciona   40659     2534.44,2612.26     1669.5,1716        43    498.00       1251.0   2974.00     97390   2573.3486   4003.2243
## 3          aligned  same_or_sister     Oikopleura   45531     3144.09,3240.23     1908,1965.5        40    536.00       1328.0   3549.50    112674   3192.1570   5233.1119
## 4          aligned           close Caenorhabditis  103050     1429.47,1452.09   1045.5,1058.5        43    453.00        888.0   1649.00     42453   1440.7798   1852.2570
## 5          aligned           close     Drosophila   23059     4475.07,4642.76     3052,3167.5        60    919.00       2264.0   5468.00     92398   4558.9142   6495.6166
## 6          aligned           close          Ciona  176589     1497.75,1517.72     1055,1067.5        47    366.00        823.0   1784.00     83419   1507.7380   2140.7792
## 7          aligned    intermediate Caenorhabditis   89696       724.06,735.99       545,551.5        54    267.00        477.0    826.00     26910    730.0269    911.3537
## 8          aligned    intermediate     Drosophila   68750        1168.64,1187     931.5,942.5        50    553.00        843.0   1333.00     33370   1177.8198   1227.9791
## 9          aligned    intermediate     Oikopleura   98495     1547.95,1582.22     876.5,891.5        50    371.00        650.0   1497.50     54452   1565.0848   2743.5154
## 10         aligned         distant Caenorhabditis   88219       515.15,523.07       403.5,408        52    201.00        349.0    611.50     23152    519.1140    600.0351
## 11         aligned         distant     Drosophila   47893        610.2,623.09       463.5,472        48    205.00        397.0    731.00     16014    616.6455    719.1531
## 12         aligned         distant          Ciona  138664        243.89,247.2       179,179.5        60    137.00        170.0    221.00     25919    245.5469    314.0168
## 13         aligned         distant     Oikopleura  250472      995.75,1008.06       630,636.5        51    259.00        474.0   1065.00     43759   1001.9047   1572.4288
## 14         aligned different_genus           <NA>    4474       320.97,361.86         227,243        78    156.00        193.0    356.75     30669    341.4119    697.5420
## 15          bridge  same_or_sister     Drosophila    7026      884.17,1011.76       270.5,296         0     25.00        183.0    479.00     67561    947.9668   2727.8052
## 16          bridge  same_or_sister          Ciona   32958       364.42,391.57     231.5,237.5         0     30.00        195.5    340.00     97102    377.9965   1257.6133
## 17          bridge  same_or_sister     Oikopleura   28969       428.39,460.65     284.5,295.5         0      6.00        169.0    431.00     49765    444.5169   1400.7196
## 18          bridge           close Caenorhabditis   83198       579.29,599.56         387,393         0    109.00        324.0    619.00     85250    589.4247   1491.8728
## 19          bridge           close     Drosophila   21243        480.45,527.1         213,218         0    123.00        191.0    309.00     75233    503.7723   1734.2930
## 20          bridge           close          Ciona  162242       378.06,390.49       226.5,229         0     51.00        201.0    343.00     90560    384.2791   1277.3452
## 21          bridge    intermediate Caenorhabditis   82762     1224.61,1283.82         698,708         0    288.00        608.0   1097.75    456231   1254.2169   4345.2261
## 22          bridge    intermediate     Drosophila   67758       327.89,343.23     221.5,223.5         0    148.00        207.0    298.00     76314    335.5584   1018.9091
## 23          bridge    intermediate     Oikopleura   80917       352.82,364.58       236,239.5         0     58.00        222.0    369.00     38380    358.6999    853.0667
## 24          bridge         distant Caenorhabditis   81673     1281.22,1347.01       647,658.5         0    176.00        511.0   1114.00    522515   1314.1121   4796.4155
## 25          bridge         distant     Drosophila   41934     1570.08,1628.57    999.5,1023.5         0    395.00        783.0   1684.00    174913   1599.3246   3055.7262
## 26          bridge         distant          Ciona  130475      1026.7,1052.15         599,607         0    293.00        465.0    979.00    175994   1039.4208   2345.1046
## 27          bridge         distant     Oikopleura  180723       512.66,524.69         329,332         0    146.00        286.0    511.00    135099    518.6746   1304.3395
## 28          bridge different_genus           <NA>    1766     1652.73,2053.32        992,1121         0    372.00        775.0   1817.00     81648   1853.0238   4291.6322
## 29 doublecoalesced  same_or_sister     Drosophila    2272   29884.44,73666.11         623,729        56    240.00        458.5   1274.25  15422642  51775.2720 532092.2108
## 30 doublecoalesced  same_or_sister          Ciona    4224   25242.83,30881.69   1803.5,2091.5        50    491.00       1135.0   4150.50   1322492  28062.2607  93465.4230
## 31 doublecoalesced  same_or_sister     Oikopleura    8200   17889.71,21195.14     3340,3847.5        40    496.75       1606.0   8502.50   2627520  19542.4283  76347.1262
## 32 doublecoalesced           close Caenorhabditis   13156    14676.7,16023.81     3427,3922.5        43    378.00       1134.0  10147.50    619682  15350.2587  39413.6205
## 33 doublecoalesced           close     Drosophila    1582   56273.14,90367.16      764.5,1000        63    243.25        500.0   1919.25   6070020  73320.1492 345677.1415
## 34 doublecoalesced           close          Ciona    9517   32836.42,36892.14     3058,3590.5        52    466.00       1304.0  10497.00   1394106  34864.2812 100921.6066
## 35 doublecoalesced    intermediate Caenorhabditis    6566   24013.26,27774.41 10631.5,11907.5        66    960.25       4979.0  24232.25   2187342  25893.8335  77734.3638
## 36 doublecoalesced    intermediate     Drosophila     878 103687.77,132644.64   45163.5,67214        77    665.75       7701.0 134819.75   1457965 118166.2050 218585.3124
## 37 doublecoalesced    intermediate     Oikopleura   14683   11896.19,13162.07     4135,4423.5        50    612.00       2400.0   8719.50   1199960  12529.1297  39127.8040
## 38 doublecoalesced         distant Caenorhabditis    6357    22488.2,25763.17   11019.5,12195        55   1051.00       5394.0  24273.00   2037825  24125.6822  66599.7874
## 39 doublecoalesced         distant     Drosophila    5877    15727.3,17388.08     7514,8553.5        54    716.00       3233.0  17797.00    615658  16557.6888  32472.8670
## 40 doublecoalesced         distant          Ciona    8179    20048.5,21449.54 12801.5,13788.5        61   2215.00       8371.0  25468.50    526016  20749.0219  32319.1280
## 41 doublecoalesced         distant     Oikopleura   64372     5287.56,5469.02   2349.5,2420.5        51    327.00       1439.0   4730.00    218004   5378.2904  11744.8510
## 42 doublecoalesced different_genus           <NA>    2704     1584.88,1965.09     654.5,733.5        78    189.00        469.5   1252.00     81983   1774.9852   5041.5504
## 43        isolated  same_or_sister     Drosophila    2008        492.3,638.59       369,398.5        56    199.00        333.5    574.25     60697    565.4417   1671.3122
## 44        isolated  same_or_sister          Ciona    4830     1135.75,1228.97       830.5,883        45    350.00        697.0   1381.00     35993   1182.3617   1652.3764
## 45        isolated  same_or_sister     Oikopleura    9666     1389.58,1507.41       725,763.5        40    312.00        594.0   1239.50     58461   1448.4957   2954.8151
## 46        isolated           close Caenorhabditis   11068       581.25,607.71       460.5,477        43    235.00        392.0    713.00     30395    594.4825    709.9395
## 47        isolated           close     Drosophila    1314       536.32,649.82       390.5,436        63    203.00        335.0    646.00     25871    593.0723   1048.5917
## 48        isolated           close          Ciona    7994       789.28,830.11     601.5,630.5        50    259.00        512.0    967.75     14541    809.6955    931.1006
## 49        isolated    intermediate Caenorhabditis    2106       660.37,717.93     553.5,598.5        66    271.25        488.5    874.00      7539    689.1486    673.4665
## 50        isolated    intermediate     Drosophila     379       639.68,784.97       522.5,637        77    264.50        498.0    895.50      7094    712.3245    719.2297
## 51        isolated    intermediate     Oikopleura    7663       930.69,996.17       625,658.5        50    289.00        511.0   1041.00     21037    963.4338   1462.0853
## 52        isolated         distant Caenorhabditis    1631       511.58,559.52       426,464.5        55    226.00        384.0    667.50      4860    535.5469    493.5742
## 53        isolated         distant     Drosophila    2086       638.88,695.17         519,565        54    245.00        462.0    849.75      5460    667.0240    655.5648
## 54        isolated         distant          Ciona    1386       582.66,684.67         400,451        61    173.00        275.5    708.25     12900    633.6652    967.9353
## 55        isolated         distant     Oikopleura   36058       742.77,768.87     454.5,469.5        51    185.00        324.0    822.00     24353    755.8199   1264.6923
## 56        isolated different_genus           <NA>    1933       402.17,472.71       330.5,361        78    169.00        251.0    550.00     30669    437.4397    790.7655
## 57          mapped  same_or_sister     Drosophila    2706   32527.05,54245.63       667,793.5        56    237.25        465.0   1472.75   8110313  43386.3385 288086.3839
## 58          mapped  same_or_sister          Ciona    7701   14397.07,16011.39     3100,3677.5        45    532.00       1461.0   9053.00    443768  15204.2322  36134.0852
## 59          mapped  same_or_sister     Oikopleura   16562      9236.61,9869.7     3755.5,4081        40    502.00       1591.0   8854.25    394294   9553.1524  20783.1448
## 60          mapped           close Caenorhabditis   19852     9609.48,10288.9     2849,3126.5        43    352.00       1044.0   7384.25    527175   9949.1897  24419.2368
## 61          mapped           close     Drosophila    1816    51540.18,76021.1    913.5,1292.5        63    247.75        531.0   2852.50   4893795  63780.6382 265960.8921
## 62          mapped           close          Ciona   14347   21986.59,23820.35       3930,4585        50    446.00       1392.0  13495.50    730624  22903.4749  56028.4109
## 63          mapped    intermediate Caenorhabditis    6934   22689.03,26137.62   10252,11447.5        66    937.00       4731.5  23335.75   2187342  24413.3238  73245.4207
## 64          mapped    intermediate     Drosophila     992  92498.83,116597.69   39379,60320.5        77    661.50       7251.5 122432.00   1338161 104548.2621 193394.2339
## 65          mapped    intermediate     Oikopleura   17578   10009.24,10832.49       3847,4098        50    595.00       2231.5   8118.75    677793  10420.8637  27842.3288
## 66          mapped         distant Caenorhabditis    6546   21807.96,24975.79 10728.5,11877.5        55   1026.25       5201.5  23638.50   2037825  23391.8720  65371.9749
## 67          mapped         distant     Drosophila    5959   15413.96,17007.28     7325.5,8328        54    701.00       3134.0  17379.50    532037  16210.6196  31370.6132
## 68          mapped         distant          Ciona    8189   20019.71,21418.06 12785.5,13770.5        61   2203.00       8358.0  25455.00    526016  20718.8846  32276.7284
## 69          mapped         distant     Oikopleura   69749     4862.84,5020.76   2181.5,2245.5        51    311.00       1311.0   4411.00    218004   4941.7983  10639.7762
## 70          mapped different_genus           <NA>    2708     1582.66,1962.34         653,731        78    189.00        469.5   1243.00     81983   1772.4952   5038.1306
## 71       unaligned  same_or_sister     Drosophila    9138     2583.15,3115.67       521.5,592         1     71.00        276.0   1299.00    545408   2849.4082  12984.5839
## 72       unaligned  same_or_sister          Ciona   36133       640.21,686.43         262,269         1     71.00        220.0    444.00    169778    663.3193   2240.8260
## 73       unaligned  same_or_sister     Oikopleura   37958       705.33,746.63       316.5,328         1     27.00        220.5    622.75     56097    725.9802   2053.0616
## 74       unaligned           close Caenorhabditis   99450       840.06,875.41       420,426.5         1    128.00        349.0    711.00    165349    857.7356   2843.4470
## 75       unaligned           close     Drosophila   22743     1251.61,1518.29       232,238.5         1    129.00        202.0    360.00    859148   1384.9524  10259.1500
## 76       unaligned           close          Ciona  168424       592.73,619.35       239.5,242         1     75.00        212.0    382.00    222284    606.0407   2786.3012
## 77       unaligned    intermediate Caenorhabditis   89243        1820,1939.05       765,776.5         1    307.00        651.0   1234.00   1210598   1879.5255   9072.9767
## 78       unaligned    intermediate     Drosophila   68626       665.01,916.59     223.5,225.5         1    149.00        209.0    303.00   2615430    790.8018  16812.8419
## 79       unaligned    intermediate     Oikopleura   96501       649.84,681.14     257.5,261.5         1     72.00        233.0    410.00    106962    665.4904   2480.1362
## 80       unaligned         distant Caenorhabditis   87550     2063.33,2216.93       736,750.5         1    201.00        571.0   1305.00   1062215   2140.1281  11593.9599
## 81       unaligned         distant     Drosophila   47867     2115.49,2325.33     1100,1125.5         1    424.00        853.0   1868.00   1031382   2220.4080  11711.7224
## 82       unaligned         distant          Ciona  138550     1507.32,1563.34         673,683         1    301.00        497.0   1147.00    340903   1535.3326   5319.4596
## 83       unaligned         distant     Oikopleura  248924       864.59,895.56     380.5,384.5         1    154.00        309.0    639.00    283076    880.0764   3941.8615
## 84       unaligned different_genus           <NA>    4443    51033.47,56289.1 31315.5,35664.5         1   1059.00      12496.0  72354.50   1318064  53661.2845  89344.3051
## 85         unmaped  same_or_sister     Drosophila    2616     6516.25,8298.34     2456,2975.5         1    266.75       1311.0   5929.50    545408   7407.2924  23241.7218
## 86         unmaped  same_or_sister          Ciona    6304     1718.19,1933.18    905.5,1002.5         1     24.00        521.0   1977.50    169778   1825.6851   4353.8685
## 87         unmaped  same_or_sister     Oikopleura   13396      1049.23,1142.4     434.5,471.5         1      9.00        209.0    989.25     56097   1095.8156   2750.6950
## 88         unmaped           close Caenorhabditis   18898     1838.96,1998.78     735.5,779.5         1     89.00        473.0   1517.00    165349   1918.8722   5604.5425
## 89         unmaped           close     Drosophila    1776   10099.58,13319.72       4087,5246         1    407.50       2005.5  10593.00    859148  11709.6492  34595.6295
## 90         unmaped           close          Ciona   13298     2842.81,3131.98       1138,1230         1     82.00        595.0   2508.75    222284   2987.3928   8505.9641
## 91         unmaped    intermediate Caenorhabditis    6914     8589.35,9904.43       4090,4416         1    934.25       2774.5   7995.25   1210598   9246.8896  27890.7597
## 92         unmaped    intermediate     Drosophila     984   23497.61,40593.44     1173,1914.5         1    313.75        693.5   4526.00   2615430  32045.5264 136638.9172
## 93         unmaped    intermediate     Oikopleura   17332     1951.06,2110.28       647.5,699         1     92.00        321.0   1489.00    106962   2030.6699   5347.2715
## 94         unmaped         distant Caenorhabditis    6528   11351.82,13170.47   5358.5,5799.5         1   1237.75       3847.0  10231.50   1062215  12261.1434  37478.3028
## 95         unmaped         distant     Drosophila    5951     5780.24,7400.13     2069.5,2221         1    772.50       1602.0   3679.50   1031382   6590.1850  31872.2350
## 96         unmaped         distant          Ciona    8137     9082.69,9868.25       5618,5949         1   1916.00       4319.0   9995.00    340903   9475.4708  18074.6835
## 97         unmaped         distant     Oikopleura   69607      1748.1,1853.14     675.5,694.5         1    160.00        430.0   1319.00    283076   1800.6193   7069.6154
## 98         unmaped different_genus           <NA>    2679   83929.15,91617.44 66459.5,73084.5         1  19851.50      55753.0 119267.50   1318064  87773.2912 101470.9185
options(old.o)
detach('package:dplyr')

Width distribution of aligned and mapped regions

Aligned regions

Whole genomes

Aligned means “before coalescing” and mapped means “after coalescing”.

gg_freq_poly(widths$gbs) +
  aes(width, after_stat(density), col=name) +
  facet_wrap(~genus + class, scales = "free_y") +
  ggtitle("Distribution of aligned region widths") +
  scale_color_manual(values = names_table$class)

gg_freq_poly(widths$gbs |> dplyr::filter(genus == "Oikopleura")) +
  aes(col=name) +
  facet_wrap(~dist, scales = "free_y", nrow = 1) +
  ggtitle("Distribution of aligned region widths") +
  scale_color_manual(values = names_table$query)

gg_freq_poly(widths$gbs) +
  aes(width, after_stat(density), col=class) +
  facet_wrap(~genus, scales = "free_y", ncol = 1) +
  ggtitle("Distribution of aligned region widths")

Faceted by arms

The distribution of widths is not the same on long and short arms.

Note that for this reason, it would be unwise to filter by width on some operations such as coalescing.

gg_freq_poly(widths$gbs) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of aligned region widths")

gg_freq_poly(widths$gbs |> subset(chr == "chr1")) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of aligned region widths on chr1")

Width distribution per arm follows similar pattern on chr2 (first graph). Pairs at a similar evolutionary distance show similar profile, with the exception of Osa_Aom, where the number of alignments on short arms is higher. Nevertheless, the trend of widths being longer on long arms stay true, so we can average them in the plot below.

gg_freq_poly(widths$gbs |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of alignment widths before collapsing, split by distance group")

Unaligned regions

Unaligned regions show no difference of width between arms, except perhaps at the “same population” level.

gg_freq_poly(widths$unal) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of unaligned region widths")

gg_freq_poly(widths$unal |> subset(chr == "chr1")) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of unaligned region widths")

gg_freq_poly(widths$unal |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", ncol=1) +
  ggtitle("Distribution of unaligned region widths") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

Interestingly, only in Oikopleura do the width distributions from pairs of different distance peak at the same size. Could that be because the genome is very compact?

gg_freq_poly(widths$unal |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=name) +
  facet_wrap(~dist, scales = "free_y", ncol=1) +
  ggtitle("Distribution of unaligned region widths") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")  +
  scale_color_manual(values = names_table$query)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

gg_freq_poly(widths$unal) +
  aes(col=class) +
  facet_wrap(~genus, scales = "free_y", ncol=1) +
  ggtitle("Distribution of unaligned region widths") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

Mapped (coalesced) regions

After coalescing, some width distribution become clearly bimodal. The distributions are hard to compare between genera and between evolutionary distance groups, and even within group for the “same population” Oikopleura.

The plots below display the data in details for the reader to make their opinion about the above statement.

Whole data

Width distributions.
# Using after_stat(density) to make more meaningful comparisons between genus
gg_freq_poly(widths$coa) +
  aes(width, after_stat(density), col=name) +
  facet_wrap(~genus + class, scales = "free_y") +
  ggtitle("Distribution of mapped region widths") +
  scale_color_manual(values = names_table$query)

Faceting by genus only. Why is North / North (“intermediate”) not bimodal?

gg_freq_poly(widths$coa) +
  aes(width, after_stat(density), col=class) +
  facet_wrap(~genus, scales = "free_y", ncol = 1) +
  ggtitle("Distribution of mapped region widths")

What is in the first peak of the bimodal distributions ?

Empirical cumulative distributions
ggplot(widths$coa) + aes(width, col = genus) + stat_ecdf() + scale_x_log10(limits = c(50,5e6)) + facet_wrap(~class)
## Warning: Removed 15 rows containing non-finite values (`stat_ecdf()`).

ggplot(widths$coa) + aes(width, col = class) + stat_ecdf() + scale_x_log10(limits = c(50,5e6)) + facet_wrap(~genus)
## Warning: Removed 15 rows containing non-finite values (`stat_ecdf()`).

Cumulative sums.

Note: there is a bug: the lines jump between replicates instead of plotting them separately

widths$coa |>
  dplyr::arrange(name, width) |>
  dplyr::group_by(name) |>
  dplyr::mutate(cumsum = cumsum(width)) |>
  ggplot() + facet_wrap(~class) + aes(x = width, y = cumsum, col = genus) + geom_line() + scale_x_log10() + scale_y_log10()

widths$coa |>
  dplyr::arrange(name, width) |>
  dplyr::group_by(name) |>
  dplyr::mutate(cumsum = cumsum(width)) |>
  ggplot() + facet_wrap(~genus) + aes(x = width, y = cumsum, col = class) + geom_line() + scale_x_log10() + scale_y_log10()

By arm

When faceted by chromosome and arm type, bimodality is also observed. There is a small peak under 500 bp, found on all arms except YSR. The second peak shows two different distributions for long and short arms.

gg_freq_poly(widths$coa) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of alignment widths after collapsing, split by arm.")

gg_freq_poly(widths$coa) +
  aes(col=flag) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of alignment widths after collapsing, split by flag.")

gg_freq_poly(widths$coa ) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of alignment widths on chr1 after collapsing, split by genome pair")+
  geom_vline(xintercept = 500)

gg_freq_poly(widths$coa |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", ncol=1) +
  ggtitle("Distribution of alignment widths after collapsing, split by distance group")

gg_freq_poly(widths$coa |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=flag) +
  facet_wrap(~class + flag, scales = "free_y") +
  ggtitle("Distribution of alignment widths after collapsing, split by distance group")

Therefore, if the first peak represented events that we do not want to consider breaking synteny (either alignment artefacts or insertion/transpositions of short mobile elements), we could re-coalesce once after removing the shortest intervals. We tried some width cutoffs on the coalescing step, but at 500 bp, the coverage of the genome by the resulting mapped regions is already decreasing (vignette("CoalescingCutoffs", package = "OikScrambling")).

Non-coalesced mapped regions

The mapped regions that are found identical in the aligned set are those that were not coalesced… Are they the small peak ?

widths$coa$nonCoa <- sapply(coa, \(x) x$nonCoa) |> unlist()

Focus on Oki – North comparisons

p_all <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "Oki – North")) +
  aes(col=name) +
  facet_wrap(~dist, scales = "free_y", nrow = 1) +
  ggtitle("Distribution of mapped region widths (vline at 600)") + theme_bw() +
  geom_vline(xintercept = 600)

p_coa_map <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "Oki – North", Arm %in% c("short", "long"))) +
  aes(col=name) +
  ggtitle("FALSE = collapsed, TRUE = non-collapsed") + theme_bw() +
  geom_vline(xintercept = 600) +
  facet_wrap(~nonCoa + Arm)

p_all + p_coa_map + plot_layout(guides = 'collect') + theme_bw()

The short-width mapped peak is made of aligned regions that can not be coalesced.

We know from vignette("RepeatRegions", package = "OikScrambling") that these short mapped regions are not repeats.

Unmapped regions

Unmapped regions are a bit longer on the short arm in Oki – North comparisons…

gg_freq_poly(widths$unmap) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of unmapped regions width after collapsing, split by arm.")

gg_freq_poly(widths$unmap) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of unmapped regions width after collapsing, split by genome pair")

gg_freq_poly(widths$unmap |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of unmapped region widths after collapsing, split by distance group") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

Bridge regions

Note the short peak when comparing pairs within the same population.

gg_freq_poly(widths$bri) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of widths of bridge regions, split by arm.")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 22514 rows containing non-finite values (`stat_bin()`).

gg_freq_poly(widths$bri) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of widths of bridge regions, split by genome pair")
## Warning: Transformation introduced infinite values in continuous x-axis
## Removed 22514 rows containing non-finite values (`stat_bin()`).

gg_freq_poly(widths$bri |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of widths of bridge regions, split by distance group") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 5425 rows containing non-finite values (`stat_bin()`).

Translocated regions

Whole region

Reminder: here we measure the distance between the two colinear pairs that are separated by the translocated pair.

gg_freq_poly(widths$tra) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of widths of translocated regions, split by arm.")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 216 rows containing non-finite values (`stat_bin()`).

gg_freq_poly(widths$tra) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of widths of translocated regions, split by genome pair")
## Warning: Transformation introduced infinite values in continuous x-axis
## Removed 216 rows containing non-finite values (`stat_bin()`).

gg_freq_poly(widths$tra |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of widths of translocated regions, split by distance group") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 125 rows containing non-finite values (`stat_bin()`).

Whole region, after removal

Small peak in “same pop” disapears by design. Total number in the main peak also decreases, because they may pass the size filter on the query genome.

gg_freq_poly(widths$tra2) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of widths of translocated regions (after removal), split by arm.")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 10 rows containing non-finite values (`stat_bin()`).

gg_freq_poly(widths$tra2) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of widths of translocated regions (after removal), split by genome pair")
## Warning: Transformation introduced infinite values in continuous x-axis
## Removed 10 rows containing non-finite values (`stat_bin()`).

gg_freq_poly(widths$tra2 |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of widths of translocated regions (after removal), split by distance group") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2 rows containing non-finite values (`stat_bin()`).

Aligned, before removal

widths$tra_coa_aln   <- makeWidthsDF(sapply(coa, \(gb) gb |> flagTranslocations() |> filterTranslocations())) |> tibble::as_tibble()

gg_freq_poly(widths$tra_coa_aln) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of widths of translocated regions (aligned), split by arm.")

gg_freq_poly(widths$tra_coa_aln) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of widths of translocated regions (aligned), split by genome pair")

gg_freq_poly(widths$tra_coa_aln |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of widths of translocated regions (aligned), split by distance group") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

Aligned, after removal

widths$tra_coa2_aln   <- makeWidthsDF(sapply(coa2, \(gb) gb |> flagTranslocations() |> filterTranslocations())) |> tibble::as_tibble()

gg_freq_poly(widths$tra_coa2_aln) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of widths of translocated regions (aligned, after removal), split by arm.")

gg_freq_poly(widths$tra_coa2_aln) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of widths of translocated regions (aligned, after removal), split by genome pair")

gg_freq_poly(widths$tra_coa2_aln |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of widths of translocated regions (aligned, after removal), split by distance group") +
  geom_vline(xintercept = 300) +
  scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

Double-coalesced regions

gg_freq_poly(widths$coa2) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of alignment widths after double-collapsing, split by arm.")

gg_freq_poly(widths$coa2) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of widthsof alignment widths after double-collapsing, split by genome pair")

gg_freq_poly(widths$coa2 |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of alignment widths after double-collapsing, split by distance group") +
  geom_vline(xintercept = 500) +
  scale_x_log10("Width (vertical bar at 500 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

Compound pictures

“aligned”, “unaligned” and “mapped”

Facet by genus

p_ali <- gg_freq_poly(widths$gbs |> dplyr::filter(! is.na(genus))) +
  aes(width, after_stat(density), col=class) + theme_bw() +
  facet_wrap(~genus, scales = "free_y", ncol = 1) +
  ggtitle("Aligned")

p_una <- gg_freq_poly(widths$unal |> dplyr::filter(! is.na(genus))) +
  aes(width, after_stat(density), col=class) + theme_bw() +
  facet_wrap(~genus, scales = "free_y", ncol = 1) +
  ggtitle("Not aligned")

p_map <- gg_freq_poly(widths$coa |> dplyr::filter(! is.na(genus))) +
  aes(width, after_stat(density), col=class) + theme_bw() +
  facet_wrap(~genus, scales = "free_y", ncol = 1) +
  ggtitle("Mapped")

# Patchwork
(p_ali | p_una | p_map) + plot_layout(ncol = 3, guides = 'collect') + theme_bw()

Oik only, color by distance

p_isol <- gg_freq_poly(widths$isol |> dplyr::filter(genus == "Oikopleura")) +
  aes(width, after_stat(density), col=class) + theme_bw() +
  ggtitle("Isolated alignments")

p_unma <- gg_freq_poly(widths$unmap |> dplyr::filter(genus == "Oikopleura")) +
  aes(width, after_stat(density), col=class) + theme_bw() +
  ggtitle("Unmapped")

p_coar <- gg_freq_poly(widths$coa |> dplyr::filter(genus == "Oikopleura", !nonCoa)) +
  aes(width, after_stat(density), col=class) + theme_bw() +
  ggtitle("Colinear regions")

p_brid <- gg_freq_poly(widths$bri |> dplyr::filter(genus == "Oikopleura")) +
  aes(width, after_stat(density), col=class) + theme_bw() +
  ggtitle("Bridge regions")

p_coal <- gg_freq_poly(widths$gbs |> dplyr::filter(genus == "Oikopleura", !nonCoa)) +
  aes(width, after_stat(density), col=class) + theme_bw() +
  ggtitle("Colinear alignments")

# Patchwork
(p_isol | p_unma | p_coar | p_brid | p_coal) + plot_layout(nrow = 1, guides = 'collect') + theme_bw()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 7561 rows containing non-finite values (`stat_bin()`).

Oik only, color by alignment class

w <- rbind(
  widths$isol  |> dplyr::filter(genus == "Oikopleura")           |> dplyr::mutate(aliclass = "Isol"),
  widths$unmap |> dplyr::filter(genus == "Oikopleura")           |> dplyr::mutate(aliclass = "Unmap"),
  widths$coa   |> dplyr::filter(genus == "Oikopleura", !nonCoa)  |> dplyr::mutate(aliclass = "Col reg"),
  widths$bri   |> dplyr::filter(genus == "Oikopleura")           |> dplyr::mutate(aliclass = "Bridge"),
  widths$gbs   |> dplyr::filter(genus == "Oikopleura", !nonCoa)  |> dplyr::mutate(aliclass = "Col aln")
)

gg_freq_poly(w) +
  aes(width, after_stat(density), col=aliclass) + theme_bw()+
  facet_wrap(~class, ncol = 1)
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 7561 rows containing non-finite values (`stat_bin()`).

gg_freq_poly(w |> dplyr::filter(class == "distant")) +
  aes(width, after_stat(density), col=aliclass) + theme_bw()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 1406 rows containing non-finite values (`stat_bin()`).

gg_freq_poly(w |> dplyr::filter(class == "distant"), scale = "natural") +
  aes(width, after_stat(density), col=aliclass) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3))
## Warning: Removed 49725 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 10 rows containing missing values (`geom_path()`).

For figure 2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Biostrings':
## 
##     collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:XVector':
## 
##     slice
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Frequency table is pre-computed so that ggplot2 can smooth over the different alignment pairs

wfreq <- w |>
  filter(class == "distant") |>
  count(aliclass, name, width) |>
  group_by(aliclass, name) |>          
  mutate(prop = prop.table(n))

# Smoothing with loess to see the 3 peaks in bridge regions

wfreq |>
  filter(width <= 500, aliclass != "Col reg") |>
  ggplot() +
  aes(width, prop, col = aliclass) +
  geom_smooth(span = .15, method = "loess", fill = NA) +
  scale_x_continuous(limits = c(0, 500)) + theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

# Plotting the collinear regions separately because their length distribution is
# on a different scale.

wfreq |>
  filter(width >= 250, aliclass == "Col reg") |>
  ggplot() +
  aes(width, prop, col = aliclass) +
  geom_smooth(span = .15, method = "loess", fill = NA) +
  scale_x_log10(limits = c(250, 1e5)) + theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 77 rows containing non-finite values (`stat_smooth()`).

“isolated”, “unmapped”, “colinear regions”, “bridge” and “colinear alingments”

Oki – North

p_ali2 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "Oki – North", nonCoa), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Isolated alignments")

p_unmap2 <- gg_freq_poly(widths$unmap |> dplyr::filter(dist == "Oki – North"), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Unmapped")

p_syn2 <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "Oki – North", !nonCoa), binwidth = 600, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e4)) +
  ggtitle("Colinear region")

p_map2 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "Oki – North", !nonCoa), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Colinear alignment")

p_bridge <- gg_freq_poly(widths$bri |> dplyr::filter(dist == "Oki – North"), binwidth = 20, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Bridge")

# Patchwork
p_oki_north <-(p_ali2 | p_unmap2 | p_syn2 | p_bridge | p_map2 ) + plot_layout(ncol = 5, guides = 'collect') + theme_bw() + plot_annotation(title = "Okinawa vs North")
p_oki_north
## Warning: Removed 1485 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 8089 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 2326 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 3190 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 15668 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).

North – North

p_ali3 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "North – North", nonCoa), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Isolated alignments")

p_unmap3 <- gg_freq_poly(widths$unmap |> dplyr::filter(dist == "North – North"), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Unmapped")

p_syn3 <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "North – North", !nonCoa), binwidth = 600, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e4)) +
  ggtitle("Colinear region")

p_map3 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "North – North", !nonCoa), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Colinear alignment")

p_bridge3 <- gg_freq_poly(widths$bri |> dplyr::filter(dist == "North – North"), binwidth = 20, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Bridge")

# Patchwork
p_north_north <- (p_ali3 | p_unmap3 | p_syn3 | p_bridge3 | p_map3 ) + plot_layout(ncol = 5, guides = 'collect') + theme_bw() + plot_annotation(title = "North vs North")
p_ali4 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "In same pop", nonCoa), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Isolated alignments")

p_unmap4 <- gg_freq_poly(widths$unmap |> dplyr::filter(dist == "In same pop"), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Unmapped")

p_syn4 <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "In same pop", !nonCoa), binwidth = 600, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e4)) +
  ggtitle("Colinear region")

p_map4 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "In same pop", !nonCoa), binwidth = 60, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Colinear alignment")

p_bridge4 <- gg_freq_poly(widths$bri |> dplyr::filter(dist == "In same pop"), binwidth = 20, scale = "natural") +
  aes(width, after_stat(density), col=name) + theme_bw() +
  scale_x_continuous(limits = c(0, 3e3)) +
  ggtitle("Bridge")

# Patchwork
p_same_pop <- (p_ali4 | p_unmap4 | p_syn4 | p_bridge4 | p_map4 ) + plot_layout(ncol = 5, guides = 'collect') + theme_bw() + plot_annotation(title = "Same population")
p_oki_north    /
p_north_north  /
p_same_pop
## Warning: Removed 1485 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 8089 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 2326 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 3190 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 15668 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 431 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 2700 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 1402 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 876 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 11985 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 1031 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
## Warning: Removed 1229 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
## Warning: Removed 1482 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
## Warning: Removed 614 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
## Warning: Removed 12048 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).

Paired comparison of widths in target and query genomes.

GBreaks2widthTibble <- function(gb, pair = NULL) {
  df <- as.data.frame(gb)
  if(is.null(df$Arm))       df$Arm        <- NA
  if(is.null(df$rep))       df$rep        <- NA
  if(is.null(df$query.rep)) df$query.rep  <- NA
  tibble::tibble(target     = df$width,        query     = df$query.width,
                 target.rep = ! is.na(df$rep), query.rep = ! is.na(df$query.rep),  # TRUE if overlaps a repeat
                 pair = pair, Arm = df$Arm,
                 dist = OikScrambling:::compDistance(pair))
}

gbs.w2w     <- do.call(rbind, lapply(names(gbs),      \(name) GBreaks2widthTibble(gbs[[name]],      pair = name)))
bri.w2s <- do.call(rbind, lapply(names(bri),  \(name) GBreaks2widthTibble(bri[[name]],  pair = name)))
tra.w2s     <- do.call(rbind, lapply(names(tra),      \(name) GBreaks2widthTibble(tra[[name]],      pair = name)))
tra2.w2s    <- do.call(rbind, lapply(names(tra2),     \(name) GBreaks2widthTibble(tra2[[name]],     pair = name)))

rbind (
  aligned = table(paste(    gbs.w2w$target.rep,     gbs.w2w$query.rep)),
  bri = table(paste(bri.w2s$target.rep, bri.w2s$query.rep)),
  transpo = table(paste(    tra.w2s$target.rep,     tra.w2s$query.rep))
)
##         FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE
## aligned     1049613      41624      41943     52103
## bri          801374      84573      82538     35159
## transpo       10377       1252       3043      2782
ggPlotW2W <- function(tibble) {
  ggplot(tibble) +
    aes(target, query) +
    geom_point(alpha = 0.05) +
    geom_density_2d() +
    scale_x_log10() +
    scale_y_log10()
}

ggPlotW2W(gbs.w2w |> dplyr::filter(!is.na(Arm))) + facet_wrap(~Arm) +
  ggtitle("Width of the target and query aligned regions")

ggPlotW2W(bri.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~Arm) +
  ggtitle("Width of the target and query bridge regions")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 14293 rows containing non-finite values (`stat_density2d()`).

ggPlotW2W(gbs.w2w) + facet_wrap(~pair) +
  ggtitle("Width of the target and query aligned regions")
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf

ggPlotW2W(gbs.w2w) + facet_wrap(~dist) +
  ggtitle("Width of the target and query aligned regions")
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf

ggPlotW2W(bri.w2s) + facet_wrap(~pair) +
  ggtitle("Width of the target and query bridge regions")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 43487 rows containing non-finite values (`stat_density2d()`).
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf

ggPlotW2W(tra.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~pair) +
  ggtitle("Width of the target and query translocated regions")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 893 rows containing non-finite values (`stat_density2d()`).

ggPlotW2W(tra.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~dist) +
  ggtitle("Width of the target and query translocated regions")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 893 rows containing non-finite values (`stat_density2d()`).

ggPlotW2W(tra2.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~pair) +
  ggtitle("Width of the target and query translocated regions (after removal)")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 62 rows containing non-finite values (`stat_density2d()`).

ggPlotW2W(tra2.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~dist) +
  ggtitle("Width of the target and query translocated regions (after removal)")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 62 rows containing non-finite values (`stat_density2d()`).

ggPlotW2W(gbs.w2w) +
  facet_wrap(~query.rep + target.rep) +
  ggtitle("Width of the target and query aligned regions, matching or not matching repeats")

ggPlotW2W(bri.w2s) +
  facet_wrap(~query.rep + target.rep) +
  ggtitle("Width of the target and query bridge regions, matching or not matching repeats")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 43487 rows containing non-finite values (`stat_density2d()`).

ggPlotW2W(tra.w2s) +
  facet_wrap(~dist + query.rep + target.rep, nrow = 3) +
  ggtitle("Width of the target and query translocated regions, matching or not matching repeats")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1298 rows containing non-finite values (`stat_density2d()`).

ggplot(bri.w2s) +
  aes(target / query, col = paste(target.rep, query.rep) ) +
  scale_x_log10() +
  geom_freqpoly() +
  ggtitle("In presence of a repeat, target width tends to be longer than query") +
  geom_vline(xintercept = 1)
## Warning: Transformation introduced infinite values in continuous x-axis
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 43487 rows containing non-finite values (`stat_bin()`).

Structural variants

Width of inverted regions

We know that we can detect more inversions after coalescing and removing translocations (vignette("Inversions", package = "OikScrambling")).

Many inversions are shorter than 1 kbp. Size may differ slightly between long and short arms.

invs.gbs  <- sapply(gbs,  function(gb) filterInversions(flagInversions(gb))) |> SimpleList()
invs.coa  <- sapply(coa,  function(gb) filterInversions(flagInversions(gb))) |> SimpleList()
invs.coa2 <- sapply(coa2, function(gb) filterInversions(flagInversions(gb))) |> SimpleList()

sapply(invs.gbs,  length)
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu Dme_Dsu 
##     614     604      89     608     541     613     241     620      56     212     597     237     608     237      19      42      58      16       0      50      58      14       0     697      20 
## Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel 
##      13      12     221      91      40     236      92      64
sapply(invs.coa,  length)
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu Dme_Dsu 
##     670     675     105     665     591     671     321     678      73     275     667     319     686     319      24      51      71      92       0      65      70      92       0    1003      44 
## Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel 
##      21      16     300     212     132     318     200     166
sapply(invs.coa2, length)
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu Dme_Dsu 
##     704     705     122     695     611     706     330     708      79     279     695     327     711     335      27      55      76      92       0      71      77      92       0    1003      45 
## Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel 
##      23      16     331     216     134     358     207     166
# Sanity check that we detect inversions on both strands
sapply(invs.coa2, \(x) {summary(decode(strand(x)))})
##   Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu
## +     381     365      33     369     317     382     186     367      38     147     354     182     365     164       7       2       6      51       0       5       5      52       0     470
## -     323     340      89     326     294     324     144     341      41     132     341     145     346     171      20      53      70      41       0      66      72      40       0     533
## *       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
##   Dme_Dsu Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel
## +      29      11       3      53      84      73      60      88      69
## -      16      12      13     278     132      61     298     119      97
## *       0       0       0       0       0       0       0       0       0
widths$invs.gbs  <- makeWidthsDF(invs.gbs)  |> tibble::as_tibble()
widths$invs.coa  <- makeWidthsDF(invs.coa)  |> tibble::as_tibble()
widths$invs.coa2 <- makeWidthsDF(invs.coa2) |> tibble::as_tibble()
gg_freq_poly(widths$invs.gbs) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of alignment widths for inverted regions, using aligned regions, split by arm.")

gg_freq_poly(widths$invs.gbs) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of alignment widths for inverted regions, using aligned regions, split by genome pair")

gg_freq_poly(widths$invs.gbs |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of alignment widths for inverted regions, using aligned regions, split by distance group") +
  geom_vline(xintercept = 500) +
  scale_x_log10("Width (vertical bar at 500 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

gg_freq_poly(widths$invs.coa) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of alignment widths for inverted regions, using mapped regions, split by arm.")

gg_freq_poly(widths$invs.coa) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of alignment widths for inverted regions, using mapped regions, split by genome pair")

gg_freq_poly(widths$invs.coa |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of alignment widths for inverted regions, using mapped regions, split by distance group") +
  geom_vline(xintercept = 500) +
  scale_x_log10("Width (vertical bar at 500 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

gg_freq_poly(widths$invs.coa2) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution of alignment widths for inverted regions, using doublemapped regions, split by arm.")

gg_freq_poly(widths$invs.coa2) +
  aes(col=Arm) +
  facet_wrap(~name, scales = "free_y", ncol=5) +
  ggtitle("Distribution of alignment widths for inverted regions, using doublemapped regions, split by genome pair")

gg_freq_poly(widths$invs.coa2 |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
  aes(col=Arm) +
  facet_wrap(~dist, scales = "free_y", nrow=1) +
  ggtitle("Distribution of alignment widths for inverted regions, using doublemapped regions, split by distance group") +
  geom_vline(xintercept = 500) +
  scale_x_log10("Width (vertical bar at 500 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

Width of gaps next to the inverted regions

inv.lgaps <- sapply(coa, leftInversionGaps) |> SimpleList()
inv.lgaps[ 1:5]  <- sapply(inv.lgaps[ 1:5],  flagLongShort, longShort$OKI2018.I69)
inv.lgaps[ 6:10] <- sapply(inv.lgaps[ 6:10], flagLongShort, longShort$OSKA2016v1.9)
inv.lgaps[11:15] <- sapply(inv.lgaps[11:15], flagLongShort, longShort$Bar2.p4)
sapply(inv.lgaps, length)
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu Dme_Dsu 
##     669     675      96     665     591     669     319     678      69     274     666     315     685     315      22      46      65      92       0      61      67      92       0    1003      44 
## Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel 
##      21      14     294     212     132     310     200     166
widths$invs.lgaps <- makeWidthsDF(inv.lgaps) |> tibble::as_tibble()

gg_freq_poly(widths$invs.lgaps) +
  aes(col=Arm) +
  facet_wrap(~chr, scales = "free_y") +
  ggtitle("Distribution widths of left gaps next to inverted regions, split by arm.") +
  geom_vline(xintercept = 1000)