vignettes/RegionWidths.Rmd
RegionWidths.Rmd
knitr::opts_chunk$set(cache = TRUE, cache.lazy = FALSE)
options(width = 200)
library('OikScrambling') |> suppressPackageStartupMessages()
library('ggplot2')
requireNamespace('patchwork') |> suppressPackageStartupMessages()
genomes <- OikScrambling:::loadAllGenomes(compat = F)
## Warning in runHook(".onLoad", env, package.lib, package): input string 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
## Warning in runHook(".onLoad", env, package.lib, package): input string 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
See
vignette("LoadGenomicBreaks", package = "OikScrambling")
for how the different GBreaks objects are prepared.
widths <- SimpleList()
makeWidthsDF <- function (objList) {
lapply(names(objList), \(name) {
DF <- mcols(objList[[name]])
if (nrow(DF) == 0) return(DataFrame())
if (is.null(DF$Arm) ) DF$Arm <- NA
if (is.null(DF$flag)) DF$flag <- NA
if (is.null(DF$nonCoa)) DF$nonCoa <- NA
DF$name <- name
DF$width <- width(objList[[name]])
DF$chr <- seqnames(objList[[name]]) |>
sub(pat = "Chr", rep = "chr") |>
sub(pat = "BNJZ.*", rep = "Ply") |>
sub(pat = "BNKA.*", rep = "Ros") |>
sub(pat = "BJTB.*", rep = "Rob") |>
sub(pat = "^R.*", rep = "Sav") |>
sub(pat = "^2L$", rep = "Dro") |>
sub(pat = "^2R$", rep = "Dro") |>
sub(pat = "^3L$", rep = "Dro") |>
sub(pat = "^3R$", rep = "Dro") |>
sub(pat = "^4$", rep = "Dro") |>
sub(pat = "^X$", rep = "Dro") |>
sub(pat = "^Y$", rep = "Dro")
DF$query <- DF$score <- DF$rep <- DF$repOvlp <- DF$transcripts <- NULL
DF$dist <- OikScrambling:::compDistance(DF$name)
DF$distS <- OikScrambling:::compDistance(DF$name, short = TRUE)
DF$genus <- OikScrambling:::compGenus(DF$name)
DF$class <- OikScrambling:::compDistClass(DF$name)
DF
}) |> do.call(what=rbind)
}
isol <- sapply(gbs, \(gb) gb[gb$nonCoa]) |> SimpleList()
widths$gbs <- makeWidthsDF(gbs) |> tibble::as_tibble()
widths$isol <- makeWidthsDF(isol) |> tibble::as_tibble()
widths$unal <- makeWidthsDF(unal) |> tibble::as_tibble()
widths$coa <- makeWidthsDF(coa) |> tibble::as_tibble()
widths$unmap <- makeWidthsDF(unmap) |> tibble::as_tibble()
widths$bri <- makeWidthsDF(bri) |> tibble::as_tibble()
widths$tra <- makeWidthsDF(tra) |> tibble::as_tibble()
widths$tra2 <- makeWidthsDF(tra2) |> tibble::as_tibble()
widths$coa2 <- makeWidthsDF(coa2) |> tibble::as_tibble()
names_table <- tibble::tibble(
pairnames = levels(factor((widths$gbs$name))),
class = OikScrambling:::compDistClass(pairnames),
distS = OikScrambling:::compDistance(pairnames, short = TRUE),
target = pairnames |> sub(pat="_.*", rep="") |> factor(),
query = pairnames |> sub(pat=".*_", rep="") |> factor()
)
This is useful to verify by eye that collections of distributions in the same distance group resemble each other.
gg_freq_poly <- function(tibble, bins = 30, binwidth = 30, scale = "log") {
p <-
ggplot(tibble) +
aes(width)
if(scale == "log") {
p <- p +
geom_freqpoly(bins = bins) +
scale_x_log10()
} else {
p <- p +
geom_freqpoly(binwidth = binwidth)
}
p
}
w_df <- rbind(widths$gbs |> as.data.frame()|> dplyr::mutate(what = "aligned"),
widths$isol |> as.data.frame()|> dplyr::mutate(what = "isolated"),
widths$unal |> as.data.frame()|> dplyr::mutate(what = "unaligned"),
widths$coa |> as.data.frame()|> dplyr::mutate(what = "mapped"),
widths$bri |> as.data.frame()|> dplyr::mutate(what = "bridge"),
widths$unmap |> as.data.frame()|> dplyr::mutate(what = "unmaped"),
widths$coa2 |> as.data.frame()|> dplyr::mutate(what = "doublecoalesced"))
widths_all_tibble <- rbind(widths$gbs |> dplyr::mutate(what = "aligned"),
widths$isol |> dplyr::mutate(what = "isolated"),
widths$unal |> dplyr::mutate(what = "unaligned"),
widths$coa |> dplyr::mutate(what = "mapped"),
widths$bri |> dplyr::mutate(what = "bridge"),
widths$unmap |> dplyr::mutate(what = "unmaped"),
widths$coa2 |> dplyr::mutate(what = "doublecoalesced"))
p <-gg_freq_poly(widths_all_tibble) +
aes(col = what) +
ggtitle("Distribution of alignment width before and after collapsing")
# Showing all individual distributions, to demonstrate we can pool some.
p + facet_wrap(~class + genus + distS + name, scales = "free_y")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 22514 rows containing non-finite values (`stat_bin()`).
# Aggregating by distance class
p_droso <- gg_freq_poly(widths_all_tibble |> dplyr::filter(genus == "Drosophila")) + aes(col = what) + facet_wrap(~genus + class, nrow = 1, scales = "free_y")
p_ciona <- gg_freq_poly(widths_all_tibble |> dplyr::filter(genus == "Ciona")) + aes(col = what) + facet_wrap(~genus + class, nrow = 1, scales = "free_y")
p_oikop <- gg_freq_poly(widths_all_tibble |> dplyr::filter(genus == "Oikopleura")) + aes(col = what) + facet_wrap(~genus + class, nrow = 1, scales = "free_y")
layout <- "
AAAA
BBB#
CCC#
"
library("patchwork")
(p_droso / p_ciona / p_oikop) + plot_layout(design = layout, guides = 'collect')
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 914 rows containing non-finite values (`stat_bin()`).
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 10307 rows containing non-finite values (`stat_bin()`).
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 7561 rows containing non-finite values (`stat_bin()`).
Below, I take the widths_all_tibble
and perform some
statistical operations on it for later. Click below to view.
library(dplyr) |> suppressPackageStartupMessages()
t_CI <- function(vector, conf.level){
if(length(vector) < 3)
return(NA_character_)
conf_test <- t.test(vector, conf.level = conf.level)
conf_str <- paste(round(conf_test$conf.int,2), collapse=',')
conf_str
}
# Note that because the Wilcox test involves ranking (and therefore sorting),
# this test is computationally demanding for large data frames.
# It works the same as the function above - just slower.
wilcox_CI <- function(vector, conf.level){
if(length(vector) < 3)
return(NA_character_)
conf_test <- wilcox.test(vector, conf.level = conf.level, conf.int = TRUE)
conf_str <- paste(round(conf_test$conf.int, 2), collapse=',')
conf_str
}
stats_to_compute <- list(n = ~n(),
t_CI = ~t_CI(., conf.level=0.95),
wilcox_CI = ~wilcox_CI(., conf.level=0.95),
min = min,
q25 = ~quantile(., 0.25),
median = median,
q75 = ~quantile(., 0.75),
max = max,
mean = mean,
sd = sd)
old.o <- options(max.print = 2000)
widths_all_tibble |>
group_by(name, class, what) |>
summarise(across(width, stats_to_compute)) |> as.data.frame()
## `summarise()` has grouped output by 'name', 'class'. You can override using the `.groups` argument.
## name class what width_n width_t_CI width_wilcox_CI width_min width_q25 width_median width_q75 width_max width_mean width_sd
## 1 Bar_Aom intermediate aligned 24717 1538.64,1610.24 844,872.5 51 368.00 634.0 1447.00 54452 1574.4417 2871.3143
## 2 Bar_Aom intermediate bridge 20520 302.15,327.1 191,197 0 38.00 178.0 309.00 38380 314.6256 911.6882
## 3 Bar_Aom intermediate doublecoalesced 3452 11606.52,14764.2 3836,4392 58 562.75 2251.5 8361.50 1199960 13185.3589 47312.1501
## 4 Bar_Aom intermediate isolated 1853 862.03,1000.66 559.5,621 58 271.00 481.0 960.00 19742 931.3475 1521.3645
## 5 Bar_Aom intermediate mapped 4197 9887.49,11733.47 3636.5,4119.5 58 547.00 2104.0 7894.00 677793 10810.4818 30499.4833
## 6 Bar_Aom intermediate unaligned 24170 611.96,681.74 210.5,217 1 48.00 194.0 351.00 79613 646.8544 2767.3868
## 7 Bar_Aom intermediate unmaped 4129 2035.51,2410.29 526.5,629 1 82.00 272.0 1330.00 79613 2222.9002 6141.9161
## 8 Bar_Kum distant aligned 31557 957.75,991.34 617,635 51 250.00 469.0 1052.00 43546 974.5433 1522.0555
## 9 Bar_Kum distant bridge 23028 408.46,440.56 289.5,296.5 0 144.00 258.0 441.00 135099 424.5125 1242.4483
## 10 Bar_Kum distant doublecoalesced 7849 4903.46,5419.47 2151.5,2342.5 51 316.00 1354.0 4436.00 216498 5161.4630 11660.5964
## 11 Bar_Kum distant isolated 4346 653.68,717.84 408,449.5 51 177.00 302.0 764.75 16862 685.7561 1078.6911
## 12 Bar_Kum distant mapped 8529 4530.09,4973.8 2005.5,2181.5 51 296.00 1227.0 4188.00 138044 4751.9448 10452.2655
## 13 Bar_Kum distant unaligned 31373 704.7,796.64 323,331 1 147.00 273.0 521.00 273024 750.6715 4154.0813
## 14 Bar_Kum distant unmaped 8515 1455.44,1780.07 502,540 1 144.00 351.0 975.00 273024 1617.7504 7640.8206
## 15 Bar_Nor same_or_sister aligned 15184 2880.98,3022.93 1814.5,1908.5 40 526.00 1305.5 3380.25 55611 2951.9559 4462.0282
## 16 Bar_Nor same_or_sister bridge 8996 378.79,417.1 274,297 0 2.00 124.0 406.00 17680 397.9464 926.7815
## 17 Bar_Nor same_or_sister doublecoalesced 4319 10271.06,12273.5 2512,2848 40 529.00 1517.0 5440.00 569531 11272.2790 33562.3130
## 18 Bar_Nor same_or_sister isolated 3882 1262.66,1415.56 780,845 40 312.00 633.0 1366.50 44180 1339.1121 2429.5923
## 19 Bar_Nor same_or_sister mapped 6188 7355.49,8288.48 2419,2720.5 40 488.00 1373.5 5444.75 240933 7821.9819 18719.2179
## 20 Bar_Nor same_or_sister unaligned 12425 755.51,822.08 359.5,383 1 24.00 225.0 734.00 56097 788.7979 1892.7856
## 21 Bar_Nor same_or_sister unmaped 5214 1122.1,1264.13 557,629.5 1 16.00 325.0 1250.00 56097 1193.1124 2615.7709
## 22 Bar_Oki distant aligned 32265 924.88,956.96 597.5,614.5 51 245.00 454.0 1014.00 43420 940.9199 1469.8924
## 23 Bar_Oki distant bridge 23772 413.37,438.69 288.5,295.5 0 142.00 256.0 443.00 59129 426.0277 995.8294
## 24 Bar_Oki distant doublecoalesced 7849 4896.93,5417.11 2145.5,2338 54 306.00 1347.0 4450.00 218004 5157.0210 11754.6679
## 25 Bar_Oki distant isolated 4321 645.66,714.44 390,427.5 54 172.00 296.0 714.00 21112 680.0470 1153.0032
## 26 Bar_Oki distant mapped 8493 4540.11,4993.94 2005.5,2179 54 291.00 1218.0 4177.00 218004 4767.0211 10667.9952
## 27 Bar_Oki distant unaligned 32076 703.97,795.69 322.5,330.5 1 146.00 272.0 520.00 269200 749.8278 4190.4617
## 28 Bar_Oki distant unmaped 8484 1472.9,1809.5 503.5,539.5 1 148.00 358.0 968.00 269200 1641.2007 7908.2528
## 29 Bar_Osa intermediate aligned 24851 1523.3,1593.59 844.5,873 52 367.00 634.0 1449.00 49546 1558.4455 2826.8737
## 30 Bar_Osa intermediate bridge 20778 303.39,328.1 188.5,194.5 0 37.00 174.0 305.00 32212 315.7473 908.4853
## 31 Bar_Osa intermediate doublecoalesced 3373 12040.47,14823.71 3990.5,4583 59 585.00 2310.0 8781.00 714822 13432.0910 41221.5092
## 32 Bar_Osa intermediate isolated 1732 814.98,929.84 571,634 59 279.75 478.0 968.25 16143 872.4094 1218.6145
## 33 Bar_Osa intermediate mapped 4073 10180.46,12058.44 3738,4238 59 572.00 2153.0 8140.00 562164 11119.4513 30566.1990
## 34 Bar_Osa intermediate unaligned 24266 616.12,690.07 208,214.5 1 47.00 189.0 346.00 106962 653.0948 2938.6433
## 35 Bar_Osa intermediate unmaped 4008 2110.15,2524.29 516.5,607 1 83.00 277.0 1249.50 106962 2317.2161 6686.5554
## 36 Cbr_Cel distant aligned 47012 523.92,534.98 413,419.5 53 210.00 363.0 622.00 23152 529.4498 611.7579
## 37 Cbr_Cel distant bridge 43326 1114.48,1175.78 623.5,639 0 160.00 494.0 1077.00 248393 1145.1281 3254.9048
## 38 Cbr_Cel distant doublecoalesced 3546 19066.68,23080.29 9597,11187 70 953.00 4744.0 22150.50 2037825 21073.4873 60950.6693
## 39 Cbr_Cel distant isolated 993 520.53,582.69 428,476.5 70 238.00 401.0 658.00 4860 551.6113 499.0966
## 40 Cbr_Cel distant mapped 3686 18298.4,22127.16 9262,10799 70 910.50 4530.5 21520.00 2037825 20212.7811 59280.8720
## 41 Cbr_Cel distant unaligned 46608 1674.24,1800.27 708,726.5 1 189.00 555.0 1257.00 540635 1737.2543 6941.2578
## 42 Cbr_Cel distant unmaped 3675 7856.75,9207.81 4321.5,4771 1 1059.00 3163.0 8405.00 540635 8532.2808 20887.2596
## 43 Cbr_Cni close aligned 51726 1418.23,1450.18 1037,1055.5 48 451.00 885.0 1639.00 42453 1434.2038 1854.0210
## 44 Cbr_Cni close bridge 41711 527.58,550.53 386.5,394.5 0 116.00 324.0 621.00 74381 539.0541 1195.3952
## 45 Cbr_Cni close doublecoalesced 6677 13734.59,15563.37 3214.5,3861 52 376.00 1146.0 9736.00 619682 14648.9759 38114.9360
## 46 Cbr_Cni close isolated 5576 579.87,620.17 456.5,480 52 236.00 390.0 713.00 30395 600.0187 767.5146
## 47 Cbr_Cni close mapped 10015 9185.2,10119.87 2706.5,3085.5 52 350.50 1051.0 7105.00 527175 9652.5324 23859.0372
## 48 Cbr_Cni close unaligned 49877 622.3,650.97 398.5,407 1 120.00 336.0 674.00 95699 636.6340 1633.0832
## 49 Cbr_Cni close unmaped 9394 930.55,1042.82 489,525 1 52.00 341.0 974.75 95699 986.6839 2775.6234
## 50 Cbr_Cre intermediate aligned 44819 724.89,741.84 545,554 54 267.00 477.0 829.00 25357 733.3664 915.6811
## 51 Cbr_Cre intermediate bridge 41373 1145.76,1206.59 697.5,711.5 0 290.00 605.0 1106.00 248371 1176.1738 3156.3155
## 52 Cbr_Cre intermediate doublecoalesced 3274 22486.72,27520.38 10373,12213.5 70 1006.50 5313.0 24194.00 1794548 25003.5464 73448.7989
## 53 Cbr_Cre intermediate isolated 1013 644.12,727.14 540,605 70 266.00 476.0 879.00 7030 685.6328 673.3018
## 54 Cbr_Cre intermediate mapped 3446 21350.32,25968.65 10091,11802.5 70 988.00 5078.5 23458.25 1794548 23659.4855 69137.1135
## 55 Cbr_Cre intermediate unaligned 44593 1585.87,1691.14 763.5,780 1 308.00 648.0 1239.00 285240 1638.5042 5670.9042
## 56 Cbr_Cre intermediate unmaped 3434 6561.56,7651.59 3659,4063 1 916.25 2630.0 7107.25 285240 7106.5754 16289.4619
## 57 Cni_Cbr close aligned 51324 1431.4,1463.42 1048.5,1067.5 43 456.00 891.0 1656.00 39397 1447.4074 1850.4720
## 58 Cni_Cbr close bridge 41487 623.34,656.79 385.5,394 0 101.00 323.0 618.00 85250 640.0672 1738.2862
## 59 Cni_Cbr close doublecoalesced 6479 15081.78,17064.17 3471.5,4222.5 43 380.00 1125.0 10826.50 525245 16072.9728 40698.9749
## 60 Cni_Cbr close isolated 5492 571.77,605.96 457.5,481 43 234.00 394.0 713.00 12227 588.8616 646.2795
## 61 Cni_Cbr close mapped 9837 9757.63,10744.8 2882.5,3301.5 43 354.00 1040.0 7658.00 487247 10251.2150 24974.2632
## 62 Cni_Cbr close unaligned 49573 1047.92,1112.46 442,452.5 1 136.00 362.0 760.00 165349 1080.1929 3665.8063
## 63 Cni_Cbr close unmaped 9504 2693.7,2986.84 1136,1240 1 153.00 684.0 2449.50 165349 2840.2713 7289.6177
## 64 Cni_Cin distant aligned 41207 501.66,512.98 390.5,397 52 190.00 334.0 600.00 16541 507.3222 586.1596
## 65 Cni_Cin distant bridge 38347 1444.19,1565.89 669,687.5 0 190.00 532.0 1161.00 522515 1505.0371 6079.4902
## 66 Cni_Cin distant doublecoalesced 2811 25278.56,30673.32 12221,14210.5 55 1198.00 6294.0 27444.00 1433591 27975.9431 72935.1950
## 67 Cni_Cin distant isolated 638 472.9,548.19 404.5,466.5 55 211.00 346.5 675.25 4665 510.5439 484.1822
## 68 Cni_Cin distant mapped 2860 24839.14,30139.1 12026,13958 55 1163.00 6199.0 26778.50 1433591 27489.1199 72275.8818
## 69 Cni_Cin distant unaligned 40942 2451.15,2746.36 764,787 1 213.00 592.0 1379.00 1062215 2598.7560 15238.1718
## 70 Cni_Cin distant unmaped 2853 15188.36,18940.35 6952.5,7869 1 1555.00 5117.0 13941.00 1062215 17064.3575 51103.4333
## 71 Cni_Cre intermediate aligned 44877 718.3,735.08 542,551 54 267.00 476.0 823.00 26910 726.6916 907.0093
## 72 Cni_Cre intermediate bridge 41389 1281.45,1383.01 694.5,708.5 0 286.00 610.0 1091.00 456231 1332.2298 5271.0991
## 73 Cni_Cre intermediate doublecoalesced 3292 23984.77,29573.74 10398.5,12151.5 66 905.50 4619.5 24288.25 2187342 26779.2527 81775.6460
## 74 Cni_Cre intermediate isolated 1093 652.41,732.4 548,610.5 66 278.00 503.0 872.00 7539 692.4071 673.9110
## 75 Cni_Cre intermediate mapped 3488 22598.78,27717.39 9891.5,11617 66 879.75 4410.5 23256.25 2187342 25158.0849 77092.2783
## 76 Cni_Cre intermediate unaligned 44650 2013.55,2226.93 762,778 1 306.00 654.0 1228.00 1210598 2120.2391 11502.1956
## 77 Cni_Cre intermediate unmaped 3480 10172.19,12545.63 4457.5,4991 1 959.00 2977.5 8936.25 1210598 11358.9124 35705.8170
## 78 Dme_Dbu distant aligned 47893 610.2,623.09 463.5,472 48 205.00 397.0 731.00 16014 616.6455 719.1531
## 79 Dme_Dbu distant bridge 41934 1570.08,1628.57 999.5,1023.5 0 395.00 783.0 1684.00 174913 1599.3246 3055.7262
## 80 Dme_Dbu distant doublecoalesced 5877 15727.3,17388.08 7514,8553.5 54 716.00 3233.0 17797.00 615658 16557.6888 32472.8670
## 81 Dme_Dbu distant isolated 2086 638.88,695.17 519,565 54 245.00 462.0 849.75 5460 667.0240 655.5648
## 82 Dme_Dbu distant mapped 5959 15413.96,17007.28 7325.5,8328 54 701.00 3134.0 17379.50 532037 16210.6196 31370.6132
## 83 Dme_Dbu distant unaligned 47867 2115.49,2325.33 1100,1125.5 1 424.00 853.0 1868.00 1031382 2220.4080 11711.7224
## 84 Dme_Dbu distant unmaped 5951 5780.24,7400.13 2069.5,2221 1 772.50 1602.0 3679.50 1031382 6590.1850 31872.2350
## 85 Dme_Dma same_or_sister aligned 9732 10954.29,11804.24 5294,5971 49 404.00 1953.0 13037.50 291858 11379.2660 21387.8094
## 86 Dme_Dma same_or_sister bridge 7026 884.17,1011.76 270.5,296 0 25.00 183.0 479.00 67561 947.9668 2727.8052
## 87 Dme_Dma same_or_sister doublecoalesced 2272 29884.44,73666.11 623,729 56 240.00 458.5 1274.25 15422642 51775.2720 532092.2108
## 88 Dme_Dma same_or_sister isolated 2008 492.3,638.59 369,398.5 56 199.00 333.5 574.25 60697 565.4417 1671.3122
## 89 Dme_Dma same_or_sister mapped 2706 32527.05,54245.63 667,793.5 56 237.25 465.0 1472.75 8110313 43386.3385 288086.3839
## 90 Dme_Dma same_or_sister unaligned 9138 2583.15,3115.67 521.5,592 1 71.00 276.0 1299.00 545408 2849.4082 12984.5839
## 91 Dme_Dma same_or_sister unmaped 2616 6516.25,8298.34 2456,2975.5 1 266.75 1311.0 5929.50 545408 7407.2924 23241.7218
## 92 Dme_Dsu intermediate aligned 68750 1168.64,1187 931.5,942.5 50 553.00 843.0 1333.00 33370 1177.8198 1227.9791
## 93 Dme_Dsu intermediate bridge 67758 327.89,343.23 221.5,223.5 0 148.00 207.0 298.00 76314 335.5584 1018.9091
## 94 Dme_Dsu intermediate doublecoalesced 878 103687.77,132644.64 45163.5,67214 77 665.75 7701.0 134819.75 1457965 118166.2050 218585.3124
## 95 Dme_Dsu intermediate isolated 379 639.68,784.97 522.5,637 77 264.50 498.0 895.50 7094 712.3245 719.2297
## 96 Dme_Dsu intermediate mapped 992 92498.83,116597.69 39379,60320.5 77 661.50 7251.5 122432.00 1338161 104548.2621 193394.2339
## 97 Dme_Dsu intermediate unaligned 68626 665.01,916.59 223.5,225.5 1 149.00 209.0 303.00 2615430 790.8018 16812.8419
## 98 Dme_Dsu intermediate unmaped 984 23497.61,40593.44 1173,1914.5 1 313.75 693.5 4526.00 2615430 32045.5264 136638.9172
## 99 Dme_Dya close aligned 23059 4475.07,4642.76 3052,3167.5 60 919.00 2264.0 5468.00 92398 4558.9142 6495.6166
## 100 Dme_Dya close bridge 21243 480.45,527.1 213,218 0 123.00 191.0 309.00 75233 503.7723 1734.2930
## 101 Dme_Dya close doublecoalesced 1582 56273.14,90367.16 764.5,1000 63 243.25 500.0 1919.25 6070020 73320.1492 345677.1415
## 102 Dme_Dya close isolated 1314 536.32,649.82 390.5,436 63 203.00 335.0 646.00 25871 593.0723 1048.5917
## 103 Dme_Dya close mapped 1816 51540.18,76021.1 913.5,1292.5 63 247.75 531.0 2852.50 4893795 63780.6382 265960.8921
## 104 Dme_Dya close unaligned 22743 1251.61,1518.29 232,238.5 1 129.00 202.0 360.00 859148 1384.9524 10259.1500
## 105 Dme_Dya close unmaped 1776 10099.58,13319.72 4087,5246 1 407.50 2005.5 10593.00 859148 11709.6492 34595.6295
## 106 Oki_Aom distant aligned 32398 970.18,1004.75 599,616 55 257.25 459.0 1014.00 38135 987.4651 1587.6967
## 107 Oki_Aom distant bridge 23534 556.31,599.03 329.5,340 0 91.00 278.0 548.00 81689 577.6661 1671.8453
## 108 Oki_Aom distant doublecoalesced 8176 5354.77,5893.45 2325,2534.5 64 324.75 1434.0 4868.50 216565 5624.1087 12423.8043
## 109 Oki_Aom distant isolated 4586 723.3,801.8 432.5,474 55 184.00 322.0 803.75 23204 762.5497 1355.7858
## 110 Oki_Aom distant mapped 8864 4909.98,5375.83 2150.5,2340 55 310.75 1291.5 4535.00 216565 5142.9025 11187.1566
## 111 Oki_Aom distant unaligned 32175 925.18,1015.58 400,413 1 129.00 313.0 710.00 189661 970.3809 4136.8088
## 112 Oki_Aom distant unmaped 8848 1839.99,2144.46 786.5,848.5 1 176.00 497.0 1577.50 189661 1992.2255 7305.3661
## 113 Oki_Bar distant aligned 30841 1005.87,1041.09 652,671.5 51 261.00 494.0 1118.00 43759 1023.4778 1577.7563
## 114 Oki_Bar distant bridge 22308 589.8,629.34 359.5,370 0 165.00 302.0 583.00 51342 609.5723 1506.3415
## 115 Oki_Bar distant doublecoalesced 7876 5494.42,6064.46 2353.5,2569.5 54 316.00 1438.0 4926.75 216563 5779.4397 12903.7318
## 116 Oki_Bar distant isolated 4435 691.58,762.42 423,463.5 54 184.50 316.0 775.00 21231 726.9998 1203.0890
## 117 Oki_Bar distant mapped 8533 5044.56,5541.03 2182,2374 54 302.00 1312.0 4580.00 168062 5292.7949 11697.6288
## 118 Oki_Bar distant unaligned 30684 969.67,1068.66 426,439.5 1 172.00 333.0 744.00 283076 1019.1656 4423.5389
## 119 Oki_Bar distant unmaped 8515 1906.94,2244.26 780,845.5 1 182.00 503.0 1572.50 283076 2075.6006 7939.4455
## 120 Oki_Kum same_or_sister aligned 15086 3444.82,3615 2263.5,2378 44 601.00 1598.0 4234.00 72046 3529.9073 5331.9172
## 121 Oki_Kum same_or_sister bridge 9662 554.25,637.24 311.5,332 0 7.00 180.0 497.00 49765 595.7452 2080.8174
## 122 Oki_Kum same_or_sister doublecoalesced 1625 32119.88,41848.39 8992.5,12752.5 48 427.00 2193.0 27502.00 1365817 36984.1342 99970.3102
## 123 Oki_Kum same_or_sister isolated 2993 1640.94,1904.58 726.5,808 44 321.00 597.0 1305.00 58461 1772.7618 3677.9126
## 124 Oki_Kum same_or_sister mapped 5424 10370.48,11387.73 5736,6459.5 44 542.75 2506.0 12995.25 262072 10879.1062 19107.8473
## 125 Oki_Kum same_or_sister unaligned 12386 790.2,882.38 309,329 1 25.00 211.0 614.00 53783 836.2917 2616.6886
## 126 Oki_Kum same_or_sister unmaped 4119 1018.53,1216.1 305.5,364.5 1 8.00 121.0 794.00 53783 1117.3146 3233.7276
## 127 Oki_Nor distant aligned 30055 974.05,1006.36 656.5,675.5 52 261.00 502.0 1122.00 43752 990.2015 1428.7816
## 128 Oki_Nor distant bridge 21180 616.65,658.38 369.5,381.5 0 171.00 305.0 607.00 83492 637.5155 1549.4677
## 129 Oki_Nor distant doublecoalesced 8280 5027.56,5504.15 2359.5,2552.5 54 365.75 1530.0 4800.25 216563 5265.8556 11061.6823
## 130 Oki_Nor distant isolated 4578 769.55,842.95 498,544 54 190.00 357.0 926.25 17200 806.2532 1266.6301
## 131 Oki_Nor distant mapped 8875 4662.56,5086.86 2202.5,2381 54 340.50 1408.0 4490.50 216563 4874.7136 10195.7615
## 132 Oki_Nor distant unaligned 29897 1065.36,1166.48 462.5,478.5 1 178.00 348.0 824.00 280268 1115.9183 4460.4825
## 133 Oki_Nor distant unmaped 8854 2082.17,2403.95 951.5,1028.5 1 194.00 596.0 1929.75 280268 2243.0576 7723.0016
## 134 Oki_Osa distant aligned 30852 1042.47,1080.36 642.5,662 56 273.00 487.0 1098.25 38135 1061.4169 1697.9866
## 135 Oki_Osa distant bridge 22009 554.32,592.64 333.5,344.5 0 75.00 284.0 559.00 47969 573.4841 1450.1810
## 136 Oki_Osa distant doublecoalesced 8132 5358.65,5881.01 2385.5,2593.5 67 338.75 1489.0 4944.25 164824 5619.8308 12014.8889
## 137 Oki_Osa distant isolated 4646 780.7,861.56 471.5,518 61 195.00 345.5 888.75 24353 821.1334 1405.6672
## 138 Oki_Osa distant mapped 8843 4903.95,5356.97 2195,2380.5 61 324.00 1358.0 4561.00 164824 5130.4588 10866.2362
## 139 Oki_Osa distant unaligned 30636 937.76,1034.33 403,416.5 1 114.00 318.0 722.00 211398 986.0471 4311.6897
## 140 Oki_Osa distant unmaped 8821 1834.91,2152.56 757.5,819.5 1 167.00 475.0 1543.00 211398 1993.7338 7609.5710
## 141 Osa_Aom same_or_sister aligned 15261 3005.27,3189.26 1625,1709 44 501.00 1142.0 3015.00 112674 3097.2689 5797.9303
## 142 Osa_Aom same_or_sister bridge 10311 327.61,359.27 263,276.5 0 12.00 186.0 392.00 31147 343.4384 820.0688
## 143 Osa_Aom same_or_sister doublecoalesced 2256 18388.65,27235.22 3626.5,4766.5 46 467.75 1569.0 10310.00 2627520 22811.9331 107135.4024
## 144 Osa_Aom same_or_sister isolated 2791 1152.21,1353.59 619,670 46 297.50 546.0 1015.50 37928 1252.9022 2712.9315
## 145 Osa_Aom same_or_sister mapped 4950 9581.21,10947.52 3444,4108 46 487.25 1441.0 8766.75 394294 10264.3665 24517.1044
## 146 Osa_Aom same_or_sister unaligned 13147 536.81,588.57 281.5,297 1 34.00 219.0 539.50 37950 562.6859 1513.9013
## 147 Osa_Aom same_or_sister unmaped 4063 876.8,1021.52 375,431 1 10.00 200.0 868.00 37950 949.1605 2352.5788
## 148 Osa_Bar intermediate aligned 24430 1555.18,1626.72 870.5,900.5 50 372.00 651.0 1500.00 49309 1590.9494 2852.6515
## 149 Osa_Bar intermediate bridge 20349 374.78,396.54 276.5,283 0 121.00 257.0 400.00 34004 385.6573 791.8057
## 150 Osa_Bar intermediate doublecoalesced 3358 12551.79,15470.77 4230,4939.5 59 596.25 2474.0 9656.25 735649 14011.2802 43135.7029
## 151 Osa_Bar intermediate isolated 1757 837.18,973.65 570.5,635 59 282.00 487.0 962.00 19831 905.4104 1458.2906
## 152 Osa_Bar intermediate mapped 4081 10494.2,12399.52 3967,4547.5 59 588.00 2321.0 8849.00 541207 11446.8598 31041.5565
## 153 Osa_Bar intermediate unaligned 24028 628,681.41 293,300.5 1 128.00 267.0 437.25 79670 654.7069 2111.8623
## [ reached 'max' / getOption("max.print") -- omitted 78 rows ]
widths_all_tibble |>
group_by(what, class, genus) |>
summarise(across(width, stats_to_compute)) |> as.data.frame()
## `summarise()` has grouped output by 'what', 'class'. You can override using the `.groups` argument.
## what class genus width_n width_t_CI width_wilcox_CI width_min width_q25 width_median width_q75 width_max width_mean width_sd
## 1 aligned same_or_sister Drosophila 9732 10954.29,11804.24 5294,5971 49 404.00 1953.0 13037.50 291858 11379.2660 21387.8094
## 2 aligned same_or_sister Ciona 40659 2534.44,2612.26 1669.5,1716 43 498.00 1251.0 2974.00 97390 2573.3486 4003.2243
## 3 aligned same_or_sister Oikopleura 45531 3144.09,3240.23 1908,1965.5 40 536.00 1328.0 3549.50 112674 3192.1570 5233.1119
## 4 aligned close Caenorhabditis 103050 1429.47,1452.09 1045.5,1058.5 43 453.00 888.0 1649.00 42453 1440.7798 1852.2570
## 5 aligned close Drosophila 23059 4475.07,4642.76 3052,3167.5 60 919.00 2264.0 5468.00 92398 4558.9142 6495.6166
## 6 aligned close Ciona 176589 1497.75,1517.72 1055,1067.5 47 366.00 823.0 1784.00 83419 1507.7380 2140.7792
## 7 aligned intermediate Caenorhabditis 89696 724.06,735.99 545,551.5 54 267.00 477.0 826.00 26910 730.0269 911.3537
## 8 aligned intermediate Drosophila 68750 1168.64,1187 931.5,942.5 50 553.00 843.0 1333.00 33370 1177.8198 1227.9791
## 9 aligned intermediate Oikopleura 98495 1547.95,1582.22 876.5,891.5 50 371.00 650.0 1497.50 54452 1565.0848 2743.5154
## 10 aligned distant Caenorhabditis 88219 515.15,523.07 403.5,408 52 201.00 349.0 611.50 23152 519.1140 600.0351
## 11 aligned distant Drosophila 47893 610.2,623.09 463.5,472 48 205.00 397.0 731.00 16014 616.6455 719.1531
## 12 aligned distant Ciona 138664 243.89,247.2 179,179.5 60 137.00 170.0 221.00 25919 245.5469 314.0168
## 13 aligned distant Oikopleura 250472 995.75,1008.06 630,636.5 51 259.00 474.0 1065.00 43759 1001.9047 1572.4288
## 14 aligned different_genus <NA> 4474 320.97,361.86 227,243 78 156.00 193.0 356.75 30669 341.4119 697.5420
## 15 bridge same_or_sister Drosophila 7026 884.17,1011.76 270.5,296 0 25.00 183.0 479.00 67561 947.9668 2727.8052
## 16 bridge same_or_sister Ciona 32958 364.42,391.57 231.5,237.5 0 30.00 195.5 340.00 97102 377.9965 1257.6133
## 17 bridge same_or_sister Oikopleura 28969 428.39,460.65 284.5,295.5 0 6.00 169.0 431.00 49765 444.5169 1400.7196
## 18 bridge close Caenorhabditis 83198 579.29,599.56 387,393 0 109.00 324.0 619.00 85250 589.4247 1491.8728
## 19 bridge close Drosophila 21243 480.45,527.1 213,218 0 123.00 191.0 309.00 75233 503.7723 1734.2930
## 20 bridge close Ciona 162242 378.06,390.49 226.5,229 0 51.00 201.0 343.00 90560 384.2791 1277.3452
## 21 bridge intermediate Caenorhabditis 82762 1224.61,1283.82 698,708 0 288.00 608.0 1097.75 456231 1254.2169 4345.2261
## 22 bridge intermediate Drosophila 67758 327.89,343.23 221.5,223.5 0 148.00 207.0 298.00 76314 335.5584 1018.9091
## 23 bridge intermediate Oikopleura 80917 352.82,364.58 236,239.5 0 58.00 222.0 369.00 38380 358.6999 853.0667
## 24 bridge distant Caenorhabditis 81673 1281.22,1347.01 647,658.5 0 176.00 511.0 1114.00 522515 1314.1121 4796.4155
## 25 bridge distant Drosophila 41934 1570.08,1628.57 999.5,1023.5 0 395.00 783.0 1684.00 174913 1599.3246 3055.7262
## 26 bridge distant Ciona 130475 1026.7,1052.15 599,607 0 293.00 465.0 979.00 175994 1039.4208 2345.1046
## 27 bridge distant Oikopleura 180723 512.66,524.69 329,332 0 146.00 286.0 511.00 135099 518.6746 1304.3395
## 28 bridge different_genus <NA> 1766 1652.73,2053.32 992,1121 0 372.00 775.0 1817.00 81648 1853.0238 4291.6322
## 29 doublecoalesced same_or_sister Drosophila 2272 29884.44,73666.11 623,729 56 240.00 458.5 1274.25 15422642 51775.2720 532092.2108
## 30 doublecoalesced same_or_sister Ciona 4224 25242.83,30881.69 1803.5,2091.5 50 491.00 1135.0 4150.50 1322492 28062.2607 93465.4230
## 31 doublecoalesced same_or_sister Oikopleura 8200 17889.71,21195.14 3340,3847.5 40 496.75 1606.0 8502.50 2627520 19542.4283 76347.1262
## 32 doublecoalesced close Caenorhabditis 13156 14676.7,16023.81 3427,3922.5 43 378.00 1134.0 10147.50 619682 15350.2587 39413.6205
## 33 doublecoalesced close Drosophila 1582 56273.14,90367.16 764.5,1000 63 243.25 500.0 1919.25 6070020 73320.1492 345677.1415
## 34 doublecoalesced close Ciona 9517 32836.42,36892.14 3058,3590.5 52 466.00 1304.0 10497.00 1394106 34864.2812 100921.6066
## 35 doublecoalesced intermediate Caenorhabditis 6566 24013.26,27774.41 10631.5,11907.5 66 960.25 4979.0 24232.25 2187342 25893.8335 77734.3638
## 36 doublecoalesced intermediate Drosophila 878 103687.77,132644.64 45163.5,67214 77 665.75 7701.0 134819.75 1457965 118166.2050 218585.3124
## 37 doublecoalesced intermediate Oikopleura 14683 11896.19,13162.07 4135,4423.5 50 612.00 2400.0 8719.50 1199960 12529.1297 39127.8040
## 38 doublecoalesced distant Caenorhabditis 6357 22488.2,25763.17 11019.5,12195 55 1051.00 5394.0 24273.00 2037825 24125.6822 66599.7874
## 39 doublecoalesced distant Drosophila 5877 15727.3,17388.08 7514,8553.5 54 716.00 3233.0 17797.00 615658 16557.6888 32472.8670
## 40 doublecoalesced distant Ciona 8179 20048.5,21449.54 12801.5,13788.5 61 2215.00 8371.0 25468.50 526016 20749.0219 32319.1280
## 41 doublecoalesced distant Oikopleura 64372 5287.56,5469.02 2349.5,2420.5 51 327.00 1439.0 4730.00 218004 5378.2904 11744.8510
## 42 doublecoalesced different_genus <NA> 2704 1584.88,1965.09 654.5,733.5 78 189.00 469.5 1252.00 81983 1774.9852 5041.5504
## 43 isolated same_or_sister Drosophila 2008 492.3,638.59 369,398.5 56 199.00 333.5 574.25 60697 565.4417 1671.3122
## 44 isolated same_or_sister Ciona 4830 1135.75,1228.97 830.5,883 45 350.00 697.0 1381.00 35993 1182.3617 1652.3764
## 45 isolated same_or_sister Oikopleura 9666 1389.58,1507.41 725,763.5 40 312.00 594.0 1239.50 58461 1448.4957 2954.8151
## 46 isolated close Caenorhabditis 11068 581.25,607.71 460.5,477 43 235.00 392.0 713.00 30395 594.4825 709.9395
## 47 isolated close Drosophila 1314 536.32,649.82 390.5,436 63 203.00 335.0 646.00 25871 593.0723 1048.5917
## 48 isolated close Ciona 7994 789.28,830.11 601.5,630.5 50 259.00 512.0 967.75 14541 809.6955 931.1006
## 49 isolated intermediate Caenorhabditis 2106 660.37,717.93 553.5,598.5 66 271.25 488.5 874.00 7539 689.1486 673.4665
## 50 isolated intermediate Drosophila 379 639.68,784.97 522.5,637 77 264.50 498.0 895.50 7094 712.3245 719.2297
## 51 isolated intermediate Oikopleura 7663 930.69,996.17 625,658.5 50 289.00 511.0 1041.00 21037 963.4338 1462.0853
## 52 isolated distant Caenorhabditis 1631 511.58,559.52 426,464.5 55 226.00 384.0 667.50 4860 535.5469 493.5742
## 53 isolated distant Drosophila 2086 638.88,695.17 519,565 54 245.00 462.0 849.75 5460 667.0240 655.5648
## 54 isolated distant Ciona 1386 582.66,684.67 400,451 61 173.00 275.5 708.25 12900 633.6652 967.9353
## 55 isolated distant Oikopleura 36058 742.77,768.87 454.5,469.5 51 185.00 324.0 822.00 24353 755.8199 1264.6923
## 56 isolated different_genus <NA> 1933 402.17,472.71 330.5,361 78 169.00 251.0 550.00 30669 437.4397 790.7655
## 57 mapped same_or_sister Drosophila 2706 32527.05,54245.63 667,793.5 56 237.25 465.0 1472.75 8110313 43386.3385 288086.3839
## 58 mapped same_or_sister Ciona 7701 14397.07,16011.39 3100,3677.5 45 532.00 1461.0 9053.00 443768 15204.2322 36134.0852
## 59 mapped same_or_sister Oikopleura 16562 9236.61,9869.7 3755.5,4081 40 502.00 1591.0 8854.25 394294 9553.1524 20783.1448
## 60 mapped close Caenorhabditis 19852 9609.48,10288.9 2849,3126.5 43 352.00 1044.0 7384.25 527175 9949.1897 24419.2368
## 61 mapped close Drosophila 1816 51540.18,76021.1 913.5,1292.5 63 247.75 531.0 2852.50 4893795 63780.6382 265960.8921
## 62 mapped close Ciona 14347 21986.59,23820.35 3930,4585 50 446.00 1392.0 13495.50 730624 22903.4749 56028.4109
## 63 mapped intermediate Caenorhabditis 6934 22689.03,26137.62 10252,11447.5 66 937.00 4731.5 23335.75 2187342 24413.3238 73245.4207
## 64 mapped intermediate Drosophila 992 92498.83,116597.69 39379,60320.5 77 661.50 7251.5 122432.00 1338161 104548.2621 193394.2339
## 65 mapped intermediate Oikopleura 17578 10009.24,10832.49 3847,4098 50 595.00 2231.5 8118.75 677793 10420.8637 27842.3288
## 66 mapped distant Caenorhabditis 6546 21807.96,24975.79 10728.5,11877.5 55 1026.25 5201.5 23638.50 2037825 23391.8720 65371.9749
## 67 mapped distant Drosophila 5959 15413.96,17007.28 7325.5,8328 54 701.00 3134.0 17379.50 532037 16210.6196 31370.6132
## 68 mapped distant Ciona 8189 20019.71,21418.06 12785.5,13770.5 61 2203.00 8358.0 25455.00 526016 20718.8846 32276.7284
## 69 mapped distant Oikopleura 69749 4862.84,5020.76 2181.5,2245.5 51 311.00 1311.0 4411.00 218004 4941.7983 10639.7762
## 70 mapped different_genus <NA> 2708 1582.66,1962.34 653,731 78 189.00 469.5 1243.00 81983 1772.4952 5038.1306
## 71 unaligned same_or_sister Drosophila 9138 2583.15,3115.67 521.5,592 1 71.00 276.0 1299.00 545408 2849.4082 12984.5839
## 72 unaligned same_or_sister Ciona 36133 640.21,686.43 262,269 1 71.00 220.0 444.00 169778 663.3193 2240.8260
## 73 unaligned same_or_sister Oikopleura 37958 705.33,746.63 316.5,328 1 27.00 220.5 622.75 56097 725.9802 2053.0616
## 74 unaligned close Caenorhabditis 99450 840.06,875.41 420,426.5 1 128.00 349.0 711.00 165349 857.7356 2843.4470
## 75 unaligned close Drosophila 22743 1251.61,1518.29 232,238.5 1 129.00 202.0 360.00 859148 1384.9524 10259.1500
## 76 unaligned close Ciona 168424 592.73,619.35 239.5,242 1 75.00 212.0 382.00 222284 606.0407 2786.3012
## 77 unaligned intermediate Caenorhabditis 89243 1820,1939.05 765,776.5 1 307.00 651.0 1234.00 1210598 1879.5255 9072.9767
## 78 unaligned intermediate Drosophila 68626 665.01,916.59 223.5,225.5 1 149.00 209.0 303.00 2615430 790.8018 16812.8419
## 79 unaligned intermediate Oikopleura 96501 649.84,681.14 257.5,261.5 1 72.00 233.0 410.00 106962 665.4904 2480.1362
## 80 unaligned distant Caenorhabditis 87550 2063.33,2216.93 736,750.5 1 201.00 571.0 1305.00 1062215 2140.1281 11593.9599
## 81 unaligned distant Drosophila 47867 2115.49,2325.33 1100,1125.5 1 424.00 853.0 1868.00 1031382 2220.4080 11711.7224
## 82 unaligned distant Ciona 138550 1507.32,1563.34 673,683 1 301.00 497.0 1147.00 340903 1535.3326 5319.4596
## 83 unaligned distant Oikopleura 248924 864.59,895.56 380.5,384.5 1 154.00 309.0 639.00 283076 880.0764 3941.8615
## 84 unaligned different_genus <NA> 4443 51033.47,56289.1 31315.5,35664.5 1 1059.00 12496.0 72354.50 1318064 53661.2845 89344.3051
## 85 unmaped same_or_sister Drosophila 2616 6516.25,8298.34 2456,2975.5 1 266.75 1311.0 5929.50 545408 7407.2924 23241.7218
## 86 unmaped same_or_sister Ciona 6304 1718.19,1933.18 905.5,1002.5 1 24.00 521.0 1977.50 169778 1825.6851 4353.8685
## 87 unmaped same_or_sister Oikopleura 13396 1049.23,1142.4 434.5,471.5 1 9.00 209.0 989.25 56097 1095.8156 2750.6950
## 88 unmaped close Caenorhabditis 18898 1838.96,1998.78 735.5,779.5 1 89.00 473.0 1517.00 165349 1918.8722 5604.5425
## 89 unmaped close Drosophila 1776 10099.58,13319.72 4087,5246 1 407.50 2005.5 10593.00 859148 11709.6492 34595.6295
## 90 unmaped close Ciona 13298 2842.81,3131.98 1138,1230 1 82.00 595.0 2508.75 222284 2987.3928 8505.9641
## 91 unmaped intermediate Caenorhabditis 6914 8589.35,9904.43 4090,4416 1 934.25 2774.5 7995.25 1210598 9246.8896 27890.7597
## 92 unmaped intermediate Drosophila 984 23497.61,40593.44 1173,1914.5 1 313.75 693.5 4526.00 2615430 32045.5264 136638.9172
## 93 unmaped intermediate Oikopleura 17332 1951.06,2110.28 647.5,699 1 92.00 321.0 1489.00 106962 2030.6699 5347.2715
## 94 unmaped distant Caenorhabditis 6528 11351.82,13170.47 5358.5,5799.5 1 1237.75 3847.0 10231.50 1062215 12261.1434 37478.3028
## 95 unmaped distant Drosophila 5951 5780.24,7400.13 2069.5,2221 1 772.50 1602.0 3679.50 1031382 6590.1850 31872.2350
## 96 unmaped distant Ciona 8137 9082.69,9868.25 5618,5949 1 1916.00 4319.0 9995.00 340903 9475.4708 18074.6835
## 97 unmaped distant Oikopleura 69607 1748.1,1853.14 675.5,694.5 1 160.00 430.0 1319.00 283076 1800.6193 7069.6154
## 98 unmaped different_genus <NA> 2679 83929.15,91617.44 66459.5,73084.5 1 19851.50 55753.0 119267.50 1318064 87773.2912 101470.9185
Aligned means “before coalescing” and mapped means “after coalescing”.
gg_freq_poly(widths$gbs) +
aes(width, after_stat(density), col=name) +
facet_wrap(~genus + class, scales = "free_y") +
ggtitle("Distribution of aligned region widths") +
scale_color_manual(values = names_table$class)
gg_freq_poly(widths$gbs |> dplyr::filter(genus == "Oikopleura")) +
aes(col=name) +
facet_wrap(~dist, scales = "free_y", nrow = 1) +
ggtitle("Distribution of aligned region widths") +
scale_color_manual(values = names_table$query)
gg_freq_poly(widths$gbs) +
aes(width, after_stat(density), col=class) +
facet_wrap(~genus, scales = "free_y", ncol = 1) +
ggtitle("Distribution of aligned region widths")
The distribution of widths is not the same on long and short arms.
Note that for this reason, it would be unwise to filter by width on some operations such as coalescing.
gg_freq_poly(widths$gbs) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of aligned region widths")
gg_freq_poly(widths$gbs |> subset(chr == "chr1")) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of aligned region widths on chr1")
Width distribution per arm follows similar pattern on chr2 (first graph). Pairs at a similar evolutionary distance show similar profile, with the exception of Osa_Aom, where the number of alignments on short arms is higher. Nevertheless, the trend of widths being longer on long arms stay true, so we can average them in the plot below.
Unaligned regions show no difference of width between arms, except perhaps at the “same population” level.
gg_freq_poly(widths$unal) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of unaligned region widths")
gg_freq_poly(widths$unal |> subset(chr == "chr1")) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of unaligned region widths")
gg_freq_poly(widths$unal |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", ncol=1) +
ggtitle("Distribution of unaligned region widths") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
Interestingly, only in Oikopleura do the width distributions from pairs of different distance peak at the same size. Could that be because the genome is very compact?
gg_freq_poly(widths$unal |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=name) +
facet_wrap(~dist, scales = "free_y", ncol=1) +
ggtitle("Distribution of unaligned region widths") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)") +
scale_color_manual(values = names_table$query)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
gg_freq_poly(widths$unal) +
aes(col=class) +
facet_wrap(~genus, scales = "free_y", ncol=1) +
ggtitle("Distribution of unaligned region widths") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
After coalescing, some width distribution become clearly bimodal. The distributions are hard to compare between genera and between evolutionary distance groups, and even within group for the “same population” Oikopleura.
The plots below display the data in details for the reader to make their opinion about the above statement.
# Using after_stat(density) to make more meaningful comparisons between genus
gg_freq_poly(widths$coa) +
aes(width, after_stat(density), col=name) +
facet_wrap(~genus + class, scales = "free_y") +
ggtitle("Distribution of mapped region widths") +
scale_color_manual(values = names_table$query)
Faceting by genus only. Why is North / North (“intermediate”) not bimodal?
gg_freq_poly(widths$coa) +
aes(width, after_stat(density), col=class) +
facet_wrap(~genus, scales = "free_y", ncol = 1) +
ggtitle("Distribution of mapped region widths")
What is in the first peak of the bimodal distributions ?
ggplot(widths$coa) + aes(width, col = genus) + stat_ecdf() + scale_x_log10(limits = c(50,5e6)) + facet_wrap(~class)
## Warning: Removed 15 rows containing non-finite values (`stat_ecdf()`).
ggplot(widths$coa) + aes(width, col = class) + stat_ecdf() + scale_x_log10(limits = c(50,5e6)) + facet_wrap(~genus)
## Warning: Removed 15 rows containing non-finite values (`stat_ecdf()`).
Note: there is a bug: the lines jump between replicates instead of plotting them separately
widths$coa |>
dplyr::arrange(name, width) |>
dplyr::group_by(name) |>
dplyr::mutate(cumsum = cumsum(width)) |>
ggplot() + facet_wrap(~class) + aes(x = width, y = cumsum, col = genus) + geom_line() + scale_x_log10() + scale_y_log10()
widths$coa |>
dplyr::arrange(name, width) |>
dplyr::group_by(name) |>
dplyr::mutate(cumsum = cumsum(width)) |>
ggplot() + facet_wrap(~genus) + aes(x = width, y = cumsum, col = class) + geom_line() + scale_x_log10() + scale_y_log10()
When faceted by chromosome and arm type, bimodality is also observed. There is a small peak under 500 bp, found on all arms except YSR. The second peak shows two different distributions for long and short arms.
gg_freq_poly(widths$coa) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of alignment widths after collapsing, split by arm.")
gg_freq_poly(widths$coa) +
aes(col=flag) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of alignment widths after collapsing, split by flag.")
gg_freq_poly(widths$coa ) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of alignment widths on chr1 after collapsing, split by genome pair")+
geom_vline(xintercept = 500)
gg_freq_poly(widths$coa |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", ncol=1) +
ggtitle("Distribution of alignment widths after collapsing, split by distance group")
gg_freq_poly(widths$coa |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=flag) +
facet_wrap(~class + flag, scales = "free_y") +
ggtitle("Distribution of alignment widths after collapsing, split by distance group")
Therefore, if the first peak represented events that we do not want
to consider breaking synteny (either alignment artefacts or
insertion/transpositions of short mobile elements), we could re-coalesce
once after removing the shortest intervals. We tried some width cutoffs
on the coalescing step, but at 500 bp, the coverage of the genome by the
resulting mapped regions is already decreasing
(vignette("CoalescingCutoffs", package = "OikScrambling")
).
The mapped regions that are found identical in the aligned set are those that were not coalesced… Are they the small peak ?
p_all <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "Oki – North")) +
aes(col=name) +
facet_wrap(~dist, scales = "free_y", nrow = 1) +
ggtitle("Distribution of mapped region widths (vline at 600)") + theme_bw() +
geom_vline(xintercept = 600)
p_coa_map <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "Oki – North", Arm %in% c("short", "long"))) +
aes(col=name) +
ggtitle("FALSE = collapsed, TRUE = non-collapsed") + theme_bw() +
geom_vline(xintercept = 600) +
facet_wrap(~nonCoa + Arm)
p_all + p_coa_map + plot_layout(guides = 'collect') + theme_bw()
The short-width mapped peak is made of aligned regions that can not be coalesced.
We know from
vignette("RepeatRegions", package = "OikScrambling")
that
these short mapped regions are not repeats.
Unmapped regions are a bit longer on the short arm in Oki – North comparisons…
gg_freq_poly(widths$unmap) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of unmapped regions width after collapsing, split by arm.")
gg_freq_poly(widths$unmap) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of unmapped regions width after collapsing, split by genome pair")
gg_freq_poly(widths$unmap |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of unmapped region widths after collapsing, split by distance group") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
Note the short peak when comparing pairs within the same population.
gg_freq_poly(widths$bri) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of widths of bridge regions, split by arm.")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 22514 rows containing non-finite values (`stat_bin()`).
gg_freq_poly(widths$bri) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of widths of bridge regions, split by genome pair")
## Warning: Transformation introduced infinite values in continuous x-axis
## Removed 22514 rows containing non-finite values (`stat_bin()`).
gg_freq_poly(widths$bri |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of widths of bridge regions, split by distance group") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 5425 rows containing non-finite values (`stat_bin()`).
Reminder: here we measure the distance between the two colinear pairs that are separated by the translocated pair.
gg_freq_poly(widths$tra) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of widths of translocated regions, split by arm.")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 216 rows containing non-finite values (`stat_bin()`).
gg_freq_poly(widths$tra) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of widths of translocated regions, split by genome pair")
## Warning: Transformation introduced infinite values in continuous x-axis
## Removed 216 rows containing non-finite values (`stat_bin()`).
gg_freq_poly(widths$tra |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of widths of translocated regions, split by distance group") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 125 rows containing non-finite values (`stat_bin()`).
Small peak in “same pop” disapears by design. Total number in the main peak also decreases, because they may pass the size filter on the query genome.
gg_freq_poly(widths$tra2) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of widths of translocated regions (after removal), split by arm.")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 10 rows containing non-finite values (`stat_bin()`).
gg_freq_poly(widths$tra2) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of widths of translocated regions (after removal), split by genome pair")
## Warning: Transformation introduced infinite values in continuous x-axis
## Removed 10 rows containing non-finite values (`stat_bin()`).
gg_freq_poly(widths$tra2 |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of widths of translocated regions (after removal), split by distance group") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2 rows containing non-finite values (`stat_bin()`).
widths$tra_coa_aln <- makeWidthsDF(sapply(coa, \(gb) gb |> flagTranslocations() |> filterTranslocations())) |> tibble::as_tibble()
gg_freq_poly(widths$tra_coa_aln) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of widths of translocated regions (aligned), split by arm.")
gg_freq_poly(widths$tra_coa_aln) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of widths of translocated regions (aligned), split by genome pair")
gg_freq_poly(widths$tra_coa_aln |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of widths of translocated regions (aligned), split by distance group") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
widths$tra_coa2_aln <- makeWidthsDF(sapply(coa2, \(gb) gb |> flagTranslocations() |> filterTranslocations())) |> tibble::as_tibble()
gg_freq_poly(widths$tra_coa2_aln) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of widths of translocated regions (aligned, after removal), split by arm.")
gg_freq_poly(widths$tra_coa2_aln) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of widths of translocated regions (aligned, after removal), split by genome pair")
gg_freq_poly(widths$tra_coa2_aln |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of widths of translocated regions (aligned, after removal), split by distance group") +
geom_vline(xintercept = 300) +
scale_x_log10("Width (vertical bar at 300 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
gg_freq_poly(widths$coa2) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of alignment widths after double-collapsing, split by arm.")
gg_freq_poly(widths$coa2) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of widthsof alignment widths after double-collapsing, split by genome pair")
gg_freq_poly(widths$coa2 |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of alignment widths after double-collapsing, split by distance group") +
geom_vline(xintercept = 500) +
scale_x_log10("Width (vertical bar at 500 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
p_ali <- gg_freq_poly(widths$gbs |> dplyr::filter(! is.na(genus))) +
aes(width, after_stat(density), col=class) + theme_bw() +
facet_wrap(~genus, scales = "free_y", ncol = 1) +
ggtitle("Aligned")
p_una <- gg_freq_poly(widths$unal |> dplyr::filter(! is.na(genus))) +
aes(width, after_stat(density), col=class) + theme_bw() +
facet_wrap(~genus, scales = "free_y", ncol = 1) +
ggtitle("Not aligned")
p_map <- gg_freq_poly(widths$coa |> dplyr::filter(! is.na(genus))) +
aes(width, after_stat(density), col=class) + theme_bw() +
facet_wrap(~genus, scales = "free_y", ncol = 1) +
ggtitle("Mapped")
# Patchwork
(p_ali | p_una | p_map) + plot_layout(ncol = 3, guides = 'collect') + theme_bw()
p_isol <- gg_freq_poly(widths$isol |> dplyr::filter(genus == "Oikopleura")) +
aes(width, after_stat(density), col=class) + theme_bw() +
ggtitle("Isolated alignments")
p_unma <- gg_freq_poly(widths$unmap |> dplyr::filter(genus == "Oikopleura")) +
aes(width, after_stat(density), col=class) + theme_bw() +
ggtitle("Unmapped")
p_coar <- gg_freq_poly(widths$coa |> dplyr::filter(genus == "Oikopleura", !nonCoa)) +
aes(width, after_stat(density), col=class) + theme_bw() +
ggtitle("Colinear regions")
p_brid <- gg_freq_poly(widths$bri |> dplyr::filter(genus == "Oikopleura")) +
aes(width, after_stat(density), col=class) + theme_bw() +
ggtitle("Bridge regions")
p_coal <- gg_freq_poly(widths$gbs |> dplyr::filter(genus == "Oikopleura", !nonCoa)) +
aes(width, after_stat(density), col=class) + theme_bw() +
ggtitle("Colinear alignments")
# Patchwork
(p_isol | p_unma | p_coar | p_brid | p_coal) + plot_layout(nrow = 1, guides = 'collect') + theme_bw()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 7561 rows containing non-finite values (`stat_bin()`).
w <- rbind(
widths$isol |> dplyr::filter(genus == "Oikopleura") |> dplyr::mutate(aliclass = "Isol"),
widths$unmap |> dplyr::filter(genus == "Oikopleura") |> dplyr::mutate(aliclass = "Unmap"),
widths$coa |> dplyr::filter(genus == "Oikopleura", !nonCoa) |> dplyr::mutate(aliclass = "Col reg"),
widths$bri |> dplyr::filter(genus == "Oikopleura") |> dplyr::mutate(aliclass = "Bridge"),
widths$gbs |> dplyr::filter(genus == "Oikopleura", !nonCoa) |> dplyr::mutate(aliclass = "Col aln")
)
gg_freq_poly(w) +
aes(width, after_stat(density), col=aliclass) + theme_bw()+
facet_wrap(~class, ncol = 1)
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 7561 rows containing non-finite values (`stat_bin()`).
gg_freq_poly(w |> dplyr::filter(class == "distant")) +
aes(width, after_stat(density), col=aliclass) + theme_bw()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 1406 rows containing non-finite values (`stat_bin()`).
gg_freq_poly(w |> dplyr::filter(class == "distant"), scale = "natural") +
aes(width, after_stat(density), col=aliclass) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3))
## Warning: Removed 49725 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 10 rows containing missing values (`geom_path()`).
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Biostrings':
##
## collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:XVector':
##
## slice
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Frequency table is pre-computed so that ggplot2 can smooth over the different alignment pairs
wfreq <- w |>
filter(class == "distant") |>
count(aliclass, name, width) |>
group_by(aliclass, name) |>
mutate(prop = prop.table(n))
# Smoothing with loess to see the 3 peaks in bridge regions
wfreq |>
filter(width <= 500, aliclass != "Col reg") |>
ggplot() +
aes(width, prop, col = aliclass) +
geom_smooth(span = .15, method = "loess", fill = NA) +
scale_x_continuous(limits = c(0, 500)) + theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
# Plotting the collinear regions separately because their length distribution is
# on a different scale.
wfreq |>
filter(width >= 250, aliclass == "Col reg") |>
ggplot() +
aes(width, prop, col = aliclass) +
geom_smooth(span = .15, method = "loess", fill = NA) +
scale_x_log10(limits = c(250, 1e5)) + theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 77 rows containing non-finite values (`stat_smooth()`).
p_ali2 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "Oki – North", nonCoa), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Isolated alignments")
p_unmap2 <- gg_freq_poly(widths$unmap |> dplyr::filter(dist == "Oki – North"), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Unmapped")
p_syn2 <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "Oki – North", !nonCoa), binwidth = 600, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e4)) +
ggtitle("Colinear region")
p_map2 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "Oki – North", !nonCoa), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Colinear alignment")
p_bridge <- gg_freq_poly(widths$bri |> dplyr::filter(dist == "Oki – North"), binwidth = 20, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Bridge")
# Patchwork
p_oki_north <-(p_ali2 | p_unmap2 | p_syn2 | p_bridge | p_map2 ) + plot_layout(ncol = 5, guides = 'collect') + theme_bw() + plot_annotation(title = "Okinawa vs North")
p_oki_north
## Warning: Removed 1485 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 8089 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 2326 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 3190 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 15668 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
p_ali3 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "North – North", nonCoa), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Isolated alignments")
p_unmap3 <- gg_freq_poly(widths$unmap |> dplyr::filter(dist == "North – North"), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Unmapped")
p_syn3 <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "North – North", !nonCoa), binwidth = 600, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e4)) +
ggtitle("Colinear region")
p_map3 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "North – North", !nonCoa), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Colinear alignment")
p_bridge3 <- gg_freq_poly(widths$bri |> dplyr::filter(dist == "North – North"), binwidth = 20, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Bridge")
# Patchwork
p_north_north <- (p_ali3 | p_unmap3 | p_syn3 | p_bridge3 | p_map3 ) + plot_layout(ncol = 5, guides = 'collect') + theme_bw() + plot_annotation(title = "North vs North")
p_ali4 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "In same pop", nonCoa), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Isolated alignments")
p_unmap4 <- gg_freq_poly(widths$unmap |> dplyr::filter(dist == "In same pop"), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Unmapped")
p_syn4 <- gg_freq_poly(widths$coa |> dplyr::filter(dist == "In same pop", !nonCoa), binwidth = 600, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e4)) +
ggtitle("Colinear region")
p_map4 <- gg_freq_poly(widths$gbs |> dplyr::filter(dist == "In same pop", !nonCoa), binwidth = 60, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Colinear alignment")
p_bridge4 <- gg_freq_poly(widths$bri |> dplyr::filter(dist == "In same pop"), binwidth = 20, scale = "natural") +
aes(width, after_stat(density), col=name) + theme_bw() +
scale_x_continuous(limits = c(0, 3e3)) +
ggtitle("Bridge")
# Patchwork
p_same_pop <- (p_ali4 | p_unmap4 | p_syn4 | p_bridge4 | p_map4 ) + plot_layout(ncol = 5, guides = 'collect') + theme_bw() + plot_annotation(title = "Same population")
p_oki_north /
p_north_north /
p_same_pop
## Warning: Removed 1485 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 8089 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 2326 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 3190 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 15668 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 16 rows containing missing values (`geom_path()`).
## Warning: Removed 431 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 2700 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 1402 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 876 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 11985 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 8 rows containing missing values (`geom_path()`).
## Warning: Removed 1031 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
## Warning: Removed 1229 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
## Warning: Removed 1482 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
## Warning: Removed 614 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
## Warning: Removed 12048 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 6 rows containing missing values (`geom_path()`).
GBreaks2widthTibble <- function(gb, pair = NULL) {
df <- as.data.frame(gb)
if(is.null(df$Arm)) df$Arm <- NA
if(is.null(df$rep)) df$rep <- NA
if(is.null(df$query.rep)) df$query.rep <- NA
tibble::tibble(target = df$width, query = df$query.width,
target.rep = ! is.na(df$rep), query.rep = ! is.na(df$query.rep), # TRUE if overlaps a repeat
pair = pair, Arm = df$Arm,
dist = OikScrambling:::compDistance(pair))
}
gbs.w2w <- do.call(rbind, lapply(names(gbs), \(name) GBreaks2widthTibble(gbs[[name]], pair = name)))
bri.w2s <- do.call(rbind, lapply(names(bri), \(name) GBreaks2widthTibble(bri[[name]], pair = name)))
tra.w2s <- do.call(rbind, lapply(names(tra), \(name) GBreaks2widthTibble(tra[[name]], pair = name)))
tra2.w2s <- do.call(rbind, lapply(names(tra2), \(name) GBreaks2widthTibble(tra2[[name]], pair = name)))
rbind (
aligned = table(paste( gbs.w2w$target.rep, gbs.w2w$query.rep)),
bri = table(paste(bri.w2s$target.rep, bri.w2s$query.rep)),
transpo = table(paste( tra.w2s$target.rep, tra.w2s$query.rep))
)
## FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE
## aligned 1049613 41624 41943 52103
## bri 801374 84573 82538 35159
## transpo 10377 1252 3043 2782
ggPlotW2W <- function(tibble) {
ggplot(tibble) +
aes(target, query) +
geom_point(alpha = 0.05) +
geom_density_2d() +
scale_x_log10() +
scale_y_log10()
}
ggPlotW2W(gbs.w2w |> dplyr::filter(!is.na(Arm))) + facet_wrap(~Arm) +
ggtitle("Width of the target and query aligned regions")
ggPlotW2W(bri.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~Arm) +
ggtitle("Width of the target and query bridge regions")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 14293 rows containing non-finite values (`stat_density2d()`).
ggPlotW2W(gbs.w2w) + facet_wrap(~pair) +
ggtitle("Width of the target and query aligned regions")
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
ggPlotW2W(gbs.w2w) + facet_wrap(~dist) +
ggtitle("Width of the target and query aligned regions")
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
ggPlotW2W(bri.w2s) + facet_wrap(~pair) +
ggtitle("Width of the target and query bridge regions")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 43487 rows containing non-finite values (`stat_density2d()`).
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
ggPlotW2W(tra.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~pair) +
ggtitle("Width of the target and query translocated regions")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 893 rows containing non-finite values (`stat_density2d()`).
ggPlotW2W(tra.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~dist) +
ggtitle("Width of the target and query translocated regions")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 893 rows containing non-finite values (`stat_density2d()`).
ggPlotW2W(tra2.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~pair) +
ggtitle("Width of the target and query translocated regions (after removal)")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 62 rows containing non-finite values (`stat_density2d()`).
ggPlotW2W(tra2.w2s |> dplyr::filter(!is.na(Arm))) + facet_wrap(~dist) +
ggtitle("Width of the target and query translocated regions (after removal)")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 62 rows containing non-finite values (`stat_density2d()`).
ggPlotW2W(gbs.w2w) +
facet_wrap(~query.rep + target.rep) +
ggtitle("Width of the target and query aligned regions, matching or not matching repeats")
ggPlotW2W(bri.w2s) +
facet_wrap(~query.rep + target.rep) +
ggtitle("Width of the target and query bridge regions, matching or not matching repeats")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 43487 rows containing non-finite values (`stat_density2d()`).
ggPlotW2W(tra.w2s) +
facet_wrap(~dist + query.rep + target.rep, nrow = 3) +
ggtitle("Width of the target and query translocated regions, matching or not matching repeats")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1298 rows containing non-finite values (`stat_density2d()`).
ggplot(bri.w2s) +
aes(target / query, col = paste(target.rep, query.rep) ) +
scale_x_log10() +
geom_freqpoly() +
ggtitle("In presence of a repeat, target width tends to be longer than query") +
geom_vline(xintercept = 1)
## Warning: Transformation introduced infinite values in continuous x-axis
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 43487 rows containing non-finite values (`stat_bin()`).
We know that we can detect more inversions after coalescing and
removing translocations
(vignette("Inversions", package = "OikScrambling")
).
Many inversions are shorter than 1 kbp. Size may differ slightly between long and short arms.
invs.gbs <- sapply(gbs, function(gb) filterInversions(flagInversions(gb))) |> SimpleList()
invs.coa <- sapply(coa, function(gb) filterInversions(flagInversions(gb))) |> SimpleList()
invs.coa2 <- sapply(coa2, function(gb) filterInversions(flagInversions(gb))) |> SimpleList()
sapply(invs.gbs, length)
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu Dme_Dsu
## 614 604 89 608 541 613 241 620 56 212 597 237 608 237 19 42 58 16 0 50 58 14 0 697 20
## Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel
## 13 12 221 91 40 236 92 64
sapply(invs.coa, length)
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu Dme_Dsu
## 670 675 105 665 591 671 321 678 73 275 667 319 686 319 24 51 71 92 0 65 70 92 0 1003 44
## Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel
## 21 16 300 212 132 318 200 166
sapply(invs.coa2, length)
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu Dme_Dsu
## 704 705 122 695 611 706 330 708 79 279 695 327 711 335 27 55 76 92 0 71 77 92 0 1003 45
## Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel
## 23 16 331 216 134 358 207 166
# Sanity check that we detect inversions on both strands
sapply(invs.coa2, \(x) {summary(decode(strand(x)))})
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu
## + 381 365 33 369 317 382 186 367 38 147 354 182 365 164 7 2 6 51 0 5 5 52 0 470
## - 323 340 89 326 294 324 144 341 41 132 341 145 346 171 20 53 70 41 0 66 72 40 0 533
## * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Dme_Dsu Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel
## + 29 11 3 53 84 73 60 88 69
## - 16 12 13 278 132 61 298 119 97
## * 0 0 0 0 0 0 0 0 0
widths$invs.gbs <- makeWidthsDF(invs.gbs) |> tibble::as_tibble()
widths$invs.coa <- makeWidthsDF(invs.coa) |> tibble::as_tibble()
widths$invs.coa2 <- makeWidthsDF(invs.coa2) |> tibble::as_tibble()
gg_freq_poly(widths$invs.gbs) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of alignment widths for inverted regions, using aligned regions, split by arm.")
gg_freq_poly(widths$invs.gbs) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of alignment widths for inverted regions, using aligned regions, split by genome pair")
gg_freq_poly(widths$invs.gbs |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of alignment widths for inverted regions, using aligned regions, split by distance group") +
geom_vline(xintercept = 500) +
scale_x_log10("Width (vertical bar at 500 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
gg_freq_poly(widths$invs.coa) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of alignment widths for inverted regions, using mapped regions, split by arm.")
gg_freq_poly(widths$invs.coa) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of alignment widths for inverted regions, using mapped regions, split by genome pair")
gg_freq_poly(widths$invs.coa |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of alignment widths for inverted regions, using mapped regions, split by distance group") +
geom_vline(xintercept = 500) +
scale_x_log10("Width (vertical bar at 500 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
gg_freq_poly(widths$invs.coa2) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution of alignment widths for inverted regions, using doublemapped regions, split by arm.")
gg_freq_poly(widths$invs.coa2) +
aes(col=Arm) +
facet_wrap(~name, scales = "free_y", ncol=5) +
ggtitle("Distribution of alignment widths for inverted regions, using doublemapped regions, split by genome pair")
gg_freq_poly(widths$invs.coa2 |> subset(chr %in% c("chr1", "chr2", "PAR"))) +
aes(col=Arm) +
facet_wrap(~dist, scales = "free_y", nrow=1) +
ggtitle("Distribution of alignment widths for inverted regions, using doublemapped regions, split by distance group") +
geom_vline(xintercept = 500) +
scale_x_log10("Width (vertical bar at 500 bp)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
inv.lgaps <- sapply(coa, leftInversionGaps) |> SimpleList()
inv.lgaps[ 1:5] <- sapply(inv.lgaps[ 1:5], flagLongShort, longShort$OKI2018.I69)
inv.lgaps[ 6:10] <- sapply(inv.lgaps[ 6:10], flagLongShort, longShort$OSKA2016v1.9)
inv.lgaps[11:15] <- sapply(inv.lgaps[11:15], flagLongShort, longShort$Bar2.p4)
sapply(inv.lgaps, length)
## Oki_Osa Oki_Bar Oki_Kum Oki_Aom Oki_Nor Osa_Oki Osa_Bar Osa_Kum Osa_Aom Osa_Nor Bar_Oki Bar_Osa Bar_Kum Bar_Aom Bar_Nor Ply_Ros Ply_Rob Ply_Sav Ply_Oki Rob_Ros Rob_Ply Rob_Sav Rob_Oki Dme_Dbu Dme_Dsu
## 669 675 96 665 591 669 319 678 69 274 666 315 685 315 22 46 65 92 0 61 67 92 0 1003 44
## Dme_Dya Dme_Dma Cni_Cbr Cni_Cre Cni_Cin Cbr_Cni Cbr_Cre Cbr_Cel
## 21 14 294 212 132 310 200 166
widths$invs.lgaps <- makeWidthsDF(inv.lgaps) |> tibble::as_tibble()
gg_freq_poly(widths$invs.lgaps) +
aes(col=Arm) +
facet_wrap(~chr, scales = "free_y") +
ggtitle("Distribution widths of left gaps next to inverted regions, split by arm.") +
geom_vline(xintercept = 1000)