vignettes/BergenScaffolding.Rmd
BergenScaffolding.Rmd
knitr::opts_chunk$set(cache = TRUE,
dev = c('png', 'svg'),
fig.ext= c('png', 'svg'),
fig.width = 10,
fig.height = 10)
This vignette attempts to produce a scaffolding guide to be used with the OdB3 (Nor) genome for plotting purposes.
library('OikScrambling') |> suppressPackageStartupMessages()
library('patchwork') |> suppressPackageStartupMessages()
ggplot2::theme_set(theme_bw())
genomes <- OikScrambling:::loadAllGenomes()
## Warning in runHook(".onLoad", env, package.lib, package): input string
## 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid
## UTF-8
## Warning in runHook(".onLoad", env, package.lib, package): input string
## 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid
## UTF-8
load("BreakPoints.Rdata")
# Let's remove the contigs that have their main match elsewhere.
QTcoverage <- function(gb) {
stopifnot (length(seqlevels(gb)) == 1) # Not ready for full objects
stopifnot (!any(is.na(seqlengths(gb$query))))
grl <- split(gb, seqnames(gb$query))
lapply(grl, \(gb) sum(width(gb$query))) |> unlist() / seqlengths(gb$query)
}
scafs <- OikScrambling::scafs
scafs$Nor_Bar <- list()
Bar_Nor <- gbs$Bar_Nor # Copy as we will modify it later.
Bar_Nor_Chr2 <- Bar_Nor |> plyranges::filter(seqnames == "Chr2")
seqlevels(Bar_Nor_Chr2) <- seqlevelsInUse(Bar_Nor_Chr2)
seqlevels(Bar_Nor_Chr2$query) <- seqlevelsInUse(Bar_Nor_Chr2$query)
makeOxfordPlots(Bar_Nor_Chr2, col='strand') + ggtitle("Everything from OdB3 that matches Barcelona's Chr2")
We will need a pieces of scaffold_3 between scaffolds 5 and 1, and scaffold_8 between scaffolds 60 and 15.
Bar_Nor |> plyranges::filter(seqnames(query) == "scaffold_3") |> makeOxfordPlots()
Bar_Nor_Chr2 |> plyranges::filter(seqnames(query) == "scaffold_3") |> coalesce_contigs(minwidth = 1e3) |> swap(s=T)
## GBreaks object with 3 ranges and 1 metadata column:
## seqnames ranges strand | query
## <Rle> <IRanges> <Rle> | <GRanges>
## [1] scaffold_3 5183-652224 - | Chr2:8280598-8805630
## [2] scaffold_3 681583-700924 - | Chr2:8257632-8279108
## [3] scaffold_3 1353447-1354577 + | Chr2:8279468-8280597
## -------
## seqinfo: 360 sequences from OdB3 genome
Bar_Nor.1 <- splitSeqLevel(Bar_Nor |> swap(), "scaffold_3", 700924) |> swap()
Bar_Nor |> plyranges::filter(seqnames(query) == "scaffold_8") |> makeOxfordPlots()
Bar_Nor_Chr2 |> plyranges::filter(seqnames(query) == "scaffold_8") |> coalesce_contigs(minwidth = 1e3) |> swap(s=T)
## GBreaks object with 7 ranges and 1 metadata column:
## seqnames ranges strand | query
## <Rle> <IRanges> <Rle> | <GRanges>
## [1] scaffold_8 137585-138688 + | Chr2:11731462-11732777
## [2] scaffold_8 187827-189405 - | Chr2:13225993-13227552
## [3] scaffold_8 817748-820365 - | Chr2:2349749-2352438
## [4] scaffold_8 820525-825496 - | Chr2:2572041-2577605
## [5] scaffold_8 923200-940893 - | Chr2:12007039-12020611
## [6] scaffold_8 1110681-1112993 - | Chr2:3811254-3813535
## [7] scaffold_8 1146316-1380562 + | Chr2:301552-633552
## -------
## seqinfo: 360 sequences from OdB3 genome
Bar_Nor.2 <- splitSeqLevel(Bar_Nor.1 |> swap(), "scaffold_8", 1110680) |> swap()
Let’s visualise without the contigs that have their main match elsewhere.
Bar_Nor_Chr2 <- Bar_Nor.2 |> plyranges::filter(seqnames == "Chr2")
seqlevels(Bar_Nor_Chr2) <- seqlevelsInUse(Bar_Nor_Chr2)
seqlevels(Bar_Nor_Chr2$query) <- seqlevelsInUse(Bar_Nor_Chr2$query)
Bar_Nor_Chr2[seqnames(Bar_Nor_Chr2$query) %in% seqlevels(Bar_Nor_Chr2$query)[QTcoverage(Bar_Nor_Chr2) < 0.5]] <- NULL
seqlevels(Bar_Nor_Chr2$query) <- seqlevelsInUse(Bar_Nor_Chr2$query)
makeOxfordPlots(Bar_Nor_Chr2, col='strand') + ggtitle("Main hits from OdB3 to Barcelona's Chr2")
scafs$Nor_Bar[["Chr2"]] <- data.frame(
contig = c('scaffold_60'
, 'scaffold_8_2'
, 'scaffold_15', 'scaffold_39', 'scaffold_27', 'scaffold_7', 'scaffold_109', 'scaffold_98', 'scaffold_43'
, 'scaffold_81', 'scaffold_102', 'scaffold_72', 'scaffold_71', 'scaffold_9', 'scaffold_59', 'scaffold_5'
, 'scaffold_3_1'
, 'scaffold_1', 'scaffold_110', 'scaffold_53', 'scaffold_132', 'scaffold_225', 'scaffold_131', 'scaffold_70'),
orientation = c(1,
1,
1,1, -1,-1,-1,1,1,
1,1,-1, 1,-1,1,1,
-1,
1,1, 1, 1, 1, -1, -1)
)
Bar_Nor.2 |> swap() |> scaffoldByFlipAndMerge(scafs$Nor_Bar) |> coalesce_contigs() |> plotApairOfChrs(chrT = 'Chr2')
# Store the results in the package for re-use in other vignettes
# usethis::use_data(scafs, overwrite = TRUE)
identical(scafs$Nor_Bar, OikScrambling::scafs$Nor_Bar)
## [1] TRUE
Bar_Nor.1 <- splitSeqLevel(Bar_Nor |> swap(), "scaffold_3", 700924) |> swap()
Bar_Nor.2 <- splitSeqLevel(Bar_Nor.1 |> swap(), "scaffold_8", 1110680) |> swap()
gbs$Bar_Nor |> swap() |>
splitSeqLevel("scaffold_3", 700924) |>
splitSeqLevel("scaffold_8", 1110680) |>
scaffoldByFlipAndMerge(scafs$Nor_Bar) |>
makeOxfordPlots()
# Original plot for comparison
gbs$Bar_Nor |> makeOxfordPlots()