knitr::opts_chunk$set(cache = TRUE,
                      dev    = c('png', 'svg'),
                      fig.ext= c('png', 'svg'),
                      fig.width  = 10,
                      fig.height = 10)

Introduction

This vignette attempts to produce a scaffolding guide to be used with the OdB3 (Nor) genome for plotting purposes.

Load R pacakges and data

## Warning in runHook(".onLoad", env, package.lib, package): input string
## 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid
## UTF-8

## Warning in runHook(".onLoad", env, package.lib, package): input string
## 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid
## UTF-8
load("BreakPoints.Rdata")

Scaffolding

Helper functions

# Let's remove the contigs that have their main match elsewhere.
QTcoverage <- function(gb) {
  stopifnot (length(seqlevels(gb)) == 1) # Not ready for full objects
  stopifnot (!any(is.na(seqlengths(gb$query))))
  grl <- split(gb, seqnames(gb$query))
  lapply(grl, \(gb) sum(width(gb$query))) |> unlist() / seqlengths(gb$query)
}

scafs <- OikScrambling::scafs
scafs$Nor_Bar <- list()

Chr2

Visual inspection

All contigs

Bar_Nor <- gbs$Bar_Nor # Copy as we will modify it later.
Bar_Nor_Chr2 <- Bar_Nor |> plyranges::filter(seqnames == "Chr2")
seqlevels(Bar_Nor_Chr2)       <- seqlevelsInUse(Bar_Nor_Chr2)
seqlevels(Bar_Nor_Chr2$query) <- seqlevelsInUse(Bar_Nor_Chr2$query)

makeOxfordPlots(Bar_Nor_Chr2, col='strand') + ggtitle("Everything from OdB3 that matches Barcelona's Chr2")

We will need a pieces of scaffold_3 between scaffolds 5 and 1, and scaffold_8 between scaffolds 60 and 15.

Split scaffold 3

Bar_Nor |> plyranges::filter(seqnames(query) == "scaffold_3") |> makeOxfordPlots()

Bar_Nor_Chr2 |> plyranges::filter(seqnames(query) == "scaffold_3") |> coalesce_contigs(minwidth = 1e3) |> swap(s=T)
## GBreaks object with 3 ranges and 1 metadata column:
##         seqnames          ranges strand |                query
##            <Rle>       <IRanges>  <Rle> |            <GRanges>
##   [1] scaffold_3     5183-652224      - | Chr2:8280598-8805630
##   [2] scaffold_3   681583-700924      - | Chr2:8257632-8279108
##   [3] scaffold_3 1353447-1354577      + | Chr2:8279468-8280597
##   -------
##   seqinfo: 360 sequences from OdB3 genome
Bar_Nor.1 <- splitSeqLevel(Bar_Nor |> swap(), "scaffold_3", 700924) |> swap()

Split scaffold 8

Bar_Nor |> plyranges::filter(seqnames(query) == "scaffold_8") |> makeOxfordPlots()

Bar_Nor_Chr2 |> plyranges::filter(seqnames(query) == "scaffold_8") |> coalesce_contigs(minwidth = 1e3) |> swap(s=T)
## GBreaks object with 7 ranges and 1 metadata column:
##         seqnames          ranges strand |                  query
##            <Rle>       <IRanges>  <Rle> |              <GRanges>
##   [1] scaffold_8   137585-138688      + | Chr2:11731462-11732777
##   [2] scaffold_8   187827-189405      - | Chr2:13225993-13227552
##   [3] scaffold_8   817748-820365      - |   Chr2:2349749-2352438
##   [4] scaffold_8   820525-825496      - |   Chr2:2572041-2577605
##   [5] scaffold_8   923200-940893      - | Chr2:12007039-12020611
##   [6] scaffold_8 1110681-1112993      - |   Chr2:3811254-3813535
##   [7] scaffold_8 1146316-1380562      + |     Chr2:301552-633552
##   -------
##   seqinfo: 360 sequences from OdB3 genome
Bar_Nor.2 <- splitSeqLevel(Bar_Nor.1 |> swap(), "scaffold_8", 1110680) |> swap()

Best contigs

Let’s visualise without the contigs that have their main match elsewhere.

Bar_Nor_Chr2 <- Bar_Nor.2 |> plyranges::filter(seqnames == "Chr2")
seqlevels(Bar_Nor_Chr2)       <- seqlevelsInUse(Bar_Nor_Chr2)
seqlevels(Bar_Nor_Chr2$query) <- seqlevelsInUse(Bar_Nor_Chr2$query)
Bar_Nor_Chr2[seqnames(Bar_Nor_Chr2$query) %in% seqlevels(Bar_Nor_Chr2$query)[QTcoverage(Bar_Nor_Chr2) < 0.5]] <- NULL
seqlevels(Bar_Nor_Chr2$query) <- seqlevelsInUse(Bar_Nor_Chr2$query)
makeOxfordPlots(Bar_Nor_Chr2, col='strand') + ggtitle("Main hits from OdB3 to Barcelona's Chr2")

Assembly

scafs$Nor_Bar[["Chr2"]] <- data.frame(
  contig = c('scaffold_60'
             , 'scaffold_8_2'
             , 'scaffold_15', 'scaffold_39', 'scaffold_27', 'scaffold_7', 'scaffold_109', 'scaffold_98', 'scaffold_43'
             , 'scaffold_81', 'scaffold_102', 'scaffold_72', 'scaffold_71', 'scaffold_9', 'scaffold_59', 'scaffold_5'
             , 'scaffold_3_1'
             , 'scaffold_1', 'scaffold_110', 'scaffold_53', 'scaffold_132', 'scaffold_225', 'scaffold_131', 'scaffold_70'),
  orientation = c(1,
                  1,
                  1,1, -1,-1,-1,1,1,
                  1,1,-1, 1,-1,1,1,
                  -1,
                  1,1, 1, 1, 1, -1, -1)
)

Bar_Nor.2 |> swap() |> scaffoldByFlipAndMerge(scafs$Nor_Bar) |> coalesce_contigs() |> plotApairOfChrs(chrT = 'Chr2')

Bringing it together

# Store the results in the package for re-use in other vignettes
# usethis::use_data(scafs, overwrite = TRUE)
identical(scafs$Nor_Bar, OikScrambling::scafs$Nor_Bar)
## [1] TRUE
Bar_Nor.1 <- splitSeqLevel(Bar_Nor |> swap(), "scaffold_3", 700924) |> swap()
Bar_Nor.2 <- splitSeqLevel(Bar_Nor.1 |> swap(), "scaffold_8", 1110680) |> swap()

gbs$Bar_Nor |> swap() |>
  splitSeqLevel("scaffold_3", 700924) |>
  splitSeqLevel("scaffold_8", 1110680) |>
  scaffoldByFlipAndMerge(scafs$Nor_Bar) |>
  makeOxfordPlots()

# Original plot for comparison
gbs$Bar_Nor |> makeOxfordPlots()