We skip all computations if pwmMatchesOki.Rds, because it is heavy.

## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## 
library(TFBSTools)
library(JASPAR2020)
genomes <- OikScrambling:::loadAllGenomes()
## Warning in runHook(".onLoad", env, package.lib, package): input string
## 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid
## UTF-8

## Warning in runHook(".onLoad", env, package.lib, package): input string
## 'Génoscope' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid
## UTF-8
searchSeq2GB <- function(pwm, genomeseqs, mc.cores=1L) {
  siteSet <- TFBSTools::searchSeq(pwm, genomeseqs, mc.cores = mc.cores) |> suppressWarnings()
  gr <- as(siteSet, "GRanges")
  # Slim the result object
  gr$siteSeqs <- NULL
  # Most PWMs are palindromic, so strand information has little relevance but
  # bloats the object with redundant copies
  strand(gr) <- '*'
  unique(sort(gr, ignore.strand=TRUE))
}

Select all vertebrate entries; convert to a list of position-weight matrices.

if (! file.exists("pwmMatchesOki.Rds")) {
PFMatrixList <- getMatrixSet(JASPAR2020, list(tax_group="vertebrates"))
PFMatrixList
PWMatrixList <- toPWM(PFMatrixList)
}

Scan the Okinawa genome, save the results and save a reduced version for the Gen

if (! file.exists("pwmMatchesOki.Rds")) {

Oki_DNAStringSet <- lapply(seqnames(genomes$Oki), \(name) genomes$Oki[[name]]) |> as("DNAStringSet")
names(Oki_DNAStringSet) <- seqnames(genomes$Oki)

pwmMatchesOnOki <- lapply(PWMatrixList |> as.list(), searchSeq2GB, Oki_DNAStringSet)
# x_15_98 <- lapply(pwmMatchesOnOki, \(gr) gr[gr$relScore > 0.98 & gr$absScore > 15]) |> as("GRangesList") |> unlist() |> reduce()
# x_10_95 <- lapply(pwmMatchesOnOki, \(gr) gr[gr$relScore > 0.95 & gr$absScore > 10]) |> as("GRangesList") |> unlist() |> reduce()
# x_15_95 <- lapply(pwmMatchesOnOki, \(gr) gr[gr$relScore > 0.95 & gr$absScore > 15]) |> as("GRangesList") |> unlist() |> reduce()
# x_12_90 <- lapply(pwmMatchesOnOki, \(gr) gr[gr$relScore > 0.90 & gr$absScore > 12]) |> as("GRangesList") |> unlist() |> reduce()
# x_15_90 <- lapply(pwmMatchesOnOki, \(gr) gr[gr$relScore > 0.90 & gr$absScore > 15]) |> as("GRangesList") |> unlist() |> reduce()
# Selected this one as a compromise between lower baseline and visible peaks.
pwmMatchesOnOki_12_95 <- lapply(pwmMatchesOnOki, \(gr) gr[gr$relScore > 0.95 & gr$absScore > 12]) |> as("GRangesList") |> unlist() |> reduce()
# x_10_99 <- lapply(pwmMatchesOnOki, \(gr) gr[gr$relScore > 0.99 & gr$absScore > 10]) |> as("GRangesList") |> unlist() |> reduce()

saveRDS(pwmMatchesOnOki,       "pwmMatchesOki.Rds")
saveRDS(pwmMatchesOnOki_12_95, "pwmMatchesOki_12_95.Rds")
}
if (! file.exists("pwmMatchesOsa.Rds")) {
Osa_DNAStringSet <- lapply(seqnames(genomes$Osa), \(name) genomes$Osa[[name]]) |> as("DNAStringSet")
names(Osa_DNAStringSet) <- seqnames(genomes$Osa)

pwmMatchesOnOsa <- lapply(PWMatrixList |> as.list(), searchSeq2GB, Osa_DNAStringSet)
# x_15_98 <- lapply(pwmMatchesOnOsa, \(gr) gr[gr$relScore > 0.98 & gr$absScore > 15]) |> as("GRangesList") |> unlist() |> reduce()
# x_10_95 <- lapply(pwmMatchesOnOsa, \(gr) gr[gr$relScore > 0.95 & gr$absScore > 10]) |> as("GRangesList") |> unlist() |> reduce()
# x_15_95 <- lapply(pwmMatchesOnOsa, \(gr) gr[gr$relScore > 0.95 & gr$absScore > 15]) |> as("GRangesList") |> unlist() |> reduce()
# x_12_90 <- lapply(pwmMatchesOnOsa, \(gr) gr[gr$relScore > 0.90 & gr$absScore > 12]) |> as("GRangesList") |> unlist() |> reduce()
# x_15_90 <- lapply(pwmMatchesOnOsa, \(gr) gr[gr$relScore > 0.90 & gr$absScore > 15]) |> as("GRangesList") |> unlist() |> reduce()
# Selected this one as a compromise between lower baseline and visible peaks.
pwmMatchesOnOsa_12_95 <- lapply(pwmMatchesOnOsa, \(gr) gr[gr$relScore > 0.95 & gr$absScore > 12]) |> as("GRangesList") |> unlist() |> reduce()
# x_10_99 <- lapply(pwmMatchesOnOsa, \(gr) gr[gr$relScore > 0.99 & gr$absScore > 10]) |> as("GRangesList") |> unlist() |> reduce()

saveRDS(pwmMatchesOnOsa,       "pwmMatchesOsa.Rds")
saveRDS(pwmMatchesOnOsa_12_95, "pwmMatchesOsa_12_95.Rds")
}