port more code

This commit is contained in:
Jacob Lifshay 2026-01-02 04:50:51 -08:00
parent 7ecdbc0239
commit 9e090a66a3
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ

View file

@ -1,10 +1,11 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::quad_tree::QuadTree;
use non_nan_float::NonNaNF32;
use std::{
borrow::Borrow,
collections::{HashMap, HashSet},
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
fmt,
sync::OnceLock,
};
@ -1435,4 +1436,279 @@ enum InsnParseSection {
Desc,
}
#[derive(Clone, Debug)]
enum PageItem {
Char(Char),
LineOrRect(LineOrRect),
}
#[derive(Clone, Debug)]
enum LineOrRect {
Line(()),
Rect(()),
}
#[derive(Debug)]
struct Page {
page_num: u32,
qt: BTreeMap<TextSection, QuadTree<PageItem>>,
unprocessed_chars: BTreeMap<TextSection, BTreeMap<Font, BTreeSet<Char>>>,
unprocessed_non_text: BTreeSet<LineOrRect>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct TextSection {
page_num: u32,
min_x: NonNaNF32,
min_y: NonNaNF32,
max_x: NonNaNF32,
max_y: NonNaNF32,
}
struct TextSectionPagesData {
columns_then_full_page: BTreeMap<u32, f32>,
full_page_then_columns: BTreeMap<u32, f32>,
one_title_line_then_columns_then_full_page: BTreeMap<u32, f32>,
two_title_lines_then_columns_then_full_page: BTreeMap<u32, f32>,
columns_then_columns: BTreeMap<u32, f32>,
one_title_line_then_columns_then_columns: BTreeMap<u32, f32>,
one_title_line_then_columns: BTreeSet<u32>,
two_title_lines_then_columns: BTreeSet<u32>,
full_page: BTreeSet<u32>,
}
impl TextSectionPagesData {
fn get() -> &'static Self {
static DATA: OnceLock<TextSectionPagesData> = OnceLock::new();
DATA.get_or_init(|| Self {
columns_then_full_page: FromIterator::from_iter([
(129, 438.992),
(241, 512.419),
(242, 408.077),
(243, 488.509),
(244, 437.518),
(245, 444.522),
(247, 352.082),
(248, 356.723),
(249, 365.944),
(251, 334.553),
(264, 184.67),
(296, 267.29),
(297, 200.043),
(298, 440.64),
(299, 197.356),
(300, 160.076),
(301, 364.924),
(303, 330.055),
(305, 344.867),
(306, 335.403),
(307, 336.897),
(308, 365.233),
(309, 364.735),
]),
full_page_then_columns: FromIterator::from_iter([
(246, 689.039),
(250, 615.315),
(266, 678.088),
]),
one_title_line_then_columns_then_full_page: FromIterator::from_iter([(128, 301.55)]),
two_title_lines_then_columns_then_full_page: FromIterator::from_iter([(304, 242.732)]),
columns_then_columns: FromIterator::from_iter([(79, 621.66), (126, 519.89)]),
one_title_line_then_columns_then_columns: FromIterator::from_iter([
(130, 550.43),
(162, 599.247),
(194, 622.161),
(196, 682.933),
(204, 613.195),
(215, 633.12),
]),
one_title_line_then_columns: FromIterator::from_iter([
103, 104, 105, 121, 139, 143, 146, 151, 153, 158, 207, 213, 218,
]),
two_title_lines_then_columns: FromIterator::from_iter([198, 206]),
full_page: FromIterator::from_iter(
[
118, 157, 252, 254, 255, 257, 259, 260, 265, 268, 270, 271, 272,
]
.into_iter()
.chain(274..286),
),
})
}
}
impl TextSection {
fn first() -> TextSection {
Self::page_sections(1)[0]
}
fn next(self) -> TextSection {
let page_sections = Self::page_sections(self.page_num);
let Some(index) = page_sections.iter().position(|v| *v == self) else {
panic!("not a known TextSection: {self:?}");
};
if let Some(&retval) = page_sections.get(index + 1) {
return retval;
}
for page_num in self.page_num + 1..self.page_num + 100000 {
let page_sections = Self::page_sections(page_num);
if let Some(&retval) = page_sections.get(0) {
return retval;
}
}
panic!("can't find next TextSection after {self:?}")
}
fn new(page_num: u32, min_x: f32, min_y: f32, max_x: f32, max_y: f32) -> Self {
Self {
page_num,
min_x: NonNaNF32::new(min_x).expect("invalid min_x"),
min_y: NonNaNF32::new(min_y).expect("invalid min_y"),
max_x: NonNaNF32::new(max_x).expect("invalid max_x"),
max_y: NonNaNF32::new(max_y).expect("invalid max_y"),
}
}
fn left_column(page_num: u32, min_y: f32, max_y: f32) -> TextSection {
Self::new(page_num, PAGE_BODY_MIN_X, min_y, COLUMN_SPLIT_X, max_y)
}
fn right_column(page_num: u32, min_y: f32, max_y: f32) -> TextSection {
Self::new(page_num, COLUMN_SPLIT_X, min_y, PAGE_BODY_MAX_X, max_y)
}
fn columns(page_num: u32, min_y: f32, max_y: f32) -> [TextSection; 2] {
[
Self::left_column(page_num, min_y, max_y),
Self::right_column(page_num, min_y, max_y),
]
}
fn full_page(page_num: u32, min_y: f32, max_y: f32) -> TextSection {
Self::new(page_num, PAGE_BODY_MIN_X, min_y, PAGE_BODY_MAX_X, max_y)
}
fn page_sections_helper(page_num: u32) -> Box<[TextSection]> {
let TextSectionPagesData {
columns_then_full_page,
full_page_then_columns,
one_title_line_then_columns_then_full_page,
two_title_lines_then_columns_then_full_page,
columns_then_columns,
one_title_line_then_columns_then_columns,
one_title_line_then_columns,
two_title_lines_then_columns,
full_page,
} = TextSectionPagesData::get();
if let Some(split_y) = columns_then_columns.get(&page_num) {
return Box::from_iter(
Self::columns(page_num, *split_y, PAGE_BODY_MAX_Y)
.into_iter()
.chain(Self::columns(page_num, PAGE_BODY_MIN_Y, *split_y)),
);
}
if one_title_line_then_columns.contains(&page_num) {
return Box::from_iter(
[Self::full_page(
page_num,
ONE_TITLE_LINE_SPLIT_Y,
PAGE_BODY_MAX_Y,
)]
.into_iter()
.chain(Self::columns(
page_num,
PAGE_BODY_MIN_Y,
ONE_TITLE_LINE_SPLIT_Y,
)),
);
}
if full_page.contains(&page_num) {
return Box::new([Self::full_page(page_num, PAGE_BODY_MIN_Y, PAGE_BODY_MAX_Y)]);
}
if let Some(split_y) = one_title_line_then_columns_then_columns.get(&page_num) {
return Box::from_iter(
[Self::full_page(
page_num,
ONE_TITLE_LINE_SPLIT_Y,
PAGE_BODY_MAX_Y,
)]
.into_iter()
.chain(Self::columns(page_num, *split_y, ONE_TITLE_LINE_SPLIT_Y))
.chain(Self::columns(page_num, PAGE_BODY_MIN_Y, *split_y)),
);
}
if two_title_lines_then_columns.contains(&page_num) {
return Box::from_iter(
[Self::full_page(
page_num,
TWO_TITLE_LINES_SPLIT_Y,
PAGE_BODY_MAX_Y,
)]
.into_iter()
.chain(Self::columns(
page_num,
PAGE_BODY_MIN_Y,
TWO_TITLE_LINES_SPLIT_Y,
)),
);
}
if let Some(split_y) = columns_then_full_page.get(&page_num) {
return Box::from_iter(
Self::columns(page_num, *split_y, PAGE_BODY_MAX_Y)
.into_iter()
.chain([Self::full_page(page_num, PAGE_BODY_MIN_Y, *split_y)]),
);
}
if let Some(split_y) = full_page_then_columns.get(&page_num) {
return Box::from_iter(
[Self::full_page(page_num, *split_y, PAGE_BODY_MAX_Y)]
.into_iter()
.chain(Self::columns(page_num, PAGE_BODY_MIN_Y, *split_y)),
);
}
if let Some(split_y) = one_title_line_then_columns_then_full_page.get(&page_num) {
return Box::from_iter(
[Self::full_page(
page_num,
ONE_TITLE_LINE_SPLIT_Y,
PAGE_BODY_MAX_Y,
)]
.into_iter()
.chain(Self::columns(page_num, *split_y, ONE_TITLE_LINE_SPLIT_Y))
.chain([Self::full_page(page_num, PAGE_BODY_MIN_Y, *split_y)]),
);
}
if let Some(split_y) = two_title_lines_then_columns_then_full_page.get(&page_num) {
return Box::from_iter(
[Self::full_page(
page_num,
TWO_TITLE_LINES_SPLIT_Y,
PAGE_BODY_MAX_Y,
)]
.into_iter()
.chain(Self::columns(page_num, *split_y, TWO_TITLE_LINES_SPLIT_Y))
.chain([Self::full_page(page_num, PAGE_BODY_MIN_Y, *split_y)]),
);
}
if page_num == 263 {
return Box::from_iter(
[Self::full_page(page_num, 699.997, PAGE_BODY_MAX_Y)]
.into_iter()
.chain(Self::columns(page_num, 366.396, 699.997))
.chain(Self::columns(page_num, 207.0, 366.396))
.chain([Self::full_page(page_num, PAGE_BODY_MIN_Y, 207.0)]),
);
}
// TODO: checked up to page 309 (page named 273)
Box::new(Self::columns(page_num, PAGE_BODY_MIN_Y, PAGE_BODY_MAX_Y))
}
fn page_sections(page_num: u32) -> &'static [TextSection] {
static CACHE: [OnceLock<Box<[TextSection]>>; 2000] = [const { OnceLock::new() }; _];
CACHE
.get(page_num as usize)
.expect("page_num out of range")
.get_or_init(|| Self::page_sections_helper(page_num))
}
}
fn main() {}