diff --git a/Cargo.lock b/Cargo.lock index 0281106..4de1b68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,19 +154,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" -[[package]] -name = "mupdf" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6499267155b9ae03ff8e53c456d0bfff988b2647d62ff1df038f39ebe93a0c" -dependencies = [ - "bitflags", - "mupdf-sys", - "num_enum", - "once_cell", - "zerocopy", -] - [[package]] name = "mupdf-sys" version = "0.5.0" @@ -190,41 +177,13 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "num_enum" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" -dependencies = [ - "num_enum_derive", - "rustversion", -] - -[[package]] -name = "num_enum_derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - [[package]] name = "parse_powerisa_pdf" version = "0.1.0" dependencies = [ "indexmap", "libm", - "mupdf", + "mupdf-sys", "quick-xml", "serde", ] @@ -235,15 +194,6 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" -[[package]] -name = "proc-macro-crate" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" -dependencies = [ - "toml_edit", -] - [[package]] name = "proc-macro2" version = "1.0.104" @@ -307,12 +257,6 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - [[package]] name = "serde" version = "1.0.228" @@ -360,36 +304,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "toml_datetime" -version = "0.7.5+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_edit" -version = "0.23.10+spec-1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" -dependencies = [ - "indexmap", - "toml_datetime", - "toml_parser", - "winnow", -] - -[[package]] -name = "toml_parser" -version = "1.0.6+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" -dependencies = [ - "winnow", -] - [[package]] name = "unicode-ident" version = "1.0.22" @@ -402,15 +316,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "winnow" -version = "0.7.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" -dependencies = [ - "memchr", -] - [[package]] name = "zerocopy" version = "0.8.31" diff --git a/Cargo.toml b/Cargo.toml index 224dad3..09de0ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,6 @@ rust-version = "1.89.0" [dependencies] indexmap = "2.12.1" libm = "0.2.15" -mupdf = { version = "0.5.0", default-features = false } +mupdf-sys = { version = "0.5.0", default-features = false } quick-xml = { version = "0.38.4", features = ["serialize"] } serde = { version = "1.0.228", features = ["derive"] } diff --git a/parse_powerisa_pdf/parse_powerisa_pdf.py b/parse_powerisa_pdf/parse_powerisa_pdf.py index a4afd09..3c2afe5 100755 --- a/parse_powerisa_pdf/parse_powerisa_pdf.py +++ b/parse_powerisa_pdf/parse_powerisa_pdf.py @@ -765,7 +765,7 @@ class Page: unprocessed_non_text: SetById[LTLine | LTRect] @staticmethod - def from_lt_page(page_num: int, page: LTPage) -> Page: + def from_lt_page(page_num: int, page: LTPage, first_seen_fonts: defaultdict[str, set[float]]) -> Page: qt: defaultdict[TextSection, QuadTree[Char | LTLine | LTRect]] = defaultdict(QuadTree) unprocessed_chars = defaultdict(lambda: defaultdict(SetById[Char])) unprocessed_non_text: SetById[LTLine | LTRect] = SetById() @@ -804,20 +804,25 @@ class Page: raise AssertionError( f"char not in text section: {element}\npage_num={page_num}") continue + font_size = round(element.size, 3) char = Char( text=element.get_text(), - font=Font(font_name=element.fontname, size=round(element.size, 3)), + font=Font(font_name=element.fontname, size=font_size), adv=element.adv, min_x=element.x0, min_y=element.y0, max_x=element.x1, max_y=element.y1, ) + if font_size not in first_seen_fonts[element.fontname]: + first_seen_fonts[element.fontname].add(font_size) + print(f"first seen font: {element.fontname!r} {font_size}: page {page_num} {char!r}") qt[text_section].insert(char.min_x, char.min_y, char) unprocessed_chars[text_section][char.font].add(char) - for i in unprocessed_chars.values(): - for j in i.values(): - j.sort(key=Char.top_down_left_to_right_sort_key) + for text_section, i in unprocessed_chars.items(): + for chars in i.values(): + chars.sort(key=Char.top_down_left_to_right_sort_key) + print(f"first char: {text_section!r}: {next(iter(chars), None)!r}") unknown_fonts=[] unknown_font_errors=[] for i in unprocessed_chars.values(): @@ -1181,13 +1186,14 @@ class Parser: def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]: if page_numbers is not None: page_numbers = sorted(i - 1 for i in page_numbers) + first_seen_fonts = defaultdict(set) for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)): if page_numbers is not None: page_num = page_numbers[i] + 1 else: page_num = i + 1 print(f"page {page_num}") - yield Page.from_lt_page(page_num=page_num, page=page) + yield Page.from_lt_page(page_num=page_num, page=page, first_seen_fonts=first_seen_fonts) def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None): self.pages = Pages(pages_gen=Parser.__pages_gen( @@ -1503,7 +1509,7 @@ class Parser: f"instruction bit fields box has wrong number of horizontal lines:\n{h_lines}") if len(v_lines) < 2: raise InsnParseError( - f"instruction bit fields box has too few vertical lines:\n{h_lines}") + f"instruction bit fields box has too few vertical lines:\n{v_lines}") bottom_line, top_line = h_lines box_min_x = v_lines[0].x0 box_max_x = v_lines[-1].x0 diff --git a/src/main.rs b/src/main.rs index d9c54ec..a6a36e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,20 +1,30 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information -use crate::quad_tree::QuadTree; +use crate::{ + mupdf_ffi::{ + WriteMode, add_points, point_max_components, point_min_components, transform_vector, + }, + quad_tree::QuadTree, +}; use indexmap::IndexSet; +use mupdf_sys::{fz_matrix, fz_point, fz_text_item}; use non_nan_float::NonNaNF32; use std::{ - borrow::{Borrow, Cow}, + backtrace::Backtrace, + borrow::Cow, cell::RefCell, collections::{BTreeMap, BTreeSet, HashMap, HashSet}, + convert::Infallible, error::Error, fmt, num::NonZero, + ops::ControlFlow, rc::Rc, sync::OnceLock, }; +mod mupdf_ffi; mod quad_tree; mod xml_tree; @@ -41,6 +51,12 @@ mod non_nan_float { pub(crate) const fn get(self) -> f32 { self.0 } + pub(crate) const fn min(self, other: Self) -> Self { + Self(self.0.min(other.0)) + } + pub(crate) const fn max(self, other: Self) -> Self { + Self(self.0.max(other.0)) + } } impl std::hash::Hash for NonNaNF32 { @@ -68,6 +84,22 @@ mod non_nan_float { } } +const fn str_eq(a: &str, b: &str) -> bool { + let a = a.as_bytes(); + let b = b.as_bytes(); + if a.len() != b.len() { + return false; + } + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + true +} + macro_rules! make_enum_font { ( enum $Font:ident { @@ -75,7 +107,7 @@ macro_rules! make_enum_font { $Other:ident $other_body:tt, $(#[group] $KnownFontGroup:ident { - $(#[name = $known_font_name:literal, size = $known_font_size:literal] + $(#[name_with_tag = $known_font_name_with_tag:literal, size = $known_font_size:literal] $KnownFont:ident,)* },)* } @@ -101,10 +133,32 @@ macro_rules! make_enum_font { } impl $Font { + const fn extract_font_name_from_font_name_with_tag(font_name_with_tag: &str) -> &str { + if let [b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'+',_,..] = font_name_with_tag.as_bytes() { + font_name_with_tag.split_at(7).1 + } else { + panic!("invalid font name with id") + } + } + const fn known_from_name_with_tag(font_name_with_tag: &str, size: NonNaNF32) -> Option { + match size.get() { + $($($known_font_size if str_eq(font_name_with_tag, $known_font_name_with_tag) => Some(Self::$KnownFont),)*)* + _ => None, + } + } + const fn new_known(font_name: &str, size: NonNaNF32) -> Option { + match size.get() { + $($($known_font_size if str_eq(font_name, const { + Self::extract_font_name_from_font_name_with_tag($known_font_name_with_tag) + }) => Some(Self::$KnownFont),)*)* + _ => None, + } + } fn new(font_name: &str, size: NonNaNF32) -> Self { - match (font_name, size.get()) { - $($(($known_font_name, $known_font_size) => Self::$KnownFont,)*)* - _ => Self::Other { + if let Some(v) = Self::new_known(font_name, size) { + v + } else { + Self::Other { font_name: Box::from(font_name), size, } @@ -119,7 +173,7 @@ macro_rules! make_enum_font { const fn font_name(&self) -> &str { match self { Self::$Other { font_name, .. } => font_name, - $($(Self::$KnownFont => $known_font_name,)*)* + $($(Self::$KnownFont => const { Self::extract_font_name_from_font_name_with_tag($known_font_name_with_tag) },)*)* } } const fn known_font_group(&self) -> Option { @@ -135,6 +189,17 @@ macro_rules! make_enum_font { } } } + + const _: () = { + $($( + let (known_font_name, known_font) = const { + let known_font_name = Font::extract_font_name_from_font_name_with_tag($known_font_name_with_tag); + (known_font_name, &Font::new_known(known_font_name, NonNaNF32::new($known_font_size).unwrap()).unwrap()) + }; + assert!(str_eq(known_font_name, known_font.font_name())); + assert!(matches!(known_font, Font::$KnownFont)); + )*)* + }; }; } @@ -147,352 +212,356 @@ make_enum_font! { }, #[group] InsnHeader { - #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 9.963] + #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 9.963] InsnHeader, }, #[group] RtlFnHeader { - #[name = "APUYSQ+zcoN-Regular", size = 9.963] + #[name_with_tag = "APUYSQ+zcoN-Regular", size = 9.963] RtlFnHeader, }, #[group] PageHeader { - #[name = "MJBFWM+DejaVuSansCondensed", size = 9.963] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 9.963] PageHeader, }, #[group] PageFooter { - #[name = "MJBFWM+DejaVuSansCondensed", size = 4.981] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.981] PageFooter, }, #[group] InsnDesc { - #[name = "MJBFWM+DejaVuSansCondensed", size = 8.966] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 8.966] InsnDesc0, - #[name = "FZTIYT+CMMI9", size = 8.966] + #[name_with_tag = "FZTIYT+CMMI9", size = 8.966] InsnDesc1, - #[name = "ONUAYC+CMSSI9", size = 8.966] + #[name_with_tag = "ONUAYC+CMSSI9", size = 8.966] InsnDesc2, - #[name = "TNGBFZ+CMSY9", size = 8.966] + #[name_with_tag = "TNGBFZ+CMSY9", size = 8.966] InsnDesc3, - #[name = "WHMZPU+CMEX9", size = 8.966] + #[name_with_tag = "WHMZPU+CMEX9", size = 8.966] InsnDesc4, - #[name = "ZJTMSG+CMSS9", size = 8.966] + #[name_with_tag = "ZJTMSG+CMSS9", size = 8.966] InsnDesc5, }, #[group] InsnDescMisc { - #[name = "MJBFWM+DejaVuSansCondensed", size = 2.377] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 2.377] InsnDescMisc0, - #[name = "MJBFWM+DejaVuSansCondensed", size = 2.561] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 2.561] InsnDescMisc1, - #[name = "MJBFWM+DejaVuSansCondensed", size = 4.492] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.492] InsnDescMisc2, - #[name = "MJBFWM+DejaVuSansCondensed", size = 4.641] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.641] InsnDescMisc3, - #[name = "MJBFWM+DejaVuSansCondensed", size = 4.772] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.772] InsnDescMisc4, - #[name = "MJBFWM+DejaVuSansCondensed", size = 4.864] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.864] InsnDescMisc5, - #[name = "MJBFWM+DejaVuSansCondensed", size = 4.925] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.925] InsnDescMisc6, - #[name = "MJBFWM+DejaVuSansCondensed", size = 5.097] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.097] InsnDescMisc7, - #[name = "MJBFWM+DejaVuSansCondensed", size = 5.123] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.123] InsnDescMisc8, - #[name = "MJBFWM+DejaVuSansCondensed", size = 5.131] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.131] InsnDescMisc9, - #[name = "MJBFWM+DejaVuSansCondensed", size = 5.516] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.516] InsnDescMisc10, - #[name = "MJBFWM+DejaVuSansCondensed", size = 5.604] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.604] InsnDescMisc11, - #[name = "MJBFWM+DejaVuSansCondensed", size = 5.634] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.634] InsnDescMisc12, - #[name = "MJBFWM+DejaVuSansCondensed", size = 5.906] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.906] InsnDescMisc13, - #[name = "MJBFWM+DejaVuSansCondensed", size = 6.033] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.033] InsnDescMisc14, - #[name = "MJBFWM+DejaVuSansCondensed", size = 6.068] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.068] InsnDescMisc15, - #[name = "MJBFWM+DejaVuSansCondensed", size = 6.213] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.213] InsnDescMisc16, - #[name = "MJBFWM+DejaVuSansCondensed", size = 6.252] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.238] InsnDescMisc17, - #[name = "MJBFWM+DejaVuSansCondensed", size = 6.962] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.252] InsnDescMisc18, - #[name = "MJBFWM+DejaVuSansCondensed", size = 7.977] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.962] InsnDescMisc19, + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 7.977] + InsnDescMisc20, + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 8.506] + InsnDescMisc21, }, #[group] InsnDescCode { - #[name = "APUYSQ+zcoN-Regular", size = 6.974] + #[name_with_tag = "APUYSQ+zcoN-Regular", size = 6.974] InsnDescCode, }, #[group] InsnDescCodeMisc { - #[name = "APUYSQ+zcoN-Regular", size = 3.587] + #[name_with_tag = "APUYSQ+zcoN-Regular", size = 3.587] InsnDescCodeMisc0, - #[name = "APUYSQ+zcoN-Regular", size = 4.483] + #[name_with_tag = "APUYSQ+zcoN-Regular", size = 4.483] InsnDescCodeMisc1, }, #[group] InsnDescItalic { - #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 8.966] + #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 8.966] InsnDescItalic, }, #[group] InsnDescBold { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.966] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.966] InsnDescBold, }, #[group] InsnDescBoldItalic { - #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 8.966] + #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 8.966] InsnDescBoldItalic, }, #[group] InsnDescSmall { - #[name = "MJBFWM+DejaVuSansCondensed", size = 7.97] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 7.97] InsnDescSmall, }, #[group] InsnDescSmallItalic { - #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 7.97] + #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 7.97] InsnDescSmallItalic, }, #[group] InsnDescSmallBold { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 7.97] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 7.97] InsnDescSmallBold, }, #[group] InsnDescSmallBoldItalic { - #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 7.97] + #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 7.97] InsnDescSmallBoldItalic, }, #[group] InsnDescBoldMisc { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.21] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.21] InsnDescBoldMisc0, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.399] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.399] InsnDescBoldMisc1, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.763] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.763] InsnDescBoldMisc2, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.946] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.946] InsnDescBoldMisc3, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.949] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.949] InsnDescBoldMisc4, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.999] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.999] InsnDescBoldMisc5, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.065] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.065] InsnDescBoldMisc6, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.086] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.086] InsnDescBoldMisc7, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.183] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.183] InsnDescBoldMisc8, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.686] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.686] InsnDescBoldMisc9, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.744] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.744] InsnDescBoldMisc10, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.825] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.825] InsnDescBoldMisc11, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.842] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.842] InsnDescBoldMisc12, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.857] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.857] InsnDescBoldMisc13, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.979] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.979] InsnDescBoldMisc14, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.032] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.032] InsnDescBoldMisc15, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.112] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.112] InsnDescBoldMisc16, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.161] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.161] InsnDescBoldMisc17, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.206] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.206] InsnDescBoldMisc18, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.353] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.353] InsnDescBoldMisc19, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.378] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.378] InsnDescBoldMisc20, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.434] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.434] InsnDescBoldMisc21, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.595] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.595] InsnDescBoldMisc22, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.619] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.619] InsnDescBoldMisc23, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.647] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.647] InsnDescBoldMisc24, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.68] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.68] InsnDescBoldMisc25, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.693] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.693] InsnDescBoldMisc26, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.736] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.736] InsnDescBoldMisc27, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.781] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.781] InsnDescBoldMisc28, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.802] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.802] InsnDescBoldMisc29, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.995] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.995] InsnDescBoldMisc30, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.201] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.201] InsnDescBoldMisc31, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.258] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.258] InsnDescBoldMisc32, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.363] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.363] InsnDescBoldMisc33, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.442] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.442] InsnDescBoldMisc34, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.473] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.473] InsnDescBoldMisc35, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.485] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.485] InsnDescBoldMisc36, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.512] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.512] InsnDescBoldMisc37, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.543] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.543] InsnDescBoldMisc38, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.613] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.613] InsnDescBoldMisc39, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.744] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.744] InsnDescBoldMisc40, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.774] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.774] InsnDescBoldMisc41, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.809] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.809] InsnDescBoldMisc42, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.849] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.849] InsnDescBoldMisc43, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.911] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.911] InsnDescBoldMisc44, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.92] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.92] InsnDescBoldMisc45, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.962] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.962] InsnDescBoldMisc46, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.981] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.981] InsnDescBoldMisc47, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.146] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.146] InsnDescBoldMisc48, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.213] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.213] InsnDescBoldMisc49, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.221] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.221] InsnDescBoldMisc50, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.243] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.243] InsnDescBoldMisc51, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.55] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.55] InsnDescBoldMisc52, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.62] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.62] InsnDescBoldMisc53, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.699] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.699] InsnDescBoldMisc54, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.725] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.725] InsnDescBoldMisc55, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.751] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.751] InsnDescBoldMisc56, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.856] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.856] InsnDescBoldMisc57, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.029] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.029] InsnDescBoldMisc58, - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.406] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.406] InsnDescBoldMisc59, }, #[group] InsnDescSubscript { - #[name = "MJBFWM+DejaVuSansCondensed", size = 5.978] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.978] InsnDescSubscript, }, #[group] InsnDescBoldSubscript { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.978] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.978] InsnDescBoldSubscript, }, #[group] InsnDescItalicSubscript { - #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 5.978] + #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 5.978] InsnDescItalicSubscript, }, #[group] InsnDescBoldItalicSubscript { - #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 5.978] + #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 5.978] InsnDescBoldItalicSubscript, }, #[group] InsnExtMnemonic { - #[name = "APUYSQ+zcoN-Regular", size = 8.966] + #[name_with_tag = "APUYSQ+zcoN-Regular", size = 8.966] InsnExtMnemonic, }, #[group] InsnCode { - #[name = "APUYSQ+zcoN-Regular", size = 7.97] + #[name_with_tag = "APUYSQ+zcoN-Regular", size = 7.97] InsnCode0, - #[name = "RRFUNA+CMSY8", size = 7.97] + #[name_with_tag = "RRFUNA+CMSY8", size = 7.97] InsnCode1, - #[name = "HPXOZC+CMSS8", size = 7.97] + #[name_with_tag = "HPXOZC+CMSS8", size = 7.97] InsnCode2, }, #[group] InsnCodeSubscript { - #[name = "APUYSQ+zcoN-Regular", size = 5.978] + #[name_with_tag = "APUYSQ+zcoN-Regular", size = 5.978] InsnCodeSubscript0, - #[name = "DBQTKF+CMSY6", size = 5.978] + #[name_with_tag = "DBQTKF+CMSY6", size = 5.978] InsnCodeSubscript1, }, #[group] TitlePageBig { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 24.787] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 24.787] TitlePageBig, }, #[group] TitlePageVersion { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 9.963] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 9.963] TitlePageVersion, }, #[group] TitlePageTm { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.974] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.974] TitlePageTm, }, #[group] TitlePageRev { - #[name = "MJBFWM+DejaVuSansCondensed", size = 6.974] + #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.974] TitlePageRev, }, #[group] TitlePageBook { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 20.663] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 20.663] TitlePageBook, }, #[group] LegalPageItalic { - #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 9.963] + #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 9.963] LegalPageItalic, }, #[group] ChangeSummaryPageBold { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 11.955] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 11.955] ChangeSummaryPageBold, }, #[group] ChapterTitle { - #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 17.215] + #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 17.215] ChapterTitle, }, #[group] MathMisc { - #[name = "AAJMKT+CMMI6", size = 5.978] + #[name_with_tag = "AAJMKT+CMMI6", size = 5.978] MathMisc0, - #[name = "CUTMFD+CMSSI8", size = 5.978] + #[name_with_tag = "CUTMFD+CMSSI8", size = 5.978] MathMisc1, - #[name = "CUTMFD+CMSSI8", size = 7.97] + #[name_with_tag = "CUTMFD+CMSSI8", size = 7.97] MathMisc2, - #[name = "FZTIYT+CMMI9", size = 5.734] + #[name_with_tag = "FZTIYT+CMMI9", size = 5.734] MathMisc3, - #[name = "FZTIYT+CMMI9", size = 7.168] + #[name_with_tag = "FZTIYT+CMMI9", size = 7.168] MathMisc4, - #[name = "HONFQS+CMMI8", size = 7.97] + #[name_with_tag = "HONFQS+CMMI8", size = 7.97] MathMisc5, - #[name = "HPXOZC+CMSS8", size = 5.978] + #[name_with_tag = "HPXOZC+CMSS8", size = 5.978] MathMisc6, - #[name = "LLVRDD+CMSY10", size = 11.955] + #[name_with_tag = "LLVRDD+CMSY10", size = 11.955] MathMisc7, - #[name = "ZJTMSG+CMSS9", size = 7.168] + #[name_with_tag = "ZJTMSG+CMSS9", size = 7.168] MathMisc8, }, } @@ -503,21 +572,6 @@ impl Font { self.size() * const { 3.985 / Font::InsnCode0.size() } } const fn line_height_helper(&self) -> f32 { - const fn str_eq(a: &str, b: &str) -> bool { - let a = a.as_bytes(); - let b = b.as_bytes(); - if a.len() != b.len() { - return false; - } - let mut i = 0; - while i < a.len() { - if a[i] != b[i] { - return false; - } - i += 1; - } - true - } let font_name = self.font_name(); let mut i = 0; while i < KnownFontGroup::INSN_CODE_FONT_GROUPS.len() { @@ -638,7 +692,7 @@ impl ParsedTextLine { } } fn write_xml_lines( - lines: impl IntoIterator>, + lines: impl IntoIterator>, parent: &mut xml_tree::Element, trailing_nl: bool, preceding_nl: bool, @@ -648,7 +702,7 @@ impl ParsedTextLine { } let mut first = true; for line in lines { - let line = line.borrow(); + let line = std::borrow::Borrow::borrow(&line); if first { first = false; } else { @@ -1139,8 +1193,8 @@ impl<'a> ElementBodyBuilder<'a> { } self.shrink_stack(new_len); } - fn write_text(&mut self, text: impl Borrow) { - let text = text.borrow(); + fn write_text(&mut self, text: impl std::borrow::Borrow) { + let text = std::borrow::Borrow::borrow(&text); let insert_point = self.insert_point(); if let Some(child) = insert_point.children.last_mut() { child.tail += text; @@ -1285,14 +1339,14 @@ impl fmt::Display for InsnBitFields { impl InsnBitFields { fn write_xml_fields( - fields: impl IntoIterator>, + fields: impl IntoIterator>, parent: &mut xml_tree::Element, ) { let fields_elm = parent.sub_element("fields".into(), []); fields_elm.text = "\n".into(); fields_elm.tail = "\n".into(); for field in fields { - field.borrow().write_xml(fields_elm); + std::borrow::Borrow::borrow(&field).write_xml(fields_elm); } } fn write_xml(&self, parent: &mut xml_tree::Element) { @@ -1455,10 +1509,95 @@ enum PageItem { LineOrRect(LineOrRect), } -#[derive(Clone, Debug)] +#[derive(Copy, Clone, Debug)] enum LineOrRect { - Line(()), - Rect(()), + Line(Line), + Rect(Rect), +} + +impl LineOrRect { + fn width(self) -> f32 { + match self { + Self::Line(v) => v.width(), + Self::Rect(v) => v.width(), + } + } + fn height(self) -> f32 { + match self { + Self::Line(v) => v.height(), + Self::Rect(v) => v.height(), + } + } + fn min_x(self) -> NonNaNF32 { + match self { + Self::Line(v) => v.min_x(), + Self::Rect(v) => v.min_x, + } + } + fn max_x(self) -> NonNaNF32 { + match self { + Self::Line(v) => v.max_x(), + Self::Rect(v) => v.max_x, + } + } + fn min_y(self) -> NonNaNF32 { + match self { + Self::Line(v) => v.min_y(), + Self::Rect(v) => v.min_y, + } + } + fn max_y(self) -> NonNaNF32 { + match self { + Self::Line(v) => v.max_y(), + Self::Rect(v) => v.max_y, + } + } +} + +#[derive(Copy, Clone, Debug)] +struct Line { + p0_x: NonNaNF32, + p0_y: NonNaNF32, + p1_x: NonNaNF32, + p1_y: NonNaNF32, +} + +impl Line { + fn width(self) -> f32 { + f32::abs(self.p0_x.get() - self.p1_x.get()) + } + fn height(self) -> f32 { + f32::abs(self.p0_y.get() - self.p1_y.get()) + } + fn min_x(self) -> NonNaNF32 { + self.p0_x.min(self.p1_x) + } + fn max_x(self) -> NonNaNF32 { + self.p0_x.max(self.p1_x) + } + fn min_y(self) -> NonNaNF32 { + self.p0_y.min(self.p1_y) + } + fn max_y(self) -> NonNaNF32 { + self.p0_y.max(self.p1_y) + } +} + +#[derive(Copy, Clone, Debug)] +struct Rect { + min_x: NonNaNF32, + max_x: NonNaNF32, + min_y: NonNaNF32, + max_y: NonNaNF32, +} + +impl Rect { + fn width(self) -> f32 { + self.max_x.get() - self.min_x.get() + } + fn height(self) -> f32 { + self.max_y.get() - self.min_y.get() + } } #[derive(Debug)] @@ -1470,13 +1609,13 @@ struct Page { unprocessed_non_text: Rc>>, } -struct Pages { - pages_gen: Option>>>>, +struct Pages<'ctx> { + pages_gen: Option>> + 'ctx>>, pages: BTreeMap>, max_page_num: u32, } -impl fmt::Debug for Pages { +impl<'ctx> fmt::Debug for Pages<'ctx> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { pages_gen, @@ -1494,8 +1633,10 @@ impl fmt::Debug for Pages { } } -impl Pages { - fn new(pages_gen: Option>>>>) -> Self { +impl<'ctx> Pages<'ctx> { + fn new( + pages_gen: Option>> + 'ctx>>, + ) -> Self { Self { pages_gen, pages: BTreeMap::new(), @@ -1870,13 +2011,52 @@ impl Insn { } #[derive(Debug)] -struct Parser { - pages: Pages, +struct Parser<'ctx> { + pages: Pages<'ctx>, text_section: TextSection, insns: Vec, } -impl Parser { +enum ExtractInsnsError { + InsnParseError(String, std::backtrace::Backtrace), + PageParseError(String, std::backtrace::Backtrace), + Other(Box), +} + +impl fmt::Display for ExtractInsnsError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let backtrace = match self { + ExtractInsnsError::InsnParseError(msg, backtrace) => { + writeln!(f, "instruction parse error: {msg}")?; + backtrace + } + ExtractInsnsError::PageParseError(msg, backtrace) => { + writeln!(f, "page parse error: {msg}")?; + backtrace + } + ExtractInsnsError::Other(e) => return fmt::Display::fmt(&e, f), + }; + backtrace.fmt(f) + } +} + +#[derive(Clone, Debug)] +struct ErrorWithNote { + error: E, + note: String, +} + +impl fmt::Display for ErrorWithNote { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { error, note } = self; + fmt::Display::fmt(error, f)?; + write!(f, "\nnote: {note}") + } +} + +impl Error for ErrorWithNote {} + +impl<'ctx> Parser<'ctx> { fn new() -> Self { Self { pages: Pages::new(None), @@ -1902,34 +2082,40 @@ impl Parser { .clone()) } fn pages_gen( + ctx: impl Into>, file: &str, page_numbers: Option>>, - ) -> Result>>>, Box> { - let page_numbers = page_numbers.map(|page_numbers| { - let mut retval = Vec::from_iter(page_numbers.into_iter().map(|v| v.get() - 1)); + ) -> Result>> + 'ctx>, Box> { + let ctx = ctx.into(); + let page_indexes = page_numbers.map(|page_numbers| { + let mut retval = Vec::from_iter(page_numbers.into_iter().map(|v| v.get() as usize - 1)); retval.sort(); retval }); - let document = mupdf::Document::open(file)?; - let pages: Vec = document.pages().and_then(|pages| pages.collect())?; - Ok(Box::new(pages.into_iter().enumerate().map( - move |(i, page)| { - let page_num = match &page_numbers { - Some(page_numbers) => page_numbers[i] + 1, - None => i as u32 + 1, - }; - println!("page {page_num}"); - Ok(Page::from_mupdf_page(page_num, page) - .map_err(|e| format!("error reading pdf page {page_num}: {e}"))?) - }, - ))) + let document = mupdf_ffi::Document::open(ctx, &std::ffi::CString::new(file)?)?; + let page_count = document.page_count()?; + let page_indexes = page_indexes.unwrap_or_else(|| (0..page_count).collect()); + let mut first_seen_fonts = BTreeMap::new(); + Ok(Box::new(page_indexes.into_iter().map(move |page_index| { + let page_num = page_index as u32 + 1; + println!("page {page_num}"); + let page = document + .load_page(page_index) + .map_err(|e| format!("error reading pdf page {page_num}: {e}"))?; + Ok( + Page::from_mupdf_page(page_num, &page, &mut first_seen_fonts) + .map_err(|e| format!("error reading pdf page {page_num}: {e}"))?, + ) + }))) } fn parse_pdf>>( &mut self, + ctx: impl Into>, file: &str, page_numbers: Option, ) -> Result<(), Box> { self.pages = Pages::new(Some(Self::pages_gen( + ctx, file, page_numbers.map(|v| v.into_iter().collect()), )?)); @@ -1945,14 +2131,14 @@ impl Parser { } } } - fn note_text_section( + fn note_text_section( &mut self, - f: impl FnOnce(&mut Self) -> Result<(), Box>, - ) -> Result<(), Box> { + f: impl FnOnce(&mut Self) -> Result<(), E>, + ) -> Result<(), ErrorWithNote> { let start_text_section = self.text_section; match f(self) { Ok(()) => Ok(()), - Err(e) => { + Err(error) => { let note = if self.text_section == start_text_section { format!("text_section={:?}", self.text_section) } else { @@ -1961,66 +2147,1448 @@ impl Parser { self.text_section ) }; - Err(format!("{e}\nnote: {note}").into()) + Err(ErrorWithNote { error, note }) } } } - fn parse_text_section(&mut self) -> Result<(), Box> { - todo!() + fn parse_text_section(&mut self) -> Result<(), ErrorWithNote>> { + match self.note_text_section(Self::extract_insns) { + Ok(()) => Ok(()), + Err( + e @ ErrorWithNote { + error: + ExtractInsnsError::InsnParseError(..) | ExtractInsnsError::PageParseError(..), + .. + }, + ) => { + println!("{e}"); + Ok(()) + } + Err(ErrorWithNote { + error: ExtractInsnsError::Other(error), + note, + }) => Err(ErrorWithNote { error, note }), + } + } + fn find_top_left_char_in_range( + &mut self, + min_x: f32, + max_x: f32, + min_y: f32, + max_y: f32, + allow_processed: bool, + ) -> Result, Box> { + let mut retval = None; + let page = self.page()?; + let unprocessed_chars = self.unprocessed_chars()?; + let ControlFlow::::Continue(()) = + page.qt[&self.text_section].range(min_x, max_x, min_y, max_y, |_x, _y, ch| { + let PageItem::Char(ch) = ch else { + return ControlFlow::Continue(()); + }; + if !allow_processed && !RefCell::borrow(&*unprocessed_chars)[&ch.font].contains(ch) + { + return ControlFlow::Continue(()); + } + match &mut retval { + None => retval = Some(ch.clone()), + Some(retval) + if ch.min_x.get() - ch.min_y.get() + < retval.min_x.get() - retval.min_y.get() => + { + *retval = ch.clone(); + } + Some(_) => {} + } + ControlFlow::Continue(()) + }); + Ok(retval) + } + fn extract_text_line( + &mut self, + start_char: Option, + mut start_min_y: f32, + min_x: f32, + max_x: f32, + fonts: TextLineFonts, + preceding_blank_lines: u32, + mut skip_initial_spaces: bool, + allowed_start_min_y_error: Option, + ) -> Result, ExtractInsnsError> { + let mut chars: Vec = Vec::new(); + let mut chars_set: IndexSet = IndexSet::new(); + if let Some(start_char) = start_char.clone() { + chars.push(start_char.clone()); + chars_set.insert(start_char); + } + if let Some(start_char) = start_char + && start_char.text == "*" + && self.text_section.page_num == 168 + && fonts + .subscript() + .is_some_and(|v| v.contains(&start_char.font)) + { + start_min_y = start_char.max_y.get() - fonts.regular()[0].size(); + } + let page = self.page().map_err(ExtractInsnsError::Other)?; + let unprocessed_chars = self.unprocessed_chars().map_err(ExtractInsnsError::Other)?; + let ControlFlow::::Continue(()) = page.qt[&self.text_section].range( + min_x - fonts.regular()[0].size() * 0.5, + max_x, + start_min_y - fonts.regular()[0].size() * 0.4, + start_min_y + fonts.regular()[0].size() * 0.6, + |_x, _y, ch| { + let PageItem::Char(ch) = ch else { + return ControlFlow::Continue(()); + }; + if !RefCell::borrow(&*unprocessed_chars)[&ch.font].contains(ch) + || chars_set.contains(ch) + { + return ControlFlow::Continue(()); + } + chars_set.insert(ch.clone()); + chars.push(ch.clone()); + ControlFlow::Continue(()) + }, + ); + if chars.is_empty() { + return Ok(None); + } + chars.sort_by(|a, b| (a.min_x, &a.text).cmp(&(b.min_x, &b.text))); + let mut regular_min_y = chars[0].min_y.get(); + let mut regular_max_y = chars[0].max_y.get(); + for ch in &chars { + let Some(kind) = fonts.get_kind(ch.font.clone(), BaselinePos::Below) else { + continue; + }; + if kind.sub_super() == FontVariantSubSuper::NotSubSuper { + regular_min_y = ch.min_y.get(); + regular_max_y = ch.max_y.get(); + break; + } + } + let mut retval = ParsedTextLine { + element: xml_tree::Element::new("text-line".into(), []), + regular_min_y, + regular_max_y, + fonts, + chars, + preceding_blank_lines, + }; + let mut text_and_tag_stacks: Vec<(String, Vec<&str>)> = Vec::new(); + let mut last_max_x = min_x; + let mut last_kind = None; + let mut last_char: Option = None; + for ch in &retval.chars { + let baseline_pos = if (ch.max_y.get() + ch.min_y.get()) * 0.5 + > (retval.regular_max_y + retval.regular_min_y) * 0.5 + { + BaselinePos::Above + } else { + BaselinePos::Below + }; + let Some(kind) = fonts.get_kind(ch.font.clone(), baseline_pos) else { + println!( + "font kind is None:\n\ + regular_min_y={}\n\ + fonts={fonts:?}\n\ + ch={ch:?}\n\ + baseline_pos={baseline_pos:?}\n\ + chars[0]={:?}", + retval.regular_min_y, retval.chars[0], + ); + return Ok(None); + }; + let space_kind = match last_kind { + None => kind, + Some(last_kind) if last_kind != kind => TextLineFontKind::Regular, + _ => kind, + }; + let (space_fonts, _) = fonts + .get_fonts(space_kind) + .unwrap_or((fonts.regular(), None)); + let space_width = ch.min_x.get() - last_max_x; + let space_count_f = space_width / space_fonts[0].space_width(); + let mut space_count = space_count_f.round() as usize; + if space_count == 0 && space_count_f > 0.35 { + space_count = 1 + } + if space_count_f > 0.25 && f32::abs(space_count as f32 - space_count_f) > 0.15 { + println!("spaces: space_count_f={space_count_f} space_width={space_width}"); + } + if space_count > 0 && !skip_initial_spaces { + text_and_tag_stacks.push(( + " ".repeat(space_count), + space_kind.text_line_tags().collect(), + )); + } + skip_initial_spaces = false; + if ch.text == "\u{0338}" + && let Some(last_char) = last_char + && last_char.text == "=" + && f32::abs(ch.min_x.get() - last_char.min_x.get()) < 0.01 + && f32::abs(ch.min_y.get() - last_char.min_y.get()) < 0.01 + { + *text_and_tag_stacks + .last_mut() + .expect("known to be non-empty") = ("\u{2260}".into(), Vec::new()); + last_max_x = last_char.max_x.get(); + } else { + let char_text = match &*ch.text { + "\u{fb00}" => "ff", + "\u{fb01}" => "fi", + "\u{fb02}" => "fl", + "\u{fb03}" => "ffi", + "\u{fb04}" => "ffl", + v => v, + }; + text_and_tag_stacks.push((char_text.into(), kind.text_line_tags().collect())); + last_max_x = ch.max_x.get(); + } + last_kind = Some(kind); + last_char = Some(ch.clone()); + } + ElementBodyBuilder::scope( + &mut ElementBodyBuilder::new(&mut retval.element), + |body_builder| { + for (text, tag_stack) in text_and_tag_stacks { + body_builder.set_tag_stack(tag_stack); + body_builder.write_text(text) + } + }, + ); + for ch in &retval.chars { + RefCell::borrow_mut(&*unprocessed_chars) + .get_mut(&ch.font) + .expect("known to exist") + .shift_remove(ch); + } + let allowed_start_min_y_error = allowed_start_min_y_error.unwrap_or(0.01); + if f32::abs(start_min_y - retval.regular_min_y) > allowed_start_min_y_error { + return Err(ExtractInsnsError::PageParseError( + format!( + "start_min_y={start_min_y} regular_min_y={}\n\ + start_min_y error: {}\n\ + allowed_start_min_y_error={allowed_start_min_y_error}", + retval.regular_min_y, + start_min_y - retval.regular_min_y, + ), + Backtrace::capture(), + )); + } + Ok(Some(retval)) + } + fn extract_following_text_lines( + &mut self, + first_text_line: ParsedTextLine, + min_x: f32, + max_x: f32, + allowed_start_min_y_error: Option, + ) -> Result, ExtractInsnsError> { + let mut retval = Vec::new(); + let fonts = first_text_line.fonts; + let mut line = Some(first_text_line); + while let Some(cur_line) = line { + let start_min_y = cur_line.regular_min_y - fonts.regular()[0].line_height(); + retval.push(cur_line); + line = self.extract_text_line( + None, + start_min_y, + min_x, + max_x, + fonts, + 0, + false, + allowed_start_min_y_error, + )?; + } + return Ok(retval); + } + fn extract_insn_bit_fields( + &mut self, + mnemonic_lines: &[ParsedTextLine], + ) -> Result, ExtractInsnsError> { + let mut found_non_affix_line = false; + let [.., last_mnemonic_line] = mnemonic_lines else { + unreachable!(); + }; + let expected_non_affix_line_y = last_mnemonic_line.regular_min_y + - if mnemonic_lines.len() > 1 { + INSN_BIT_FIELDS_TOP_PAD_HEIGHT2 + } else { + INSN_BIT_FIELDS_TOP_PAD_HEIGHT + }; + let page = self.page().map_err(ExtractInsnsError::Other)?; + let _ = page.qt[&self.text_section].range( + self.text_section.min_x.get() - 5.0, + self.text_section.max_x.get() + 5.0, + expected_non_affix_line_y - 5.0, + expected_non_affix_line_y + 5.0, + |_x, _y, line| { + let PageItem::LineOrRect(LineOrRect::Line(line)) = line else { + return ControlFlow::Continue(()); + }; + if line.width() > line.height() { + found_non_affix_line = true; + return ControlFlow::Break(()); + } + ControlFlow::Continue(()) + }, + ); + if found_non_affix_line { + return self.extract_insn_bit_fields_box(expected_non_affix_line_y); + }; + let prefix_text = self.extract_text_line( + None, + last_mnemonic_line.regular_min_y - INSN_BIT_FIELDS_PREFIX_TEXT_TOP_PAD_HEIGHT, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + TextLineFonts::InsnBitFieldsAffixTitleFonts, + 0, + true, + Some(2.0), + )?; + let Some(prefix_text) = prefix_text else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn prefix bit fields title".into(), + Backtrace::capture(), + )); + }; + let prefix_text_str = prefix_text.element.inner_text(); + if prefix_text_str != "Prefix:" { + return Err(ExtractInsnsError::InsnParseError( + format!("insn prefix bit fields title is not as expected: {prefix_text_str:?}"), + Backtrace::capture(), + )); + } + let prefix_bit_fields = self.extract_insn_bit_fields_box( + prefix_text.regular_min_y - INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT, + )?; + let Some(prefix_bit_fields) = prefix_bit_fields else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn prefix bit fields".into(), + Backtrace::capture(), + )); + }; + let suffix_text = self.extract_text_line( + None, + prefix_bit_fields.box_min_y - INSN_BIT_FIELDS_PREFIX_BOX_BOTTOM_TO_SUFFIX_TEXT_HEIGHT, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + TextLineFonts::InsnBitFieldsAffixTitleFonts, + 0, + true, + Some(2.0), + )?; + let Some(suffix_text) = suffix_text else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn suffix bit fields title".into(), + Backtrace::capture(), + )); + }; + let suffix_text_str = suffix_text.element.inner_text(); + if suffix_text_str != "Suffix:" { + return Err(ExtractInsnsError::InsnParseError( + format!("insn suffix bit fields title is not as expected: {suffix_text_str:?}"), + Backtrace::capture(), + )); + } + let suffix_bit_fields = self.extract_insn_bit_fields_box( + suffix_text.regular_min_y - INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT, + )?; + let Some(suffix_bit_fields) = suffix_bit_fields else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn suffix bit fields".into(), + Backtrace::capture(), + )); + }; + return Ok(Some(InsnBitFields { + prefix: Some(InsnBitFieldsPrefix { + box_min_x: prefix_bit_fields.box_min_x, + box_min_y: prefix_bit_fields.box_min_y, + box_max_x: prefix_bit_fields.box_max_x, + box_max_y: prefix_bit_fields.box_max_y, + prefix_text: prefix_text, + fields: prefix_bit_fields.fields, + suffix_text: suffix_text, + }), + box_min_x: suffix_bit_fields.box_min_x, + box_min_y: suffix_bit_fields.box_min_y, + box_max_x: suffix_bit_fields.box_max_x, + box_max_y: suffix_bit_fields.box_max_y, + fields: suffix_bit_fields.fields, + })); + } + fn extract_insn_bit_fields_box( + &mut self, + expected_box_max_y: f32, + ) -> Result, ExtractInsnsError> { + let mut h_lines = Vec::new(); + let mut v_lines = Vec::new(); + let page = self.page().map_err(ExtractInsnsError::Other)?; + let ControlFlow::::Continue(()) = page.qt[&self.text_section].range( + self.text_section.min_x.get() - 5.0, + self.text_section.max_x.get() + 5.0, + expected_box_max_y - INSN_BIT_FIELDS_BOX_HEIGHT - 5.0, + expected_box_max_y + 5.0, + |_x, _y, line| { + let PageItem::LineOrRect(LineOrRect::Line(line)) = *line else { + return ControlFlow::Continue(()); + }; + if line.width() > line.height() { + h_lines.push(line); + } else { + v_lines.push(line); + } + ControlFlow::Continue(()) + }, + ); + h_lines.sort_by_key(|line| line.min_y()); + v_lines.sort_by_key(|line| line.min_x()); + for i in (0..v_lines.len().saturating_sub(1)).rev() { + if f32::abs(v_lines[i].min_x().get() - v_lines[i + 1].min_x().get()) < 0.5 { + v_lines.remove(i + 1); // remove duplicates + } + } + if h_lines.is_empty() && v_lines.is_empty() { + return Ok(None); + } + let [bottom_line, top_line] = &*h_lines else { + return Err(ExtractInsnsError::InsnParseError( + format!( + "instruction bit fields box has wrong number of horizontal lines:\n{h_lines:?}" + ), + Backtrace::capture(), + )); + }; + let [leftmost_line, .., rightmost_line] = &*v_lines else { + return Err(ExtractInsnsError::InsnParseError( + format!("instruction bit fields box has too few vertical lines:\n{v_lines:?}"), + Backtrace::capture(), + )); + }; + let box_min_x = leftmost_line.min_x().get(); + let box_max_x = rightmost_line.min_x().get(); + let box_min_y = bottom_line.min_y().get(); + let box_max_y = top_line.max_y().get(); + let box_mid_y = (box_min_y + box_max_y) * 0.5; + println!("bottom_line={bottom_line:?}"); + println!("top_line={top_line:?}"); + println!("{v_lines:?}"); + let mut fields = Vec::new(); + for i in 0..v_lines.len() - 1 { + let left_line = v_lines[i]; + let right_line = v_lines[i + 1]; + let field_box_min_x = left_line.max_x().get(); + let field_box_max_x = right_line.min_x().get(); + let bit_field_name_start_min_y = box_mid_y + 3.288; + let bit_field_name = self.extract_text_line( + None, + bit_field_name_start_min_y, + field_box_min_x, + field_box_max_x, + TextLineFonts::InsnBitFieldNameFonts, + 0, + true, + Some(0.4), + )?; + let Some(bit_field_name) = bit_field_name else { + return Err(ExtractInsnsError::InsnParseError( + format!( + "instruction bit field name not found:\n\ + start_min_y={bit_field_name_start_min_y} \ + field_box_min_x={field_box_min_x} \ + field_box_max_x={field_box_max_x}" + ), + Backtrace::capture(), + )); + }; + let bit_field_number_start_min_y = box_min_y + 3.487; + let bit_number = self.extract_text_line( + None, + bit_field_number_start_min_y, + field_box_min_x, + field_box_max_x, + TextLineFonts::InsnBitFieldBitNumberFonts, + 0, + true, + None, + )?; + let Some(bit_number) = bit_number else { + return Err(ExtractInsnsError::InsnParseError( + format!( + "instruction bit field bit number not found:\n\ + start_min_y={bit_field_number_start_min_y} \ + field_box_min_x={field_box_min_x} \ + field_box_max_x={field_box_max_x}" + ), + Backtrace::capture(), + )); + }; + fields.push(InsnBitField { + box_min_x: field_box_min_x, + box_max_x: field_box_max_x, + name: bit_field_name, + bit_number: bit_number, + }); + } + return Ok(Some(InsnBitFields { + prefix: None, + box_min_x, + box_min_y, + box_max_x, + box_max_y, + fields, + })); + } + fn extract_insn_header_mnemonics_and_bit_fields( + &mut self, + start_min_y: f32, + header_start_char: Option, + ) -> Result, ExtractInsnsError> { + assert!( + header_start_char + .as_ref() + .is_none_or(|v| v.font == Font::InsnHeader) + ); + let Some(header_line) = self.extract_text_line( + header_start_char, + start_min_y, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + TextLineFonts::InsnHeaderFonts, + 0, + true, + Some(6.0), + )? + else { + return Ok(None); + }; + println!("found header line:\n{header_line}"); + let header_lines = self.extract_following_text_lines( + header_line, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + Some(1.5), + )?; + println!("insn header lines:"); + for header_line in &header_lines { + println!("{header_line}"); + } + let [.., last_header_line] = &*header_lines else { + unreachable!(); + }; + let Some(mnemonic_start_char) = self + .find_top_left_char_in_range( + self.text_section.min_x.get() - 5.0, + self.text_section.max_x.get() + 5.0, + last_header_line.regular_min_y - 50.0, + last_header_line.regular_min_y - 5.0, + false, + ) + .map_err(ExtractInsnsError::Other)? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn mnemonic text line".into(), + Backtrace::capture(), + )); + }; + let mnemonic_start_char_min_y = mnemonic_start_char.min_y.get(); + let Some(mnemonic_line) = self.extract_text_line( + Some(mnemonic_start_char), + mnemonic_start_char_min_y, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + TextLineFonts::InsnMnemonicFonts, + 0, + true, + None, + )? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn mnemonic text line".into(), + Backtrace::capture(), + )); + }; + let mnemonic_line_first_char_min_x = mnemonic_line.chars[0].min_x.get(); + let mnemonic_lines = self.extract_following_text_lines( + mnemonic_line, + mnemonic_line_first_char_min_x, + self.text_section.max_x.get(), + None, + )?; + println!("insn mnemonic lines:"); + for mnemonic_line in &mnemonic_lines { + println!("{mnemonic_line}"); + } + let Some(insn_bit_fields) = self.extract_insn_bit_fields(&mnemonic_lines)? else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn bit fields".into(), + Backtrace::capture(), + )); + }; + println!("{insn_bit_fields}"); + return Ok(Some(InsnHeader { + header_lines, + mnemonic_lines, + bit_fields: insn_bit_fields, + })); + } + fn extract_insn_sp_regs_altered( + &mut self, + mut sp_regs_altered_text: ParsedTextLine, + ) -> Result { + sp_regs_altered_text.preceding_blank_lines = 0; + let fonts = TextLineFonts::InsnDescFonts; + let column_min_x = sp_regs_altered_text.chars[0].min_x.get(); + let Some(table_header_reg_char) = self + .find_top_left_char_in_range( + column_min_x - 1.0, + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X - 1.0, + sp_regs_altered_text.regular_min_y - 30.0, + sp_regs_altered_text.regular_min_y - 5.0, + false, + ) + .map_err(ExtractInsnsError::Other)? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find special registers altered table's register-column's header".into(), + Backtrace::capture(), + )); + }; + const KNOWN_SPECIAL_TEXTS: &[&str] = &[ + "None", + "Dependent on the system service", + "See above.", + "See Table 5.1", + ]; + match &*table_header_reg_char.text { + "R" => {} + text if KNOWN_SPECIAL_TEXTS.iter().any(|i| text == &i[..1]) => { + let start_min_y = table_header_reg_char.min_y.get(); + let special_text = self.extract_text_line( + Some(table_header_reg_char), + start_min_y, + column_min_x, + self.text_section.max_x.get(), + fonts, + 0, + true, + None, + )?; + let special_text = match special_text { + Some(special_text) + if KNOWN_SPECIAL_TEXTS.contains(&&*special_text.element.text) => + { + special_text + } + _ => return Err(ExtractInsnsError::Other( + format!( + "can't find special-registers-altered special-text:\n{special_text:?}" + ) + .into(), + )), + }; + let final_regular_min_y = special_text.regular_min_y; + return Ok(InsnSpRegsAltered { + sp_regs_altered_text, + special_text: Some(special_text), + table_header_reg: None, + table_header_fields: None, + entries: vec![], + final_regular_min_y, + }); + } + text => { + return Err(ExtractInsnsError::InsnParseError( + format!( + "unknown special-registers-altered special-text start character: {text:?}" + ), + Backtrace::capture(), + )); + } + } + let Some(table_header_fields_char) = self + .find_top_left_char_in_range( + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X - 10.0, + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, + table_header_reg_char.min_y.get() - 5.0, + table_header_reg_char.min_y.get() + 5.0, + false, + ) + .map_err(ExtractInsnsError::Other)? + else { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's fields-column's header".into(), + )); + }; + if table_header_fields_char.text != "F" { + return Err(ExtractInsnsError::Other( + format!( + "can't find special registers altered table's fields-column's header:\n\ + table_header_fields_char={table_header_fields_char:?}" + ) + .into(), + )); + } + let columns_x_bounds = [ + ( + table_header_reg_char.min_x.get(), + table_header_fields_char.min_x.get() - 1.0, + ), + ( + table_header_fields_char.min_x.get(), + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, + ), + ( + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, + self.text_section.max_x.get(), + ), + ]; + let start_min_y = table_header_reg_char.min_y.get(); + let Some(table_header_reg) = self.extract_text_line( + Some(table_header_reg_char), + start_min_y, + columns_x_bounds[0].0, + columns_x_bounds[0].1, + fonts, + 0, + false, + None, + )? + else { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's register-column's header".into(), + )); + }; + let table_header_reg_text = table_header_reg.element.inner_text(); + if table_header_reg_text != "Register" { + return Err(ExtractInsnsError::Other( + format!( + "can't find special registers altered table's register-column's header:\n\ + table_header_reg_text={table_header_reg_text:?}" + ) + .into(), + )); + } + let start_min_y = table_header_fields_char.min_y.get(); + let Some(table_header_fields) = self.extract_text_line( + Some(table_header_fields_char), + start_min_y, + columns_x_bounds[1].0, + columns_x_bounds[1].1, + fonts, + 0, + false, + None, + )? + else { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's fields-column's header".into(), + )); + }; + let table_header_fields_text = table_header_fields.element.inner_text(); + if table_header_reg_text != "Field(s)" { + return Err(ExtractInsnsError::Other( + format!( + "can't find special registers altered table's fields-column's header:\n\ + table_header_fields_text={table_header_fields_text:?}" + ) + .into(), + )); + } + let mut regular_min_y = table_header_reg.regular_min_y; + let mut entries = Vec::new(); + let mut cur_reg = None; + let mut cur_fields = Vec::new(); + let mut cur_conds = Vec::new(); + loop { + let mut row = [None, None, None]; + let mut next_regular_min_y = None; + for (i, (min_x, max_x)) in columns_x_bounds.into_iter().enumerate() { + row[i] = self.extract_text_line( + None, + regular_min_y - fonts.regular()[0].line_height(), + min_x, + max_x, + fonts, + 0, + true, + Some(2.0), + )?; + if let Some(cell) = &row[i] + && next_regular_min_y.is_none() + { + next_regular_min_y = Some(cell.regular_min_y); + } + } + match next_regular_min_y { + Some(v) => regular_min_y = v, + None => break, + } + let [cur_reg_cell, cur_fields_cell, cur_conds_cell] = row; + if cur_reg_cell.is_none() { + if cur_reg.is_none() { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's first register".into(), + )); + } + cur_fields.extend(cur_fields_cell); + cur_conds.extend(cur_conds_cell); + continue; + } + if let Some(cur_reg) = cur_reg { + entries.push(InsnSpRegsAlteredEntry { + reg: cur_reg, + fields: cur_fields, + conds: cur_conds, + }); + cur_fields = Vec::new(); + cur_conds = Vec::new(); + } + cur_reg = cur_reg_cell; + cur_fields.extend(cur_fields_cell); + cur_conds.extend(cur_conds_cell); + } + let Some(cur_reg) = cur_reg else { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's first register".into(), + )); + }; + entries.push(InsnSpRegsAlteredEntry { + reg: cur_reg, + fields: cur_fields, + conds: cur_conds, + }); + return Ok(InsnSpRegsAltered { + sp_regs_altered_text: sp_regs_altered_text, + special_text: None, + table_header_reg: Some(table_header_reg), + table_header_fields: Some(table_header_fields), + entries, + final_regular_min_y: regular_min_y, + }); + } + fn extract_insn(&mut self, header_start_char: Char) -> Result { + assert_eq!(header_start_char.font, Font::InsnHeader); + println!("{header_start_char:?}"); + let Some(header) = self.extract_insn_header_mnemonics_and_bit_fields( + header_start_char.min_y.get(), + Some(header_start_char), + )? + else { + return Err(ExtractInsnsError::PageParseError( + "can't find header text line".into(), + Backtrace::capture(), + )); + }; + let mut next_start_min_y = header.min_y() - 5.0; + let mut headers = vec![header]; + let mut code_lines: Vec = Vec::new(); + let mut desc_lines: Vec = Vec::new(); + let mut sp_regs_altered = None; + loop { + let search_min_y = next_start_min_y - 70.0; + let Some(next_char) = self + .find_top_left_char_in_range( + self.text_section.min_x.get() - 5.0, + self.text_section.max_x.get() + 5.0, + search_min_y.max(self.text_section.min_y.get()), + next_start_min_y, + false, + ) + .map_err(ExtractInsnsError::Other)? + else { + if search_min_y <= self.text_section.min_y.get() + && self + .pages + .get(self.text_section.next().page_num) + .map_err(ExtractInsnsError::Other)? + .is_some() + { + // go to next section + self.text_section = self.text_section.next(); + next_start_min_y = self.text_section.max_y.get(); + continue; + } else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn code or description text".into(), + Backtrace::capture(), + )); + } + }; + let next_section = match &next_char.font { + font if TextLineFonts::InsnCodeFonts.fonts().contains(font) => { + InsnParseSection::Code + } + font if TextLineFonts::InsnDescFonts.fonts().contains(font) => { + InsnParseSection::Desc + } + Font::InsnHeader => InsnParseSection::Header, + font => { + return Err(ExtractInsnsError::InsnParseError( + format!("can't find insn code or description text\nfont={font:?}"), + Backtrace::capture(), + )); + } + }; + match next_section { + InsnParseSection::Code => { + if !desc_lines.is_empty() { + break; + } + let start_min_y = next_char.min_y.get(); + let min_x = next_char.min_x.get(); + let Some(code_line) = self.extract_text_line( + Some(next_char), + start_min_y, + min_x, + self.text_section.max_x.get(), + TextLineFonts::InsnCodeFonts, + if code_lines.is_empty() { 0 } else { 1 }, + false, + None, + )? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn code text line".into(), + Backtrace::capture(), + )); + }; + let min_x = code_line.chars[0].min_x.get(); + let more_code_lines = self.extract_following_text_lines( + code_line, + min_x, + self.text_section.max_x.get(), + Some(0.05), + )?; + println!("more insn code lines:"); + for i in &more_code_lines { + println!("{i}"); + } + code_lines.extend(more_code_lines); + let Some(last) = code_lines.last() else { + unreachable!() + }; + next_start_min_y = last.regular_min_y - 5.0; + } + InsnParseSection::Header => { + if !(code_lines.is_empty() && desc_lines.is_empty()) { + break; + } + let Some(header) = self.extract_insn_header_mnemonics_and_bit_fields( + next_char.min_y.get(), + Some(next_char), + )? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find header text line".into(), + Backtrace::capture(), + )); + }; + next_start_min_y = header.min_y() - 5.0; + headers.push(header); + } + InsnParseSection::Desc => { + let start_min_y = next_char.min_y.get(); + let min_x = next_char.min_x.get(); + let Some(desc_line) = self.extract_text_line( + Some(next_char), + start_min_y, + min_x, + self.text_section.max_x.get(), + TextLineFonts::InsnDescFonts, + if desc_lines.is_empty() { 0 } else { 1 }, + false, + Some(3.0), + )? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn desc text line".into(), + Backtrace::capture(), + )); + }; + match desc_line.get_header_text() { + None => { + let min_x = desc_line.chars[0].min_x.get(); + let more_desc_lines = self.extract_following_text_lines( + desc_line, + min_x, + self.text_section.max_x.get(), + Some(3.5), + )?; + println!("more insn desc lines:"); + for i in &more_desc_lines { + println!("{i}"); + } + desc_lines.extend(more_desc_lines); + next_start_min_y = desc_lines + .last() + .expect("known to be non-empty") + .regular_min_y + - 5.0; + } + Some(header_text) if header_text == "Special Registers Altered:" => { + let new_sp_regs_altered = + self.extract_insn_sp_regs_altered(desc_line)?; + next_start_min_y = new_sp_regs_altered.final_regular_min_y; + sp_regs_altered = Some(new_sp_regs_altered); + break; + } + Some(header_text) => { + return Err(ExtractInsnsError::Other( + format!("unhandled header text: {header_text:?}\n{desc_line}") + .into(), + )); + } + } + } + } + } + println!("insn code lines:"); + for i in &code_lines { + println!("{i}"); + } + println!("insn desc lines:"); + for i in &desc_lines { + println!("{i}"); + } + println!("sp_regs_altered:"); + println!("{sp_regs_altered:?}"); + // TODO: finish + return Ok(Insn { + headers, + code_lines, + desc_lines, + sp_regs_altered, + }); + } + fn extract_insns(&mut self) -> Result<(), ExtractInsnsError> { + loop { + let Some(header_start_char) = + RefCell::borrow(&*self.unprocessed_chars().map_err(ExtractInsnsError::Other)?) + .get(&Font::InsnHeader) + .and_then(|v| v.first().cloned()) + else { + return Ok(()); + }; + let insn = self.extract_insn(header_start_char)?; + self.insns.push(insn); + } } } -#[derive(Clone, Debug, Default)] -struct MyDevice { - qt: Rc>>>, +#[derive(Debug)] +struct MyDevice<'a> { + page_num: u32, + qt: RefCell>>, + unprocessed_chars: + Rc>>>>>>, unprocessed_non_text: Rc>>, + first_seen_fonts: RefCell<&'a mut BTreeMap>>, + error: RefCell>>, } -impl mupdf::NativeDevice for MyDevice { +impl<'a> MyDevice<'a> { + fn new(page_num: u32, first_seen_fonts: &'a mut BTreeMap>) -> Self { + Self { + page_num, + qt: Default::default(), + unprocessed_chars: Default::default(), + unprocessed_non_text: Default::default(), + first_seen_fonts: RefCell::new(first_seen_fonts), + error: RefCell::new(Ok(())), + } + } + fn path(&self, path: &mupdf_ffi::Path<'_>, ctm: fz_matrix) { + if self.error.borrow().is_err() { + return; + } + enum Walker { + Empty, + Moved { x: f32, y: f32 }, + Line(Line), + Rect { x1: f32, y1: f32, x2: f32, y2: f32 }, + NotRecognized, + } + fn new_line(p0_x: f32, p0_y: f32, p1_x: f32, p1_y: f32) -> Option { + Some(Line { + p0_x: NonNaNF32::new(p0_x)?, + p0_y: NonNaNF32::new(p0_y)?, + p1_x: NonNaNF32::new(p1_x)?, + p1_y: NonNaNF32::new(p1_y)?, + }) + } + impl<'ctx> mupdf_ffi::PathWalker<'ctx> for Walker { + fn move_to(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>, x: f32, y: f32) { + *self = match *self { + Walker::Empty | Walker::Moved { .. } => Walker::Moved { x, y }, + Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => { + Walker::NotRecognized + } + }; + } + fn line_to(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>, x: f32, y: f32) { + *self = match *self { + Walker::Empty => Walker::NotRecognized, + Walker::Moved { x: p0_x, y: p0_y } => new_line(p0_x, p0_y, x, y) + .map(Walker::Line) + .unwrap_or(Walker::NotRecognized), + Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => { + Walker::NotRecognized + } + }; + } + fn curve_to( + &mut self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + _cx1: f32, + _cy1: f32, + _cx2: f32, + _cy2: f32, + _ex: f32, + _ey: f32, + ) { + *self = Walker::NotRecognized; + } + fn close_path(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>) {} + fn rect_to( + &mut self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + ) { + *self = match *self { + Walker::Empty => Walker::Rect { x1, y1, x2, y2 }, + Walker::Moved { .. } + | Walker::Line(..) + | Walker::Rect { .. } + | Walker::NotRecognized => Walker::NotRecognized, + }; + } + } + let mut walker = Walker::Empty; + path.walk(&mut walker); + let component = match walker { + Walker::Empty | Walker::Moved { .. } | Walker::NotRecognized => return, + Walker::Line(Line { + p0_x, + p0_y, + p1_x, + p1_y, + }) => { + let mupdf_sys::fz_point { x: p0_x, y: p0_y } = + mupdf_ffi::transform_point_xy(p0_x.get(), p0_y.get(), ctm); + let mupdf_sys::fz_point { x: p1_x, y: p1_y } = + mupdf_ffi::transform_point_xy(p1_x.get(), p1_y.get(), ctm); + let Some(line) = new_line(p0_x, p0_y, p1_x, p1_y) else { + return; + }; + LineOrRect::Line(line) + } + Walker::Rect { x1, y1, x2, y2 } => { + let p1 = mupdf_ffi::transform_point_xy(x1, y1, ctm); + let p2 = mupdf_ffi::transform_point_xy(x2, y1, ctm); + let p3 = mupdf_ffi::transform_point_xy(x2, y2, ctm); + let p4 = mupdf_ffi::transform_point_xy(x1, y2, ctm); + let min_x = NonNaNF32::new(p1.x.min(p2.x).min(p3.x).min(p4.x)); + let max_x = NonNaNF32::new(p1.x.max(p2.x).max(p3.x).max(p4.x)); + let min_y = NonNaNF32::new(p1.y.min(p2.y).min(p3.y).min(p4.y)); + let max_y = NonNaNF32::new(p1.y.max(p2.y).max(p3.y).max(p4.y)); + let (Some(min_x), Some(max_x), Some(min_y), Some(max_y)) = + (min_x, max_x, min_y, max_y) + else { + return; + }; + LineOrRect::Rect(Rect { + min_x, + max_x, + min_y, + max_y, + }) + } + }; + if component.width() > 100.0 + && component.min_x().get() < COLUMN_SPLIT_X - 10.0 + && component.max_x().get() > COLUMN_SPLIT_X + 10.0 + { + println!("wide component: {component:?}"); + } else { + println!("component: {component:?}"); + } + let text_section = TextSection::for_position( + self.page_num, + (component.min_x().get() + component.max_x().get()) * 0.5, + (component.min_y().get() + component.max_y().get()) * 0.5, + ); + if let Some(text_section) = text_section { + self.qt + .borrow_mut() + .entry(text_section) + .or_default() + .insert( + component.min_x().get(), + component.min_y().get(), + PageItem::LineOrRect(component), + ); + } + } + fn text(&self, text: &mupdf_ffi::Text<'_>, ctm: fz_matrix) { + if self.error.borrow().is_err() { + return; + } + let mut first_seen_fonts = self.first_seen_fonts.borrow_mut(); + for span in text.spans() { + let tm = span.trm(); + const ROUND_FACTOR: f32 = 1000.0; + let font_size = (mupdf_ffi::matrix_expansion(tm) * ROUND_FACTOR).round() / ROUND_FACTOR; + let Some(font_size) = NonNaNF32::new(font_size) else { + continue; + }; + let font_name_with_tag = span.font().name(); + let font_name_with_tag = match font_name_with_tag { + "CGMSHV+DejaVuSansCondensed-Obli" => "CGMSHV+DejaVuSansCondensed-Oblique", + "YDJYQV+DejaVuSansCondensed-Bold" => "YDJYQV+DejaVuSansCondensed-BoldOblique", + "NHUPPK+DejaVuSansCondensed-Bold" => "NHUPPK+DejaVuSansCondensed-Bold", + _ if font_name_with_tag.len() == 31 => { + let _ = self.error.replace(Err(format!( + "probably truncated font name: {font_name_with_tag:?}" + ) + .into())); + return; + } + _ => font_name_with_tag, + }; + for &fz_text_item { + x, + y, + adv, + gid, + ucs, + cid: _, + } in span.items() + { + let adv = if gid >= 0 { adv } else { 0.0 }; + let tm = fz_matrix { e: x, f: y, ..tm }; + let trm = mupdf_ffi::concat(tm, ctm); + let dir = match span.write_mode() { + WriteMode::Horizontal => fz_point { x: 1.0, y: 0.0 }, + WriteMode::Vertical => fz_point { x: 0.0, y: -1.0 }, + }; + let dir = mupdf_ffi::transform_vector(dir, trm); + let glyph_start; + let glyph_stop; + let glyph_ascender; + let glyph_descender; + match span.write_mode() { + WriteMode::Horizontal => { + glyph_start = fz_point { x: trm.e, y: trm.f }; + glyph_stop = fz_point { + x: trm.e + adv * dir.x, + y: trm.f + adv * dir.y, + }; + glyph_ascender = fz_point { + x: 0.0, + y: span.font().ascender(), + }; + glyph_descender = fz_point { + x: 0.0, + y: span.font().descender(), + }; + } + WriteMode::Vertical => { + glyph_start = fz_point { + x: trm.e - adv * dir.x, + y: trm.f - adv * dir.y, + }; + glyph_stop = fz_point { x: trm.e, y: trm.f }; + glyph_ascender = fz_point { x: 1.0, y: 0.0 }; + glyph_descender = fz_point { x: 0.0, y: 0.0 }; + } + }; + let glyph_ascender = transform_vector(glyph_ascender, trm); + let glyph_descender = transform_vector(glyph_descender, trm); + let points = [ + add_points(glyph_start, glyph_descender), + add_points(glyph_start, glyph_ascender), + add_points(glyph_stop, glyph_descender), + add_points(glyph_stop, glyph_ascender), + ]; + let min = point_min_components( + point_min_components(point_min_components(points[0], points[1]), points[2]), + points[3], + ); + let max = point_max_components( + point_max_components(point_max_components(points[0], points[1]), points[2]), + points[3], + ); + let Some(ch) = u32::try_from(ucs).ok().and_then(|v| char::try_from(v).ok()) else { + continue; + }; + let text = String::from(ch); + if text.trim().is_empty() { + continue; + } + let font = Font::known_from_name_with_tag(font_name_with_tag, font_size) + .unwrap_or_else(|| Font::Other { + font_name: font_name_with_tag.into(), + size: font_size, + }); + let Some(text_section) = TextSection::for_position( + self.page_num, + (min.x + max.x) * 0.5, + (min.y + max.y) * 0.5, + ) else { + if PAGE_BODY_MIN_Y <= min.y && min.y <= PAGE_BODY_MAX_Y { + if self.page_num != 1072 { + // page 1072 has characters in the margins + let _ = self.error.replace(Err(format!( + "char not in text section: {text:?}\npage_num={}", + self.page_num, + ) + .into())); + return; + } + } + continue; + }; + let (Some(min_x), Some(min_y), Some(max_x), Some(max_y)) = ( + NonNaNF32::new(min.x), + NonNaNF32::new(min.y), + NonNaNF32::new(max.x), + NonNaNF32::new(max.y), + ) else { + let _ = self + .error + .replace(Err("char position shouldn't be NaN".into())); + return; + }; + let char = Char { + font, + text, + min_x, + min_y, + max_x, + max_y, + }; + let set = match first_seen_fonts.get_mut(font_name_with_tag) { + Some(v) => v, + None => first_seen_fonts + .entry(String::from(font_name_with_tag)) + .or_default(), + }; + if set.insert(font_size) { + println!( + "first seen font: {font_name_with_tag:?} {font_size}: page {} {char:?}", + self.page_num, + ); + } + self.qt + .borrow_mut() + .entry(text_section) + .or_default() + .insert(min_x.get(), min_y.get(), PageItem::Char(char.clone())); + self.unprocessed_chars + .borrow_mut() + .entry(text_section) + .or_default() + .borrow_mut() + .entry(char.font.clone()) + .or_default() + .insert(char); + } + } + } +} + +impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice<'_> { fn fill_path( - &mut self, - path: &mupdf::Path, - even_odd: bool, - cmt: mupdf::Matrix, - color_space: &mupdf::Colorspace, - color: &[f32], - alpha: f32, - cp: mupdf::ColorParams, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + path: &mupdf_ffi::Path<'ctx>, + _even_odd: bool, + ctm: fz_matrix, ) { - // TODO + self.path(path, ctm); } fn stroke_path( - &mut self, - path: &mupdf::Path, - stroke_state: &mupdf::StrokeState, - cmt: mupdf::Matrix, - color_space: &mupdf::Colorspace, - color: &[f32], - alpha: f32, - cp: mupdf::ColorParams, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + path: &mupdf_ffi::Path<'ctx>, + ctm: fz_matrix, ) { - // TODO + self.path(path, ctm); } fn clip_path( - &mut self, - path: &mupdf::Path, - even_odd: bool, - cmt: mupdf::Matrix, - scissor: mupdf::Rect, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + path: &mupdf_ffi::Path<'ctx>, + _even_odd: bool, + ctm: fz_matrix, + _scissor: mupdf_sys::fz_rect, ) { - // TODO + self.path(path, ctm); } fn clip_stroke_path( - &mut self, - path: &mupdf::Path, - stroke_state: &mupdf::StrokeState, - cmt: mupdf::Matrix, - scissor: mupdf::Rect, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + path: &mupdf_ffi::Path<'ctx>, + ctm: fz_matrix, + _scissor: mupdf_sys::fz_rect, ) { - // TODO + self.path(path, ctm); + } + + fn fill_text( + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + text: &mupdf_ffi::Text<'ctx>, + ctm: fz_matrix, + ) { + self.text(text, ctm); + } + + fn stroke_text( + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + text: &mupdf_ffi::Text<'ctx>, + ctm: fz_matrix, + ) { + self.text(text, ctm); + } + + fn clip_text( + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + text: &mupdf_ffi::Text<'ctx>, + ctm: fz_matrix, + _scissor: mupdf_sys::fz_rect, + ) { + self.text(text, ctm); + } + + fn clip_stroke_text( + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + text: &mupdf_ffi::Text<'ctx>, + ctm: fz_matrix, + _scissor: mupdf_sys::fz_rect, + ) { + self.text(text, ctm); + } + + fn ignore_text( + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + text: &mupdf_ffi::Text<'ctx>, + ctm: fz_matrix, + ) { + self.text(text, ctm); } } @@ -2095,83 +3663,37 @@ struct MuPdfXmlChar<'a> { impl Page { fn from_mupdf_page( page_num: u32, - page: mupdf::Page, + page: &mupdf_ffi::Page<'_>, + first_seen_fonts: &mut BTreeMap>, ) -> Result> { - let device = MyDevice::default(); + let device = mupdf_ffi::Device::new( + page.ctx(), + Box::new(MyDevice::new(page_num, first_seen_fonts)), + )?; page.run( - &mupdf::Device::from_native(device.clone())?, - &mupdf::Matrix::IDENTITY, + &device, + fz_matrix { + a: 1.0, + b: 0.0, + c: 0.0, + d: 1.0, + e: 0.0, + f: 0.0, + }, )?; let MyDevice { + page_num: _, qt, + unprocessed_chars, unprocessed_non_text, - } = device; - let mut qt = Rc::try_unwrap(qt) - .ok() - .expect("already dropped all other references") - .into_inner(); - let unprocessed_chars: Rc< - RefCell>>>>>, - > = Rc::default(); - // we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need. - let xml = page.to_xml()?; - let MuPdfXml::Page(xml_page) = quick_xml::de::from_str(&xml)?; - for xml_block in xml_page.block { - for xml_line in xml_block.line { - for xml_font in xml_line.font { - const ROUND_FACTOR: f32 = 1000.0; - let font_size = (xml_font.size * ROUND_FACTOR).round() / ROUND_FACTOR; - let font_size = NonNaNF32::new(font_size).ok_or("font size must not be NaN")?; - let font = Font::new(&xml_font.name, font_size); - for xml_char in xml_font.char { - let [x0, y0, x1, y1, x2, y2, x3, y3] = xml_char.quad; - let min_x = x0.min(x1).min(x2).min(x3); - let max_x = x0.max(x1).max(x2).max(x3); - let min_y = y0.min(y1).min(y2).min(y3); - let max_y = y0.max(y1).max(y2).max(y3); - let Some(text_section) = TextSection::for_position( - page_num, - (min_x + max_x) * 0.5, - (min_y + max_y) * 0.5, - ) else { - if PAGE_BODY_MIN_Y <= min_y && min_y <= PAGE_BODY_MAX_Y { - if page_num != 1072 { - // page 1072 has characters in the margins - return Err( - format!("char not in text section: {xml_char:?}\npage_num={page_num}").into(), - ); - } - } - continue; - }; - let char = Char { - font: font.clone(), - text: xml_char.c.into_owned(), - min_x: NonNaNF32::new(min_x).ok_or("char position shouldn't be NaN")?, - min_y: NonNaNF32::new(min_y).ok_or("char position shouldn't be NaN")?, - max_x: NonNaNF32::new(max_x).ok_or("char position shouldn't be NaN")?, - max_y: NonNaNF32::new(max_y).ok_or("char position shouldn't be NaN")?, - }; - qt.entry(text_section).or_default().insert( - min_x, - min_y, - PageItem::Char(char.clone()), - ); - unprocessed_chars - .borrow_mut() - .entry(text_section) - .or_default() - .borrow_mut() - .entry(char.font.clone()) - .or_default() - .insert(char); - } - } - } - } - for i in unprocessed_chars.borrow_mut().values_mut() { - for j in i.borrow_mut().values_mut() { - j.sort_by_key(Char::top_down_left_to_right_sort_key); + first_seen_fonts: _, + error, + } = device.get(); + error.replace(Ok(()))?; + for (text_section, i) in unprocessed_chars.borrow_mut().iter_mut() { + for chars in i.borrow_mut().values_mut() { + chars.sort_by_key(Char::top_down_left_to_right_sort_key); + println!("first char: {text_section:?}: {:?}", chars.first()); } } let mut unknown_fonts = Vec::new(); @@ -2202,14 +3724,14 @@ impl Page { } Ok(Self { page_num, - qt, - unprocessed_chars, - unprocessed_non_text, + qt: qt.take(), + unprocessed_chars: unprocessed_chars.clone(), + unprocessed_non_text: unprocessed_non_text.clone(), }) } } -fn main() -> Result<(), Box> { +fn main_inner() -> Result<(), Box> { let args: Vec = std::env::args().collect(); let page_numbers: Option>>> = if 2 < args.len() { Some(if let Some((start, end)) = args[2].split_once(":") { @@ -2229,25 +3751,37 @@ fn main() -> Result<(), Box> { } else { None }; - let mut parser = Parser::new(); - let is_subset = page_numbers.is_some(); - let file_name = &args[1]; - parser.parse_pdf(file_name, page_numbers)?; - let mut insns = xml_tree::Element::new( - "instructions".into(), - [("is-subset".into(), is_subset.to_string())], - ); - insns.text = "\n".into(); - insns.tail = "\n".into(); - let mut comment = - xml_tree::Element::comment(format!(" Automatically generated from {file_name} ")); - comment.tail = "\n".into(); - insns.children.push(comment); - for insn in parser.insns { - insn.write_xml(&mut insns); - } - let mut output = Vec::new(); - insns.write(&mut output, true)?; - std::fs::write("powerisa-instructions.xml", output)?; - Ok(()) + mupdf_ffi::Context::with(|ctx| { + let mut parser = Parser::new(); + let is_subset = page_numbers.is_some(); + let file_name = &args[1]; + parser.parse_pdf(ctx, file_name, page_numbers)?; + let mut insns = xml_tree::Element::new( + "instructions".into(), + [("is-subset".into(), is_subset.to_string())], + ); + insns.text = "\n".into(); + insns.tail = "\n".into(); + let mut comment = + xml_tree::Element::comment(format!(" Automatically generated from {file_name} ")); + comment.tail = "\n".into(); + insns.children.push(comment); + for insn in parser.insns { + insn.write_xml(&mut insns); + } + let mut output = Vec::new(); + insns.write(&mut output, true)?; + std::fs::write("powerisa-instructions.xml", output)?; + Ok(()) + }) +} + +fn main() -> std::process::ExitCode { + match main_inner() { + Ok(()) => std::process::ExitCode::SUCCESS, + Err(e) => { + println!("Error: {e}"); + std::process::ExitCode::FAILURE + } + } } diff --git a/src/mupdf_ffi.rs b/src/mupdf_ffi.rs new file mode 100644 index 0000000..942bcfc --- /dev/null +++ b/src/mupdf_ffi.rs @@ -0,0 +1,804 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use mupdf_sys::{ + fz_clone_context, fz_color_params, fz_colorspace, fz_concat, fz_context, fz_device, + fz_document, fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, + fz_drop_text, fz_error_type_FZ_ERROR_GENERIC, fz_font, fz_font_ascender, fz_font_descender, + fz_font_is_bold, fz_font_is_italic, fz_font_name, fz_matrix, fz_matrix_expansion, fz_page, + fz_path, fz_path_walker, fz_point, fz_rect, fz_stroke_state, fz_text, fz_text_item, + fz_text_span, fz_transform_point, fz_transform_point_xy, fz_transform_vector, fz_walk_path, + mupdf_document_page_count, mupdf_drop_error, mupdf_error_t, mupdf_load_page, + mupdf_new_base_context, mupdf_new_derived_device, mupdf_open_document, mupdf_run_page, +}; +use std::{ + cell::{Cell, UnsafeCell}, + ffi::{CStr, c_int, c_void}, + fmt, + marker::PhantomData, + mem::ManuallyDrop, + ptr::{self, NonNull}, + sync::{Mutex, OnceLock}, +}; + +#[derive(Debug)] +pub(crate) struct MuPdfError { + type_: c_int, + message: String, +} + +impl MuPdfError { + fn new_generic(message: impl ToString) -> Self { + Self { + type_: fz_error_type_FZ_ERROR_GENERIC as _, + message: message.to_string(), + } + } +} + +impl fmt::Display for MuPdfError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "MuPDF error: type: {}, message: {}", + self.type_, self.message + ) + } +} + +impl std::error::Error for MuPdfError {} + +struct OwnedMuPdfError(NonNull); + +impl Drop for OwnedMuPdfError { + fn drop(&mut self) { + unsafe { + mupdf_drop_error(self.0.as_ptr()); + } + } +} + +unsafe fn mupdf_try(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result { + let mut err = ptr::null_mut(); + let retval = f(&mut err); + let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else { + return Ok(retval); + }; + unsafe { + Err(MuPdfError { + type_: (*err.0.as_ptr()).type_, + message: CStr::from_ptr((*err.0.as_ptr()).message) + .to_string_lossy() + .into_owned(), + }) + } +} + +pub(crate) struct Context(NonNull); + +impl Context { + fn new() -> Self { + struct BaseContext(NonNull); + unsafe impl Send for BaseContext {} + static CTX: OnceLock> = OnceLock::new(); + let base = CTX + .get_or_init(|| { + let ctx = unsafe { mupdf_new_base_context() }; + let Some(ctx) = NonNull::new(ctx).map(BaseContext) else { + panic!("failed to allocate a MuPDF context"); + }; + Mutex::new(ctx) + }) + .lock() + .expect("not poisoned"); + let ctx = unsafe { fz_clone_context(base.0.as_ptr()) }; + let Some(ctx) = NonNull::new(ctx).map(Self) else { + drop(base); + panic!("failed to clone a MuPDF context"); + }; + ctx + } + pub(crate) fn with(f: impl FnOnce(&Self) -> R) -> R { + thread_local! { + static CTX: Context = Context::new(); + } + CTX.with(f) + } + pub(crate) fn as_ref(&self) -> ContextRef<'_> { + unsafe { ContextRef::from_ptr(self.0.as_ptr()) } + } +} + +impl Drop for Context { + fn drop(&mut self) { + unsafe { + fz_drop_context(self.0.as_ptr()); + } + } +} + +#[derive(Clone, Copy)] +pub(crate) struct ContextRef<'ctx>(&'ctx UnsafeCell); + +impl<'ctx> ContextRef<'ctx> { + unsafe fn from_ptr(ptr: *mut fz_context) -> Self { + Self(unsafe { &*ptr.cast() }) + } +} + +impl<'ctx> From<&'ctx Context> for ContextRef<'ctx> { + fn from(value: &'ctx Context) -> Self { + value.as_ref() + } +} + +pub(crate) struct Document<'ctx> { + ptr: *mut fz_document, + ctx: ContextRef<'ctx>, +} + +impl<'ctx> Document<'ctx> { + pub(crate) fn open( + ctx: impl Into>, + file_name: &CStr, + ) -> Result, MuPdfError> { + let ctx = ctx.into(); + unsafe { + mupdf_try(|errptr| mupdf_open_document(ctx.0.get(), file_name.as_ptr(), errptr)) + .map(|ptr| Document { ptr, ctx }) + } + } + pub(crate) fn page_count(&self) -> Result { + unsafe { + mupdf_try(|errptr| mupdf_document_page_count(self.ctx.0.get(), self.ptr, errptr))? + .try_into() + .map_err(MuPdfError::new_generic) + } + } + pub(crate) fn load_page(&self, page: usize) -> Result, MuPdfError> { + let page = page.try_into().map_err(MuPdfError::new_generic)?; + unsafe { + mupdf_try(|errptr| mupdf_load_page(self.ctx.0.get(), self.ptr, page, errptr)) + .map(|ptr| Page { ptr, ctx: self.ctx }) + } + } +} + +impl<'ctx> Drop for Document<'ctx> { + fn drop(&mut self) { + unsafe { + fz_drop_document(self.ctx.0.get(), self.ptr); + } + } +} + +pub(crate) struct Page<'ctx> { + ptr: *mut fz_page, + ctx: ContextRef<'ctx>, +} + +impl<'ctx> Page<'ctx> { + pub(crate) fn ctx(&self) -> ContextRef<'ctx> { + self.ctx + } + pub(crate) fn run( + &self, + device: &Device<'ctx, T>, + ctm: fz_matrix, + ) -> Result<(), MuPdfError> { + unsafe { + mupdf_try(|errptr| { + mupdf_run_page( + self.ctx.0.get(), + self.ptr, + device.dev, + ctm, + ptr::null_mut(), + errptr, + ) + }) + } + } +} + +impl<'ctx> Drop for Page<'ctx> { + fn drop(&mut self) { + unsafe { + fz_drop_page(self.ctx.0.get(), self.ptr); + } + } +} + +pub(crate) struct Device<'ctx, T: 'ctx> { + dev: *mut fz_device, + ctx: ContextRef<'ctx>, + _phantom: PhantomData>>, +} + +pub(crate) trait DeviceCallbacks<'ctx> { + fn fill_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, ctm: fz_matrix); + fn stroke_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, ctm: fz_matrix); + fn clip_path( + &self, + ctx: ContextRef<'ctx>, + path: &Path<'ctx>, + even_odd: bool, + ctm: fz_matrix, + scissor: fz_rect, + ); + fn clip_stroke_path( + &self, + ctx: ContextRef<'ctx>, + path: &Path<'ctx>, + ctm: fz_matrix, + scissor: fz_rect, + ); + fn fill_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); + fn stroke_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); + fn clip_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix, scissor: fz_rect); + fn clip_stroke_text( + &self, + ctx: ContextRef<'ctx>, + text: &Text<'ctx>, + ctm: fz_matrix, + scissor: fz_rect, + ); + fn ignore_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); +} + +impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { + pub(crate) fn new(ctx: impl Into>, value: Box) -> Result { + let ctx = ctx.into(); + unsafe { + let dev_ptr = mupdf_try(|errptr| { + mupdf_new_derived_device::>( + ctx.0.get(), + c"parse_powerisa_pdf::mupdf_ffi::Device", + errptr, + ) + })?; + let retval = Device { + dev: dev_ptr.cast(), + ctx, + _phantom: PhantomData, + }; + (&raw mut (*dev_ptr).value).write(value); + let fz_device { + drop_device, + fill_path, + stroke_path, + clip_path, + clip_stroke_path, + fill_text, + stroke_text, + clip_text, + clip_stroke_text, + ignore_text, + .. + } = &mut (*dev_ptr).base; + *drop_device = Some(Self::drop_device_fn); + *fill_path = Some(Self::fill_path_fn); + *stroke_path = Some(Self::stroke_path_fn); + *clip_path = Some(Self::clip_path_fn); + *clip_stroke_path = Some(Self::clip_stroke_path_fn); + *fill_text = Some(Self::fill_text_fn); + *stroke_text = Some(Self::stroke_text_fn); + *clip_text = Some(Self::clip_text_fn); + *clip_stroke_text = Some(Self::clip_stroke_text_fn); + *ignore_text = Some(Self::ignore_text_fn); + Ok(retval) + } + } + pub(crate) fn get(&self) -> &T { + unsafe { &(*self.dev.cast::>()).value } + } + unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) { + unsafe { + (&raw mut (*dev.cast::>()).value).drop_in_place(); + } + } + unsafe extern "C" fn fill_path_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + path: *const fz_path, + even_odd: c_int, + ctm: fz_matrix, + _color_space: *mut fz_colorspace, + _color: *const f32, + _alpha: f32, + _color_params: fz_color_params, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.fill_path( + ctx, + &ManuallyDrop::new(Path { + ptr: path.cast_mut(), + ctx, + }), + even_odd != 0, + ctm, + ); + } + unsafe extern "C" fn stroke_path_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + path: *const fz_path, + _stroke_state: *const fz_stroke_state, + ctm: fz_matrix, + _color_space: *mut fz_colorspace, + _color: *const f32, + _alpha: f32, + _color_params: fz_color_params, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.stroke_path( + ctx, + &ManuallyDrop::new(Path { + ptr: path.cast_mut(), + ctx, + }), + ctm, + ); + } + unsafe extern "C" fn clip_path_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + path: *const fz_path, + even_odd: ::std::os::raw::c_int, + ctm: fz_matrix, + scissor: fz_rect, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.clip_path( + ctx, + &ManuallyDrop::new(Path { + ptr: path.cast_mut(), + ctx, + }), + even_odd != 0, + ctm, + scissor, + ); + } + unsafe extern "C" fn clip_stroke_path_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + path: *const fz_path, + _stroke_state: *const fz_stroke_state, + ctm: fz_matrix, + scissor: fz_rect, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.clip_stroke_path( + ctx, + &ManuallyDrop::new(Path { + ptr: path.cast_mut(), + ctx, + }), + ctm, + scissor, + ); + } + unsafe extern "C" fn fill_text_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + text: *const fz_text, + ctm: fz_matrix, + _color_space: *mut fz_colorspace, + _color: *const f32, + _alpha: f32, + _color_params: fz_color_params, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.fill_text( + ctx, + &ManuallyDrop::new(Text { + ptr: text.cast_mut(), + ctx, + }), + ctm, + ); + } + unsafe extern "C" fn stroke_text_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + text: *const fz_text, + _stroke_state: *const fz_stroke_state, + ctm: fz_matrix, + _color_space: *mut fz_colorspace, + _color: *const f32, + _alpha: f32, + _color_params: fz_color_params, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.stroke_text( + ctx, + &ManuallyDrop::new(Text { + ptr: text.cast_mut(), + ctx, + }), + ctm, + ); + } + unsafe extern "C" fn clip_text_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + text: *const fz_text, + ctm: fz_matrix, + scissor: fz_rect, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.clip_text( + ctx, + &ManuallyDrop::new(Text { + ptr: text.cast_mut(), + ctx, + }), + ctm, + scissor, + ); + } + unsafe extern "C" fn clip_stroke_text_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + text: *const fz_text, + _stroke_state: *const fz_stroke_state, + ctm: fz_matrix, + scissor: fz_rect, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.clip_stroke_text( + ctx, + &ManuallyDrop::new(Text { + ptr: text.cast_mut(), + ctx, + }), + ctm, + scissor, + ); + } + unsafe extern "C" fn ignore_text_fn( + ctx: *mut fz_context, + dev: *mut fz_device, + text: *const fz_text, + ctm: fz_matrix, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut (*dev.cast::>()).value }; + this.ignore_text( + ctx, + &ManuallyDrop::new(Text { + ptr: text.cast_mut(), + ctx, + }), + ctm, + ); + } +} + +impl<'ctx, T> Drop for Device<'ctx, T> { + fn drop(&mut self) { + unsafe { + // FIXME: fz_close_device may throw exceptions + // fz_close_device(self.ctx.0.get(), self.dev); + fz_drop_device(self.ctx.0.get(), self.dev); + } + } +} + +#[repr(C)] +struct DeviceStruct { + base: fz_device, + value: Box, +} + +pub(crate) trait PathWalker<'ctx> { + fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32); + fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32); + fn curve_to( + &mut self, + ctx: ContextRef<'ctx>, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + x3: f32, + y3: f32, + ); + fn close_path(&mut self, ctx: ContextRef<'ctx>); + fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) { + self.move_to(ctx, x1, y1); + self.move_to(ctx, x2, y1); + self.move_to(ctx, x2, y2); + self.move_to(ctx, x1, y2); + self.close_path(ctx); + } +} + +impl<'ctx, T: ?Sized + PathWalker<'ctx>> PathWalker<'ctx> for &'_ mut T { + fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) { + T::move_to(self, ctx, x, y); + } + + fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) { + T::line_to(self, ctx, x, y); + } + + fn curve_to( + &mut self, + ctx: ContextRef<'ctx>, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + x3: f32, + y3: f32, + ) { + T::curve_to(self, ctx, x1, y1, x2, y2, x3, y3); + } + + fn close_path(&mut self, ctx: ContextRef<'ctx>) { + T::close_path(self, ctx); + } + + fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) { + T::rect_to(self, ctx, x1, y1, x2, y2); + } +} + +pub(crate) struct Path<'ctx> { + ptr: *mut fz_path, + ctx: ContextRef<'ctx>, +} + +impl<'ctx> Path<'ctx> { + pub(crate) fn walk>(&self, mut walker: W) { + unsafe { + fz_walk_path( + self.ctx.0.get(), + self.ptr, + const { + &fz_path_walker { + moveto: Some(Self::move_to_fn::), + lineto: Some(Self::line_to_fn::), + curveto: Some(Self::curve_to_fn::), + closepath: Some(Self::close_path_fn::), + quadto: None, + curvetov: None, + curvetoy: None, + rectto: Some(Self::rect_to_fn::), + } + }, + (&raw mut walker).cast(), + ); + } + } + unsafe extern "C" fn move_to_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + x: f32, + y: f32, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut *arg.cast::() }; + this.move_to(ctx, x, y); + } + unsafe extern "C" fn line_to_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + x: f32, + y: f32, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut *arg.cast::() }; + this.line_to(ctx, x, y); + } + unsafe extern "C" fn curve_to_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + x3: f32, + y3: f32, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut *arg.cast::() }; + this.curve_to(ctx, x1, y1, x2, y2, x3, y3); + } + unsafe extern "C" fn close_path_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut *arg.cast::() }; + this.close_path(ctx); + } + unsafe extern "C" fn rect_to_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + ) { + let ctx = unsafe { ContextRef::from_ptr(ctx) }; + let this = unsafe { &mut *arg.cast::() }; + this.rect_to(ctx, x1, y1, x2, y2); + } +} + +impl<'ctx> Drop for Path<'ctx> { + fn drop(&mut self) { + unsafe { + fz_drop_path(self.ctx.0.get(), self.ptr); + } + } +} + +pub(crate) struct Text<'ctx> { + ptr: *mut fz_text, + ctx: ContextRef<'ctx>, +} + +impl<'ctx> Drop for Text<'ctx> { + fn drop(&mut self) { + unsafe { + fz_drop_text(self.ctx.0.get(), self.ptr); + } + } +} + +impl<'ctx> Text<'ctx> { + pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> { + TextSpanIter { + ptr: unsafe { NonNull::new((*self.ptr).head).map(|ptr| &*ptr.as_ptr().cast()) }, + ctx: self.ctx, + _phantom: PhantomData, + } + } +} + +#[derive(Clone)] +pub(crate) struct TextSpanIter<'a, 'ctx> { + ptr: Option<&'a UnsafeCell>, + ctx: ContextRef<'ctx>, + _phantom: PhantomData<&'a Text<'ctx>>, +} + +impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> { + type Item = TextSpanRef<'a, 'ctx>; + + fn next(&mut self) -> Option { + let ptr = self.ptr?; + self.ptr = unsafe { NonNull::new((*ptr.get()).next).map(|ptr| &*ptr.as_ptr().cast()) }; + Some(TextSpanRef { + ptr, + ctx: self.ctx, + _phantom: PhantomData, + }) + } +} + +#[derive(Copy, Clone)] +pub(crate) struct TextSpanRef<'a, 'ctx> { + ptr: &'a UnsafeCell, + ctx: ContextRef<'ctx>, + _phantom: PhantomData<&'a Text<'ctx>>, +} + +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub(crate) enum WriteMode { + Horizontal, + Vertical, +} + +impl<'a, 'ctx> TextSpanRef<'a, 'ctx> { + pub(crate) fn get(self) -> &'a UnsafeCell { + self.ptr + } + pub(crate) fn font(self) -> FontRef<'a, 'ctx> { + FontRef { + ptr: unsafe { &*(*self.ptr.get()).font.cast::>() }, + ctx: self.ctx, + _phantom: PhantomData, + } + } + pub(crate) fn trm(self) -> fz_matrix { + unsafe { (*self.ptr.get()).trm } + } + pub(crate) fn write_mode(self) -> WriteMode { + if unsafe { (*self.ptr.get()).wmode() != 0 } { + WriteMode::Vertical + } else { + WriteMode::Horizontal + } + } + pub(crate) fn items(self) -> &'a [fz_text_item] { + let len = unsafe { (*self.ptr.get()).len } as usize; + if len == 0 { + return &[]; + } + unsafe { std::slice::from_raw_parts((*self.ptr.get()).items, len) } + } +} + +#[derive(Clone, Copy)] +pub(crate) struct FontRef<'a, 'ctx> { + ptr: &'a UnsafeCell, + ctx: ContextRef<'ctx>, + _phantom: PhantomData<&'a Text<'ctx>>, +} + +impl<'a, 'ctx> FontRef<'a, 'ctx> { + pub(crate) fn get(self) -> &'a UnsafeCell { + self.ptr + } + pub(crate) fn name(self) -> &'a str { + unsafe { CStr::from_ptr(fz_font_name(self.ctx.0.get(), self.ptr.get())) } + .to_str() + .expect("font name isn't valid UTF-8") + } + pub(crate) fn is_bold(self) -> bool { + unsafe { fz_font_is_bold(self.ctx.0.get(), self.ptr.get()) != 0 } + } + pub(crate) fn is_italic(self) -> bool { + unsafe { fz_font_is_italic(self.ctx.0.get(), self.ptr.get()) != 0 } + } + pub(crate) fn ascender(self) -> f32 { + unsafe { fz_font_ascender(self.ctx.0.get(), self.ptr.get()) } + } + pub(crate) fn descender(self) -> f32 { + unsafe { fz_font_descender(self.ctx.0.get(), self.ptr.get()) } + } +} + +pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point { + unsafe { fz_transform_point(point, m) } +} + +pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point { + unsafe { fz_transform_point_xy(x, y, m) } +} + +pub(crate) fn transform_vector(vector: fz_point, m: fz_matrix) -> fz_point { + unsafe { fz_transform_vector(vector, m) } +} + +pub(crate) fn matrix_expansion(m: fz_matrix) -> f32 { + unsafe { fz_matrix_expansion(m) } +} + +pub(crate) fn concat(left: fz_matrix, right: fz_matrix) -> fz_matrix { + unsafe { fz_concat(left, right) } +} + +pub(crate) fn add_points(a: fz_point, b: fz_point) -> fz_point { + fz_point { + x: a.x + b.x, + y: a.y + b.y, + } +} + +pub(crate) fn point_min_components(a: fz_point, b: fz_point) -> fz_point { + fz_point { + x: a.x.min(b.x), + y: a.y.min(b.y), + } +} + +pub(crate) fn point_max_components(a: fz_point, b: fz_point) -> fz_point { + fz_point { + x: a.x.max(b.x), + y: a.y.max(b.y), + } +}