diff --git a/Cargo.lock b/Cargo.lock index 4de1b68..0281106 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,6 +154,19 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "mupdf" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a6499267155b9ae03ff8e53c456d0bfff988b2647d62ff1df038f39ebe93a0c" +dependencies = [ + "bitflags", + "mupdf-sys", + "num_enum", + "once_cell", + "zerocopy", +] + [[package]] name = "mupdf-sys" version = "0.5.0" @@ -177,13 +190,41 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + [[package]] name = "parse_powerisa_pdf" version = "0.1.0" dependencies = [ "indexmap", "libm", - "mupdf-sys", + "mupdf", "quick-xml", "serde", ] @@ -194,6 +235,15 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.104" @@ -257,6 +307,12 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "serde" version = "1.0.228" @@ -304,6 +360,36 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.10+spec-1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.6+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +dependencies = [ + "winnow", +] + [[package]] name = "unicode-ident" version = "1.0.22" @@ -316,6 +402,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] + [[package]] name = "zerocopy" version = "0.8.31" diff --git a/Cargo.toml b/Cargo.toml index 09de0ba..224dad3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,6 @@ rust-version = "1.89.0" [dependencies] indexmap = "2.12.1" libm = "0.2.15" -mupdf-sys = { version = "0.5.0", default-features = false } +mupdf = { version = "0.5.0", default-features = false } quick-xml = { version = "0.38.4", features = ["serialize"] } serde = { version = "1.0.228", features = ["derive"] } diff --git a/parse_powerisa_pdf/parse_powerisa_pdf.py b/parse_powerisa_pdf/parse_powerisa_pdf.py index 3c2afe5..a4afd09 100755 --- a/parse_powerisa_pdf/parse_powerisa_pdf.py +++ b/parse_powerisa_pdf/parse_powerisa_pdf.py @@ -765,7 +765,7 @@ class Page: unprocessed_non_text: SetById[LTLine | LTRect] @staticmethod - def from_lt_page(page_num: int, page: LTPage, first_seen_fonts: defaultdict[str, set[float]]) -> Page: + def from_lt_page(page_num: int, page: LTPage) -> Page: qt: defaultdict[TextSection, QuadTree[Char | LTLine | LTRect]] = defaultdict(QuadTree) unprocessed_chars = defaultdict(lambda: defaultdict(SetById[Char])) unprocessed_non_text: SetById[LTLine | LTRect] = SetById() @@ -804,25 +804,20 @@ class Page: raise AssertionError( f"char not in text section: {element}\npage_num={page_num}") continue - font_size = round(element.size, 3) char = Char( text=element.get_text(), - font=Font(font_name=element.fontname, size=font_size), + font=Font(font_name=element.fontname, size=round(element.size, 3)), adv=element.adv, min_x=element.x0, min_y=element.y0, max_x=element.x1, max_y=element.y1, ) - if font_size not in first_seen_fonts[element.fontname]: - first_seen_fonts[element.fontname].add(font_size) - print(f"first seen font: {element.fontname!r} {font_size}: page {page_num} {char!r}") qt[text_section].insert(char.min_x, char.min_y, char) unprocessed_chars[text_section][char.font].add(char) - for text_section, i in unprocessed_chars.items(): - for chars in i.values(): - chars.sort(key=Char.top_down_left_to_right_sort_key) - print(f"first char: {text_section!r}: {next(iter(chars), None)!r}") + for i in unprocessed_chars.values(): + for j in i.values(): + j.sort(key=Char.top_down_left_to_right_sort_key) unknown_fonts=[] unknown_font_errors=[] for i in unprocessed_chars.values(): @@ -1186,14 +1181,13 @@ class Parser: def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]: if page_numbers is not None: page_numbers = sorted(i - 1 for i in page_numbers) - first_seen_fonts = defaultdict(set) for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)): if page_numbers is not None: page_num = page_numbers[i] + 1 else: page_num = i + 1 print(f"page {page_num}") - yield Page.from_lt_page(page_num=page_num, page=page, first_seen_fonts=first_seen_fonts) + yield Page.from_lt_page(page_num=page_num, page=page) def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None): self.pages = Pages(pages_gen=Parser.__pages_gen( @@ -1509,7 +1503,7 @@ class Parser: f"instruction bit fields box has wrong number of horizontal lines:\n{h_lines}") if len(v_lines) < 2: raise InsnParseError( - f"instruction bit fields box has too few vertical lines:\n{v_lines}") + f"instruction bit fields box has too few vertical lines:\n{h_lines}") bottom_line, top_line = h_lines box_min_x = v_lines[0].x0 box_max_x = v_lines[-1].x0 diff --git a/src/main.rs b/src/main.rs index a6a36e6..d9c54ec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,30 +1,20 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information -use crate::{ - mupdf_ffi::{ - WriteMode, add_points, point_max_components, point_min_components, transform_vector, - }, - quad_tree::QuadTree, -}; +use crate::quad_tree::QuadTree; use indexmap::IndexSet; -use mupdf_sys::{fz_matrix, fz_point, fz_text_item}; use non_nan_float::NonNaNF32; use std::{ - backtrace::Backtrace, - borrow::Cow, + borrow::{Borrow, Cow}, cell::RefCell, collections::{BTreeMap, BTreeSet, HashMap, HashSet}, - convert::Infallible, error::Error, fmt, num::NonZero, - ops::ControlFlow, rc::Rc, sync::OnceLock, }; -mod mupdf_ffi; mod quad_tree; mod xml_tree; @@ -51,12 +41,6 @@ mod non_nan_float { pub(crate) const fn get(self) -> f32 { self.0 } - pub(crate) const fn min(self, other: Self) -> Self { - Self(self.0.min(other.0)) - } - pub(crate) const fn max(self, other: Self) -> Self { - Self(self.0.max(other.0)) - } } impl std::hash::Hash for NonNaNF32 { @@ -84,22 +68,6 @@ mod non_nan_float { } } -const fn str_eq(a: &str, b: &str) -> bool { - let a = a.as_bytes(); - let b = b.as_bytes(); - if a.len() != b.len() { - return false; - } - let mut i = 0; - while i < a.len() { - if a[i] != b[i] { - return false; - } - i += 1; - } - true -} - macro_rules! make_enum_font { ( enum $Font:ident { @@ -107,7 +75,7 @@ macro_rules! make_enum_font { $Other:ident $other_body:tt, $(#[group] $KnownFontGroup:ident { - $(#[name_with_tag = $known_font_name_with_tag:literal, size = $known_font_size:literal] + $(#[name = $known_font_name:literal, size = $known_font_size:literal] $KnownFont:ident,)* },)* } @@ -133,32 +101,10 @@ macro_rules! make_enum_font { } impl $Font { - const fn extract_font_name_from_font_name_with_tag(font_name_with_tag: &str) -> &str { - if let [b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'+',_,..] = font_name_with_tag.as_bytes() { - font_name_with_tag.split_at(7).1 - } else { - panic!("invalid font name with id") - } - } - const fn known_from_name_with_tag(font_name_with_tag: &str, size: NonNaNF32) -> Option { - match size.get() { - $($($known_font_size if str_eq(font_name_with_tag, $known_font_name_with_tag) => Some(Self::$KnownFont),)*)* - _ => None, - } - } - const fn new_known(font_name: &str, size: NonNaNF32) -> Option { - match size.get() { - $($($known_font_size if str_eq(font_name, const { - Self::extract_font_name_from_font_name_with_tag($known_font_name_with_tag) - }) => Some(Self::$KnownFont),)*)* - _ => None, - } - } fn new(font_name: &str, size: NonNaNF32) -> Self { - if let Some(v) = Self::new_known(font_name, size) { - v - } else { - Self::Other { + match (font_name, size.get()) { + $($(($known_font_name, $known_font_size) => Self::$KnownFont,)*)* + _ => Self::Other { font_name: Box::from(font_name), size, } @@ -173,7 +119,7 @@ macro_rules! make_enum_font { const fn font_name(&self) -> &str { match self { Self::$Other { font_name, .. } => font_name, - $($(Self::$KnownFont => const { Self::extract_font_name_from_font_name_with_tag($known_font_name_with_tag) },)*)* + $($(Self::$KnownFont => $known_font_name,)*)* } } const fn known_font_group(&self) -> Option { @@ -189,17 +135,6 @@ macro_rules! make_enum_font { } } } - - const _: () = { - $($( - let (known_font_name, known_font) = const { - let known_font_name = Font::extract_font_name_from_font_name_with_tag($known_font_name_with_tag); - (known_font_name, &Font::new_known(known_font_name, NonNaNF32::new($known_font_size).unwrap()).unwrap()) - }; - assert!(str_eq(known_font_name, known_font.font_name())); - assert!(matches!(known_font, Font::$KnownFont)); - )*)* - }; }; } @@ -212,356 +147,352 @@ make_enum_font! { }, #[group] InsnHeader { - #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 9.963] + #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 9.963] InsnHeader, }, #[group] RtlFnHeader { - #[name_with_tag = "APUYSQ+zcoN-Regular", size = 9.963] + #[name = "APUYSQ+zcoN-Regular", size = 9.963] RtlFnHeader, }, #[group] PageHeader { - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 9.963] + #[name = "MJBFWM+DejaVuSansCondensed", size = 9.963] PageHeader, }, #[group] PageFooter { - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.981] + #[name = "MJBFWM+DejaVuSansCondensed", size = 4.981] PageFooter, }, #[group] InsnDesc { - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 8.966] + #[name = "MJBFWM+DejaVuSansCondensed", size = 8.966] InsnDesc0, - #[name_with_tag = "FZTIYT+CMMI9", size = 8.966] + #[name = "FZTIYT+CMMI9", size = 8.966] InsnDesc1, - #[name_with_tag = "ONUAYC+CMSSI9", size = 8.966] + #[name = "ONUAYC+CMSSI9", size = 8.966] InsnDesc2, - #[name_with_tag = "TNGBFZ+CMSY9", size = 8.966] + #[name = "TNGBFZ+CMSY9", size = 8.966] InsnDesc3, - #[name_with_tag = "WHMZPU+CMEX9", size = 8.966] + #[name = "WHMZPU+CMEX9", size = 8.966] InsnDesc4, - #[name_with_tag = "ZJTMSG+CMSS9", size = 8.966] + #[name = "ZJTMSG+CMSS9", size = 8.966] InsnDesc5, }, #[group] InsnDescMisc { - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 2.377] + #[name = "MJBFWM+DejaVuSansCondensed", size = 2.377] InsnDescMisc0, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 2.561] + #[name = "MJBFWM+DejaVuSansCondensed", size = 2.561] InsnDescMisc1, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.492] + #[name = "MJBFWM+DejaVuSansCondensed", size = 4.492] InsnDescMisc2, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.641] + #[name = "MJBFWM+DejaVuSansCondensed", size = 4.641] InsnDescMisc3, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.772] + #[name = "MJBFWM+DejaVuSansCondensed", size = 4.772] InsnDescMisc4, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.864] + #[name = "MJBFWM+DejaVuSansCondensed", size = 4.864] InsnDescMisc5, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.925] + #[name = "MJBFWM+DejaVuSansCondensed", size = 4.925] InsnDescMisc6, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.097] + #[name = "MJBFWM+DejaVuSansCondensed", size = 5.097] InsnDescMisc7, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.123] + #[name = "MJBFWM+DejaVuSansCondensed", size = 5.123] InsnDescMisc8, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.131] + #[name = "MJBFWM+DejaVuSansCondensed", size = 5.131] InsnDescMisc9, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.516] + #[name = "MJBFWM+DejaVuSansCondensed", size = 5.516] InsnDescMisc10, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.604] + #[name = "MJBFWM+DejaVuSansCondensed", size = 5.604] InsnDescMisc11, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.634] + #[name = "MJBFWM+DejaVuSansCondensed", size = 5.634] InsnDescMisc12, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.906] + #[name = "MJBFWM+DejaVuSansCondensed", size = 5.906] InsnDescMisc13, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.033] + #[name = "MJBFWM+DejaVuSansCondensed", size = 6.033] InsnDescMisc14, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.068] + #[name = "MJBFWM+DejaVuSansCondensed", size = 6.068] InsnDescMisc15, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.213] + #[name = "MJBFWM+DejaVuSansCondensed", size = 6.213] InsnDescMisc16, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.238] + #[name = "MJBFWM+DejaVuSansCondensed", size = 6.252] InsnDescMisc17, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.252] + #[name = "MJBFWM+DejaVuSansCondensed", size = 6.962] InsnDescMisc18, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.962] + #[name = "MJBFWM+DejaVuSansCondensed", size = 7.977] InsnDescMisc19, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 7.977] - InsnDescMisc20, - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 8.506] - InsnDescMisc21, }, #[group] InsnDescCode { - #[name_with_tag = "APUYSQ+zcoN-Regular", size = 6.974] + #[name = "APUYSQ+zcoN-Regular", size = 6.974] InsnDescCode, }, #[group] InsnDescCodeMisc { - #[name_with_tag = "APUYSQ+zcoN-Regular", size = 3.587] + #[name = "APUYSQ+zcoN-Regular", size = 3.587] InsnDescCodeMisc0, - #[name_with_tag = "APUYSQ+zcoN-Regular", size = 4.483] + #[name = "APUYSQ+zcoN-Regular", size = 4.483] InsnDescCodeMisc1, }, #[group] InsnDescItalic { - #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 8.966] + #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 8.966] InsnDescItalic, }, #[group] InsnDescBold { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.966] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.966] InsnDescBold, }, #[group] InsnDescBoldItalic { - #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 8.966] + #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 8.966] InsnDescBoldItalic, }, #[group] InsnDescSmall { - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 7.97] + #[name = "MJBFWM+DejaVuSansCondensed", size = 7.97] InsnDescSmall, }, #[group] InsnDescSmallItalic { - #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 7.97] + #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 7.97] InsnDescSmallItalic, }, #[group] InsnDescSmallBold { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 7.97] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 7.97] InsnDescSmallBold, }, #[group] InsnDescSmallBoldItalic { - #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 7.97] + #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 7.97] InsnDescSmallBoldItalic, }, #[group] InsnDescBoldMisc { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.21] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.21] InsnDescBoldMisc0, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.399] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.399] InsnDescBoldMisc1, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.763] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.763] InsnDescBoldMisc2, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.946] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.946] InsnDescBoldMisc3, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.949] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.949] InsnDescBoldMisc4, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.999] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.999] InsnDescBoldMisc5, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.065] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.065] InsnDescBoldMisc6, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.086] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.086] InsnDescBoldMisc7, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.183] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.183] InsnDescBoldMisc8, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.686] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.686] InsnDescBoldMisc9, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.744] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.744] InsnDescBoldMisc10, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.825] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.825] InsnDescBoldMisc11, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.842] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.842] InsnDescBoldMisc12, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.857] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.857] InsnDescBoldMisc13, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.979] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.979] InsnDescBoldMisc14, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.032] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.032] InsnDescBoldMisc15, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.112] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.112] InsnDescBoldMisc16, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.161] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.161] InsnDescBoldMisc17, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.206] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.206] InsnDescBoldMisc18, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.353] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.353] InsnDescBoldMisc19, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.378] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.378] InsnDescBoldMisc20, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.434] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.434] InsnDescBoldMisc21, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.595] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.595] InsnDescBoldMisc22, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.619] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.619] InsnDescBoldMisc23, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.647] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.647] InsnDescBoldMisc24, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.68] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.68] InsnDescBoldMisc25, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.693] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.693] InsnDescBoldMisc26, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.736] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.736] InsnDescBoldMisc27, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.781] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.781] InsnDescBoldMisc28, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.802] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.802] InsnDescBoldMisc29, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.995] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.995] InsnDescBoldMisc30, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.201] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.201] InsnDescBoldMisc31, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.258] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.258] InsnDescBoldMisc32, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.363] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.363] InsnDescBoldMisc33, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.442] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.442] InsnDescBoldMisc34, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.473] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.473] InsnDescBoldMisc35, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.485] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.485] InsnDescBoldMisc36, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.512] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.512] InsnDescBoldMisc37, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.543] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.543] InsnDescBoldMisc38, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.613] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.613] InsnDescBoldMisc39, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.744] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.744] InsnDescBoldMisc40, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.774] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.774] InsnDescBoldMisc41, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.809] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.809] InsnDescBoldMisc42, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.849] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.849] InsnDescBoldMisc43, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.911] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.911] InsnDescBoldMisc44, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.92] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.92] InsnDescBoldMisc45, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.962] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.962] InsnDescBoldMisc46, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.981] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.981] InsnDescBoldMisc47, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.146] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.146] InsnDescBoldMisc48, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.213] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.213] InsnDescBoldMisc49, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.221] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.221] InsnDescBoldMisc50, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.243] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.243] InsnDescBoldMisc51, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.55] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.55] InsnDescBoldMisc52, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.62] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.62] InsnDescBoldMisc53, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.699] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.699] InsnDescBoldMisc54, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.725] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.725] InsnDescBoldMisc55, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.751] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.751] InsnDescBoldMisc56, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.856] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.856] InsnDescBoldMisc57, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.029] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.029] InsnDescBoldMisc58, - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.406] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.406] InsnDescBoldMisc59, }, #[group] InsnDescSubscript { - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.978] + #[name = "MJBFWM+DejaVuSansCondensed", size = 5.978] InsnDescSubscript, }, #[group] InsnDescBoldSubscript { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.978] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.978] InsnDescBoldSubscript, }, #[group] InsnDescItalicSubscript { - #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 5.978] + #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 5.978] InsnDescItalicSubscript, }, #[group] InsnDescBoldItalicSubscript { - #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 5.978] + #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 5.978] InsnDescBoldItalicSubscript, }, #[group] InsnExtMnemonic { - #[name_with_tag = "APUYSQ+zcoN-Regular", size = 8.966] + #[name = "APUYSQ+zcoN-Regular", size = 8.966] InsnExtMnemonic, }, #[group] InsnCode { - #[name_with_tag = "APUYSQ+zcoN-Regular", size = 7.97] + #[name = "APUYSQ+zcoN-Regular", size = 7.97] InsnCode0, - #[name_with_tag = "RRFUNA+CMSY8", size = 7.97] + #[name = "RRFUNA+CMSY8", size = 7.97] InsnCode1, - #[name_with_tag = "HPXOZC+CMSS8", size = 7.97] + #[name = "HPXOZC+CMSS8", size = 7.97] InsnCode2, }, #[group] InsnCodeSubscript { - #[name_with_tag = "APUYSQ+zcoN-Regular", size = 5.978] + #[name = "APUYSQ+zcoN-Regular", size = 5.978] InsnCodeSubscript0, - #[name_with_tag = "DBQTKF+CMSY6", size = 5.978] + #[name = "DBQTKF+CMSY6", size = 5.978] InsnCodeSubscript1, }, #[group] TitlePageBig { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 24.787] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 24.787] TitlePageBig, }, #[group] TitlePageVersion { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 9.963] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 9.963] TitlePageVersion, }, #[group] TitlePageTm { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.974] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.974] TitlePageTm, }, #[group] TitlePageRev { - #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.974] + #[name = "MJBFWM+DejaVuSansCondensed", size = 6.974] TitlePageRev, }, #[group] TitlePageBook { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 20.663] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 20.663] TitlePageBook, }, #[group] LegalPageItalic { - #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 9.963] + #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 9.963] LegalPageItalic, }, #[group] ChangeSummaryPageBold { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 11.955] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 11.955] ChangeSummaryPageBold, }, #[group] ChapterTitle { - #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 17.215] + #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 17.215] ChapterTitle, }, #[group] MathMisc { - #[name_with_tag = "AAJMKT+CMMI6", size = 5.978] + #[name = "AAJMKT+CMMI6", size = 5.978] MathMisc0, - #[name_with_tag = "CUTMFD+CMSSI8", size = 5.978] + #[name = "CUTMFD+CMSSI8", size = 5.978] MathMisc1, - #[name_with_tag = "CUTMFD+CMSSI8", size = 7.97] + #[name = "CUTMFD+CMSSI8", size = 7.97] MathMisc2, - #[name_with_tag = "FZTIYT+CMMI9", size = 5.734] + #[name = "FZTIYT+CMMI9", size = 5.734] MathMisc3, - #[name_with_tag = "FZTIYT+CMMI9", size = 7.168] + #[name = "FZTIYT+CMMI9", size = 7.168] MathMisc4, - #[name_with_tag = "HONFQS+CMMI8", size = 7.97] + #[name = "HONFQS+CMMI8", size = 7.97] MathMisc5, - #[name_with_tag = "HPXOZC+CMSS8", size = 5.978] + #[name = "HPXOZC+CMSS8", size = 5.978] MathMisc6, - #[name_with_tag = "LLVRDD+CMSY10", size = 11.955] + #[name = "LLVRDD+CMSY10", size = 11.955] MathMisc7, - #[name_with_tag = "ZJTMSG+CMSS9", size = 7.168] + #[name = "ZJTMSG+CMSS9", size = 7.168] MathMisc8, }, } @@ -572,6 +503,21 @@ impl Font { self.size() * const { 3.985 / Font::InsnCode0.size() } } const fn line_height_helper(&self) -> f32 { + const fn str_eq(a: &str, b: &str) -> bool { + let a = a.as_bytes(); + let b = b.as_bytes(); + if a.len() != b.len() { + return false; + } + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + true + } let font_name = self.font_name(); let mut i = 0; while i < KnownFontGroup::INSN_CODE_FONT_GROUPS.len() { @@ -692,7 +638,7 @@ impl ParsedTextLine { } } fn write_xml_lines( - lines: impl IntoIterator>, + lines: impl IntoIterator>, parent: &mut xml_tree::Element, trailing_nl: bool, preceding_nl: bool, @@ -702,7 +648,7 @@ impl ParsedTextLine { } let mut first = true; for line in lines { - let line = std::borrow::Borrow::borrow(&line); + let line = line.borrow(); if first { first = false; } else { @@ -1193,8 +1139,8 @@ impl<'a> ElementBodyBuilder<'a> { } self.shrink_stack(new_len); } - fn write_text(&mut self, text: impl std::borrow::Borrow) { - let text = std::borrow::Borrow::borrow(&text); + fn write_text(&mut self, text: impl Borrow) { + let text = text.borrow(); let insert_point = self.insert_point(); if let Some(child) = insert_point.children.last_mut() { child.tail += text; @@ -1339,14 +1285,14 @@ impl fmt::Display for InsnBitFields { impl InsnBitFields { fn write_xml_fields( - fields: impl IntoIterator>, + fields: impl IntoIterator>, parent: &mut xml_tree::Element, ) { let fields_elm = parent.sub_element("fields".into(), []); fields_elm.text = "\n".into(); fields_elm.tail = "\n".into(); for field in fields { - std::borrow::Borrow::borrow(&field).write_xml(fields_elm); + field.borrow().write_xml(fields_elm); } } fn write_xml(&self, parent: &mut xml_tree::Element) { @@ -1509,95 +1455,10 @@ enum PageItem { LineOrRect(LineOrRect), } -#[derive(Copy, Clone, Debug)] +#[derive(Clone, Debug)] enum LineOrRect { - Line(Line), - Rect(Rect), -} - -impl LineOrRect { - fn width(self) -> f32 { - match self { - Self::Line(v) => v.width(), - Self::Rect(v) => v.width(), - } - } - fn height(self) -> f32 { - match self { - Self::Line(v) => v.height(), - Self::Rect(v) => v.height(), - } - } - fn min_x(self) -> NonNaNF32 { - match self { - Self::Line(v) => v.min_x(), - Self::Rect(v) => v.min_x, - } - } - fn max_x(self) -> NonNaNF32 { - match self { - Self::Line(v) => v.max_x(), - Self::Rect(v) => v.max_x, - } - } - fn min_y(self) -> NonNaNF32 { - match self { - Self::Line(v) => v.min_y(), - Self::Rect(v) => v.min_y, - } - } - fn max_y(self) -> NonNaNF32 { - match self { - Self::Line(v) => v.max_y(), - Self::Rect(v) => v.max_y, - } - } -} - -#[derive(Copy, Clone, Debug)] -struct Line { - p0_x: NonNaNF32, - p0_y: NonNaNF32, - p1_x: NonNaNF32, - p1_y: NonNaNF32, -} - -impl Line { - fn width(self) -> f32 { - f32::abs(self.p0_x.get() - self.p1_x.get()) - } - fn height(self) -> f32 { - f32::abs(self.p0_y.get() - self.p1_y.get()) - } - fn min_x(self) -> NonNaNF32 { - self.p0_x.min(self.p1_x) - } - fn max_x(self) -> NonNaNF32 { - self.p0_x.max(self.p1_x) - } - fn min_y(self) -> NonNaNF32 { - self.p0_y.min(self.p1_y) - } - fn max_y(self) -> NonNaNF32 { - self.p0_y.max(self.p1_y) - } -} - -#[derive(Copy, Clone, Debug)] -struct Rect { - min_x: NonNaNF32, - max_x: NonNaNF32, - min_y: NonNaNF32, - max_y: NonNaNF32, -} - -impl Rect { - fn width(self) -> f32 { - self.max_x.get() - self.min_x.get() - } - fn height(self) -> f32 { - self.max_y.get() - self.min_y.get() - } + Line(()), + Rect(()), } #[derive(Debug)] @@ -1609,13 +1470,13 @@ struct Page { unprocessed_non_text: Rc>>, } -struct Pages<'ctx> { - pages_gen: Option>> + 'ctx>>, +struct Pages { + pages_gen: Option>>>>, pages: BTreeMap>, max_page_num: u32, } -impl<'ctx> fmt::Debug for Pages<'ctx> { +impl fmt::Debug for Pages { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { pages_gen, @@ -1633,10 +1494,8 @@ impl<'ctx> fmt::Debug for Pages<'ctx> { } } -impl<'ctx> Pages<'ctx> { - fn new( - pages_gen: Option>> + 'ctx>>, - ) -> Self { +impl Pages { + fn new(pages_gen: Option>>>>) -> Self { Self { pages_gen, pages: BTreeMap::new(), @@ -2011,52 +1870,13 @@ impl Insn { } #[derive(Debug)] -struct Parser<'ctx> { - pages: Pages<'ctx>, +struct Parser { + pages: Pages, text_section: TextSection, insns: Vec, } -enum ExtractInsnsError { - InsnParseError(String, std::backtrace::Backtrace), - PageParseError(String, std::backtrace::Backtrace), - Other(Box), -} - -impl fmt::Display for ExtractInsnsError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let backtrace = match self { - ExtractInsnsError::InsnParseError(msg, backtrace) => { - writeln!(f, "instruction parse error: {msg}")?; - backtrace - } - ExtractInsnsError::PageParseError(msg, backtrace) => { - writeln!(f, "page parse error: {msg}")?; - backtrace - } - ExtractInsnsError::Other(e) => return fmt::Display::fmt(&e, f), - }; - backtrace.fmt(f) - } -} - -#[derive(Clone, Debug)] -struct ErrorWithNote { - error: E, - note: String, -} - -impl fmt::Display for ErrorWithNote { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { error, note } = self; - fmt::Display::fmt(error, f)?; - write!(f, "\nnote: {note}") - } -} - -impl Error for ErrorWithNote {} - -impl<'ctx> Parser<'ctx> { +impl Parser { fn new() -> Self { Self { pages: Pages::new(None), @@ -2082,40 +1902,34 @@ impl<'ctx> Parser<'ctx> { .clone()) } fn pages_gen( - ctx: impl Into>, file: &str, page_numbers: Option>>, - ) -> Result>> + 'ctx>, Box> { - let ctx = ctx.into(); - let page_indexes = page_numbers.map(|page_numbers| { - let mut retval = Vec::from_iter(page_numbers.into_iter().map(|v| v.get() as usize - 1)); + ) -> Result>>>, Box> { + let page_numbers = page_numbers.map(|page_numbers| { + let mut retval = Vec::from_iter(page_numbers.into_iter().map(|v| v.get() - 1)); retval.sort(); retval }); - let document = mupdf_ffi::Document::open(ctx, &std::ffi::CString::new(file)?)?; - let page_count = document.page_count()?; - let page_indexes = page_indexes.unwrap_or_else(|| (0..page_count).collect()); - let mut first_seen_fonts = BTreeMap::new(); - Ok(Box::new(page_indexes.into_iter().map(move |page_index| { - let page_num = page_index as u32 + 1; - println!("page {page_num}"); - let page = document - .load_page(page_index) - .map_err(|e| format!("error reading pdf page {page_num}: {e}"))?; - Ok( - Page::from_mupdf_page(page_num, &page, &mut first_seen_fonts) - .map_err(|e| format!("error reading pdf page {page_num}: {e}"))?, - ) - }))) + let document = mupdf::Document::open(file)?; + let pages: Vec = document.pages().and_then(|pages| pages.collect())?; + Ok(Box::new(pages.into_iter().enumerate().map( + move |(i, page)| { + let page_num = match &page_numbers { + Some(page_numbers) => page_numbers[i] + 1, + None => i as u32 + 1, + }; + println!("page {page_num}"); + Ok(Page::from_mupdf_page(page_num, page) + .map_err(|e| format!("error reading pdf page {page_num}: {e}"))?) + }, + ))) } fn parse_pdf>>( &mut self, - ctx: impl Into>, file: &str, page_numbers: Option, ) -> Result<(), Box> { self.pages = Pages::new(Some(Self::pages_gen( - ctx, file, page_numbers.map(|v| v.into_iter().collect()), )?)); @@ -2131,14 +1945,14 @@ impl<'ctx> Parser<'ctx> { } } } - fn note_text_section( + fn note_text_section( &mut self, - f: impl FnOnce(&mut Self) -> Result<(), E>, - ) -> Result<(), ErrorWithNote> { + f: impl FnOnce(&mut Self) -> Result<(), Box>, + ) -> Result<(), Box> { let start_text_section = self.text_section; match f(self) { Ok(()) => Ok(()), - Err(error) => { + Err(e) => { let note = if self.text_section == start_text_section { format!("text_section={:?}", self.text_section) } else { @@ -2147,1448 +1961,66 @@ impl<'ctx> Parser<'ctx> { self.text_section ) }; - Err(ErrorWithNote { error, note }) + Err(format!("{e}\nnote: {note}").into()) } } } - fn parse_text_section(&mut self) -> Result<(), ErrorWithNote>> { - match self.note_text_section(Self::extract_insns) { - Ok(()) => Ok(()), - Err( - e @ ErrorWithNote { - error: - ExtractInsnsError::InsnParseError(..) | ExtractInsnsError::PageParseError(..), - .. - }, - ) => { - println!("{e}"); - Ok(()) - } - Err(ErrorWithNote { - error: ExtractInsnsError::Other(error), - note, - }) => Err(ErrorWithNote { error, note }), - } - } - fn find_top_left_char_in_range( - &mut self, - min_x: f32, - max_x: f32, - min_y: f32, - max_y: f32, - allow_processed: bool, - ) -> Result, Box> { - let mut retval = None; - let page = self.page()?; - let unprocessed_chars = self.unprocessed_chars()?; - let ControlFlow::::Continue(()) = - page.qt[&self.text_section].range(min_x, max_x, min_y, max_y, |_x, _y, ch| { - let PageItem::Char(ch) = ch else { - return ControlFlow::Continue(()); - }; - if !allow_processed && !RefCell::borrow(&*unprocessed_chars)[&ch.font].contains(ch) - { - return ControlFlow::Continue(()); - } - match &mut retval { - None => retval = Some(ch.clone()), - Some(retval) - if ch.min_x.get() - ch.min_y.get() - < retval.min_x.get() - retval.min_y.get() => - { - *retval = ch.clone(); - } - Some(_) => {} - } - ControlFlow::Continue(()) - }); - Ok(retval) - } - fn extract_text_line( - &mut self, - start_char: Option, - mut start_min_y: f32, - min_x: f32, - max_x: f32, - fonts: TextLineFonts, - preceding_blank_lines: u32, - mut skip_initial_spaces: bool, - allowed_start_min_y_error: Option, - ) -> Result, ExtractInsnsError> { - let mut chars: Vec = Vec::new(); - let mut chars_set: IndexSet = IndexSet::new(); - if let Some(start_char) = start_char.clone() { - chars.push(start_char.clone()); - chars_set.insert(start_char); - } - if let Some(start_char) = start_char - && start_char.text == "*" - && self.text_section.page_num == 168 - && fonts - .subscript() - .is_some_and(|v| v.contains(&start_char.font)) - { - start_min_y = start_char.max_y.get() - fonts.regular()[0].size(); - } - let page = self.page().map_err(ExtractInsnsError::Other)?; - let unprocessed_chars = self.unprocessed_chars().map_err(ExtractInsnsError::Other)?; - let ControlFlow::::Continue(()) = page.qt[&self.text_section].range( - min_x - fonts.regular()[0].size() * 0.5, - max_x, - start_min_y - fonts.regular()[0].size() * 0.4, - start_min_y + fonts.regular()[0].size() * 0.6, - |_x, _y, ch| { - let PageItem::Char(ch) = ch else { - return ControlFlow::Continue(()); - }; - if !RefCell::borrow(&*unprocessed_chars)[&ch.font].contains(ch) - || chars_set.contains(ch) - { - return ControlFlow::Continue(()); - } - chars_set.insert(ch.clone()); - chars.push(ch.clone()); - ControlFlow::Continue(()) - }, - ); - if chars.is_empty() { - return Ok(None); - } - chars.sort_by(|a, b| (a.min_x, &a.text).cmp(&(b.min_x, &b.text))); - let mut regular_min_y = chars[0].min_y.get(); - let mut regular_max_y = chars[0].max_y.get(); - for ch in &chars { - let Some(kind) = fonts.get_kind(ch.font.clone(), BaselinePos::Below) else { - continue; - }; - if kind.sub_super() == FontVariantSubSuper::NotSubSuper { - regular_min_y = ch.min_y.get(); - regular_max_y = ch.max_y.get(); - break; - } - } - let mut retval = ParsedTextLine { - element: xml_tree::Element::new("text-line".into(), []), - regular_min_y, - regular_max_y, - fonts, - chars, - preceding_blank_lines, - }; - let mut text_and_tag_stacks: Vec<(String, Vec<&str>)> = Vec::new(); - let mut last_max_x = min_x; - let mut last_kind = None; - let mut last_char: Option = None; - for ch in &retval.chars { - let baseline_pos = if (ch.max_y.get() + ch.min_y.get()) * 0.5 - > (retval.regular_max_y + retval.regular_min_y) * 0.5 - { - BaselinePos::Above - } else { - BaselinePos::Below - }; - let Some(kind) = fonts.get_kind(ch.font.clone(), baseline_pos) else { - println!( - "font kind is None:\n\ - regular_min_y={}\n\ - fonts={fonts:?}\n\ - ch={ch:?}\n\ - baseline_pos={baseline_pos:?}\n\ - chars[0]={:?}", - retval.regular_min_y, retval.chars[0], - ); - return Ok(None); - }; - let space_kind = match last_kind { - None => kind, - Some(last_kind) if last_kind != kind => TextLineFontKind::Regular, - _ => kind, - }; - let (space_fonts, _) = fonts - .get_fonts(space_kind) - .unwrap_or((fonts.regular(), None)); - let space_width = ch.min_x.get() - last_max_x; - let space_count_f = space_width / space_fonts[0].space_width(); - let mut space_count = space_count_f.round() as usize; - if space_count == 0 && space_count_f > 0.35 { - space_count = 1 - } - if space_count_f > 0.25 && f32::abs(space_count as f32 - space_count_f) > 0.15 { - println!("spaces: space_count_f={space_count_f} space_width={space_width}"); - } - if space_count > 0 && !skip_initial_spaces { - text_and_tag_stacks.push(( - " ".repeat(space_count), - space_kind.text_line_tags().collect(), - )); - } - skip_initial_spaces = false; - if ch.text == "\u{0338}" - && let Some(last_char) = last_char - && last_char.text == "=" - && f32::abs(ch.min_x.get() - last_char.min_x.get()) < 0.01 - && f32::abs(ch.min_y.get() - last_char.min_y.get()) < 0.01 - { - *text_and_tag_stacks - .last_mut() - .expect("known to be non-empty") = ("\u{2260}".into(), Vec::new()); - last_max_x = last_char.max_x.get(); - } else { - let char_text = match &*ch.text { - "\u{fb00}" => "ff", - "\u{fb01}" => "fi", - "\u{fb02}" => "fl", - "\u{fb03}" => "ffi", - "\u{fb04}" => "ffl", - v => v, - }; - text_and_tag_stacks.push((char_text.into(), kind.text_line_tags().collect())); - last_max_x = ch.max_x.get(); - } - last_kind = Some(kind); - last_char = Some(ch.clone()); - } - ElementBodyBuilder::scope( - &mut ElementBodyBuilder::new(&mut retval.element), - |body_builder| { - for (text, tag_stack) in text_and_tag_stacks { - body_builder.set_tag_stack(tag_stack); - body_builder.write_text(text) - } - }, - ); - for ch in &retval.chars { - RefCell::borrow_mut(&*unprocessed_chars) - .get_mut(&ch.font) - .expect("known to exist") - .shift_remove(ch); - } - let allowed_start_min_y_error = allowed_start_min_y_error.unwrap_or(0.01); - if f32::abs(start_min_y - retval.regular_min_y) > allowed_start_min_y_error { - return Err(ExtractInsnsError::PageParseError( - format!( - "start_min_y={start_min_y} regular_min_y={}\n\ - start_min_y error: {}\n\ - allowed_start_min_y_error={allowed_start_min_y_error}", - retval.regular_min_y, - start_min_y - retval.regular_min_y, - ), - Backtrace::capture(), - )); - } - Ok(Some(retval)) - } - fn extract_following_text_lines( - &mut self, - first_text_line: ParsedTextLine, - min_x: f32, - max_x: f32, - allowed_start_min_y_error: Option, - ) -> Result, ExtractInsnsError> { - let mut retval = Vec::new(); - let fonts = first_text_line.fonts; - let mut line = Some(first_text_line); - while let Some(cur_line) = line { - let start_min_y = cur_line.regular_min_y - fonts.regular()[0].line_height(); - retval.push(cur_line); - line = self.extract_text_line( - None, - start_min_y, - min_x, - max_x, - fonts, - 0, - false, - allowed_start_min_y_error, - )?; - } - return Ok(retval); - } - fn extract_insn_bit_fields( - &mut self, - mnemonic_lines: &[ParsedTextLine], - ) -> Result, ExtractInsnsError> { - let mut found_non_affix_line = false; - let [.., last_mnemonic_line] = mnemonic_lines else { - unreachable!(); - }; - let expected_non_affix_line_y = last_mnemonic_line.regular_min_y - - if mnemonic_lines.len() > 1 { - INSN_BIT_FIELDS_TOP_PAD_HEIGHT2 - } else { - INSN_BIT_FIELDS_TOP_PAD_HEIGHT - }; - let page = self.page().map_err(ExtractInsnsError::Other)?; - let _ = page.qt[&self.text_section].range( - self.text_section.min_x.get() - 5.0, - self.text_section.max_x.get() + 5.0, - expected_non_affix_line_y - 5.0, - expected_non_affix_line_y + 5.0, - |_x, _y, line| { - let PageItem::LineOrRect(LineOrRect::Line(line)) = line else { - return ControlFlow::Continue(()); - }; - if line.width() > line.height() { - found_non_affix_line = true; - return ControlFlow::Break(()); - } - ControlFlow::Continue(()) - }, - ); - if found_non_affix_line { - return self.extract_insn_bit_fields_box(expected_non_affix_line_y); - }; - let prefix_text = self.extract_text_line( - None, - last_mnemonic_line.regular_min_y - INSN_BIT_FIELDS_PREFIX_TEXT_TOP_PAD_HEIGHT, - self.text_section.min_x.get(), - self.text_section.max_x.get(), - TextLineFonts::InsnBitFieldsAffixTitleFonts, - 0, - true, - Some(2.0), - )?; - let Some(prefix_text) = prefix_text else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn prefix bit fields title".into(), - Backtrace::capture(), - )); - }; - let prefix_text_str = prefix_text.element.inner_text(); - if prefix_text_str != "Prefix:" { - return Err(ExtractInsnsError::InsnParseError( - format!("insn prefix bit fields title is not as expected: {prefix_text_str:?}"), - Backtrace::capture(), - )); - } - let prefix_bit_fields = self.extract_insn_bit_fields_box( - prefix_text.regular_min_y - INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT, - )?; - let Some(prefix_bit_fields) = prefix_bit_fields else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn prefix bit fields".into(), - Backtrace::capture(), - )); - }; - let suffix_text = self.extract_text_line( - None, - prefix_bit_fields.box_min_y - INSN_BIT_FIELDS_PREFIX_BOX_BOTTOM_TO_SUFFIX_TEXT_HEIGHT, - self.text_section.min_x.get(), - self.text_section.max_x.get(), - TextLineFonts::InsnBitFieldsAffixTitleFonts, - 0, - true, - Some(2.0), - )?; - let Some(suffix_text) = suffix_text else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn suffix bit fields title".into(), - Backtrace::capture(), - )); - }; - let suffix_text_str = suffix_text.element.inner_text(); - if suffix_text_str != "Suffix:" { - return Err(ExtractInsnsError::InsnParseError( - format!("insn suffix bit fields title is not as expected: {suffix_text_str:?}"), - Backtrace::capture(), - )); - } - let suffix_bit_fields = self.extract_insn_bit_fields_box( - suffix_text.regular_min_y - INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT, - )?; - let Some(suffix_bit_fields) = suffix_bit_fields else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn suffix bit fields".into(), - Backtrace::capture(), - )); - }; - return Ok(Some(InsnBitFields { - prefix: Some(InsnBitFieldsPrefix { - box_min_x: prefix_bit_fields.box_min_x, - box_min_y: prefix_bit_fields.box_min_y, - box_max_x: prefix_bit_fields.box_max_x, - box_max_y: prefix_bit_fields.box_max_y, - prefix_text: prefix_text, - fields: prefix_bit_fields.fields, - suffix_text: suffix_text, - }), - box_min_x: suffix_bit_fields.box_min_x, - box_min_y: suffix_bit_fields.box_min_y, - box_max_x: suffix_bit_fields.box_max_x, - box_max_y: suffix_bit_fields.box_max_y, - fields: suffix_bit_fields.fields, - })); - } - fn extract_insn_bit_fields_box( - &mut self, - expected_box_max_y: f32, - ) -> Result, ExtractInsnsError> { - let mut h_lines = Vec::new(); - let mut v_lines = Vec::new(); - let page = self.page().map_err(ExtractInsnsError::Other)?; - let ControlFlow::::Continue(()) = page.qt[&self.text_section].range( - self.text_section.min_x.get() - 5.0, - self.text_section.max_x.get() + 5.0, - expected_box_max_y - INSN_BIT_FIELDS_BOX_HEIGHT - 5.0, - expected_box_max_y + 5.0, - |_x, _y, line| { - let PageItem::LineOrRect(LineOrRect::Line(line)) = *line else { - return ControlFlow::Continue(()); - }; - if line.width() > line.height() { - h_lines.push(line); - } else { - v_lines.push(line); - } - ControlFlow::Continue(()) - }, - ); - h_lines.sort_by_key(|line| line.min_y()); - v_lines.sort_by_key(|line| line.min_x()); - for i in (0..v_lines.len().saturating_sub(1)).rev() { - if f32::abs(v_lines[i].min_x().get() - v_lines[i + 1].min_x().get()) < 0.5 { - v_lines.remove(i + 1); // remove duplicates - } - } - if h_lines.is_empty() && v_lines.is_empty() { - return Ok(None); - } - let [bottom_line, top_line] = &*h_lines else { - return Err(ExtractInsnsError::InsnParseError( - format!( - "instruction bit fields box has wrong number of horizontal lines:\n{h_lines:?}" - ), - Backtrace::capture(), - )); - }; - let [leftmost_line, .., rightmost_line] = &*v_lines else { - return Err(ExtractInsnsError::InsnParseError( - format!("instruction bit fields box has too few vertical lines:\n{v_lines:?}"), - Backtrace::capture(), - )); - }; - let box_min_x = leftmost_line.min_x().get(); - let box_max_x = rightmost_line.min_x().get(); - let box_min_y = bottom_line.min_y().get(); - let box_max_y = top_line.max_y().get(); - let box_mid_y = (box_min_y + box_max_y) * 0.5; - println!("bottom_line={bottom_line:?}"); - println!("top_line={top_line:?}"); - println!("{v_lines:?}"); - let mut fields = Vec::new(); - for i in 0..v_lines.len() - 1 { - let left_line = v_lines[i]; - let right_line = v_lines[i + 1]; - let field_box_min_x = left_line.max_x().get(); - let field_box_max_x = right_line.min_x().get(); - let bit_field_name_start_min_y = box_mid_y + 3.288; - let bit_field_name = self.extract_text_line( - None, - bit_field_name_start_min_y, - field_box_min_x, - field_box_max_x, - TextLineFonts::InsnBitFieldNameFonts, - 0, - true, - Some(0.4), - )?; - let Some(bit_field_name) = bit_field_name else { - return Err(ExtractInsnsError::InsnParseError( - format!( - "instruction bit field name not found:\n\ - start_min_y={bit_field_name_start_min_y} \ - field_box_min_x={field_box_min_x} \ - field_box_max_x={field_box_max_x}" - ), - Backtrace::capture(), - )); - }; - let bit_field_number_start_min_y = box_min_y + 3.487; - let bit_number = self.extract_text_line( - None, - bit_field_number_start_min_y, - field_box_min_x, - field_box_max_x, - TextLineFonts::InsnBitFieldBitNumberFonts, - 0, - true, - None, - )?; - let Some(bit_number) = bit_number else { - return Err(ExtractInsnsError::InsnParseError( - format!( - "instruction bit field bit number not found:\n\ - start_min_y={bit_field_number_start_min_y} \ - field_box_min_x={field_box_min_x} \ - field_box_max_x={field_box_max_x}" - ), - Backtrace::capture(), - )); - }; - fields.push(InsnBitField { - box_min_x: field_box_min_x, - box_max_x: field_box_max_x, - name: bit_field_name, - bit_number: bit_number, - }); - } - return Ok(Some(InsnBitFields { - prefix: None, - box_min_x, - box_min_y, - box_max_x, - box_max_y, - fields, - })); - } - fn extract_insn_header_mnemonics_and_bit_fields( - &mut self, - start_min_y: f32, - header_start_char: Option, - ) -> Result, ExtractInsnsError> { - assert!( - header_start_char - .as_ref() - .is_none_or(|v| v.font == Font::InsnHeader) - ); - let Some(header_line) = self.extract_text_line( - header_start_char, - start_min_y, - self.text_section.min_x.get(), - self.text_section.max_x.get(), - TextLineFonts::InsnHeaderFonts, - 0, - true, - Some(6.0), - )? - else { - return Ok(None); - }; - println!("found header line:\n{header_line}"); - let header_lines = self.extract_following_text_lines( - header_line, - self.text_section.min_x.get(), - self.text_section.max_x.get(), - Some(1.5), - )?; - println!("insn header lines:"); - for header_line in &header_lines { - println!("{header_line}"); - } - let [.., last_header_line] = &*header_lines else { - unreachable!(); - }; - let Some(mnemonic_start_char) = self - .find_top_left_char_in_range( - self.text_section.min_x.get() - 5.0, - self.text_section.max_x.get() + 5.0, - last_header_line.regular_min_y - 50.0, - last_header_line.regular_min_y - 5.0, - false, - ) - .map_err(ExtractInsnsError::Other)? - else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn mnemonic text line".into(), - Backtrace::capture(), - )); - }; - let mnemonic_start_char_min_y = mnemonic_start_char.min_y.get(); - let Some(mnemonic_line) = self.extract_text_line( - Some(mnemonic_start_char), - mnemonic_start_char_min_y, - self.text_section.min_x.get(), - self.text_section.max_x.get(), - TextLineFonts::InsnMnemonicFonts, - 0, - true, - None, - )? - else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn mnemonic text line".into(), - Backtrace::capture(), - )); - }; - let mnemonic_line_first_char_min_x = mnemonic_line.chars[0].min_x.get(); - let mnemonic_lines = self.extract_following_text_lines( - mnemonic_line, - mnemonic_line_first_char_min_x, - self.text_section.max_x.get(), - None, - )?; - println!("insn mnemonic lines:"); - for mnemonic_line in &mnemonic_lines { - println!("{mnemonic_line}"); - } - let Some(insn_bit_fields) = self.extract_insn_bit_fields(&mnemonic_lines)? else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn bit fields".into(), - Backtrace::capture(), - )); - }; - println!("{insn_bit_fields}"); - return Ok(Some(InsnHeader { - header_lines, - mnemonic_lines, - bit_fields: insn_bit_fields, - })); - } - fn extract_insn_sp_regs_altered( - &mut self, - mut sp_regs_altered_text: ParsedTextLine, - ) -> Result { - sp_regs_altered_text.preceding_blank_lines = 0; - let fonts = TextLineFonts::InsnDescFonts; - let column_min_x = sp_regs_altered_text.chars[0].min_x.get(); - let Some(table_header_reg_char) = self - .find_top_left_char_in_range( - column_min_x - 1.0, - column_min_x + INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X - 1.0, - sp_regs_altered_text.regular_min_y - 30.0, - sp_regs_altered_text.regular_min_y - 5.0, - false, - ) - .map_err(ExtractInsnsError::Other)? - else { - return Err(ExtractInsnsError::InsnParseError( - "can't find special registers altered table's register-column's header".into(), - Backtrace::capture(), - )); - }; - const KNOWN_SPECIAL_TEXTS: &[&str] = &[ - "None", - "Dependent on the system service", - "See above.", - "See Table 5.1", - ]; - match &*table_header_reg_char.text { - "R" => {} - text if KNOWN_SPECIAL_TEXTS.iter().any(|i| text == &i[..1]) => { - let start_min_y = table_header_reg_char.min_y.get(); - let special_text = self.extract_text_line( - Some(table_header_reg_char), - start_min_y, - column_min_x, - self.text_section.max_x.get(), - fonts, - 0, - true, - None, - )?; - let special_text = match special_text { - Some(special_text) - if KNOWN_SPECIAL_TEXTS.contains(&&*special_text.element.text) => - { - special_text - } - _ => return Err(ExtractInsnsError::Other( - format!( - "can't find special-registers-altered special-text:\n{special_text:?}" - ) - .into(), - )), - }; - let final_regular_min_y = special_text.regular_min_y; - return Ok(InsnSpRegsAltered { - sp_regs_altered_text, - special_text: Some(special_text), - table_header_reg: None, - table_header_fields: None, - entries: vec![], - final_regular_min_y, - }); - } - text => { - return Err(ExtractInsnsError::InsnParseError( - format!( - "unknown special-registers-altered special-text start character: {text:?}" - ), - Backtrace::capture(), - )); - } - } - let Some(table_header_fields_char) = self - .find_top_left_char_in_range( - column_min_x + INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X - 10.0, - column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, - table_header_reg_char.min_y.get() - 5.0, - table_header_reg_char.min_y.get() + 5.0, - false, - ) - .map_err(ExtractInsnsError::Other)? - else { - return Err(ExtractInsnsError::Other( - "can't find special registers altered table's fields-column's header".into(), - )); - }; - if table_header_fields_char.text != "F" { - return Err(ExtractInsnsError::Other( - format!( - "can't find special registers altered table's fields-column's header:\n\ - table_header_fields_char={table_header_fields_char:?}" - ) - .into(), - )); - } - let columns_x_bounds = [ - ( - table_header_reg_char.min_x.get(), - table_header_fields_char.min_x.get() - 1.0, - ), - ( - table_header_fields_char.min_x.get(), - column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, - ), - ( - column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, - self.text_section.max_x.get(), - ), - ]; - let start_min_y = table_header_reg_char.min_y.get(); - let Some(table_header_reg) = self.extract_text_line( - Some(table_header_reg_char), - start_min_y, - columns_x_bounds[0].0, - columns_x_bounds[0].1, - fonts, - 0, - false, - None, - )? - else { - return Err(ExtractInsnsError::Other( - "can't find special registers altered table's register-column's header".into(), - )); - }; - let table_header_reg_text = table_header_reg.element.inner_text(); - if table_header_reg_text != "Register" { - return Err(ExtractInsnsError::Other( - format!( - "can't find special registers altered table's register-column's header:\n\ - table_header_reg_text={table_header_reg_text:?}" - ) - .into(), - )); - } - let start_min_y = table_header_fields_char.min_y.get(); - let Some(table_header_fields) = self.extract_text_line( - Some(table_header_fields_char), - start_min_y, - columns_x_bounds[1].0, - columns_x_bounds[1].1, - fonts, - 0, - false, - None, - )? - else { - return Err(ExtractInsnsError::Other( - "can't find special registers altered table's fields-column's header".into(), - )); - }; - let table_header_fields_text = table_header_fields.element.inner_text(); - if table_header_reg_text != "Field(s)" { - return Err(ExtractInsnsError::Other( - format!( - "can't find special registers altered table's fields-column's header:\n\ - table_header_fields_text={table_header_fields_text:?}" - ) - .into(), - )); - } - let mut regular_min_y = table_header_reg.regular_min_y; - let mut entries = Vec::new(); - let mut cur_reg = None; - let mut cur_fields = Vec::new(); - let mut cur_conds = Vec::new(); - loop { - let mut row = [None, None, None]; - let mut next_regular_min_y = None; - for (i, (min_x, max_x)) in columns_x_bounds.into_iter().enumerate() { - row[i] = self.extract_text_line( - None, - regular_min_y - fonts.regular()[0].line_height(), - min_x, - max_x, - fonts, - 0, - true, - Some(2.0), - )?; - if let Some(cell) = &row[i] - && next_regular_min_y.is_none() - { - next_regular_min_y = Some(cell.regular_min_y); - } - } - match next_regular_min_y { - Some(v) => regular_min_y = v, - None => break, - } - let [cur_reg_cell, cur_fields_cell, cur_conds_cell] = row; - if cur_reg_cell.is_none() { - if cur_reg.is_none() { - return Err(ExtractInsnsError::Other( - "can't find special registers altered table's first register".into(), - )); - } - cur_fields.extend(cur_fields_cell); - cur_conds.extend(cur_conds_cell); - continue; - } - if let Some(cur_reg) = cur_reg { - entries.push(InsnSpRegsAlteredEntry { - reg: cur_reg, - fields: cur_fields, - conds: cur_conds, - }); - cur_fields = Vec::new(); - cur_conds = Vec::new(); - } - cur_reg = cur_reg_cell; - cur_fields.extend(cur_fields_cell); - cur_conds.extend(cur_conds_cell); - } - let Some(cur_reg) = cur_reg else { - return Err(ExtractInsnsError::Other( - "can't find special registers altered table's first register".into(), - )); - }; - entries.push(InsnSpRegsAlteredEntry { - reg: cur_reg, - fields: cur_fields, - conds: cur_conds, - }); - return Ok(InsnSpRegsAltered { - sp_regs_altered_text: sp_regs_altered_text, - special_text: None, - table_header_reg: Some(table_header_reg), - table_header_fields: Some(table_header_fields), - entries, - final_regular_min_y: regular_min_y, - }); - } - fn extract_insn(&mut self, header_start_char: Char) -> Result { - assert_eq!(header_start_char.font, Font::InsnHeader); - println!("{header_start_char:?}"); - let Some(header) = self.extract_insn_header_mnemonics_and_bit_fields( - header_start_char.min_y.get(), - Some(header_start_char), - )? - else { - return Err(ExtractInsnsError::PageParseError( - "can't find header text line".into(), - Backtrace::capture(), - )); - }; - let mut next_start_min_y = header.min_y() - 5.0; - let mut headers = vec![header]; - let mut code_lines: Vec = Vec::new(); - let mut desc_lines: Vec = Vec::new(); - let mut sp_regs_altered = None; - loop { - let search_min_y = next_start_min_y - 70.0; - let Some(next_char) = self - .find_top_left_char_in_range( - self.text_section.min_x.get() - 5.0, - self.text_section.max_x.get() + 5.0, - search_min_y.max(self.text_section.min_y.get()), - next_start_min_y, - false, - ) - .map_err(ExtractInsnsError::Other)? - else { - if search_min_y <= self.text_section.min_y.get() - && self - .pages - .get(self.text_section.next().page_num) - .map_err(ExtractInsnsError::Other)? - .is_some() - { - // go to next section - self.text_section = self.text_section.next(); - next_start_min_y = self.text_section.max_y.get(); - continue; - } else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn code or description text".into(), - Backtrace::capture(), - )); - } - }; - let next_section = match &next_char.font { - font if TextLineFonts::InsnCodeFonts.fonts().contains(font) => { - InsnParseSection::Code - } - font if TextLineFonts::InsnDescFonts.fonts().contains(font) => { - InsnParseSection::Desc - } - Font::InsnHeader => InsnParseSection::Header, - font => { - return Err(ExtractInsnsError::InsnParseError( - format!("can't find insn code or description text\nfont={font:?}"), - Backtrace::capture(), - )); - } - }; - match next_section { - InsnParseSection::Code => { - if !desc_lines.is_empty() { - break; - } - let start_min_y = next_char.min_y.get(); - let min_x = next_char.min_x.get(); - let Some(code_line) = self.extract_text_line( - Some(next_char), - start_min_y, - min_x, - self.text_section.max_x.get(), - TextLineFonts::InsnCodeFonts, - if code_lines.is_empty() { 0 } else { 1 }, - false, - None, - )? - else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn code text line".into(), - Backtrace::capture(), - )); - }; - let min_x = code_line.chars[0].min_x.get(); - let more_code_lines = self.extract_following_text_lines( - code_line, - min_x, - self.text_section.max_x.get(), - Some(0.05), - )?; - println!("more insn code lines:"); - for i in &more_code_lines { - println!("{i}"); - } - code_lines.extend(more_code_lines); - let Some(last) = code_lines.last() else { - unreachable!() - }; - next_start_min_y = last.regular_min_y - 5.0; - } - InsnParseSection::Header => { - if !(code_lines.is_empty() && desc_lines.is_empty()) { - break; - } - let Some(header) = self.extract_insn_header_mnemonics_and_bit_fields( - next_char.min_y.get(), - Some(next_char), - )? - else { - return Err(ExtractInsnsError::InsnParseError( - "can't find header text line".into(), - Backtrace::capture(), - )); - }; - next_start_min_y = header.min_y() - 5.0; - headers.push(header); - } - InsnParseSection::Desc => { - let start_min_y = next_char.min_y.get(); - let min_x = next_char.min_x.get(); - let Some(desc_line) = self.extract_text_line( - Some(next_char), - start_min_y, - min_x, - self.text_section.max_x.get(), - TextLineFonts::InsnDescFonts, - if desc_lines.is_empty() { 0 } else { 1 }, - false, - Some(3.0), - )? - else { - return Err(ExtractInsnsError::InsnParseError( - "can't find insn desc text line".into(), - Backtrace::capture(), - )); - }; - match desc_line.get_header_text() { - None => { - let min_x = desc_line.chars[0].min_x.get(); - let more_desc_lines = self.extract_following_text_lines( - desc_line, - min_x, - self.text_section.max_x.get(), - Some(3.5), - )?; - println!("more insn desc lines:"); - for i in &more_desc_lines { - println!("{i}"); - } - desc_lines.extend(more_desc_lines); - next_start_min_y = desc_lines - .last() - .expect("known to be non-empty") - .regular_min_y - - 5.0; - } - Some(header_text) if header_text == "Special Registers Altered:" => { - let new_sp_regs_altered = - self.extract_insn_sp_regs_altered(desc_line)?; - next_start_min_y = new_sp_regs_altered.final_regular_min_y; - sp_regs_altered = Some(new_sp_regs_altered); - break; - } - Some(header_text) => { - return Err(ExtractInsnsError::Other( - format!("unhandled header text: {header_text:?}\n{desc_line}") - .into(), - )); - } - } - } - } - } - println!("insn code lines:"); - for i in &code_lines { - println!("{i}"); - } - println!("insn desc lines:"); - for i in &desc_lines { - println!("{i}"); - } - println!("sp_regs_altered:"); - println!("{sp_regs_altered:?}"); - // TODO: finish - return Ok(Insn { - headers, - code_lines, - desc_lines, - sp_regs_altered, - }); - } - fn extract_insns(&mut self) -> Result<(), ExtractInsnsError> { - loop { - let Some(header_start_char) = - RefCell::borrow(&*self.unprocessed_chars().map_err(ExtractInsnsError::Other)?) - .get(&Font::InsnHeader) - .and_then(|v| v.first().cloned()) - else { - return Ok(()); - }; - let insn = self.extract_insn(header_start_char)?; - self.insns.push(insn); - } + fn parse_text_section(&mut self) -> Result<(), Box> { + todo!() } } -#[derive(Debug)] -struct MyDevice<'a> { - page_num: u32, - qt: RefCell>>, - unprocessed_chars: - Rc>>>>>>, +#[derive(Clone, Debug, Default)] +struct MyDevice { + qt: Rc>>>, unprocessed_non_text: Rc>>, - first_seen_fonts: RefCell<&'a mut BTreeMap>>, - error: RefCell>>, } -impl<'a> MyDevice<'a> { - fn new(page_num: u32, first_seen_fonts: &'a mut BTreeMap>) -> Self { - Self { - page_num, - qt: Default::default(), - unprocessed_chars: Default::default(), - unprocessed_non_text: Default::default(), - first_seen_fonts: RefCell::new(first_seen_fonts), - error: RefCell::new(Ok(())), - } - } - fn path(&self, path: &mupdf_ffi::Path<'_>, ctm: fz_matrix) { - if self.error.borrow().is_err() { - return; - } - enum Walker { - Empty, - Moved { x: f32, y: f32 }, - Line(Line), - Rect { x1: f32, y1: f32, x2: f32, y2: f32 }, - NotRecognized, - } - fn new_line(p0_x: f32, p0_y: f32, p1_x: f32, p1_y: f32) -> Option { - Some(Line { - p0_x: NonNaNF32::new(p0_x)?, - p0_y: NonNaNF32::new(p0_y)?, - p1_x: NonNaNF32::new(p1_x)?, - p1_y: NonNaNF32::new(p1_y)?, - }) - } - impl<'ctx> mupdf_ffi::PathWalker<'ctx> for Walker { - fn move_to(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>, x: f32, y: f32) { - *self = match *self { - Walker::Empty | Walker::Moved { .. } => Walker::Moved { x, y }, - Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => { - Walker::NotRecognized - } - }; - } - fn line_to(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>, x: f32, y: f32) { - *self = match *self { - Walker::Empty => Walker::NotRecognized, - Walker::Moved { x: p0_x, y: p0_y } => new_line(p0_x, p0_y, x, y) - .map(Walker::Line) - .unwrap_or(Walker::NotRecognized), - Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => { - Walker::NotRecognized - } - }; - } - fn curve_to( - &mut self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - _cx1: f32, - _cy1: f32, - _cx2: f32, - _cy2: f32, - _ex: f32, - _ey: f32, - ) { - *self = Walker::NotRecognized; - } - fn close_path(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>) {} - fn rect_to( - &mut self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - x1: f32, - y1: f32, - x2: f32, - y2: f32, - ) { - *self = match *self { - Walker::Empty => Walker::Rect { x1, y1, x2, y2 }, - Walker::Moved { .. } - | Walker::Line(..) - | Walker::Rect { .. } - | Walker::NotRecognized => Walker::NotRecognized, - }; - } - } - let mut walker = Walker::Empty; - path.walk(&mut walker); - let component = match walker { - Walker::Empty | Walker::Moved { .. } | Walker::NotRecognized => return, - Walker::Line(Line { - p0_x, - p0_y, - p1_x, - p1_y, - }) => { - let mupdf_sys::fz_point { x: p0_x, y: p0_y } = - mupdf_ffi::transform_point_xy(p0_x.get(), p0_y.get(), ctm); - let mupdf_sys::fz_point { x: p1_x, y: p1_y } = - mupdf_ffi::transform_point_xy(p1_x.get(), p1_y.get(), ctm); - let Some(line) = new_line(p0_x, p0_y, p1_x, p1_y) else { - return; - }; - LineOrRect::Line(line) - } - Walker::Rect { x1, y1, x2, y2 } => { - let p1 = mupdf_ffi::transform_point_xy(x1, y1, ctm); - let p2 = mupdf_ffi::transform_point_xy(x2, y1, ctm); - let p3 = mupdf_ffi::transform_point_xy(x2, y2, ctm); - let p4 = mupdf_ffi::transform_point_xy(x1, y2, ctm); - let min_x = NonNaNF32::new(p1.x.min(p2.x).min(p3.x).min(p4.x)); - let max_x = NonNaNF32::new(p1.x.max(p2.x).max(p3.x).max(p4.x)); - let min_y = NonNaNF32::new(p1.y.min(p2.y).min(p3.y).min(p4.y)); - let max_y = NonNaNF32::new(p1.y.max(p2.y).max(p3.y).max(p4.y)); - let (Some(min_x), Some(max_x), Some(min_y), Some(max_y)) = - (min_x, max_x, min_y, max_y) - else { - return; - }; - LineOrRect::Rect(Rect { - min_x, - max_x, - min_y, - max_y, - }) - } - }; - if component.width() > 100.0 - && component.min_x().get() < COLUMN_SPLIT_X - 10.0 - && component.max_x().get() > COLUMN_SPLIT_X + 10.0 - { - println!("wide component: {component:?}"); - } else { - println!("component: {component:?}"); - } - let text_section = TextSection::for_position( - self.page_num, - (component.min_x().get() + component.max_x().get()) * 0.5, - (component.min_y().get() + component.max_y().get()) * 0.5, - ); - if let Some(text_section) = text_section { - self.qt - .borrow_mut() - .entry(text_section) - .or_default() - .insert( - component.min_x().get(), - component.min_y().get(), - PageItem::LineOrRect(component), - ); - } - } - fn text(&self, text: &mupdf_ffi::Text<'_>, ctm: fz_matrix) { - if self.error.borrow().is_err() { - return; - } - let mut first_seen_fonts = self.first_seen_fonts.borrow_mut(); - for span in text.spans() { - let tm = span.trm(); - const ROUND_FACTOR: f32 = 1000.0; - let font_size = (mupdf_ffi::matrix_expansion(tm) * ROUND_FACTOR).round() / ROUND_FACTOR; - let Some(font_size) = NonNaNF32::new(font_size) else { - continue; - }; - let font_name_with_tag = span.font().name(); - let font_name_with_tag = match font_name_with_tag { - "CGMSHV+DejaVuSansCondensed-Obli" => "CGMSHV+DejaVuSansCondensed-Oblique", - "YDJYQV+DejaVuSansCondensed-Bold" => "YDJYQV+DejaVuSansCondensed-BoldOblique", - "NHUPPK+DejaVuSansCondensed-Bold" => "NHUPPK+DejaVuSansCondensed-Bold", - _ if font_name_with_tag.len() == 31 => { - let _ = self.error.replace(Err(format!( - "probably truncated font name: {font_name_with_tag:?}" - ) - .into())); - return; - } - _ => font_name_with_tag, - }; - for &fz_text_item { - x, - y, - adv, - gid, - ucs, - cid: _, - } in span.items() - { - let adv = if gid >= 0 { adv } else { 0.0 }; - let tm = fz_matrix { e: x, f: y, ..tm }; - let trm = mupdf_ffi::concat(tm, ctm); - let dir = match span.write_mode() { - WriteMode::Horizontal => fz_point { x: 1.0, y: 0.0 }, - WriteMode::Vertical => fz_point { x: 0.0, y: -1.0 }, - }; - let dir = mupdf_ffi::transform_vector(dir, trm); - let glyph_start; - let glyph_stop; - let glyph_ascender; - let glyph_descender; - match span.write_mode() { - WriteMode::Horizontal => { - glyph_start = fz_point { x: trm.e, y: trm.f }; - glyph_stop = fz_point { - x: trm.e + adv * dir.x, - y: trm.f + adv * dir.y, - }; - glyph_ascender = fz_point { - x: 0.0, - y: span.font().ascender(), - }; - glyph_descender = fz_point { - x: 0.0, - y: span.font().descender(), - }; - } - WriteMode::Vertical => { - glyph_start = fz_point { - x: trm.e - adv * dir.x, - y: trm.f - adv * dir.y, - }; - glyph_stop = fz_point { x: trm.e, y: trm.f }; - glyph_ascender = fz_point { x: 1.0, y: 0.0 }; - glyph_descender = fz_point { x: 0.0, y: 0.0 }; - } - }; - let glyph_ascender = transform_vector(glyph_ascender, trm); - let glyph_descender = transform_vector(glyph_descender, trm); - let points = [ - add_points(glyph_start, glyph_descender), - add_points(glyph_start, glyph_ascender), - add_points(glyph_stop, glyph_descender), - add_points(glyph_stop, glyph_ascender), - ]; - let min = point_min_components( - point_min_components(point_min_components(points[0], points[1]), points[2]), - points[3], - ); - let max = point_max_components( - point_max_components(point_max_components(points[0], points[1]), points[2]), - points[3], - ); - let Some(ch) = u32::try_from(ucs).ok().and_then(|v| char::try_from(v).ok()) else { - continue; - }; - let text = String::from(ch); - if text.trim().is_empty() { - continue; - } - let font = Font::known_from_name_with_tag(font_name_with_tag, font_size) - .unwrap_or_else(|| Font::Other { - font_name: font_name_with_tag.into(), - size: font_size, - }); - let Some(text_section) = TextSection::for_position( - self.page_num, - (min.x + max.x) * 0.5, - (min.y + max.y) * 0.5, - ) else { - if PAGE_BODY_MIN_Y <= min.y && min.y <= PAGE_BODY_MAX_Y { - if self.page_num != 1072 { - // page 1072 has characters in the margins - let _ = self.error.replace(Err(format!( - "char not in text section: {text:?}\npage_num={}", - self.page_num, - ) - .into())); - return; - } - } - continue; - }; - let (Some(min_x), Some(min_y), Some(max_x), Some(max_y)) = ( - NonNaNF32::new(min.x), - NonNaNF32::new(min.y), - NonNaNF32::new(max.x), - NonNaNF32::new(max.y), - ) else { - let _ = self - .error - .replace(Err("char position shouldn't be NaN".into())); - return; - }; - let char = Char { - font, - text, - min_x, - min_y, - max_x, - max_y, - }; - let set = match first_seen_fonts.get_mut(font_name_with_tag) { - Some(v) => v, - None => first_seen_fonts - .entry(String::from(font_name_with_tag)) - .or_default(), - }; - if set.insert(font_size) { - println!( - "first seen font: {font_name_with_tag:?} {font_size}: page {} {char:?}", - self.page_num, - ); - } - self.qt - .borrow_mut() - .entry(text_section) - .or_default() - .insert(min_x.get(), min_y.get(), PageItem::Char(char.clone())); - self.unprocessed_chars - .borrow_mut() - .entry(text_section) - .or_default() - .borrow_mut() - .entry(char.font.clone()) - .or_default() - .insert(char); - } - } - } -} - -impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice<'_> { +impl mupdf::NativeDevice for MyDevice { fn fill_path( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - path: &mupdf_ffi::Path<'ctx>, - _even_odd: bool, - ctm: fz_matrix, + &mut self, + path: &mupdf::Path, + even_odd: bool, + cmt: mupdf::Matrix, + color_space: &mupdf::Colorspace, + color: &[f32], + alpha: f32, + cp: mupdf::ColorParams, ) { - self.path(path, ctm); + // TODO } fn stroke_path( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - path: &mupdf_ffi::Path<'ctx>, - ctm: fz_matrix, + &mut self, + path: &mupdf::Path, + stroke_state: &mupdf::StrokeState, + cmt: mupdf::Matrix, + color_space: &mupdf::Colorspace, + color: &[f32], + alpha: f32, + cp: mupdf::ColorParams, ) { - self.path(path, ctm); + // TODO } fn clip_path( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - path: &mupdf_ffi::Path<'ctx>, - _even_odd: bool, - ctm: fz_matrix, - _scissor: mupdf_sys::fz_rect, + &mut self, + path: &mupdf::Path, + even_odd: bool, + cmt: mupdf::Matrix, + scissor: mupdf::Rect, ) { - self.path(path, ctm); + // TODO } fn clip_stroke_path( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - path: &mupdf_ffi::Path<'ctx>, - ctm: fz_matrix, - _scissor: mupdf_sys::fz_rect, + &mut self, + path: &mupdf::Path, + stroke_state: &mupdf::StrokeState, + cmt: mupdf::Matrix, + scissor: mupdf::Rect, ) { - self.path(path, ctm); - } - - fn fill_text( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - text: &mupdf_ffi::Text<'ctx>, - ctm: fz_matrix, - ) { - self.text(text, ctm); - } - - fn stroke_text( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - text: &mupdf_ffi::Text<'ctx>, - ctm: fz_matrix, - ) { - self.text(text, ctm); - } - - fn clip_text( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - text: &mupdf_ffi::Text<'ctx>, - ctm: fz_matrix, - _scissor: mupdf_sys::fz_rect, - ) { - self.text(text, ctm); - } - - fn clip_stroke_text( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - text: &mupdf_ffi::Text<'ctx>, - ctm: fz_matrix, - _scissor: mupdf_sys::fz_rect, - ) { - self.text(text, ctm); - } - - fn ignore_text( - &self, - _ctx: mupdf_ffi::ContextRef<'ctx>, - text: &mupdf_ffi::Text<'ctx>, - ctm: fz_matrix, - ) { - self.text(text, ctm); + // TODO } } @@ -3663,37 +2095,83 @@ struct MuPdfXmlChar<'a> { impl Page { fn from_mupdf_page( page_num: u32, - page: &mupdf_ffi::Page<'_>, - first_seen_fonts: &mut BTreeMap>, + page: mupdf::Page, ) -> Result> { - let device = mupdf_ffi::Device::new( - page.ctx(), - Box::new(MyDevice::new(page_num, first_seen_fonts)), - )?; + let device = MyDevice::default(); page.run( - &device, - fz_matrix { - a: 1.0, - b: 0.0, - c: 0.0, - d: 1.0, - e: 0.0, - f: 0.0, - }, + &mupdf::Device::from_native(device.clone())?, + &mupdf::Matrix::IDENTITY, )?; let MyDevice { - page_num: _, qt, - unprocessed_chars, unprocessed_non_text, - first_seen_fonts: _, - error, - } = device.get(); - error.replace(Ok(()))?; - for (text_section, i) in unprocessed_chars.borrow_mut().iter_mut() { - for chars in i.borrow_mut().values_mut() { - chars.sort_by_key(Char::top_down_left_to_right_sort_key); - println!("first char: {text_section:?}: {:?}", chars.first()); + } = device; + let mut qt = Rc::try_unwrap(qt) + .ok() + .expect("already dropped all other references") + .into_inner(); + let unprocessed_chars: Rc< + RefCell>>>>>, + > = Rc::default(); + // we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need. + let xml = page.to_xml()?; + let MuPdfXml::Page(xml_page) = quick_xml::de::from_str(&xml)?; + for xml_block in xml_page.block { + for xml_line in xml_block.line { + for xml_font in xml_line.font { + const ROUND_FACTOR: f32 = 1000.0; + let font_size = (xml_font.size * ROUND_FACTOR).round() / ROUND_FACTOR; + let font_size = NonNaNF32::new(font_size).ok_or("font size must not be NaN")?; + let font = Font::new(&xml_font.name, font_size); + for xml_char in xml_font.char { + let [x0, y0, x1, y1, x2, y2, x3, y3] = xml_char.quad; + let min_x = x0.min(x1).min(x2).min(x3); + let max_x = x0.max(x1).max(x2).max(x3); + let min_y = y0.min(y1).min(y2).min(y3); + let max_y = y0.max(y1).max(y2).max(y3); + let Some(text_section) = TextSection::for_position( + page_num, + (min_x + max_x) * 0.5, + (min_y + max_y) * 0.5, + ) else { + if PAGE_BODY_MIN_Y <= min_y && min_y <= PAGE_BODY_MAX_Y { + if page_num != 1072 { + // page 1072 has characters in the margins + return Err( + format!("char not in text section: {xml_char:?}\npage_num={page_num}").into(), + ); + } + } + continue; + }; + let char = Char { + font: font.clone(), + text: xml_char.c.into_owned(), + min_x: NonNaNF32::new(min_x).ok_or("char position shouldn't be NaN")?, + min_y: NonNaNF32::new(min_y).ok_or("char position shouldn't be NaN")?, + max_x: NonNaNF32::new(max_x).ok_or("char position shouldn't be NaN")?, + max_y: NonNaNF32::new(max_y).ok_or("char position shouldn't be NaN")?, + }; + qt.entry(text_section).or_default().insert( + min_x, + min_y, + PageItem::Char(char.clone()), + ); + unprocessed_chars + .borrow_mut() + .entry(text_section) + .or_default() + .borrow_mut() + .entry(char.font.clone()) + .or_default() + .insert(char); + } + } + } + } + for i in unprocessed_chars.borrow_mut().values_mut() { + for j in i.borrow_mut().values_mut() { + j.sort_by_key(Char::top_down_left_to_right_sort_key); } } let mut unknown_fonts = Vec::new(); @@ -3724,14 +2202,14 @@ impl Page { } Ok(Self { page_num, - qt: qt.take(), - unprocessed_chars: unprocessed_chars.clone(), - unprocessed_non_text: unprocessed_non_text.clone(), + qt, + unprocessed_chars, + unprocessed_non_text, }) } } -fn main_inner() -> Result<(), Box> { +fn main() -> Result<(), Box> { let args: Vec = std::env::args().collect(); let page_numbers: Option>>> = if 2 < args.len() { Some(if let Some((start, end)) = args[2].split_once(":") { @@ -3751,37 +2229,25 @@ fn main_inner() -> Result<(), Box> { } else { None }; - mupdf_ffi::Context::with(|ctx| { - let mut parser = Parser::new(); - let is_subset = page_numbers.is_some(); - let file_name = &args[1]; - parser.parse_pdf(ctx, file_name, page_numbers)?; - let mut insns = xml_tree::Element::new( - "instructions".into(), - [("is-subset".into(), is_subset.to_string())], - ); - insns.text = "\n".into(); - insns.tail = "\n".into(); - let mut comment = - xml_tree::Element::comment(format!(" Automatically generated from {file_name} ")); - comment.tail = "\n".into(); - insns.children.push(comment); - for insn in parser.insns { - insn.write_xml(&mut insns); - } - let mut output = Vec::new(); - insns.write(&mut output, true)?; - std::fs::write("powerisa-instructions.xml", output)?; - Ok(()) - }) -} - -fn main() -> std::process::ExitCode { - match main_inner() { - Ok(()) => std::process::ExitCode::SUCCESS, - Err(e) => { - println!("Error: {e}"); - std::process::ExitCode::FAILURE - } + let mut parser = Parser::new(); + let is_subset = page_numbers.is_some(); + let file_name = &args[1]; + parser.parse_pdf(file_name, page_numbers)?; + let mut insns = xml_tree::Element::new( + "instructions".into(), + [("is-subset".into(), is_subset.to_string())], + ); + insns.text = "\n".into(); + insns.tail = "\n".into(); + let mut comment = + xml_tree::Element::comment(format!(" Automatically generated from {file_name} ")); + comment.tail = "\n".into(); + insns.children.push(comment); + for insn in parser.insns { + insn.write_xml(&mut insns); } + let mut output = Vec::new(); + insns.write(&mut output, true)?; + std::fs::write("powerisa-instructions.xml", output)?; + Ok(()) } diff --git a/src/mupdf_ffi.rs b/src/mupdf_ffi.rs deleted file mode 100644 index 942bcfc..0000000 --- a/src/mupdf_ffi.rs +++ /dev/null @@ -1,804 +0,0 @@ -// SPDX-License-Identifier: LGPL-3.0-or-later -// See Notices.txt for copyright information - -use mupdf_sys::{ - fz_clone_context, fz_color_params, fz_colorspace, fz_concat, fz_context, fz_device, - fz_document, fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, - fz_drop_text, fz_error_type_FZ_ERROR_GENERIC, fz_font, fz_font_ascender, fz_font_descender, - fz_font_is_bold, fz_font_is_italic, fz_font_name, fz_matrix, fz_matrix_expansion, fz_page, - fz_path, fz_path_walker, fz_point, fz_rect, fz_stroke_state, fz_text, fz_text_item, - fz_text_span, fz_transform_point, fz_transform_point_xy, fz_transform_vector, fz_walk_path, - mupdf_document_page_count, mupdf_drop_error, mupdf_error_t, mupdf_load_page, - mupdf_new_base_context, mupdf_new_derived_device, mupdf_open_document, mupdf_run_page, -}; -use std::{ - cell::{Cell, UnsafeCell}, - ffi::{CStr, c_int, c_void}, - fmt, - marker::PhantomData, - mem::ManuallyDrop, - ptr::{self, NonNull}, - sync::{Mutex, OnceLock}, -}; - -#[derive(Debug)] -pub(crate) struct MuPdfError { - type_: c_int, - message: String, -} - -impl MuPdfError { - fn new_generic(message: impl ToString) -> Self { - Self { - type_: fz_error_type_FZ_ERROR_GENERIC as _, - message: message.to_string(), - } - } -} - -impl fmt::Display for MuPdfError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "MuPDF error: type: {}, message: {}", - self.type_, self.message - ) - } -} - -impl std::error::Error for MuPdfError {} - -struct OwnedMuPdfError(NonNull); - -impl Drop for OwnedMuPdfError { - fn drop(&mut self) { - unsafe { - mupdf_drop_error(self.0.as_ptr()); - } - } -} - -unsafe fn mupdf_try(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result { - let mut err = ptr::null_mut(); - let retval = f(&mut err); - let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else { - return Ok(retval); - }; - unsafe { - Err(MuPdfError { - type_: (*err.0.as_ptr()).type_, - message: CStr::from_ptr((*err.0.as_ptr()).message) - .to_string_lossy() - .into_owned(), - }) - } -} - -pub(crate) struct Context(NonNull); - -impl Context { - fn new() -> Self { - struct BaseContext(NonNull); - unsafe impl Send for BaseContext {} - static CTX: OnceLock> = OnceLock::new(); - let base = CTX - .get_or_init(|| { - let ctx = unsafe { mupdf_new_base_context() }; - let Some(ctx) = NonNull::new(ctx).map(BaseContext) else { - panic!("failed to allocate a MuPDF context"); - }; - Mutex::new(ctx) - }) - .lock() - .expect("not poisoned"); - let ctx = unsafe { fz_clone_context(base.0.as_ptr()) }; - let Some(ctx) = NonNull::new(ctx).map(Self) else { - drop(base); - panic!("failed to clone a MuPDF context"); - }; - ctx - } - pub(crate) fn with(f: impl FnOnce(&Self) -> R) -> R { - thread_local! { - static CTX: Context = Context::new(); - } - CTX.with(f) - } - pub(crate) fn as_ref(&self) -> ContextRef<'_> { - unsafe { ContextRef::from_ptr(self.0.as_ptr()) } - } -} - -impl Drop for Context { - fn drop(&mut self) { - unsafe { - fz_drop_context(self.0.as_ptr()); - } - } -} - -#[derive(Clone, Copy)] -pub(crate) struct ContextRef<'ctx>(&'ctx UnsafeCell); - -impl<'ctx> ContextRef<'ctx> { - unsafe fn from_ptr(ptr: *mut fz_context) -> Self { - Self(unsafe { &*ptr.cast() }) - } -} - -impl<'ctx> From<&'ctx Context> for ContextRef<'ctx> { - fn from(value: &'ctx Context) -> Self { - value.as_ref() - } -} - -pub(crate) struct Document<'ctx> { - ptr: *mut fz_document, - ctx: ContextRef<'ctx>, -} - -impl<'ctx> Document<'ctx> { - pub(crate) fn open( - ctx: impl Into>, - file_name: &CStr, - ) -> Result, MuPdfError> { - let ctx = ctx.into(); - unsafe { - mupdf_try(|errptr| mupdf_open_document(ctx.0.get(), file_name.as_ptr(), errptr)) - .map(|ptr| Document { ptr, ctx }) - } - } - pub(crate) fn page_count(&self) -> Result { - unsafe { - mupdf_try(|errptr| mupdf_document_page_count(self.ctx.0.get(), self.ptr, errptr))? - .try_into() - .map_err(MuPdfError::new_generic) - } - } - pub(crate) fn load_page(&self, page: usize) -> Result, MuPdfError> { - let page = page.try_into().map_err(MuPdfError::new_generic)?; - unsafe { - mupdf_try(|errptr| mupdf_load_page(self.ctx.0.get(), self.ptr, page, errptr)) - .map(|ptr| Page { ptr, ctx: self.ctx }) - } - } -} - -impl<'ctx> Drop for Document<'ctx> { - fn drop(&mut self) { - unsafe { - fz_drop_document(self.ctx.0.get(), self.ptr); - } - } -} - -pub(crate) struct Page<'ctx> { - ptr: *mut fz_page, - ctx: ContextRef<'ctx>, -} - -impl<'ctx> Page<'ctx> { - pub(crate) fn ctx(&self) -> ContextRef<'ctx> { - self.ctx - } - pub(crate) fn run( - &self, - device: &Device<'ctx, T>, - ctm: fz_matrix, - ) -> Result<(), MuPdfError> { - unsafe { - mupdf_try(|errptr| { - mupdf_run_page( - self.ctx.0.get(), - self.ptr, - device.dev, - ctm, - ptr::null_mut(), - errptr, - ) - }) - } - } -} - -impl<'ctx> Drop for Page<'ctx> { - fn drop(&mut self) { - unsafe { - fz_drop_page(self.ctx.0.get(), self.ptr); - } - } -} - -pub(crate) struct Device<'ctx, T: 'ctx> { - dev: *mut fz_device, - ctx: ContextRef<'ctx>, - _phantom: PhantomData>>, -} - -pub(crate) trait DeviceCallbacks<'ctx> { - fn fill_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, ctm: fz_matrix); - fn stroke_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, ctm: fz_matrix); - fn clip_path( - &self, - ctx: ContextRef<'ctx>, - path: &Path<'ctx>, - even_odd: bool, - ctm: fz_matrix, - scissor: fz_rect, - ); - fn clip_stroke_path( - &self, - ctx: ContextRef<'ctx>, - path: &Path<'ctx>, - ctm: fz_matrix, - scissor: fz_rect, - ); - fn fill_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); - fn stroke_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); - fn clip_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix, scissor: fz_rect); - fn clip_stroke_text( - &self, - ctx: ContextRef<'ctx>, - text: &Text<'ctx>, - ctm: fz_matrix, - scissor: fz_rect, - ); - fn ignore_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); -} - -impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { - pub(crate) fn new(ctx: impl Into>, value: Box) -> Result { - let ctx = ctx.into(); - unsafe { - let dev_ptr = mupdf_try(|errptr| { - mupdf_new_derived_device::>( - ctx.0.get(), - c"parse_powerisa_pdf::mupdf_ffi::Device", - errptr, - ) - })?; - let retval = Device { - dev: dev_ptr.cast(), - ctx, - _phantom: PhantomData, - }; - (&raw mut (*dev_ptr).value).write(value); - let fz_device { - drop_device, - fill_path, - stroke_path, - clip_path, - clip_stroke_path, - fill_text, - stroke_text, - clip_text, - clip_stroke_text, - ignore_text, - .. - } = &mut (*dev_ptr).base; - *drop_device = Some(Self::drop_device_fn); - *fill_path = Some(Self::fill_path_fn); - *stroke_path = Some(Self::stroke_path_fn); - *clip_path = Some(Self::clip_path_fn); - *clip_stroke_path = Some(Self::clip_stroke_path_fn); - *fill_text = Some(Self::fill_text_fn); - *stroke_text = Some(Self::stroke_text_fn); - *clip_text = Some(Self::clip_text_fn); - *clip_stroke_text = Some(Self::clip_stroke_text_fn); - *ignore_text = Some(Self::ignore_text_fn); - Ok(retval) - } - } - pub(crate) fn get(&self) -> &T { - unsafe { &(*self.dev.cast::>()).value } - } - unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) { - unsafe { - (&raw mut (*dev.cast::>()).value).drop_in_place(); - } - } - unsafe extern "C" fn fill_path_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - path: *const fz_path, - even_odd: c_int, - ctm: fz_matrix, - _color_space: *mut fz_colorspace, - _color: *const f32, - _alpha: f32, - _color_params: fz_color_params, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.fill_path( - ctx, - &ManuallyDrop::new(Path { - ptr: path.cast_mut(), - ctx, - }), - even_odd != 0, - ctm, - ); - } - unsafe extern "C" fn stroke_path_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - path: *const fz_path, - _stroke_state: *const fz_stroke_state, - ctm: fz_matrix, - _color_space: *mut fz_colorspace, - _color: *const f32, - _alpha: f32, - _color_params: fz_color_params, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.stroke_path( - ctx, - &ManuallyDrop::new(Path { - ptr: path.cast_mut(), - ctx, - }), - ctm, - ); - } - unsafe extern "C" fn clip_path_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - path: *const fz_path, - even_odd: ::std::os::raw::c_int, - ctm: fz_matrix, - scissor: fz_rect, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.clip_path( - ctx, - &ManuallyDrop::new(Path { - ptr: path.cast_mut(), - ctx, - }), - even_odd != 0, - ctm, - scissor, - ); - } - unsafe extern "C" fn clip_stroke_path_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - path: *const fz_path, - _stroke_state: *const fz_stroke_state, - ctm: fz_matrix, - scissor: fz_rect, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.clip_stroke_path( - ctx, - &ManuallyDrop::new(Path { - ptr: path.cast_mut(), - ctx, - }), - ctm, - scissor, - ); - } - unsafe extern "C" fn fill_text_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - text: *const fz_text, - ctm: fz_matrix, - _color_space: *mut fz_colorspace, - _color: *const f32, - _alpha: f32, - _color_params: fz_color_params, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.fill_text( - ctx, - &ManuallyDrop::new(Text { - ptr: text.cast_mut(), - ctx, - }), - ctm, - ); - } - unsafe extern "C" fn stroke_text_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - text: *const fz_text, - _stroke_state: *const fz_stroke_state, - ctm: fz_matrix, - _color_space: *mut fz_colorspace, - _color: *const f32, - _alpha: f32, - _color_params: fz_color_params, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.stroke_text( - ctx, - &ManuallyDrop::new(Text { - ptr: text.cast_mut(), - ctx, - }), - ctm, - ); - } - unsafe extern "C" fn clip_text_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - text: *const fz_text, - ctm: fz_matrix, - scissor: fz_rect, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.clip_text( - ctx, - &ManuallyDrop::new(Text { - ptr: text.cast_mut(), - ctx, - }), - ctm, - scissor, - ); - } - unsafe extern "C" fn clip_stroke_text_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - text: *const fz_text, - _stroke_state: *const fz_stroke_state, - ctm: fz_matrix, - scissor: fz_rect, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.clip_stroke_text( - ctx, - &ManuallyDrop::new(Text { - ptr: text.cast_mut(), - ctx, - }), - ctm, - scissor, - ); - } - unsafe extern "C" fn ignore_text_fn( - ctx: *mut fz_context, - dev: *mut fz_device, - text: *const fz_text, - ctm: fz_matrix, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut (*dev.cast::>()).value }; - this.ignore_text( - ctx, - &ManuallyDrop::new(Text { - ptr: text.cast_mut(), - ctx, - }), - ctm, - ); - } -} - -impl<'ctx, T> Drop for Device<'ctx, T> { - fn drop(&mut self) { - unsafe { - // FIXME: fz_close_device may throw exceptions - // fz_close_device(self.ctx.0.get(), self.dev); - fz_drop_device(self.ctx.0.get(), self.dev); - } - } -} - -#[repr(C)] -struct DeviceStruct { - base: fz_device, - value: Box, -} - -pub(crate) trait PathWalker<'ctx> { - fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32); - fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32); - fn curve_to( - &mut self, - ctx: ContextRef<'ctx>, - x1: f32, - y1: f32, - x2: f32, - y2: f32, - x3: f32, - y3: f32, - ); - fn close_path(&mut self, ctx: ContextRef<'ctx>); - fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) { - self.move_to(ctx, x1, y1); - self.move_to(ctx, x2, y1); - self.move_to(ctx, x2, y2); - self.move_to(ctx, x1, y2); - self.close_path(ctx); - } -} - -impl<'ctx, T: ?Sized + PathWalker<'ctx>> PathWalker<'ctx> for &'_ mut T { - fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) { - T::move_to(self, ctx, x, y); - } - - fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) { - T::line_to(self, ctx, x, y); - } - - fn curve_to( - &mut self, - ctx: ContextRef<'ctx>, - x1: f32, - y1: f32, - x2: f32, - y2: f32, - x3: f32, - y3: f32, - ) { - T::curve_to(self, ctx, x1, y1, x2, y2, x3, y3); - } - - fn close_path(&mut self, ctx: ContextRef<'ctx>) { - T::close_path(self, ctx); - } - - fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) { - T::rect_to(self, ctx, x1, y1, x2, y2); - } -} - -pub(crate) struct Path<'ctx> { - ptr: *mut fz_path, - ctx: ContextRef<'ctx>, -} - -impl<'ctx> Path<'ctx> { - pub(crate) fn walk>(&self, mut walker: W) { - unsafe { - fz_walk_path( - self.ctx.0.get(), - self.ptr, - const { - &fz_path_walker { - moveto: Some(Self::move_to_fn::), - lineto: Some(Self::line_to_fn::), - curveto: Some(Self::curve_to_fn::), - closepath: Some(Self::close_path_fn::), - quadto: None, - curvetov: None, - curvetoy: None, - rectto: Some(Self::rect_to_fn::), - } - }, - (&raw mut walker).cast(), - ); - } - } - unsafe extern "C" fn move_to_fn>( - ctx: *mut fz_context, - arg: *mut c_void, - x: f32, - y: f32, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut *arg.cast::() }; - this.move_to(ctx, x, y); - } - unsafe extern "C" fn line_to_fn>( - ctx: *mut fz_context, - arg: *mut c_void, - x: f32, - y: f32, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut *arg.cast::() }; - this.line_to(ctx, x, y); - } - unsafe extern "C" fn curve_to_fn>( - ctx: *mut fz_context, - arg: *mut c_void, - x1: f32, - y1: f32, - x2: f32, - y2: f32, - x3: f32, - y3: f32, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut *arg.cast::() }; - this.curve_to(ctx, x1, y1, x2, y2, x3, y3); - } - unsafe extern "C" fn close_path_fn>( - ctx: *mut fz_context, - arg: *mut c_void, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut *arg.cast::() }; - this.close_path(ctx); - } - unsafe extern "C" fn rect_to_fn>( - ctx: *mut fz_context, - arg: *mut c_void, - x1: f32, - y1: f32, - x2: f32, - y2: f32, - ) { - let ctx = unsafe { ContextRef::from_ptr(ctx) }; - let this = unsafe { &mut *arg.cast::() }; - this.rect_to(ctx, x1, y1, x2, y2); - } -} - -impl<'ctx> Drop for Path<'ctx> { - fn drop(&mut self) { - unsafe { - fz_drop_path(self.ctx.0.get(), self.ptr); - } - } -} - -pub(crate) struct Text<'ctx> { - ptr: *mut fz_text, - ctx: ContextRef<'ctx>, -} - -impl<'ctx> Drop for Text<'ctx> { - fn drop(&mut self) { - unsafe { - fz_drop_text(self.ctx.0.get(), self.ptr); - } - } -} - -impl<'ctx> Text<'ctx> { - pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> { - TextSpanIter { - ptr: unsafe { NonNull::new((*self.ptr).head).map(|ptr| &*ptr.as_ptr().cast()) }, - ctx: self.ctx, - _phantom: PhantomData, - } - } -} - -#[derive(Clone)] -pub(crate) struct TextSpanIter<'a, 'ctx> { - ptr: Option<&'a UnsafeCell>, - ctx: ContextRef<'ctx>, - _phantom: PhantomData<&'a Text<'ctx>>, -} - -impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> { - type Item = TextSpanRef<'a, 'ctx>; - - fn next(&mut self) -> Option { - let ptr = self.ptr?; - self.ptr = unsafe { NonNull::new((*ptr.get()).next).map(|ptr| &*ptr.as_ptr().cast()) }; - Some(TextSpanRef { - ptr, - ctx: self.ctx, - _phantom: PhantomData, - }) - } -} - -#[derive(Copy, Clone)] -pub(crate) struct TextSpanRef<'a, 'ctx> { - ptr: &'a UnsafeCell, - ctx: ContextRef<'ctx>, - _phantom: PhantomData<&'a Text<'ctx>>, -} - -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] -pub(crate) enum WriteMode { - Horizontal, - Vertical, -} - -impl<'a, 'ctx> TextSpanRef<'a, 'ctx> { - pub(crate) fn get(self) -> &'a UnsafeCell { - self.ptr - } - pub(crate) fn font(self) -> FontRef<'a, 'ctx> { - FontRef { - ptr: unsafe { &*(*self.ptr.get()).font.cast::>() }, - ctx: self.ctx, - _phantom: PhantomData, - } - } - pub(crate) fn trm(self) -> fz_matrix { - unsafe { (*self.ptr.get()).trm } - } - pub(crate) fn write_mode(self) -> WriteMode { - if unsafe { (*self.ptr.get()).wmode() != 0 } { - WriteMode::Vertical - } else { - WriteMode::Horizontal - } - } - pub(crate) fn items(self) -> &'a [fz_text_item] { - let len = unsafe { (*self.ptr.get()).len } as usize; - if len == 0 { - return &[]; - } - unsafe { std::slice::from_raw_parts((*self.ptr.get()).items, len) } - } -} - -#[derive(Clone, Copy)] -pub(crate) struct FontRef<'a, 'ctx> { - ptr: &'a UnsafeCell, - ctx: ContextRef<'ctx>, - _phantom: PhantomData<&'a Text<'ctx>>, -} - -impl<'a, 'ctx> FontRef<'a, 'ctx> { - pub(crate) fn get(self) -> &'a UnsafeCell { - self.ptr - } - pub(crate) fn name(self) -> &'a str { - unsafe { CStr::from_ptr(fz_font_name(self.ctx.0.get(), self.ptr.get())) } - .to_str() - .expect("font name isn't valid UTF-8") - } - pub(crate) fn is_bold(self) -> bool { - unsafe { fz_font_is_bold(self.ctx.0.get(), self.ptr.get()) != 0 } - } - pub(crate) fn is_italic(self) -> bool { - unsafe { fz_font_is_italic(self.ctx.0.get(), self.ptr.get()) != 0 } - } - pub(crate) fn ascender(self) -> f32 { - unsafe { fz_font_ascender(self.ctx.0.get(), self.ptr.get()) } - } - pub(crate) fn descender(self) -> f32 { - unsafe { fz_font_descender(self.ctx.0.get(), self.ptr.get()) } - } -} - -pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point { - unsafe { fz_transform_point(point, m) } -} - -pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point { - unsafe { fz_transform_point_xy(x, y, m) } -} - -pub(crate) fn transform_vector(vector: fz_point, m: fz_matrix) -> fz_point { - unsafe { fz_transform_vector(vector, m) } -} - -pub(crate) fn matrix_expansion(m: fz_matrix) -> f32 { - unsafe { fz_matrix_expansion(m) } -} - -pub(crate) fn concat(left: fz_matrix, right: fz_matrix) -> fz_matrix { - unsafe { fz_concat(left, right) } -} - -pub(crate) fn add_points(a: fz_point, b: fz_point) -> fz_point { - fz_point { - x: a.x + b.x, - y: a.y + b.y, - } -} - -pub(crate) fn point_min_components(a: fz_point, b: fz_point) -> fz_point { - fz_point { - x: a.x.min(b.x), - y: a.y.min(b.y), - } -} - -pub(crate) fn point_max_components(a: fz_point, b: fz_point) -> fz_point { - fz_point { - x: a.x.max(b.x), - y: a.y.max(b.y), - } -}