diff --git a/Cargo.lock b/Cargo.lock
index 4de1b68..4321809 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -185,7 +185,6 @@ dependencies = [
"libm",
"mupdf-sys",
"quick-xml",
- "serde",
]
[[package]]
@@ -210,7 +209,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
dependencies = [
"memchr",
- "serde",
]
[[package]]
@@ -257,36 +255,6 @@ version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
-[[package]]
-name = "serde"
-version = "1.0.228"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
-dependencies = [
- "serde_core",
- "serde_derive",
-]
-
-[[package]]
-name = "serde_core"
-version = "1.0.228"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.228"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
[[package]]
name = "shlex"
version = "1.3.0"
diff --git a/Cargo.toml b/Cargo.toml
index 09de0ba..21175d6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,5 +14,4 @@ rust-version = "1.89.0"
indexmap = "2.12.1"
libm = "0.2.15"
mupdf-sys = { version = "0.5.0", default-features = false }
-quick-xml = { version = "0.38.4", features = ["serialize"] }
-serde = { version = "1.0.228", features = ["derive"] }
+quick-xml = "0.38.4"
diff --git a/README.md b/README.md
index 7fd652f..f589559 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,35 @@ See Notices.txt for copyright information
-->
parser for the OPF PowerISA 3.1C pdf to attempt to extract all instructions' pseudo-code including subscripts/superscripts and other formatting
+# Using the new Rust code:
+
+Usage:
+* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from
+
+* Install Rust -- you need version 1.89.0 or later.
+
+ Getting it from https://rustup.rs/ is recommended.
+
+* Install required build dependencies:
+
+ On Debian 12:
+
+ ```bash
+ sudo apt update
+ sudo apt install build-essential clang unzip
+ ```
+
+* Compile and run:
+
+ ```bash
+ cargo run -- path/to/downloaded/OPF_PowerISA_v3.1C.pdf > out.log
+ ```
+
+* This will spit out lots of errors and then successfully create
+ the output file -- `powerisa-instructions.xml` in the current directory.
+
+# Using the old Python code:
+
Usage:
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from
* Obtain CPython 3.11 (the default `python3` in [Debian Bookworm](https://www.debian.org/releases/bookworm/))
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..0ea6a19
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,3828 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+// See Notices.txt for copyright information
+
+use crate::{
+ mupdf_ffi::{
+ MuPdfError, WriteMode, add_points, point_max_components, point_min_components,
+ transform_vector,
+ },
+ quad_tree::QuadTree,
+};
+use indexmap::IndexSet;
+use mupdf_sys::{fz_matrix, fz_point, fz_text_item};
+use non_nan_float::NonNaNF32;
+use std::{
+ backtrace::Backtrace,
+ cell::RefCell,
+ collections::{BTreeMap, BTreeSet, HashMap, HashSet},
+ convert::Infallible,
+ fmt,
+ num::NonZero,
+ ops::ControlFlow,
+ rc::Rc,
+ sync::OnceLock,
+};
+
+mod mupdf_ffi;
+mod quad_tree;
+mod xml_tree;
+
+mod non_nan_float {
+ #[derive(Default, PartialEq, PartialOrd, Clone, Copy)]
+ pub(crate) struct NonNaNF32(f32);
+
+ impl std::fmt::Debug for NonNaNF32 {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ self.0.fmt(f)
+ }
+ }
+
+ impl std::fmt::Display for NonNaNF32 {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ self.0.fmt(f)
+ }
+ }
+
+ impl NonNaNF32 {
+ pub(crate) const fn new(v: f32) -> Option {
+ if v.is_nan() { None } else { Some(Self(v)) }
+ }
+ pub(crate) const fn get(self) -> f32 {
+ self.0
+ }
+ pub(crate) const fn min(self, other: Self) -> Self {
+ Self(self.0.min(other.0))
+ }
+ pub(crate) const fn max(self, other: Self) -> Self {
+ Self(self.0.max(other.0))
+ }
+ }
+
+ impl std::hash::Hash for NonNaNF32 {
+ fn hash(&self, state: &mut H) {
+ if self.0 == 0.0 { 0.0 } else { self.0 }
+ .to_bits()
+ .hash(state);
+ }
+ }
+
+ impl Eq for NonNaNF32 {}
+
+ impl Ord for NonNaNF32 {
+ fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+ self.partial_cmp(other).expect("known to be non-NaN")
+ }
+ }
+
+ impl std::ops::Neg for NonNaNF32 {
+ type Output = Self;
+
+ fn neg(self) -> Self::Output {
+ Self(-self.0)
+ }
+ }
+}
+
+const fn str_eq(a: &str, b: &str) -> bool {
+ let a = a.as_bytes();
+ let b = b.as_bytes();
+ if a.len() != b.len() {
+ return false;
+ }
+ let mut i = 0;
+ while i < a.len() {
+ if a[i] != b[i] {
+ return false;
+ }
+ i += 1;
+ }
+ true
+}
+
+macro_rules! make_enum_font {
+ (
+ enum $Font:ident {
+ #[other]
+ $Other:ident $other_body:tt,
+ $(#[group]
+ $KnownFontGroup:ident {
+ $(#[name_with_tag = $known_font_name_with_tag:literal, size = $known_font_size:literal]
+ $KnownFont:ident,)*
+ },)*
+ }
+ ) => {
+ #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Clone)]
+ enum $Font {
+ $Other $other_body,
+ $($($KnownFont,)*)*
+ }
+
+ #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Copy, Clone)]
+ enum KnownFontGroup {
+ $($KnownFontGroup,)*
+ }
+
+ impl KnownFontGroup {
+ const fn fonts(self) -> &'static [Font] {
+ match self {
+ $(Self::$KnownFontGroup => &[$(Font::$KnownFont,)*],)*
+ }
+ }
+ const INSN_CODE_FONT_GROUPS: &[Self] = &[Self::InsnCode, Self::InsnCodeSubscript];
+ }
+
+ impl $Font {
+ const fn extract_font_name_from_font_name_with_tag(font_name_with_tag: &str) -> &str {
+ if let [b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'A'..=b'Z',b'+',_,..] = font_name_with_tag.as_bytes() {
+ font_name_with_tag.split_at(7).1
+ } else {
+ panic!("invalid font name with id")
+ }
+ }
+ const fn known_from_name_with_tag(font_name_with_tag: &str, size: NonNaNF32) -> Option {
+ match size.get() {
+ $($($known_font_size if str_eq(font_name_with_tag, $known_font_name_with_tag) => Some(Self::$KnownFont),)*)*
+ _ => None,
+ }
+ }
+ const fn new_known(font_name: &str, size: NonNaNF32) -> Option {
+ match size.get() {
+ $($($known_font_size if str_eq(font_name, const {
+ Self::extract_font_name_from_font_name_with_tag($known_font_name_with_tag)
+ }) => Some(Self::$KnownFont),)*)*
+ _ => None,
+ }
+ }
+ #[allow(dead_code)]
+ fn new(font_name: &str, size: NonNaNF32) -> Self {
+ if let Some(v) = Self::new_known(font_name, size) {
+ v
+ } else {
+ Self::Other {
+ font_name: Box::from(font_name),
+ size,
+ }
+ }
+ }
+ const fn size(&self) -> f32 {
+ match *self {
+ Self::$Other { size, .. } => size.get(),
+ $($(Self::$KnownFont => $known_font_size,)*)*
+ }
+ }
+ const fn font_name(&self) -> &str {
+ match self {
+ Self::$Other { font_name, .. } => font_name,
+ $($(Self::$KnownFont => const { Self::extract_font_name_from_font_name_with_tag($known_font_name_with_tag) },)*)*
+ }
+ }
+ const fn known_font_group(&self) -> Option {
+ match self {
+ Self::$Other { .. } => None,
+ $($(Self::$KnownFont => Some(KnownFontGroup::$KnownFontGroup),)*)*
+ }
+ }
+ const fn line_height(&self) -> f32 {
+ match self {
+ Self::$Other { .. } => self.line_height_helper(),
+ $($(Self::$KnownFont => const { Self::$KnownFont.line_height_helper() },)*)*
+ }
+ }
+ }
+
+ const _: () = {
+ $($(
+ let (known_font_name, known_font) = const {
+ let known_font_name = Font::extract_font_name_from_font_name_with_tag($known_font_name_with_tag);
+ (known_font_name, &Font::new_known(known_font_name, NonNaNF32::new($known_font_size).unwrap()).unwrap())
+ };
+ assert!(str_eq(known_font_name, known_font.font_name()));
+ assert!(matches!(known_font, Font::$KnownFont));
+ )*)*
+ };
+ };
+}
+
+make_enum_font! {
+ enum Font {
+ #[other]
+ Other {
+ font_name: Box,
+ size: NonNaNF32,
+ },
+ #[group]
+ InsnHeader {
+ #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 9.963]
+ InsnHeader,
+ },
+ #[group]
+ RtlFnHeader {
+ #[name_with_tag = "APUYSQ+zcoN-Regular", size = 9.963]
+ RtlFnHeader,
+ },
+ #[group]
+ PageHeader {
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 9.963]
+ PageHeader,
+ },
+ #[group]
+ PageFooter {
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.981]
+ PageFooter,
+ },
+ #[group]
+ InsnDesc {
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 8.966]
+ InsnDesc0,
+ #[name_with_tag = "FZTIYT+CMMI9", size = 8.966]
+ InsnDesc1,
+ #[name_with_tag = "ONUAYC+CMSSI9", size = 8.966]
+ InsnDesc2,
+ #[name_with_tag = "TNGBFZ+CMSY9", size = 8.966]
+ InsnDesc3,
+ #[name_with_tag = "WHMZPU+CMEX9", size = 8.966]
+ InsnDesc4,
+ #[name_with_tag = "ZJTMSG+CMSS9", size = 8.966]
+ InsnDesc5,
+ },
+ #[group]
+ InsnDescMisc {
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 2.377]
+ InsnDescMisc0,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 2.561]
+ InsnDescMisc1,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.492]
+ InsnDescMisc2,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.641]
+ InsnDescMisc3,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.772]
+ InsnDescMisc4,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.864]
+ InsnDescMisc5,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 4.925]
+ InsnDescMisc6,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.097]
+ InsnDescMisc7,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.123]
+ InsnDescMisc8,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.131]
+ InsnDescMisc9,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.516]
+ InsnDescMisc10,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.604]
+ InsnDescMisc11,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.634]
+ InsnDescMisc12,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.906]
+ InsnDescMisc13,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.033]
+ InsnDescMisc14,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.068]
+ InsnDescMisc15,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.213]
+ InsnDescMisc16,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.238]
+ InsnDescMisc17,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.252]
+ InsnDescMisc18,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.962]
+ InsnDescMisc19,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 7.977]
+ InsnDescMisc20,
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 8.506]
+ InsnDescMisc21,
+ },
+ #[group]
+ InsnDescCode {
+ #[name_with_tag = "APUYSQ+zcoN-Regular", size = 6.974]
+ InsnDescCode,
+ },
+ #[group]
+ InsnDescCodeMisc {
+ #[name_with_tag = "APUYSQ+zcoN-Regular", size = 3.587]
+ InsnDescCodeMisc0,
+ #[name_with_tag = "APUYSQ+zcoN-Regular", size = 4.483]
+ InsnDescCodeMisc1,
+ },
+ #[group]
+ InsnDescItalic {
+ #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 8.966]
+ InsnDescItalic,
+ },
+ #[group]
+ InsnDescBold {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.966]
+ InsnDescBold,
+ },
+ #[group]
+ InsnDescBoldItalic {
+ #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 8.966]
+ InsnDescBoldItalic,
+ },
+ #[group]
+ InsnDescSmall {
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 7.97]
+ InsnDescSmall,
+ },
+ #[group]
+ InsnDescSmallItalic {
+ #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 7.97]
+ InsnDescSmallItalic,
+ },
+ #[group]
+ InsnDescSmallBold {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 7.97]
+ InsnDescSmallBold,
+ },
+ #[group]
+ InsnDescSmallBoldItalic {
+ #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 7.97]
+ InsnDescSmallBoldItalic,
+ },
+ #[group]
+ InsnDescBoldMisc {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.21]
+ InsnDescBoldMisc0,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.399]
+ InsnDescBoldMisc1,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.763]
+ InsnDescBoldMisc2,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.946]
+ InsnDescBoldMisc3,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.949]
+ InsnDescBoldMisc4,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.999]
+ InsnDescBoldMisc5,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.065]
+ InsnDescBoldMisc6,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.086]
+ InsnDescBoldMisc7,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.183]
+ InsnDescBoldMisc8,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.686]
+ InsnDescBoldMisc9,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.744]
+ InsnDescBoldMisc10,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.825]
+ InsnDescBoldMisc11,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.842]
+ InsnDescBoldMisc12,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.857]
+ InsnDescBoldMisc13,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.979]
+ InsnDescBoldMisc14,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.032]
+ InsnDescBoldMisc15,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.112]
+ InsnDescBoldMisc16,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.161]
+ InsnDescBoldMisc17,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.206]
+ InsnDescBoldMisc18,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.353]
+ InsnDescBoldMisc19,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.378]
+ InsnDescBoldMisc20,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.434]
+ InsnDescBoldMisc21,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.595]
+ InsnDescBoldMisc22,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.619]
+ InsnDescBoldMisc23,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.647]
+ InsnDescBoldMisc24,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.68]
+ InsnDescBoldMisc25,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.693]
+ InsnDescBoldMisc26,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.736]
+ InsnDescBoldMisc27,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.781]
+ InsnDescBoldMisc28,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.802]
+ InsnDescBoldMisc29,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.995]
+ InsnDescBoldMisc30,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.201]
+ InsnDescBoldMisc31,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.258]
+ InsnDescBoldMisc32,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.363]
+ InsnDescBoldMisc33,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.442]
+ InsnDescBoldMisc34,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.473]
+ InsnDescBoldMisc35,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.485]
+ InsnDescBoldMisc36,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.512]
+ InsnDescBoldMisc37,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.543]
+ InsnDescBoldMisc38,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.613]
+ InsnDescBoldMisc39,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.744]
+ InsnDescBoldMisc40,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.774]
+ InsnDescBoldMisc41,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.809]
+ InsnDescBoldMisc42,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.849]
+ InsnDescBoldMisc43,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.911]
+ InsnDescBoldMisc44,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.92]
+ InsnDescBoldMisc45,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.962]
+ InsnDescBoldMisc46,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.981]
+ InsnDescBoldMisc47,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.146]
+ InsnDescBoldMisc48,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.213]
+ InsnDescBoldMisc49,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.221]
+ InsnDescBoldMisc50,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.243]
+ InsnDescBoldMisc51,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.55]
+ InsnDescBoldMisc52,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.62]
+ InsnDescBoldMisc53,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.699]
+ InsnDescBoldMisc54,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.725]
+ InsnDescBoldMisc55,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.751]
+ InsnDescBoldMisc56,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.856]
+ InsnDescBoldMisc57,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.029]
+ InsnDescBoldMisc58,
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.406]
+ InsnDescBoldMisc59,
+ },
+ #[group]
+ InsnDescSubscript {
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 5.978]
+ InsnDescSubscript,
+ },
+ #[group]
+ InsnDescBoldSubscript {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.978]
+ InsnDescBoldSubscript,
+ },
+ #[group]
+ InsnDescItalicSubscript {
+ #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 5.978]
+ InsnDescItalicSubscript,
+ },
+ #[group]
+ InsnDescBoldItalicSubscript {
+ #[name_with_tag = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 5.978]
+ InsnDescBoldItalicSubscript,
+ },
+ #[group]
+ InsnExtMnemonic {
+ #[name_with_tag = "APUYSQ+zcoN-Regular", size = 8.966]
+ InsnExtMnemonic,
+ },
+ #[group]
+ InsnCode {
+ #[name_with_tag = "APUYSQ+zcoN-Regular", size = 7.97]
+ InsnCode0,
+ #[name_with_tag = "RRFUNA+CMSY8", size = 7.97]
+ InsnCode1,
+ #[name_with_tag = "HPXOZC+CMSS8", size = 7.97]
+ InsnCode2,
+ },
+ #[group]
+ InsnCodeSubscript {
+ #[name_with_tag = "APUYSQ+zcoN-Regular", size = 5.978]
+ InsnCodeSubscript0,
+ #[name_with_tag = "DBQTKF+CMSY6", size = 5.978]
+ InsnCodeSubscript1,
+ },
+ #[group]
+ TitlePageBig {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 24.787]
+ TitlePageBig,
+ },
+ #[group]
+ TitlePageVersion {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 9.963]
+ TitlePageVersion,
+ },
+ #[group]
+ TitlePageTm {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.974]
+ TitlePageTm,
+ },
+ #[group]
+ TitlePageRev {
+ #[name_with_tag = "MJBFWM+DejaVuSansCondensed", size = 6.974]
+ TitlePageRev,
+ },
+ #[group]
+ TitlePageBook {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 20.663]
+ TitlePageBook,
+ },
+ #[group]
+ LegalPageItalic {
+ #[name_with_tag = "CGMSHV+DejaVuSansCondensed-Oblique", size = 9.963]
+ LegalPageItalic,
+ },
+ #[group]
+ ChangeSummaryPageBold {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 11.955]
+ ChangeSummaryPageBold,
+ },
+ #[group]
+ ChapterTitle {
+ #[name_with_tag = "NHUPPK+DejaVuSansCondensed-Bold", size = 17.215]
+ ChapterTitle,
+ },
+ #[group]
+ MathMisc {
+ #[name_with_tag = "AAJMKT+CMMI6", size = 5.978]
+ MathMisc0,
+ #[name_with_tag = "CUTMFD+CMSSI8", size = 5.978]
+ MathMisc1,
+ #[name_with_tag = "CUTMFD+CMSSI8", size = 7.97]
+ MathMisc2,
+ #[name_with_tag = "FZTIYT+CMMI9", size = 5.734]
+ MathMisc3,
+ #[name_with_tag = "FZTIYT+CMMI9", size = 7.168]
+ MathMisc4,
+ #[name_with_tag = "HONFQS+CMMI8", size = 7.97]
+ MathMisc5,
+ #[name_with_tag = "HPXOZC+CMSS8", size = 5.978]
+ MathMisc6,
+ #[name_with_tag = "LLVRDD+CMSY10", size = 11.955]
+ MathMisc7,
+ #[name_with_tag = "ZJTMSG+CMSS9", size = 7.168]
+ MathMisc8,
+ },
+ }
+}
+
+impl Font {
+ const fn space_width(&self) -> f32 {
+ self.size() * const { 3.985 / Font::InsnCode0.size() }
+ }
+ const fn line_height_helper(&self) -> f32 {
+ let font_name = self.font_name();
+ let mut i = 0;
+ while i < KnownFontGroup::INSN_CODE_FONT_GROUPS.len() {
+ let fonts = KnownFontGroup::INSN_CODE_FONT_GROUPS[i].fonts();
+ let mut j = 0;
+ while j < fonts.len() {
+ if str_eq(font_name, fonts[j].font_name()) {
+ return 9.464 * self.size() / Font::InsnCode0.size();
+ }
+ j += 1;
+ }
+ i += 1;
+ }
+ let group = self.known_font_group();
+ if matches!(group, Some(KnownFontGroup::InsnDesc))
+ || str_eq(font_name, Font::InsnDesc0.font_name())
+ || str_eq(font_name, Font::InsnDescBold.font_name())
+ || str_eq(font_name, Font::InsnDescItalic.font_name())
+ || str_eq(font_name, Font::InsnDescBoldItalic.font_name())
+ || matches!(group, Some(KnownFontGroup::MathMisc))
+ {
+ return 10.959 * self.size() / Font::InsnDesc0.size();
+ }
+ panic!("no line height")
+ }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+struct Char {
+ font: Font,
+ text: String,
+ min_x: NonNaNF32,
+ min_y: NonNaNF32,
+ max_x: NonNaNF32,
+ max_y: NonNaNF32,
+}
+
+impl Char {
+ #[allow(dead_code)]
+ fn width(&self) -> f32 {
+ self.max_x.get() - self.min_x.get()
+ }
+ #[allow(dead_code)]
+ fn height(&self) -> f32 {
+ self.max_y.get() - self.min_y.get()
+ }
+ fn top_down_left_to_right_sort_key(&self) -> impl Ord + use<> {
+ (-self.min_y, self.min_x)
+ }
+}
+
+const COLUMN_SPLIT_X: f32 = 300.0;
+const PAGE_BODY_MAX_X: f32 = 600.0;
+const PAGE_BODY_MIN_X: f32 = 50.0;
+const PAGE_BODY_MAX_Y: f32 = 780.0;
+const PAGE_BODY_MIN_Y: f32 = 45.0;
+const ONE_TITLE_LINE_SPLIT_Y: f32 = 734.0;
+const TWO_TITLE_LINES_SPLIT_Y: f32 = 715.0;
+const INSN_BIT_FIELDS_PREFIX_TEXT_TOP_PAD_HEIGHT: f32 = 29.938;
+const INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT: f32 = 9.278;
+const INSN_BIT_FIELDS_PREFIX_BOX_BOTTOM_TO_SUFFIX_TEXT_HEIGHT: f32 = 20.971;
+const INSN_BIT_FIELDS_TOP_PAD_HEIGHT: f32 = 20.175;
+const INSN_BIT_FIELDS_TOP_PAD_HEIGHT2: f32 = 14.694;
+const INSN_BIT_FIELDS_BOX_HEIGHT: f32 = 22.317;
+#[allow(dead_code)]
+const INSN_SP_REGS_ALTERED_REGISTER_COLUMN_X: f32 = 34.405;
+const INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X: f32 = 86.692;
+const INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X: f32 = 188.74;
+
+#[derive(Clone)]
+struct ParsedTextLine {
+ element: xml_tree::Element,
+ regular_min_y: f32,
+ regular_max_y: f32,
+ fonts: TextLineFonts,
+ chars: Vec,
+ preceding_blank_lines: u32,
+}
+
+impl ParsedTextLine {
+ #[allow(dead_code)]
+ fn regular_height(&self) -> f32 {
+ self.regular_max_y - self.regular_min_y
+ }
+ fn get_header_text(&self) -> Option {
+ assert_eq!(self.fonts, TextLineFonts::InsnDescFonts);
+ if !self.element.text.trim().is_empty() {
+ return None;
+ }
+ if !self.element.tail.trim().is_empty() {
+ return None;
+ }
+ let [b] = &*self.element.children else {
+ return None;
+ };
+ if b.tag.normal() != Some("b") {
+ return None;
+ }
+ if b.children.len() != 0 {
+ return None;
+ }
+ let text = self.element.inner_text();
+ // should also check titlecase, but rust doesn't include that in std
+ if text.ends_with(":") && text.chars().next().is_some_and(|ch| ch.is_uppercase()) {
+ Some(text)
+ } else {
+ None
+ }
+ }
+ fn write_xml(&self, parent: &mut xml_tree::Element, trailing_nl: bool) {
+ for _ in 0..self.preceding_blank_lines {
+ parent.sub_element("br".into(), []).tail = "\n".into();
+ }
+ if let Some(last_child) = parent.children.last_mut() {
+ last_child.tail += &self.element.text;
+ } else {
+ parent.text += &self.element.text;
+ }
+ parent.children.extend_from_slice(&self.element.children);
+ if trailing_nl {
+ parent.sub_element("br".into(), []).tail = "\n".into();
+ }
+ }
+ fn write_xml_lines(
+ lines: impl IntoIterator>,
+ parent: &mut xml_tree::Element,
+ trailing_nl: bool,
+ preceding_nl: bool,
+ ) {
+ if preceding_nl {
+ parent.sub_element("br".into(), []).tail = "\n".into();
+ }
+ let mut first = true;
+ for line in lines {
+ let line = std::borrow::Borrow::borrow(&line);
+ if first {
+ first = false;
+ } else {
+ parent.sub_element("br".into(), []).tail = "\n".into();
+ }
+ line.write_xml(parent, false);
+ }
+ if trailing_nl {
+ parent.sub_element("br".into(), []).tail = "\n".into();
+ }
+ }
+}
+
+impl fmt::Debug for ParsedTextLine {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let Self {
+ element,
+ regular_min_y,
+ regular_max_y,
+ fonts,
+ chars,
+ preceding_blank_lines,
+ } = self;
+ f.debug_struct("ParsedTextLine")
+ .field("element", &format_args!("{element}"))
+ .field("regular_min_y", regular_min_y)
+ .field("regular_max_y", regular_max_y)
+ .field("fonts", fonts)
+ .field("chars", chars)
+ .field("preceding_blank_lines", preceding_blank_lines)
+ .finish()
+ }
+}
+
+impl fmt::Display for ParsedTextLine {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ for _ in 0..self.preceding_blank_lines {
+ f.write_str("\n")?;
+ }
+ self.element.fmt(f)
+ }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+enum BaselinePos {
+ Above,
+ Below,
+}
+
+macro_rules! make_enum_with_values {
+ (
+ $(#[$enum_meta:meta])*
+ enum $Enum:ident {
+ $($Variant:ident,)*
+ }
+ ) => {
+ $(#[$enum_meta])*
+ enum $Enum {
+ $($Variant,)*
+ }
+
+ impl $Enum {
+ const VALUES: &[Self] = &[$(Self::$Variant,)*];
+ }
+ };
+}
+
+make_enum_with_values! {
+ #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+ enum TextLineFonts {
+ InsnMnemonicFonts,
+ InsnHeaderFonts,
+ InsnBitFieldBitNumberFonts,
+ InsnBitFieldNameFonts,
+ InsnBitFieldsAffixTitleFonts,
+ InsnCodeFonts,
+ InsnDescFonts,
+ }
+}
+
+impl TextLineFonts {
+ fn regular(self) -> &'static [Font] {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => KnownFontGroup::InsnDesc.fonts(),
+ TextLineFonts::InsnHeaderFonts => &[Font::InsnHeader],
+ TextLineFonts::InsnBitFieldBitNumberFonts => &[Font::InsnDescSmall, Font::TitlePageRev],
+ TextLineFonts::InsnBitFieldNameFonts => KnownFontGroup::InsnDesc.fonts(),
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => &[Font::InsnDescSmall],
+ TextLineFonts::InsnCodeFonts => KnownFontGroup::InsnCode.fonts(),
+ TextLineFonts::InsnDescFonts => {
+ static FONTS: OnceLock> = OnceLock::new();
+ FONTS.get_or_init(|| {
+ Box::from_iter(
+ KnownFontGroup::InsnDesc
+ .fonts()
+ .iter()
+ .cloned()
+ .chain([Font::InsnDescSmall]),
+ )
+ })
+ }
+ }
+ }
+ fn italic(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => None,
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
+ TextLineFonts::InsnCodeFonts => None,
+ TextLineFonts::InsnDescFonts => {
+ Some(&[Font::InsnDescItalic, Font::InsnDescSmallItalic])
+ }
+ }
+ }
+ fn bold(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => None,
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => Some(&[Font::InsnDescSmallBold]),
+ TextLineFonts::InsnCodeFonts => None,
+ TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBold, Font::InsnDescSmallBold]),
+ }
+ }
+ fn bold_italic(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => None,
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
+ TextLineFonts::InsnCodeFonts => None,
+ TextLineFonts::InsnDescFonts => {
+ Some(&[Font::InsnDescBoldItalic, Font::InsnDescSmallBoldItalic])
+ }
+ }
+ }
+ fn subscript(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => Some(&[Font::InsnDescSubscript]),
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
+ TextLineFonts::InsnCodeFonts => Some(KnownFontGroup::InsnCodeSubscript.fonts()),
+ TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescSubscript]),
+ }
+ }
+ fn bold_subscript(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => None,
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
+ TextLineFonts::InsnCodeFonts => None,
+ TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBoldSubscript]),
+ }
+ }
+ fn italic_subscript(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => None,
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
+ TextLineFonts::InsnCodeFonts => None,
+ TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescItalicSubscript]),
+ }
+ }
+ fn bold_italic_subscript(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => None,
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
+ TextLineFonts::InsnCodeFonts => None,
+ TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBoldItalicSubscript]),
+ }
+ }
+ fn code(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => None,
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
+ TextLineFonts::InsnCodeFonts => None,
+ TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescCode, Font::InsnExtMnemonic]),
+ }
+ }
+ fn code_subscript(self) -> Option<&'static [Font]> {
+ match self {
+ TextLineFonts::InsnMnemonicFonts => None,
+ TextLineFonts::InsnHeaderFonts => None,
+ TextLineFonts::InsnBitFieldBitNumberFonts => None,
+ TextLineFonts::InsnBitFieldNameFonts => None,
+ TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
+ TextLineFonts::InsnCodeFonts => None,
+ TextLineFonts::InsnDescFonts => Some(KnownFontGroup::InsnCodeSubscript.fonts()),
+ }
+ }
+ fn get_fonts(
+ self,
+ part_kind: TextLineFontKind,
+ ) -> Option<(&'static [Font], Option)> {
+ let fonts = match part_kind {
+ TextLineFontKind::Regular => self.regular(),
+ TextLineFontKind::Italic => self.italic()?,
+ TextLineFontKind::Bold => self.bold()?,
+ TextLineFontKind::BoldItalic => self.bold_italic()?,
+ TextLineFontKind::Subscript => self.subscript()?,
+ TextLineFontKind::Superscript => self.subscript()?,
+ TextLineFontKind::BoldSubscript => self.bold_subscript()?,
+ TextLineFontKind::BoldSuperscript => self.bold_subscript()?,
+ TextLineFontKind::ItalicSubscript => self.italic_subscript()?,
+ TextLineFontKind::ItalicSuperscript => self.italic_subscript()?,
+ TextLineFontKind::BoldItalicSubscript => self.bold_italic_subscript()?,
+ TextLineFontKind::BoldItalicSuperscript => self.bold_italic_subscript()?,
+ TextLineFontKind::Code => self.code()?,
+ TextLineFontKind::CodeSubscript => self.code_subscript()?,
+ TextLineFontKind::CodeSuperscript => self.code_subscript()?,
+ };
+ Some((fonts, part_kind.sub_super().baseline_pos()))
+ }
+ fn font_to_kind_map(self) -> &'static HashMap<(Font, Option), TextLineFontKind> {
+ static MAPS: OnceLock<
+ HashMap), TextLineFontKind>>,
+ > = OnceLock::new();
+ &MAPS.get_or_init(|| {
+ Self::VALUES
+ .iter()
+ .map(|&this: &TextLineFonts| {
+ let mut map = HashMap::new();
+ for &kind in TextLineFontKind::VALUES {
+ let Some((fonts, baseline_pos)) = this.get_fonts(kind) else {
+ continue;
+ };
+ for font in fonts {
+ let old_kind = map.insert((font.clone(), baseline_pos), kind);
+ assert!(
+ old_kind.is_none(),
+ "duplicate font: kind={kind:?} old_kind={old_kind:?} font={font:?}"
+ );
+ }
+ }
+ (this, map)
+ })
+ .collect()
+ })[&self]
+ }
+ fn fonts(self) -> &'static HashSet {
+ static SETS: OnceLock>> = OnceLock::new();
+ &SETS.get_or_init(|| {
+ Self::VALUES
+ .iter()
+ .map(|&this: &TextLineFonts| {
+ let mut set = HashSet::new();
+ for &kind in TextLineFontKind::VALUES {
+ let Some((fonts, _baseline_pos)) = this.get_fonts(kind) else {
+ continue;
+ };
+ set.extend(fonts.iter().cloned());
+ }
+ (this, set)
+ })
+ .collect()
+ })[&self]
+ }
+ fn get_kind(self, font: Font, baseline_pos: BaselinePos) -> Option {
+ let font_to_kind_map = self.font_to_kind_map();
+ font_to_kind_map
+ .get(&(font.clone(), Some(baseline_pos)))
+ .or_else(|| font_to_kind_map.get(&(font, None)))
+ .copied()
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+enum FontVariantCode {
+ Code,
+ NotCode,
+}
+
+impl FontVariantCode {
+ const fn value(self) -> &'static [&'static str] {
+ match self {
+ Self::Code => &["code"],
+ Self::NotCode => &[],
+ }
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+enum FontVariantBold {
+ Bold,
+ NotBold,
+}
+
+impl FontVariantBold {
+ const fn value(self) -> &'static [&'static str] {
+ match self {
+ Self::Bold => &["b"],
+ Self::NotBold => &[],
+ }
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+enum FontVariantItalic {
+ Italic,
+ NotItalic,
+}
+
+impl FontVariantItalic {
+ const fn value(self) -> &'static [&'static str] {
+ match self {
+ Self::Italic => &["i"],
+ Self::NotItalic => &[],
+ }
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+enum FontVariantSubSuper {
+ NotSubSuper,
+ Subscript,
+ Superscript,
+}
+
+impl FontVariantSubSuper {
+ const fn value(self) -> &'static [&'static str] {
+ match self {
+ Self::NotSubSuper => &[],
+ Self::Subscript => &["sub"],
+ Self::Superscript => &["sup"],
+ }
+ }
+}
+
+impl FontVariantSubSuper {
+ fn baseline_pos(self) -> Option {
+ match self {
+ FontVariantSubSuper::NotSubSuper => None,
+ FontVariantSubSuper::Subscript => Some(BaselinePos::Below),
+ FontVariantSubSuper::Superscript => Some(BaselinePos::Above),
+ }
+ }
+}
+
+make_enum_with_values! {
+ #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+ enum TextLineFontKind {
+ Regular,
+ Subscript,
+ Superscript,
+ Italic,
+ ItalicSubscript,
+ ItalicSuperscript,
+ Bold,
+ BoldSubscript,
+ BoldSuperscript,
+ BoldItalic,
+ BoldItalicSubscript,
+ BoldItalicSuperscript,
+ Code,
+ CodeSubscript,
+ CodeSuperscript,
+ }
+}
+
+impl TextLineFontKind {
+ fn code(self) -> FontVariantCode {
+ match self {
+ Self::Regular
+ | Self::Subscript
+ | Self::Superscript
+ | Self::Italic
+ | Self::ItalicSubscript
+ | Self::ItalicSuperscript
+ | Self::Bold
+ | Self::BoldSubscript
+ | Self::BoldSuperscript
+ | Self::BoldItalic
+ | Self::BoldItalicSubscript
+ | Self::BoldItalicSuperscript => FontVariantCode::NotCode,
+ Self::Code | Self::CodeSubscript | Self::CodeSuperscript => FontVariantCode::Code,
+ }
+ }
+ fn bold(self) -> FontVariantBold {
+ match self {
+ Self::Regular
+ | Self::Subscript
+ | Self::Superscript
+ | Self::Italic
+ | Self::ItalicSubscript
+ | Self::ItalicSuperscript => FontVariantBold::NotBold,
+ Self::Bold
+ | Self::BoldSubscript
+ | Self::BoldSuperscript
+ | Self::BoldItalic
+ | Self::BoldItalicSubscript
+ | Self::BoldItalicSuperscript => FontVariantBold::Bold,
+ Self::Code | Self::CodeSubscript | Self::CodeSuperscript => FontVariantBold::NotBold,
+ }
+ }
+ fn italic(self) -> FontVariantItalic {
+ match self {
+ Self::Regular | Self::Subscript | Self::Superscript => FontVariantItalic::NotItalic,
+ Self::Italic | Self::ItalicSubscript | Self::ItalicSuperscript => {
+ FontVariantItalic::Italic
+ }
+ Self::Bold | Self::BoldSubscript | Self::BoldSuperscript => {
+ FontVariantItalic::NotItalic
+ }
+ Self::BoldItalic | Self::BoldItalicSubscript | Self::BoldItalicSuperscript => {
+ FontVariantItalic::Italic
+ }
+ Self::Code | Self::CodeSubscript | Self::CodeSuperscript => {
+ FontVariantItalic::NotItalic
+ }
+ }
+ }
+ fn sub_super(self) -> FontVariantSubSuper {
+ match self {
+ Self::Regular => FontVariantSubSuper::NotSubSuper,
+ Self::Subscript => FontVariantSubSuper::Subscript,
+ Self::Superscript => FontVariantSubSuper::Superscript,
+ Self::Italic => FontVariantSubSuper::NotSubSuper,
+ Self::ItalicSubscript => FontVariantSubSuper::Subscript,
+ Self::ItalicSuperscript => FontVariantSubSuper::Superscript,
+ Self::Bold => FontVariantSubSuper::NotSubSuper,
+ Self::BoldSubscript => FontVariantSubSuper::Subscript,
+ Self::BoldSuperscript => FontVariantSubSuper::Superscript,
+ Self::BoldItalic => FontVariantSubSuper::NotSubSuper,
+ Self::BoldItalicSubscript => FontVariantSubSuper::Subscript,
+ Self::BoldItalicSuperscript => FontVariantSubSuper::Superscript,
+ Self::Code => FontVariantSubSuper::NotSubSuper,
+ Self::CodeSubscript => FontVariantSubSuper::Subscript,
+ Self::CodeSuperscript => FontVariantSubSuper::Superscript,
+ }
+ }
+ fn text_line_tags(self) -> impl Clone + Iterator- {
+ self.code()
+ .value()
+ .iter()
+ .copied()
+ .chain(self.bold().value().iter().copied())
+ .chain(self.italic().value().iter().copied())
+ .chain(self.sub_super().value().iter().copied())
+ }
+}
+
+#[derive(Debug)]
+struct ElementBodyBuilder<'a> {
+ containing_element: &'a mut xml_tree::Element,
+ stack: Vec,
+}
+
+impl<'a> ElementBodyBuilder<'a> {
+ fn new(containing_element: &'a mut xml_tree::Element) -> Self {
+ Self {
+ containing_element,
+ stack: Vec::with_capacity(5),
+ }
+ }
+ fn shrink_stack(&mut self, new_len: usize) {
+ while new_len < self.stack.len() {
+ let Some(element) = self.stack.pop() else {
+ unreachable!();
+ };
+ self.insert_point().children.push(element);
+ }
+ }
+ fn set_tag_stack<'b>(&mut self, tag_stack: impl IntoIterator
- ) {
+ let mut new_len = 0;
+ for (i, tag) in tag_stack.into_iter().enumerate() {
+ new_len = i + 1;
+ if i >= self.stack.len() {
+ self.stack.push(xml_tree::Element::new(tag.into(), []));
+ } else if self.stack[i].tag.normal() != Some(tag) {
+ self.shrink_stack(new_len);
+ }
+ }
+ self.shrink_stack(new_len);
+ }
+ fn write_text(&mut self, text: impl std::borrow::Borrow) {
+ let text = std::borrow::Borrow::borrow(&text);
+ let insert_point = self.insert_point();
+ if let Some(child) = insert_point.children.last_mut() {
+ child.tail += text;
+ } else {
+ insert_point.text += text;
+ }
+ }
+ fn insert_point(&mut self) -> &mut xml_tree::Element {
+ self.stack.last_mut().unwrap_or(self.containing_element)
+ }
+ fn scope(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
+ let retval = f(self);
+ self.flush();
+ retval
+ }
+ fn flush(&mut self) {
+ self.set_tag_stack([]);
+ }
+}
+
+#[derive(Clone, Debug)]
+struct InsnBitField {
+ box_min_x: f32,
+ box_max_x: f32,
+ name: ParsedTextLine,
+ bit_number: ParsedTextLine,
+}
+
+impl fmt::Display for InsnBitField {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let Self {
+ box_min_x,
+ box_max_x,
+ name,
+ bit_number,
+ } = self;
+ write!(
+ f,
+ ""
+ )
+ }
+}
+
+impl InsnBitField {
+ fn write_xml(&self, parent: &mut xml_tree::Element) {
+ let field = parent.sub_element("field".into(), []);
+ field.text = "\n".into();
+ field.tail = "\n".into();
+ let name = field.sub_element("name".into(), []);
+ name.tail = "\n".into();
+ self.name.write_xml(name, false);
+ let bit_number = field.sub_element("bit-number".into(), []);
+ bit_number.tail = "\n".into();
+ self.bit_number.write_xml(bit_number, false);
+ }
+}
+
+#[derive(Clone, Debug)]
+struct InsnBitFieldsPrefix {
+ box_min_x: f32,
+ box_min_y: f32,
+ box_max_x: f32,
+ box_max_y: f32,
+ prefix_text: ParsedTextLine,
+ fields: Vec,
+ suffix_text: ParsedTextLine,
+}
+
+impl fmt::Display for InsnBitFieldsPrefix {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let Self {
+ box_min_x,
+ box_min_y,
+ box_max_x,
+ box_max_y,
+ prefix_text,
+ fields,
+ suffix_text,
+ } = self;
+ writeln!(
+ f,
+ "")
+ }
+}
+
+impl InsnBitFieldsPrefix {
+ fn write_xml(&self, parent: &mut xml_tree::Element) {
+ let prefix_elm = parent.sub_element("prefix".into(), []);
+ prefix_elm.text = "\n".into();
+ prefix_elm.tail = "\n".into();
+ let prefix_text = prefix_elm.sub_element("prefix-text".into(), []);
+ prefix_text.tail = "\n".into();
+ self.prefix_text.write_xml(prefix_text, false);
+ InsnBitFields::write_xml_fields(&self.fields, prefix_elm);
+ let suffix_text = prefix_elm.sub_element("suffix-text".into(), []);
+ suffix_text.tail = "\n".into();
+ self.suffix_text.write_xml(suffix_text, false);
+ }
+}
+
+#[derive(Clone, Debug)]
+struct InsnBitFields {
+ prefix: Option,
+ box_min_x: f32,
+ box_min_y: f32,
+ box_max_x: f32,
+ box_max_y: f32,
+ fields: Vec,
+}
+
+impl fmt::Display for InsnBitFields {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let Self {
+ prefix,
+ box_min_x,
+ box_min_y,
+ box_max_x,
+ box_max_y,
+ fields,
+ } = self;
+ if let Some(prefix) = prefix {
+ prefix.fmt(f)?;
+ }
+ writeln!(
+ f,
+ "")
+ }
+}
+
+impl InsnBitFields {
+ fn write_xml_fields(
+ fields: impl IntoIterator>,
+ parent: &mut xml_tree::Element,
+ ) {
+ let fields_elm = parent.sub_element("fields".into(), []);
+ fields_elm.text = "\n".into();
+ fields_elm.tail = "\n".into();
+ for field in fields {
+ std::borrow::Borrow::borrow(&field).write_xml(fields_elm);
+ }
+ }
+ fn write_xml(&self, parent: &mut xml_tree::Element) {
+ let bit_fields = parent.sub_element("bit-fields".into(), []);
+ bit_fields.text = "\n".into();
+ bit_fields.tail = "\n".into();
+ if let Some(prefix) = &self.prefix {
+ prefix.write_xml(bit_fields);
+ }
+ Self::write_xml_fields(&self.fields, bit_fields)
+ }
+}
+
+#[derive(Clone, Debug)]
+struct InsnSpRegsAlteredEntry {
+ reg: ParsedTextLine,
+ fields: Vec,
+ conds: Vec,
+}
+
+impl InsnSpRegsAlteredEntry {
+ fn display_fmt_with_indent(&self, f: &mut fmt::Formatter<'_>, indent: &str) -> fmt::Result {
+ let Self { reg, fields, conds } = self;
+ writeln!(f, "Entry(")?;
+ writeln!(f, "{indent} reg={reg},")?;
+ write!(f, "{indent} fields=")?;
+ if fields.is_empty() {
+ write!(f, "()")?;
+ } else {
+ writeln!(f, "(")?;
+ for field in fields {
+ writeln!(f, "{indent} {field},")?;
+ }
+ write!(f, "{indent} )")?;
+ }
+ writeln!(f, ",")?;
+ writeln!(f, "{indent} conds=")?;
+ if conds.is_empty() {
+ write!(f, "()")?;
+ } else {
+ writeln!(f, "(")?;
+ for cond in conds {
+ writeln!(f, "{indent} {cond},")?;
+ }
+ write!(f, "{indent} )")?;
+ }
+ writeln!(f, ",")?;
+ write!(f, "{indent})")
+ }
+ fn write_xml(&self, parent: &mut xml_tree::Element) {
+ let entry = parent.sub_element("entry".into(), []);
+ entry.text = "\n".into();
+ entry.tail = "\n".into();
+ let reg = entry.sub_element("register".into(), []);
+ reg.tail = "\n".into();
+ self.reg.write_xml(reg, false);
+ let fields = entry.sub_element("fields".into(), []);
+ fields.tail = "\n".into();
+ ParsedTextLine::write_xml_lines(&self.fields, fields, false, false);
+ let conds = entry.sub_element("conditions".into(), []);
+ conds.tail = "\n".into();
+ ParsedTextLine::write_xml_lines(&self.conds, conds, false, false);
+ }
+}
+
+impl fmt::Display for InsnSpRegsAlteredEntry {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.display_fmt_with_indent(f, "")
+ }
+}
+
+#[derive(Clone, Debug)]
+struct InsnSpRegsAltered {
+ sp_regs_altered_text: ParsedTextLine,
+ special_text: Option,
+ table_header_reg: Option,
+ table_header_fields: Option,
+ entries: Vec,
+ final_regular_min_y: f32,
+}
+
+impl fmt::Display for InsnSpRegsAltered {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let Self {
+ sp_regs_altered_text,
+ special_text,
+ table_header_reg,
+ table_header_fields,
+ entries,
+ final_regular_min_y,
+ } = self;
+ writeln!(f, "InsnSpRegsAltered(")?;
+ writeln!(f, " sp_regs_altered_text={sp_regs_altered_text},")?;
+ if let Some(special_text) = special_text {
+ writeln!(f, " special_text={special_text},")?;
+ }
+ if let Some(table_header_reg) = table_header_reg {
+ writeln!(f, " table_header_reg={table_header_reg},")?;
+ }
+ if let Some(table_header_fields) = table_header_fields {
+ writeln!(f, " table_header_fields={table_header_fields},")?;
+ }
+ if self.entries.is_empty() {
+ writeln!(f, " entries=(),")?;
+ } else {
+ writeln!(f, " entries=(")?;
+ for entry in entries {
+ write!(f, " ")?;
+ entry.display_fmt_with_indent(f, " ")?;
+ writeln!(f, ",")?;
+ }
+ writeln!(f, " ),")?;
+ }
+ writeln!(f, " final_regular_min_y={final_regular_min_y},")?;
+ write!(f, ")")
+ }
+}
+
+impl InsnSpRegsAltered {
+ fn write_xml(&self, parent: &mut xml_tree::Element) {
+ let sp_regs_altered = parent.sub_element("special-registers-altered".into(), []);
+ sp_regs_altered.text = "\n".into();
+ sp_regs_altered.tail = "\n".into();
+ let title = sp_regs_altered.sub_element("title".into(), []);
+ title.tail = "\n".into();
+ self.sp_regs_altered_text.write_xml(title, false);
+ if let Some(special_text) = &self.special_text {
+ let special_text_el = sp_regs_altered.sub_element("special-text".into(), []);
+ special_text_el.tail = "\n".into();
+ special_text.write_xml(special_text_el, false);
+ }
+ if let Some(table_header_reg) = &self.table_header_reg {
+ let table_header_reg_el =
+ sp_regs_altered.sub_element("table-header-register".into(), []);
+ table_header_reg_el.tail = "\n".into();
+ table_header_reg.write_xml(table_header_reg_el, false);
+ }
+ if let Some(table_header_fields) = &self.table_header_fields {
+ let table_header_fields_el =
+ sp_regs_altered.sub_element("table-header-fields".into(), []);
+ table_header_fields_el.tail = "\n".into();
+ table_header_fields.write_xml(table_header_fields_el, false);
+ }
+ for entry in &self.entries {
+ entry.write_xml(sp_regs_altered);
+ }
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+enum InsnParseSection {
+ Code,
+ Header,
+ Desc,
+}
+
+#[derive(Clone, Debug)]
+enum PageItem {
+ Char(Char),
+ LineOrRect(LineOrRect),
+}
+
+#[derive(Copy, Clone, Debug)]
+enum LineOrRect {
+ Line(Line),
+ Rect(Rect),
+}
+
+impl LineOrRect {
+ fn width(self) -> f32 {
+ match self {
+ Self::Line(v) => v.width(),
+ Self::Rect(v) => v.width(),
+ }
+ }
+ #[allow(dead_code)]
+ fn height(self) -> f32 {
+ match self {
+ Self::Line(v) => v.height(),
+ Self::Rect(v) => v.height(),
+ }
+ }
+ fn min_x(self) -> NonNaNF32 {
+ match self {
+ Self::Line(v) => v.min_x(),
+ Self::Rect(v) => v.min_x,
+ }
+ }
+ fn max_x(self) -> NonNaNF32 {
+ match self {
+ Self::Line(v) => v.max_x(),
+ Self::Rect(v) => v.max_x,
+ }
+ }
+ fn min_y(self) -> NonNaNF32 {
+ match self {
+ Self::Line(v) => v.min_y(),
+ Self::Rect(v) => v.min_y,
+ }
+ }
+ fn max_y(self) -> NonNaNF32 {
+ match self {
+ Self::Line(v) => v.max_y(),
+ Self::Rect(v) => v.max_y,
+ }
+ }
+}
+
+#[derive(Copy, Clone, Debug)]
+struct Line {
+ p0_x: NonNaNF32,
+ p0_y: NonNaNF32,
+ p1_x: NonNaNF32,
+ p1_y: NonNaNF32,
+}
+
+impl Line {
+ fn width(self) -> f32 {
+ f32::abs(self.p0_x.get() - self.p1_x.get())
+ }
+ fn height(self) -> f32 {
+ f32::abs(self.p0_y.get() - self.p1_y.get())
+ }
+ fn min_x(self) -> NonNaNF32 {
+ self.p0_x.min(self.p1_x)
+ }
+ fn max_x(self) -> NonNaNF32 {
+ self.p0_x.max(self.p1_x)
+ }
+ fn min_y(self) -> NonNaNF32 {
+ self.p0_y.min(self.p1_y)
+ }
+ fn max_y(self) -> NonNaNF32 {
+ self.p0_y.max(self.p1_y)
+ }
+}
+
+#[derive(Copy, Clone, Debug)]
+struct Rect {
+ min_x: NonNaNF32,
+ max_x: NonNaNF32,
+ min_y: NonNaNF32,
+ max_y: NonNaNF32,
+}
+
+impl Rect {
+ fn width(self) -> f32 {
+ self.max_x.get() - self.min_x.get()
+ }
+ fn height(self) -> f32 {
+ self.max_y.get() - self.min_y.get()
+ }
+}
+
+#[derive(Debug)]
+struct Page {
+ page_num: u32,
+ qt: BTreeMap>,
+ unprocessed_chars:
+ Rc>>>>>>,
+ #[allow(dead_code)]
+ unprocessed_non_text: Rc>>,
+}
+
+struct Pages<'ctx> {
+ pages_gen: Option> + 'ctx>>,
+ pages: BTreeMap>,
+ max_page_num: u32,
+}
+
+impl<'ctx> fmt::Debug for Pages<'ctx> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let Self {
+ pages_gen,
+ pages,
+ max_page_num,
+ } = self;
+ f.debug_struct("Pages")
+ .field(
+ "pages_gen",
+ &pages_gen.is_some().then_some(format_args!("...")),
+ )
+ .field("pages", pages)
+ .field("max_page_num", max_page_num)
+ .finish()
+ }
+}
+
+impl<'ctx> Pages<'ctx> {
+ fn new(pages_gen: Option> + 'ctx>>) -> Self {
+ Self {
+ pages_gen,
+ pages: BTreeMap::new(),
+ max_page_num: 0,
+ }
+ }
+ fn close(&mut self) {
+ self.pages_gen = None;
+ }
+ fn is_past_end(&mut self, page_num: u32) -> Result {
+ while self.pages_gen.is_some() && page_num > self.max_page_num {
+ self.fill_page()?;
+ }
+ Ok(page_num > self.max_page_num)
+ }
+ fn fill_page(&mut self) -> Result {
+ let Some(pages_gen) = &mut self.pages_gen else {
+ return Ok(false);
+ };
+ let page = pages_gen.next();
+ let Some(page) = page else {
+ self.close();
+ return Ok(false);
+ };
+ let page = page?;
+ let page_num = page.page_num;
+ assert!(
+ page_num > self.max_page_num,
+ "page numbers must be a strictly-increasing positive integer sequence:\n\
+ got {page_num} which isn't more than {}",
+ self.max_page_num
+ );
+ self.pages.insert(page_num, Rc::new(page));
+ self.max_page_num = page_num;
+ Ok(true)
+ }
+ fn get(&mut self, page_num: u32) -> Result