// SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information use non_nan_float::NonNaNF32; use std::{ borrow::Borrow, collections::{HashMap, HashSet}, fmt, sync::OnceLock, }; mod quad_tree; mod xml_tree; mod non_nan_float { #[derive(Default, PartialEq, PartialOrd, Clone, Copy)] pub(crate) struct NonNaNF32(f32); impl std::fmt::Debug for NonNaNF32 { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } impl std::fmt::Display for NonNaNF32 { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } impl NonNaNF32 { pub(crate) const fn new(v: f32) -> Option { if v.is_nan() { None } else { Some(Self(v)) } } pub(crate) const fn get(self) -> f32 { self.0 } } impl std::hash::Hash for NonNaNF32 { fn hash(&self, state: &mut H) { if self.0 == 0.0 { 0.0 } else { self.0 } .to_bits() .hash(state); } } impl Eq for NonNaNF32 {} impl Ord for NonNaNF32 { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.partial_cmp(other).expect("known to be non-NaN") } } impl std::ops::Neg for NonNaNF32 { type Output = Self; fn neg(self) -> Self::Output { Self(-self.0) } } } macro_rules! make_enum_font { ( enum $Font:ident { #[other] $Other:ident $other_body:tt, $(#[group] $KnownFontGroup:ident { $(#[name = $known_font_name:literal, size = $known_font_size:literal] $KnownFont:ident,)* },)* } ) => { #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Clone)] enum $Font { $Other $other_body, $($($KnownFont,)*)* } #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Copy, Clone)] enum KnownFontGroup { $($KnownFontGroup,)* } impl KnownFontGroup { const fn fonts(self) -> &'static [Font] { match self { $(Self::$KnownFontGroup => &[$(Font::$KnownFont,)*],)* } } const INSN_CODE_FONT_GROUPS: &[Self] = &[Self::InsnCode, Self::InsnCodeSubscript]; } impl $Font { const fn size(&self) -> f32 { match *self { Self::$Other { size, .. } => size.get(), $($(Self::$KnownFont => $known_font_size,)*)* } } const fn font_name(&self) -> &str { match self { Self::$Other { font_name, .. } => font_name, $($(Self::$KnownFont => $known_font_name,)*)* } } const fn known_font_group(&self) -> Option { match self { Self::$Other { .. } => None, $($(Self::$KnownFont => Some(KnownFontGroup::$KnownFontGroup),)*)* } } const fn line_height(&self) -> f32 { match self { Self::$Other { .. } => self.line_height_helper(), $($(Self::$KnownFont => const { Self::$KnownFont.line_height_helper() },)*)* } } } }; } make_enum_font! { enum Font { #[other] Other { font_name: Box, size: NonNaNF32, }, #[group] InsnHeader { #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 9.963] InsnHeader, }, #[group] RtlFnHeader { #[name = "APUYSQ+zcoN-Regular", size = 9.963] RtlFnHeader, }, #[group] PageHeader { #[name = "MJBFWM+DejaVuSansCondensed", size = 9.963] PageHeader, }, #[group] PageFooter { #[name = "MJBFWM+DejaVuSansCondensed", size = 4.981] PageFooter, }, #[group] InsnDesc { #[name = "MJBFWM+DejaVuSansCondensed", size = 8.966] InsnDesc0, #[name = "FZTIYT+CMMI9", size = 8.966] InsnDesc1, #[name = "ONUAYC+CMSSI9", size = 8.966] InsnDesc2, #[name = "TNGBFZ+CMSY9", size = 8.966] InsnDesc3, #[name = "WHMZPU+CMEX9", size = 8.966] InsnDesc4, #[name = "ZJTMSG+CMSS9", size = 8.966] InsnDesc5, }, #[group] InsnDescMisc { #[name = "MJBFWM+DejaVuSansCondensed", size = 2.377] InsnDescMisc0, #[name = "MJBFWM+DejaVuSansCondensed", size = 2.561] InsnDescMisc1, #[name = "MJBFWM+DejaVuSansCondensed", size = 4.492] InsnDescMisc2, #[name = "MJBFWM+DejaVuSansCondensed", size = 4.641] InsnDescMisc3, #[name = "MJBFWM+DejaVuSansCondensed", size = 4.772] InsnDescMisc4, #[name = "MJBFWM+DejaVuSansCondensed", size = 4.864] InsnDescMisc5, #[name = "MJBFWM+DejaVuSansCondensed", size = 4.925] InsnDescMisc6, #[name = "MJBFWM+DejaVuSansCondensed", size = 5.097] InsnDescMisc7, #[name = "MJBFWM+DejaVuSansCondensed", size = 5.123] InsnDescMisc8, #[name = "MJBFWM+DejaVuSansCondensed", size = 5.131] InsnDescMisc9, #[name = "MJBFWM+DejaVuSansCondensed", size = 5.516] InsnDescMisc10, #[name = "MJBFWM+DejaVuSansCondensed", size = 5.604] InsnDescMisc11, #[name = "MJBFWM+DejaVuSansCondensed", size = 5.634] InsnDescMisc12, #[name = "MJBFWM+DejaVuSansCondensed", size = 5.906] InsnDescMisc13, #[name = "MJBFWM+DejaVuSansCondensed", size = 6.033] InsnDescMisc14, #[name = "MJBFWM+DejaVuSansCondensed", size = 6.068] InsnDescMisc15, #[name = "MJBFWM+DejaVuSansCondensed", size = 6.213] InsnDescMisc16, #[name = "MJBFWM+DejaVuSansCondensed", size = 6.252] InsnDescMisc17, #[name = "MJBFWM+DejaVuSansCondensed", size = 6.962] InsnDescMisc18, #[name = "MJBFWM+DejaVuSansCondensed", size = 7.977] InsnDescMisc19, }, #[group] InsnDescCode { #[name = "APUYSQ+zcoN-Regular", size = 6.974] InsnDescCode, }, #[group] InsnDescCodeMisc { #[name = "APUYSQ+zcoN-Regular", size = 3.587] InsnDescCodeMisc0, #[name = "APUYSQ+zcoN-Regular", size = 4.483] InsnDescCodeMisc1, }, #[group] InsnDescItalic { #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 8.966] InsnDescItalic, }, #[group] InsnDescBold { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.966] InsnDescBold, }, #[group] InsnDescBoldItalic { #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 8.966] InsnDescBoldItalic, }, #[group] InsnDescSmall { #[name = "MJBFWM+DejaVuSansCondensed", size = 7.97] InsnDescSmall, }, #[group] InsnDescSmallItalic { #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 7.97] InsnDescSmallItalic, }, #[group] InsnDescSmallBold { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 7.97] InsnDescSmallBold, }, #[group] InsnDescSmallBoldItalic { #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 7.97] InsnDescSmallBoldItalic, }, #[group] InsnDescBoldMisc { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.21] InsnDescBoldMisc0, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.399] InsnDescBoldMisc1, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.763] InsnDescBoldMisc2, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.946] InsnDescBoldMisc3, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.949] InsnDescBoldMisc4, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.999] InsnDescBoldMisc5, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.065] InsnDescBoldMisc6, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.086] InsnDescBoldMisc7, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.183] InsnDescBoldMisc8, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.686] InsnDescBoldMisc9, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.744] InsnDescBoldMisc10, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.825] InsnDescBoldMisc11, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.842] InsnDescBoldMisc12, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.857] InsnDescBoldMisc13, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.979] InsnDescBoldMisc14, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.032] InsnDescBoldMisc15, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.112] InsnDescBoldMisc16, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.161] InsnDescBoldMisc17, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.206] InsnDescBoldMisc18, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.353] InsnDescBoldMisc19, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.378] InsnDescBoldMisc20, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.434] InsnDescBoldMisc21, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.595] InsnDescBoldMisc22, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.619] InsnDescBoldMisc23, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.647] InsnDescBoldMisc24, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.68] InsnDescBoldMisc25, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.693] InsnDescBoldMisc26, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.736] InsnDescBoldMisc27, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.781] InsnDescBoldMisc28, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.802] InsnDescBoldMisc29, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.995] InsnDescBoldMisc30, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.201] InsnDescBoldMisc31, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.258] InsnDescBoldMisc32, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.363] InsnDescBoldMisc33, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.442] InsnDescBoldMisc34, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.473] InsnDescBoldMisc35, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.485] InsnDescBoldMisc36, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.512] InsnDescBoldMisc37, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.543] InsnDescBoldMisc38, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.613] InsnDescBoldMisc39, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.744] InsnDescBoldMisc40, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.774] InsnDescBoldMisc41, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.809] InsnDescBoldMisc42, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.849] InsnDescBoldMisc43, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.911] InsnDescBoldMisc44, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.92] InsnDescBoldMisc45, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.962] InsnDescBoldMisc46, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.981] InsnDescBoldMisc47, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.146] InsnDescBoldMisc48, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.213] InsnDescBoldMisc49, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.221] InsnDescBoldMisc50, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.243] InsnDescBoldMisc51, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.55] InsnDescBoldMisc52, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.62] InsnDescBoldMisc53, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.699] InsnDescBoldMisc54, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.725] InsnDescBoldMisc55, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.751] InsnDescBoldMisc56, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.856] InsnDescBoldMisc57, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.029] InsnDescBoldMisc58, #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.406] InsnDescBoldMisc59, }, #[group] InsnDescSubscript { #[name = "MJBFWM+DejaVuSansCondensed", size = 5.978] InsnDescSubscript, }, #[group] InsnDescBoldSubscript { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.978] InsnDescBoldSubscript, }, #[group] InsnDescItalicSubscript { #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 5.978] InsnDescItalicSubscript, }, #[group] InsnDescBoldItalicSubscript { #[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 5.978] InsnDescBoldItalicSubscript, }, #[group] InsnExtMnemonic { #[name = "APUYSQ+zcoN-Regular", size = 8.966] InsnExtMnemonic, }, #[group] InsnCode { #[name = "APUYSQ+zcoN-Regular", size = 7.97] InsnCode0, #[name = "RRFUNA+CMSY8", size = 7.97] InsnCode1, #[name = "HPXOZC+CMSS8", size = 7.97] InsnCode2, }, #[group] InsnCodeSubscript { #[name = "APUYSQ+zcoN-Regular", size = 5.978] InsnCodeSubscript0, #[name = "DBQTKF+CMSY6", size = 5.978] InsnCodeSubscript1, }, #[group] TitlePageBig { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 24.787] TitlePageBig, }, #[group] TitlePageVersion { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 9.963] TitlePageVersion, }, #[group] TitlePageTm { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.974] TitlePageTm, }, #[group] TitlePageRev { #[name = "MJBFWM+DejaVuSansCondensed", size = 6.974] TitlePageRev, }, #[group] TitlePageBook { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 20.663] TitlePageBook, }, #[group] LegalPageItalic { #[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 9.963] LegalPageItalic, }, #[group] ChangeSummaryPageBold { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 11.955] ChangeSummaryPageBold, }, #[group] ChapterTitle { #[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 17.215] ChapterTitle, }, #[group] MathMisc { #[name = "AAJMKT+CMMI6", size = 5.978] MathMisc0, #[name = "CUTMFD+CMSSI8", size = 5.978] MathMisc1, #[name = "CUTMFD+CMSSI8", size = 7.97] MathMisc2, #[name = "FZTIYT+CMMI9", size = 5.734] MathMisc3, #[name = "FZTIYT+CMMI9", size = 7.168] MathMisc4, #[name = "HONFQS+CMMI8", size = 7.97] MathMisc5, #[name = "HPXOZC+CMSS8", size = 5.978] MathMisc6, #[name = "LLVRDD+CMSY10", size = 11.955] MathMisc7, #[name = "ZJTMSG+CMSS9", size = 7.168] MathMisc8, }, } } impl Font { const fn space_width(&self) -> f32 { self.size() * const { 3.985 / Font::InsnCode0.size() } } const fn line_height_helper(&self) -> f32 { const fn str_eq(a: &str, b: &str) -> bool { let a = a.as_bytes(); let b = b.as_bytes(); if a.len() != b.len() { return false; } let mut i = 0; while i < a.len() { if a[i] != b[i] { return false; } i += 1; } true } let font_name = self.font_name(); let mut i = 0; while i < KnownFontGroup::INSN_CODE_FONT_GROUPS.len() { let fonts = KnownFontGroup::INSN_CODE_FONT_GROUPS[i].fonts(); let mut j = 0; while j < fonts.len() { if str_eq(font_name, fonts[j].font_name()) { return 9.464 * self.size() / Font::InsnCode0.size(); } j += 1; } i += 1; } let group = self.known_font_group(); if matches!(group, Some(KnownFontGroup::InsnDesc)) || str_eq(font_name, Font::InsnDesc0.font_name()) || str_eq(font_name, Font::InsnDescBold.font_name()) || str_eq(font_name, Font::InsnDescItalic.font_name()) || str_eq(font_name, Font::InsnDescBoldItalic.font_name()) || matches!(group, Some(KnownFontGroup::MathMisc)) { return 10.959 * self.size() / Font::InsnDesc0.size(); } panic!("no line height") } } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] struct Char { font: Font, text: String, adv: NonNaNF32, min_x: NonNaNF32, min_y: NonNaNF32, max_x: NonNaNF32, max_y: NonNaNF32, } impl Char { fn width(&self) -> f32 { self.max_x.get() - self.min_x.get() } fn height(&self) -> f32 { self.max_y.get() - self.min_y.get() } fn top_down_left_to_right_sort_key(&self) -> impl Ord { (-self.min_y, self.min_x) } } const COLUMN_SPLIT_X: f32 = 300.0; const PAGE_BODY_MAX_X: f32 = 600.0; const PAGE_BODY_MIN_X: f32 = 50.0; const PAGE_BODY_MAX_Y: f32 = 780.0; const PAGE_BODY_MIN_Y: f32 = 45.0; const ONE_TITLE_LINE_SPLIT_Y: f32 = 734.0; const TWO_TITLE_LINES_SPLIT_Y: f32 = 715.0; const INSN_BIT_FIELDS_PREFIX_TEXT_TOP_PAD_HEIGHT: f32 = 29.938; const INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT: f32 = 9.278; const INSN_BIT_FIELDS_PREFIX_BOX_BOTTOM_TO_SUFFIX_TEXT_HEIGHT: f32 = 20.971; const INSN_BIT_FIELDS_TOP_PAD_HEIGHT: f32 = 20.175; const INSN_BIT_FIELDS_TOP_PAD_HEIGHT2: f32 = 14.694; const INSN_BIT_FIELDS_BOX_HEIGHT: f32 = 22.317; const INSN_SP_REGS_ALTERED_REGISTER_COLUMN_X: f32 = 34.405; const INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X: f32 = 86.692; const INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X: f32 = 188.74; #[derive(Clone)] struct ParsedTextLine { element: xml_tree::Element, regular_min_y: f32, regular_max_y: f32, fonts: TextLineFonts, chars: Vec, preceding_blank_lines: u32, } impl ParsedTextLine { fn regular_height(&self) -> f32 { self.regular_max_y - self.regular_min_y } fn get_header_text(&self) -> Option { assert_eq!(self.fonts, TextLineFonts::InsnDescFonts); if !self.element.text.trim().is_empty() { return None; } if !self.element.tail.trim().is_empty() { return None; } let [b] = &*self.element.children else { return None; }; if b.tag.normal() != Some("b") { return None; } if b.children.len() != 0 { return None; } let text = self.element.inner_text(); // should also check titlecase, but rust doesn't include that in std if text.ends_with(":") && text.chars().next().is_some_and(|ch| ch.is_uppercase()) { Some(text) } else { None } } fn write_xml(&self, parent: &mut xml_tree::Element, trailing_nl: bool) { for _ in 0..self.preceding_blank_lines { parent.sub_element("br".into(), []).tail = "\n".into(); } if let Some(last_child) = parent.children.last_mut() { last_child.tail += &self.element.text; } else { parent.text += &self.element.text; } parent.children.extend_from_slice(&self.element.children); if trailing_nl { parent.sub_element("br".into(), []).tail = "\n".into(); } } fn write_xml_lines( lines: impl IntoIterator>, parent: &mut xml_tree::Element, trailing_nl: bool, preceding_nl: bool, ) { if preceding_nl { parent.sub_element("br".into(), []).tail = "\n".into(); } let mut first = true; for line in lines { let line = line.borrow(); if first { first = false; } else { parent.sub_element("br".into(), []).tail = "\n".into(); } line.write_xml(parent, false); } if trailing_nl { parent.sub_element("br".into(), []).tail = "\n".into(); } } } impl fmt::Debug for ParsedTextLine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { element, regular_min_y, regular_max_y, fonts, chars, preceding_blank_lines, } = self; f.debug_struct("ParsedTextLine") .field("element", &format_args!("{element}")) .field("regular_min_y", regular_min_y) .field("regular_max_y", regular_max_y) .field("fonts", fonts) .field("chars", chars) .field("preceding_blank_lines", preceding_blank_lines) .finish() } } impl fmt::Display for ParsedTextLine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for _ in 0..self.preceding_blank_lines { f.write_str("\n")?; } self.element.fmt(f) } } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] enum BaselinePos { Above, Below, } macro_rules! make_enum_with_values { ( $(#[$enum_meta:meta])* enum $Enum:ident { $($Variant:ident,)* } ) => { $(#[$enum_meta])* enum $Enum { $($Variant,)* } impl $Enum { const VALUES: &[Self] = &[$(Self::$Variant,)*]; } }; } make_enum_with_values! { #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] enum TextLineFonts { InsnMnemonicFonts, InsnHeaderFonts, InsnBitFieldBitNumberFonts, InsnBitFieldNameFonts, InsnBitFieldsAffixTitleFonts, InsnCodeFonts, InsnDescFonts, } } impl TextLineFonts { fn regular(self) -> &'static [Font] { match self { TextLineFonts::InsnMnemonicFonts => KnownFontGroup::InsnDesc.fonts(), TextLineFonts::InsnHeaderFonts => &[Font::InsnHeader], TextLineFonts::InsnBitFieldBitNumberFonts => &[Font::InsnDescSmall, Font::TitlePageRev], TextLineFonts::InsnBitFieldNameFonts => KnownFontGroup::InsnDesc.fonts(), TextLineFonts::InsnBitFieldsAffixTitleFonts => &[Font::InsnDescSmall], TextLineFonts::InsnCodeFonts => KnownFontGroup::InsnCode.fonts(), TextLineFonts::InsnDescFonts => { static FONTS: OnceLock> = OnceLock::new(); FONTS.get_or_init(|| { Box::from_iter( KnownFontGroup::InsnDesc .fonts() .iter() .cloned() .chain([Font::InsnDescSmall]), ) }) } } } fn italic(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => None, TextLineFonts::InsnBitFieldsAffixTitleFonts => None, TextLineFonts::InsnCodeFonts => None, TextLineFonts::InsnDescFonts => { Some(&[Font::InsnDescItalic, Font::InsnDescSmallItalic]) } } } fn bold(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => None, TextLineFonts::InsnBitFieldsAffixTitleFonts => Some(&[Font::InsnDescSmallBold]), TextLineFonts::InsnCodeFonts => None, TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBold, Font::InsnDescSmallBold]), } } fn bold_italic(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => None, TextLineFonts::InsnBitFieldsAffixTitleFonts => None, TextLineFonts::InsnCodeFonts => None, TextLineFonts::InsnDescFonts => { Some(&[Font::InsnDescBoldItalic, Font::InsnDescSmallBoldItalic]) } } } fn subscript(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => Some(&[Font::InsnDescSubscript]), TextLineFonts::InsnBitFieldsAffixTitleFonts => None, TextLineFonts::InsnCodeFonts => Some(KnownFontGroup::InsnCodeSubscript.fonts()), TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescSubscript]), } } fn bold_subscript(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => None, TextLineFonts::InsnBitFieldsAffixTitleFonts => None, TextLineFonts::InsnCodeFonts => None, TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBoldSubscript]), } } fn italic_subscript(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => None, TextLineFonts::InsnBitFieldsAffixTitleFonts => None, TextLineFonts::InsnCodeFonts => None, TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescItalicSubscript]), } } fn bold_italic_subscript(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => None, TextLineFonts::InsnBitFieldsAffixTitleFonts => None, TextLineFonts::InsnCodeFonts => None, TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBoldItalicSubscript]), } } fn code(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => None, TextLineFonts::InsnBitFieldsAffixTitleFonts => None, TextLineFonts::InsnCodeFonts => None, TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescCode, Font::InsnExtMnemonic]), } } fn code_subscript(self) -> Option<&'static [Font]> { match self { TextLineFonts::InsnMnemonicFonts => None, TextLineFonts::InsnHeaderFonts => None, TextLineFonts::InsnBitFieldBitNumberFonts => None, TextLineFonts::InsnBitFieldNameFonts => None, TextLineFonts::InsnBitFieldsAffixTitleFonts => None, TextLineFonts::InsnCodeFonts => None, TextLineFonts::InsnDescFonts => Some(KnownFontGroup::InsnCodeSubscript.fonts()), } } fn get_fonts( self, part_kind: TextLineFontKind, ) -> Option<(&'static [Font], Option)> { let fonts = match part_kind { TextLineFontKind::Regular => self.regular(), TextLineFontKind::Italic => self.italic()?, TextLineFontKind::Bold => self.bold()?, TextLineFontKind::BoldItalic => self.bold_italic()?, TextLineFontKind::Subscript => self.subscript()?, TextLineFontKind::Superscript => self.subscript()?, TextLineFontKind::BoldSubscript => self.bold_subscript()?, TextLineFontKind::BoldSuperscript => self.bold_subscript()?, TextLineFontKind::ItalicSubscript => self.italic_subscript()?, TextLineFontKind::ItalicSuperscript => self.italic_subscript()?, TextLineFontKind::BoldItalicSubscript => self.bold_italic_subscript()?, TextLineFontKind::BoldItalicSuperscript => self.bold_italic_subscript()?, TextLineFontKind::Code => self.code()?, TextLineFontKind::CodeSubscript => self.code_subscript()?, TextLineFontKind::CodeSuperscript => self.code_subscript()?, }; Some((fonts, part_kind.sub_super().baseline_pos())) } fn font_to_kind_map(self) -> &'static HashMap<(Font, Option), TextLineFontKind> { static MAPS: OnceLock< HashMap), TextLineFontKind>>, > = OnceLock::new(); &MAPS.get_or_init(|| { Self::VALUES .iter() .map(|&this: &TextLineFonts| { let mut map = HashMap::new(); for &kind in TextLineFontKind::VALUES { let Some((fonts, baseline_pos)) = this.get_fonts(kind) else { continue; }; for font in fonts { let old_kind = map.insert((font.clone(), baseline_pos), kind); assert!( old_kind.is_none(), "duplicate font: kind={kind:?} old_kind={old_kind:?} font={font:?}" ); } } (this, map) }) .collect() })[&self] } fn fonts(self) -> &'static HashSet { static SETS: OnceLock>> = OnceLock::new(); &SETS.get_or_init(|| { Self::VALUES .iter() .map(|&this: &TextLineFonts| { let mut set = HashSet::new(); for &kind in TextLineFontKind::VALUES { let Some((fonts, _baseline_pos)) = this.get_fonts(kind) else { continue; }; set.extend(fonts.iter().cloned()); } (this, set) }) .collect() })[&self] } fn get_kind(self, font: Font, baseline_pos: BaselinePos) -> Option { let font_to_kind_map = self.font_to_kind_map(); font_to_kind_map .get(&(font.clone(), Some(baseline_pos))) .or_else(|| font_to_kind_map.get(&(font, None))) .copied() } } #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] enum FontVariantCode { Code, NotCode, } impl FontVariantCode { const fn value(self) -> &'static [&'static str] { match self { Self::Code => &["code"], Self::NotCode => &[], } } } #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] enum FontVariantBold { Bold, NotBold, } impl FontVariantBold { const fn value(self) -> &'static [&'static str] { match self { Self::Bold => &["b"], Self::NotBold => &[], } } } #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] enum FontVariantItalic { Italic, NotItalic, } impl FontVariantItalic { const fn value(self) -> &'static [&'static str] { match self { Self::Italic => &["i"], Self::NotItalic => &[], } } } #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] enum FontVariantSubSuper { NotSubSuper, Subscript, Superscript, } impl FontVariantSubSuper { const fn value(self) -> &'static [&'static str] { match self { Self::NotSubSuper => &[], Self::Subscript => &["sub"], Self::Superscript => &["sup"], } } } impl FontVariantSubSuper { fn baseline_pos(self) -> Option { match self { FontVariantSubSuper::NotSubSuper => None, FontVariantSubSuper::Subscript => Some(BaselinePos::Below), FontVariantSubSuper::Superscript => Some(BaselinePos::Above), } } } make_enum_with_values! { #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] enum TextLineFontKind { Regular, Subscript, Superscript, Italic, ItalicSubscript, ItalicSuperscript, Bold, BoldSubscript, BoldSuperscript, BoldItalic, BoldItalicSubscript, BoldItalicSuperscript, Code, CodeSubscript, CodeSuperscript, } } impl TextLineFontKind { fn code(self) -> FontVariantCode { match self { Self::Regular | Self::Subscript | Self::Superscript | Self::Italic | Self::ItalicSubscript | Self::ItalicSuperscript | Self::Bold | Self::BoldSubscript | Self::BoldSuperscript | Self::BoldItalic | Self::BoldItalicSubscript | Self::BoldItalicSuperscript => FontVariantCode::NotCode, Self::Code | Self::CodeSubscript | Self::CodeSuperscript => FontVariantCode::Code, } } fn bold(self) -> FontVariantBold { match self { Self::Regular | Self::Subscript | Self::Superscript | Self::Italic | Self::ItalicSubscript | Self::ItalicSuperscript => FontVariantBold::NotBold, Self::Bold | Self::BoldSubscript | Self::BoldSuperscript | Self::BoldItalic | Self::BoldItalicSubscript | Self::BoldItalicSuperscript => FontVariantBold::Bold, Self::Code | Self::CodeSubscript | Self::CodeSuperscript => FontVariantBold::NotBold, } } fn italic(self) -> FontVariantItalic { match self { Self::Regular | Self::Subscript | Self::Superscript => FontVariantItalic::NotItalic, Self::Italic | Self::ItalicSubscript | Self::ItalicSuperscript => { FontVariantItalic::Italic } Self::Bold | Self::BoldSubscript | Self::BoldSuperscript => { FontVariantItalic::NotItalic } Self::BoldItalic | Self::BoldItalicSubscript | Self::BoldItalicSuperscript => { FontVariantItalic::Italic } Self::Code | Self::CodeSubscript | Self::CodeSuperscript => { FontVariantItalic::NotItalic } } } fn sub_super(self) -> FontVariantSubSuper { match self { Self::Regular => FontVariantSubSuper::NotSubSuper, Self::Subscript => FontVariantSubSuper::Subscript, Self::Superscript => FontVariantSubSuper::Superscript, Self::Italic => FontVariantSubSuper::NotSubSuper, Self::ItalicSubscript => FontVariantSubSuper::Subscript, Self::ItalicSuperscript => FontVariantSubSuper::Superscript, Self::Bold => FontVariantSubSuper::NotSubSuper, Self::BoldSubscript => FontVariantSubSuper::Subscript, Self::BoldSuperscript => FontVariantSubSuper::Superscript, Self::BoldItalic => FontVariantSubSuper::NotSubSuper, Self::BoldItalicSubscript => FontVariantSubSuper::Subscript, Self::BoldItalicSuperscript => FontVariantSubSuper::Superscript, Self::Code => FontVariantSubSuper::NotSubSuper, Self::CodeSubscript => FontVariantSubSuper::Subscript, Self::CodeSuperscript => FontVariantSubSuper::Superscript, } } fn text_line_tags(self) -> impl Clone + Iterator { self.code() .value() .iter() .copied() .chain(self.bold().value().iter().copied()) .chain(self.italic().value().iter().copied()) .chain(self.sub_super().value().iter().copied()) } } #[derive(Debug)] struct ElementBodyBuilder<'a> { containing_element: &'a mut xml_tree::Element, stack: Vec, } impl<'a> ElementBodyBuilder<'a> { fn new(containing_element: &'a mut xml_tree::Element) -> Self { Self { containing_element, stack: Vec::with_capacity(5), } } fn shrink_stack(&mut self, new_len: usize) { while new_len < self.stack.len() { let Some(element) = self.stack.pop() else { unreachable!(); }; self.insert_point().children.push(element); } } fn set_tag_stack<'b>(&mut self, tag_stack: impl IntoIterator) { let mut new_len = 0; for (i, tag) in tag_stack.into_iter().enumerate() { new_len = i + 1; if i >= self.stack.len() { self.stack.push(xml_tree::Element::new(tag.into(), [])); } else if self.stack[i].tag.normal() != Some(tag) { self.shrink_stack(new_len); } } self.shrink_stack(new_len); } fn write_text(&mut self, text: impl Borrow) { let text = text.borrow(); let insert_point = self.insert_point(); if let Some(child) = insert_point.children.last_mut() { child.tail += text; } else { insert_point.text += text; } } fn insert_point(&mut self) -> &mut xml_tree::Element { self.stack.last_mut().unwrap_or(self.containing_element) } fn scope(&mut self, f: impl FnOnce(&mut Self) -> R) -> R { let retval = f(self); self.flush(); retval } fn flush(&mut self) { self.set_tag_stack([]); } } #[derive(Clone, Debug)] struct InsnBitField { box_min_x: f32, box_max_x: f32, name: ParsedTextLine, bit_number: ParsedTextLine, } impl fmt::Display for InsnBitField { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { box_min_x, box_max_x, name, bit_number, } = self; write!( f, "" ) } } impl InsnBitField { fn write_xml(&self, parent: &mut xml_tree::Element) { let field = parent.sub_element("field".into(), []); field.text = "\n".into(); field.tail = "\n".into(); let name = field.sub_element("name".into(), []); name.tail = "\n".into(); self.name.write_xml(name, false); let bit_number = field.sub_element("bit-number".into(), []); bit_number.tail = "\n".into(); self.bit_number.write_xml(bit_number, false); } } #[derive(Clone, Debug)] struct InsnBitFieldsPrefix { box_min_x: f32, box_min_y: f32, box_max_x: f32, box_max_y: f32, prefix_text: ParsedTextLine, fields: Vec, suffix_text: ParsedTextLine, } impl fmt::Display for InsnBitFieldsPrefix { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { box_min_x, box_min_y, box_max_x, box_max_y, prefix_text, fields, suffix_text, } = self; writeln!( f, "") } } impl InsnBitFieldsPrefix { fn write_xml(&self, parent: &mut xml_tree::Element) { let prefix_elm = parent.sub_element("prefix".into(), []); prefix_elm.text = "\n".into(); prefix_elm.tail = "\n".into(); let prefix_text = prefix_elm.sub_element("prefix-text".into(), []); prefix_text.tail = "\n".into(); self.prefix_text.write_xml(prefix_text, false); InsnBitFields::write_xml_fields(&self.fields, prefix_elm); let suffix_text = prefix_elm.sub_element("suffix-text".into(), []); suffix_text.tail = "\n".into(); self.suffix_text.write_xml(suffix_text, false); } } #[derive(Clone, Debug)] struct InsnBitFields { prefix: Option, box_min_x: f32, box_min_y: f32, box_max_x: f32, box_max_y: f32, fields: Vec, } impl fmt::Display for InsnBitFields { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { prefix, box_min_x, box_min_y, box_max_x, box_max_y, fields, } = self; if let Some(prefix) = prefix { prefix.fmt(f)?; } writeln!( f, "") } } impl InsnBitFields { fn write_xml_fields( fields: impl IntoIterator>, parent: &mut xml_tree::Element, ) { let fields_elm = parent.sub_element("fields".into(), []); fields_elm.text = "\n".into(); fields_elm.tail = "\n".into(); for field in fields { field.borrow().write_xml(fields_elm); } } fn write_xml(&self, parent: &mut xml_tree::Element) { let bit_fields = parent.sub_element("bit-fields".into(), []); bit_fields.text = "\n".into(); bit_fields.tail = "\n".into(); if let Some(prefix) = &self.prefix { prefix.write_xml(bit_fields); } Self::write_xml_fields(&self.fields, bit_fields) } } #[derive(Clone, Debug)] struct InsnSpRegsAlteredEntry { reg: ParsedTextLine, fields: Vec, conds: Vec, } impl InsnSpRegsAlteredEntry { fn display_fmt_with_indent(&self, f: &mut fmt::Formatter<'_>, indent: &str) -> fmt::Result { let Self { reg, fields, conds } = self; writeln!(f, "Entry(")?; writeln!(f, "{indent} reg={reg},")?; write!(f, "{indent} fields=")?; if fields.is_empty() { write!(f, "()")?; } else { writeln!(f, "(")?; for field in fields { writeln!(f, "{indent} {field},")?; } write!(f, "{indent} )")?; } writeln!(f, ",")?; writeln!(f, "{indent} conds=")?; if conds.is_empty() { write!(f, "()")?; } else { writeln!(f, "(")?; for cond in conds { writeln!(f, "{indent} {cond},")?; } write!(f, "{indent} )")?; } writeln!(f, ",")?; write!(f, "{indent})") } fn write_xml(&self, parent: &mut xml_tree::Element) { let entry = parent.sub_element("entry".into(), []); entry.text = "\n".into(); entry.tail = "\n".into(); let reg = entry.sub_element("register".into(), []); reg.tail = "\n".into(); self.reg.write_xml(reg, false); let fields = entry.sub_element("fields".into(), []); fields.tail = "\n".into(); ParsedTextLine::write_xml_lines(&self.fields, fields, false, false); let conds = entry.sub_element("conditions".into(), []); conds.tail = "\n".into(); ParsedTextLine::write_xml_lines(&self.conds, conds, false, false); } } impl fmt::Display for InsnSpRegsAlteredEntry { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.display_fmt_with_indent(f, "") } } #[derive(Clone, Debug)] struct InsnSpRegsAltered { sp_regs_altered_text: ParsedTextLine, special_text: Option, table_header_reg: Option, table_header_fields: Option, entries: Vec, final_regular_min_y: f32, } impl fmt::Display for InsnSpRegsAltered { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { sp_regs_altered_text, special_text, table_header_reg, table_header_fields, entries, final_regular_min_y, } = self; writeln!(f, "InsnSpRegsAltered(")?; writeln!(f, " sp_regs_altered_text={sp_regs_altered_text},")?; if let Some(special_text) = special_text { writeln!(f, " special_text={special_text},")?; } if let Some(table_header_reg) = table_header_reg { writeln!(f, " table_header_reg={table_header_reg},")?; } if let Some(table_header_fields) = table_header_fields { writeln!(f, " table_header_fields={table_header_fields},")?; } if self.entries.is_empty() { writeln!(f, " entries=(),")?; } else { writeln!(f, " entries=(")?; for entry in entries { write!(f, " ")?; entry.display_fmt_with_indent(f, " ")?; writeln!(f, ",")?; } writeln!(f, " ),")?; } writeln!(f, " final_regular_min_y={final_regular_min_y},")?; write!(f, ")") } } impl InsnSpRegsAltered { fn write_xml(&self, parent: &mut xml_tree::Element) { let sp_regs_altered = parent.sub_element("special-registers-altered".into(), []); sp_regs_altered.text = "\n".into(); sp_regs_altered.tail = "\n".into(); let title = sp_regs_altered.sub_element("title".into(), []); title.tail = "\n".into(); self.sp_regs_altered_text.write_xml(title, false); if let Some(special_text) = &self.special_text { let special_text_el = sp_regs_altered.sub_element("special-text".into(), []); special_text_el.tail = "\n".into(); special_text.write_xml(special_text_el, false); } if let Some(table_header_reg) = &self.table_header_reg { let table_header_reg_el = sp_regs_altered.sub_element("table-header-register".into(), []); table_header_reg_el.tail = "\n".into(); table_header_reg.write_xml(table_header_reg_el, false); } if let Some(table_header_fields) = &self.table_header_fields { let table_header_fields_el = sp_regs_altered.sub_element("table-header-fields".into(), []); table_header_fields_el.tail = "\n".into(); table_header_fields.write_xml(table_header_fields_el, false); } for entry in &self.entries { entry.write_xml(sp_regs_altered); } } } #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] enum InsnParseSection { Code, Header, Desc, } fn main() {}