parse_powerisa_pdf/src/main.rs

1150 lines
40 KiB
Rust

// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use non_nan_float::NonNaNF32;
use std::{
borrow::Borrow,
collections::{HashMap, HashSet},
fmt,
sync::OnceLock,
};
mod quad_tree;
mod xml_tree;
mod non_nan_float {
#[derive(Default, PartialEq, PartialOrd, Clone, Copy)]
pub(crate) struct NonNaNF32(f32);
impl std::fmt::Debug for NonNaNF32 {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl std::fmt::Display for NonNaNF32 {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl NonNaNF32 {
pub(crate) const fn new(v: f32) -> Option<Self> {
if v.is_nan() { None } else { Some(Self(v)) }
}
pub(crate) const fn get(self) -> f32 {
self.0
}
}
impl std::hash::Hash for NonNaNF32 {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
if self.0 == 0.0 { 0.0 } else { self.0 }
.to_bits()
.hash(state);
}
}
impl Eq for NonNaNF32 {}
impl Ord for NonNaNF32 {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.partial_cmp(other).expect("known to be non-NaN")
}
}
impl std::ops::Neg for NonNaNF32 {
type Output = Self;
fn neg(self) -> Self::Output {
Self(-self.0)
}
}
}
macro_rules! make_enum_font {
(
enum $Font:ident {
#[other]
$Other:ident $other_body:tt,
$(#[group]
$KnownFontGroup:ident {
$(#[name = $known_font_name:literal, size = $known_font_size:literal]
$KnownFont:ident,)*
},)*
}
) => {
#[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Clone)]
enum $Font {
$Other $other_body,
$($($KnownFont,)*)*
}
#[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Copy, Clone)]
enum KnownFontGroup {
$($KnownFontGroup,)*
}
impl KnownFontGroup {
const fn fonts(self) -> &'static [Font] {
match self {
$(Self::$KnownFontGroup => &[$(Font::$KnownFont,)*],)*
}
}
const INSN_CODE_FONT_GROUPS: &[Self] = &[Self::InsnCode, Self::InsnCodeSubscript];
}
impl $Font {
const fn size(&self) -> f32 {
match *self {
Self::$Other { size, .. } => size.get(),
$($(Self::$KnownFont => $known_font_size,)*)*
}
}
const fn font_name(&self) -> &str {
match self {
Self::$Other { font_name, .. } => font_name,
$($(Self::$KnownFont => $known_font_name,)*)*
}
}
const fn known_font_group(&self) -> Option<KnownFontGroup> {
match self {
Self::$Other { .. } => None,
$($(Self::$KnownFont => Some(KnownFontGroup::$KnownFontGroup),)*)*
}
}
const fn line_height(&self) -> f32 {
match self {
Self::$Other { .. } => self.line_height_helper(),
$($(Self::$KnownFont => const { Self::$KnownFont.line_height_helper() },)*)*
}
}
}
};
}
make_enum_font! {
enum Font {
#[other]
Other {
font_name: Box<str>,
size: NonNaNF32,
},
#[group]
InsnHeader {
#[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 9.963]
InsnHeader,
},
#[group]
RtlFnHeader {
#[name = "APUYSQ+zcoN-Regular", size = 9.963]
RtlFnHeader,
},
#[group]
PageHeader {
#[name = "MJBFWM+DejaVuSansCondensed", size = 9.963]
PageHeader,
},
#[group]
PageFooter {
#[name = "MJBFWM+DejaVuSansCondensed", size = 4.981]
PageFooter,
},
#[group]
InsnDesc {
#[name = "MJBFWM+DejaVuSansCondensed", size = 8.966]
InsnDesc0,
#[name = "FZTIYT+CMMI9", size = 8.966]
InsnDesc1,
#[name = "ONUAYC+CMSSI9", size = 8.966]
InsnDesc2,
#[name = "TNGBFZ+CMSY9", size = 8.966]
InsnDesc3,
#[name = "WHMZPU+CMEX9", size = 8.966]
InsnDesc4,
#[name = "ZJTMSG+CMSS9", size = 8.966]
InsnDesc5,
},
#[group]
InsnDescMisc {
#[name = "MJBFWM+DejaVuSansCondensed", size = 2.377]
InsnDescMisc0,
#[name = "MJBFWM+DejaVuSansCondensed", size = 2.561]
InsnDescMisc1,
#[name = "MJBFWM+DejaVuSansCondensed", size = 4.492]
InsnDescMisc2,
#[name = "MJBFWM+DejaVuSansCondensed", size = 4.641]
InsnDescMisc3,
#[name = "MJBFWM+DejaVuSansCondensed", size = 4.772]
InsnDescMisc4,
#[name = "MJBFWM+DejaVuSansCondensed", size = 4.864]
InsnDescMisc5,
#[name = "MJBFWM+DejaVuSansCondensed", size = 4.925]
InsnDescMisc6,
#[name = "MJBFWM+DejaVuSansCondensed", size = 5.097]
InsnDescMisc7,
#[name = "MJBFWM+DejaVuSansCondensed", size = 5.123]
InsnDescMisc8,
#[name = "MJBFWM+DejaVuSansCondensed", size = 5.131]
InsnDescMisc9,
#[name = "MJBFWM+DejaVuSansCondensed", size = 5.516]
InsnDescMisc10,
#[name = "MJBFWM+DejaVuSansCondensed", size = 5.604]
InsnDescMisc11,
#[name = "MJBFWM+DejaVuSansCondensed", size = 5.634]
InsnDescMisc12,
#[name = "MJBFWM+DejaVuSansCondensed", size = 5.906]
InsnDescMisc13,
#[name = "MJBFWM+DejaVuSansCondensed", size = 6.033]
InsnDescMisc14,
#[name = "MJBFWM+DejaVuSansCondensed", size = 6.068]
InsnDescMisc15,
#[name = "MJBFWM+DejaVuSansCondensed", size = 6.213]
InsnDescMisc16,
#[name = "MJBFWM+DejaVuSansCondensed", size = 6.252]
InsnDescMisc17,
#[name = "MJBFWM+DejaVuSansCondensed", size = 6.962]
InsnDescMisc18,
#[name = "MJBFWM+DejaVuSansCondensed", size = 7.977]
InsnDescMisc19,
},
#[group]
InsnDescCode {
#[name = "APUYSQ+zcoN-Regular", size = 6.974]
InsnDescCode,
},
#[group]
InsnDescCodeMisc {
#[name = "APUYSQ+zcoN-Regular", size = 3.587]
InsnDescCodeMisc0,
#[name = "APUYSQ+zcoN-Regular", size = 4.483]
InsnDescCodeMisc1,
},
#[group]
InsnDescItalic {
#[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 8.966]
InsnDescItalic,
},
#[group]
InsnDescBold {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.966]
InsnDescBold,
},
#[group]
InsnDescBoldItalic {
#[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 8.966]
InsnDescBoldItalic,
},
#[group]
InsnDescSmall {
#[name = "MJBFWM+DejaVuSansCondensed", size = 7.97]
InsnDescSmall,
},
#[group]
InsnDescSmallItalic {
#[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 7.97]
InsnDescSmallItalic,
},
#[group]
InsnDescSmallBold {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 7.97]
InsnDescSmallBold,
},
#[group]
InsnDescSmallBoldItalic {
#[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 7.97]
InsnDescSmallBoldItalic,
},
#[group]
InsnDescBoldMisc {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.21]
InsnDescBoldMisc0,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.399]
InsnDescBoldMisc1,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.763]
InsnDescBoldMisc2,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.946]
InsnDescBoldMisc3,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.949]
InsnDescBoldMisc4,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 2.999]
InsnDescBoldMisc5,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.065]
InsnDescBoldMisc6,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.086]
InsnDescBoldMisc7,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.183]
InsnDescBoldMisc8,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.686]
InsnDescBoldMisc9,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.744]
InsnDescBoldMisc10,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.825]
InsnDescBoldMisc11,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.842]
InsnDescBoldMisc12,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.857]
InsnDescBoldMisc13,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 3.979]
InsnDescBoldMisc14,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.032]
InsnDescBoldMisc15,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.112]
InsnDescBoldMisc16,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.161]
InsnDescBoldMisc17,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.206]
InsnDescBoldMisc18,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.353]
InsnDescBoldMisc19,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.378]
InsnDescBoldMisc20,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.434]
InsnDescBoldMisc21,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.595]
InsnDescBoldMisc22,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.619]
InsnDescBoldMisc23,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.647]
InsnDescBoldMisc24,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.68]
InsnDescBoldMisc25,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.693]
InsnDescBoldMisc26,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.736]
InsnDescBoldMisc27,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.781]
InsnDescBoldMisc28,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.802]
InsnDescBoldMisc29,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 4.995]
InsnDescBoldMisc30,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.201]
InsnDescBoldMisc31,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.258]
InsnDescBoldMisc32,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.363]
InsnDescBoldMisc33,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.442]
InsnDescBoldMisc34,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.473]
InsnDescBoldMisc35,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.485]
InsnDescBoldMisc36,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.512]
InsnDescBoldMisc37,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.543]
InsnDescBoldMisc38,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.613]
InsnDescBoldMisc39,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.744]
InsnDescBoldMisc40,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.774]
InsnDescBoldMisc41,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.809]
InsnDescBoldMisc42,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.849]
InsnDescBoldMisc43,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.911]
InsnDescBoldMisc44,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.92]
InsnDescBoldMisc45,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.962]
InsnDescBoldMisc46,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.981]
InsnDescBoldMisc47,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.146]
InsnDescBoldMisc48,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.213]
InsnDescBoldMisc49,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.221]
InsnDescBoldMisc50,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.243]
InsnDescBoldMisc51,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.55]
InsnDescBoldMisc52,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.62]
InsnDescBoldMisc53,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.699]
InsnDescBoldMisc54,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.725]
InsnDescBoldMisc55,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.751]
InsnDescBoldMisc56,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.856]
InsnDescBoldMisc57,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.029]
InsnDescBoldMisc58,
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 8.406]
InsnDescBoldMisc59,
},
#[group]
InsnDescSubscript {
#[name = "MJBFWM+DejaVuSansCondensed", size = 5.978]
InsnDescSubscript,
},
#[group]
InsnDescBoldSubscript {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 5.978]
InsnDescBoldSubscript,
},
#[group]
InsnDescItalicSubscript {
#[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 5.978]
InsnDescItalicSubscript,
},
#[group]
InsnDescBoldItalicSubscript {
#[name = "YDJYQV+DejaVuSansCondensed-BoldOblique", size = 5.978]
InsnDescBoldItalicSubscript,
},
#[group]
InsnExtMnemonic {
#[name = "APUYSQ+zcoN-Regular", size = 8.966]
InsnExtMnemonic,
},
#[group]
InsnCode {
#[name = "APUYSQ+zcoN-Regular", size = 7.97]
InsnCode0,
#[name = "RRFUNA+CMSY8", size = 7.97]
InsnCode1,
#[name = "HPXOZC+CMSS8", size = 7.97]
InsnCode2,
},
#[group]
InsnCodeSubscript {
#[name = "APUYSQ+zcoN-Regular", size = 5.978]
InsnCodeSubscript0,
#[name = "DBQTKF+CMSY6", size = 5.978]
InsnCodeSubscript1,
},
#[group]
TitlePageBig {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 24.787]
TitlePageBig,
},
#[group]
TitlePageVersion {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 9.963]
TitlePageVersion,
},
#[group]
TitlePageTm {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 6.974]
TitlePageTm,
},
#[group]
TitlePageRev {
#[name = "MJBFWM+DejaVuSansCondensed", size = 6.974]
TitlePageRev,
},
#[group]
TitlePageBook {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 20.663]
TitlePageBook,
},
#[group]
LegalPageItalic {
#[name = "CGMSHV+DejaVuSansCondensed-Oblique", size = 9.963]
LegalPageItalic,
},
#[group]
ChangeSummaryPageBold {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 11.955]
ChangeSummaryPageBold,
},
#[group]
ChapterTitle {
#[name = "NHUPPK+DejaVuSansCondensed-Bold", size = 17.215]
ChapterTitle,
},
#[group]
MathMisc {
#[name = "AAJMKT+CMMI6", size = 5.978]
MathMisc0,
#[name = "CUTMFD+CMSSI8", size = 5.978]
MathMisc1,
#[name = "CUTMFD+CMSSI8", size = 7.97]
MathMisc2,
#[name = "FZTIYT+CMMI9", size = 5.734]
MathMisc3,
#[name = "FZTIYT+CMMI9", size = 7.168]
MathMisc4,
#[name = "HONFQS+CMMI8", size = 7.97]
MathMisc5,
#[name = "HPXOZC+CMSS8", size = 5.978]
MathMisc6,
#[name = "LLVRDD+CMSY10", size = 11.955]
MathMisc7,
#[name = "ZJTMSG+CMSS9", size = 7.168]
MathMisc8,
},
}
}
impl Font {
const fn space_width(&self) -> f32 {
self.size() * const { 3.985 / Font::InsnCode0.size() }
}
const fn line_height_helper(&self) -> f32 {
const fn str_eq(a: &str, b: &str) -> bool {
let a = a.as_bytes();
let b = b.as_bytes();
if a.len() != b.len() {
return false;
}
let mut i = 0;
while i < a.len() {
if a[i] != b[i] {
return false;
}
i += 1;
}
true
}
let font_name = self.font_name();
let mut i = 0;
while i < KnownFontGroup::INSN_CODE_FONT_GROUPS.len() {
let fonts = KnownFontGroup::INSN_CODE_FONT_GROUPS[i].fonts();
let mut j = 0;
while j < fonts.len() {
if str_eq(font_name, fonts[j].font_name()) {
return 9.464 * self.size() / Font::InsnCode0.size();
}
j += 1;
}
i += 1;
}
let group = self.known_font_group();
if matches!(group, Some(KnownFontGroup::InsnDesc))
|| str_eq(font_name, Font::InsnDesc0.font_name())
|| str_eq(font_name, Font::InsnDescBold.font_name())
|| str_eq(font_name, Font::InsnDescItalic.font_name())
|| str_eq(font_name, Font::InsnDescBoldItalic.font_name())
|| matches!(group, Some(KnownFontGroup::MathMisc))
{
return 10.959 * self.size() / Font::InsnDesc0.size();
}
panic!("no line height")
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct Char {
font: Font,
text: String,
adv: NonNaNF32,
min_x: NonNaNF32,
min_y: NonNaNF32,
max_x: NonNaNF32,
max_y: NonNaNF32,
}
impl Char {
fn width(&self) -> f32 {
self.max_x.get() - self.min_x.get()
}
fn height(&self) -> f32 {
self.max_y.get() - self.min_y.get()
}
fn top_down_left_to_right_sort_key(&self) -> impl Ord {
(-self.min_y, self.min_x)
}
}
const COLUMN_SPLIT_X: f32 = 300.0;
const PAGE_BODY_MAX_X: f32 = 600.0;
const PAGE_BODY_MIN_X: f32 = 50.0;
const PAGE_BODY_MAX_Y: f32 = 780.0;
const PAGE_BODY_MIN_Y: f32 = 45.0;
const ONE_TITLE_LINE_SPLIT_Y: f32 = 734.0;
const TWO_TITLE_LINES_SPLIT_Y: f32 = 715.0;
const INSN_BIT_FIELDS_PREFIX_TEXT_TOP_PAD_HEIGHT: f32 = 29.938;
const INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT: f32 = 9.278;
const INSN_BIT_FIELDS_PREFIX_BOX_BOTTOM_TO_SUFFIX_TEXT_HEIGHT: f32 = 20.971;
const INSN_BIT_FIELDS_TOP_PAD_HEIGHT: f32 = 20.175;
const INSN_BIT_FIELDS_TOP_PAD_HEIGHT2: f32 = 14.694;
const INSN_BIT_FIELDS_BOX_HEIGHT: f32 = 22.317;
const INSN_SP_REGS_ALTERED_REGISTER_COLUMN_X: f32 = 34.405;
const INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X: f32 = 86.692;
const INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X: f32 = 188.74;
#[derive(Clone)]
struct ParsedTextLine {
element: xml_tree::Element,
regular_min_y: f32,
regular_max_y: f32,
fonts: TextLineFonts,
chars: Vec<Char>,
preceding_blank_lines: u32,
}
impl ParsedTextLine {
fn regular_height(&self) -> f32 {
self.regular_max_y - self.regular_min_y
}
fn get_header_text(&self) -> Option<String> {
assert_eq!(self.fonts, TextLineFonts::InsnDescFonts);
if !self.element.text.trim().is_empty() {
return None;
}
if !self.element.tail.trim().is_empty() {
return None;
}
let [b] = &*self.element.children else {
return None;
};
if b.tag.normal() != Some("b") {
return None;
}
if b.children.len() != 0 {
return None;
}
let text = self.element.inner_text();
// should also check titlecase, but rust doesn't include that in std
if text.ends_with(":") && text.chars().next().is_some_and(|ch| ch.is_uppercase()) {
Some(text)
} else {
None
}
}
fn write_xml(&self, parent: &mut xml_tree::Element, trailing_nl: bool) {
for _ in 0..self.preceding_blank_lines {
parent.sub_element("br".into(), []).tail = "\n".into();
}
if let Some(last_child) = parent.children.last_mut() {
last_child.tail += &self.element.text;
} else {
parent.text += &self.element.text;
}
parent.children.extend_from_slice(&self.element.children);
if trailing_nl {
parent.sub_element("br".into(), []).tail = "\n".into();
}
}
fn write_xml_lines(
lines: impl IntoIterator<Item: Borrow<ParsedTextLine>>,
parent: &mut xml_tree::Element,
trailing_nl: bool,
preceding_nl: bool,
) {
if preceding_nl {
parent.sub_element("br".into(), []).tail = "\n".into();
}
let mut first = true;
for line in lines {
let line = line.borrow();
if first {
first = false;
} else {
parent.sub_element("br".into(), []).tail = "\n".into();
}
line.write_xml(parent, false);
}
if trailing_nl {
parent.sub_element("br".into(), []).tail = "\n".into();
}
}
}
impl fmt::Debug for ParsedTextLine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let Self {
element,
regular_min_y,
regular_max_y,
fonts,
chars,
preceding_blank_lines,
} = self;
f.debug_struct("ParsedTextLine")
.field("element", &format_args!("{element}"))
.field("regular_min_y", regular_min_y)
.field("regular_max_y", regular_max_y)
.field("fonts", fonts)
.field("chars", chars)
.field("preceding_blank_lines", preceding_blank_lines)
.finish()
}
}
impl fmt::Display for ParsedTextLine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for _ in 0..self.preceding_blank_lines {
f.write_str("\n")?;
}
self.element.fmt(f)
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
enum BaselinePos {
Above,
Below,
}
macro_rules! make_enum_with_values {
(
$(#[$enum_meta:meta])*
enum $Enum:ident {
$($Variant:ident,)*
}
) => {
$(#[$enum_meta])*
enum $Enum {
$($Variant,)*
}
impl $Enum {
const VALUES: &[Self] = &[$(Self::$Variant,)*];
}
};
}
make_enum_with_values! {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
enum TextLineFonts {
InsnMnemonicFonts,
InsnHeaderFonts,
InsnBitFieldBitNumberFonts,
InsnBitFieldNameFonts,
InsnBitFieldsAffixTitleFonts,
InsnCodeFonts,
InsnDescFonts,
}
}
impl TextLineFonts {
fn regular(self) -> &'static [Font] {
match self {
TextLineFonts::InsnMnemonicFonts => KnownFontGroup::InsnDesc.fonts(),
TextLineFonts::InsnHeaderFonts => &[Font::InsnHeader],
TextLineFonts::InsnBitFieldBitNumberFonts => &[Font::InsnDescSmall, Font::TitlePageRev],
TextLineFonts::InsnBitFieldNameFonts => KnownFontGroup::InsnDesc.fonts(),
TextLineFonts::InsnBitFieldsAffixTitleFonts => &[Font::InsnDescSmall],
TextLineFonts::InsnCodeFonts => KnownFontGroup::InsnCode.fonts(),
TextLineFonts::InsnDescFonts => {
static FONTS: OnceLock<Box<[Font]>> = OnceLock::new();
FONTS.get_or_init(|| {
Box::from_iter(
KnownFontGroup::InsnDesc
.fonts()
.iter()
.cloned()
.chain([Font::InsnDescSmall]),
)
})
}
}
}
fn italic(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => None,
TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
TextLineFonts::InsnCodeFonts => None,
TextLineFonts::InsnDescFonts => {
Some(&[Font::InsnDescItalic, Font::InsnDescSmallItalic])
}
}
}
fn bold(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => None,
TextLineFonts::InsnBitFieldsAffixTitleFonts => Some(&[Font::InsnDescSmallBold]),
TextLineFonts::InsnCodeFonts => None,
TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBold, Font::InsnDescSmallBold]),
}
}
fn bold_italic(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => None,
TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
TextLineFonts::InsnCodeFonts => None,
TextLineFonts::InsnDescFonts => {
Some(&[Font::InsnDescBoldItalic, Font::InsnDescSmallBoldItalic])
}
}
}
fn subscript(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => Some(&[Font::InsnDescSubscript]),
TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
TextLineFonts::InsnCodeFonts => Some(KnownFontGroup::InsnCodeSubscript.fonts()),
TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescSubscript]),
}
}
fn bold_subscript(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => None,
TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
TextLineFonts::InsnCodeFonts => None,
TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBoldSubscript]),
}
}
fn italic_subscript(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => None,
TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
TextLineFonts::InsnCodeFonts => None,
TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescItalicSubscript]),
}
}
fn bold_italic_subscript(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => None,
TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
TextLineFonts::InsnCodeFonts => None,
TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescBoldItalicSubscript]),
}
}
fn code(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => None,
TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
TextLineFonts::InsnCodeFonts => None,
TextLineFonts::InsnDescFonts => Some(&[Font::InsnDescCode, Font::InsnExtMnemonic]),
}
}
fn code_subscript(self) -> Option<&'static [Font]> {
match self {
TextLineFonts::InsnMnemonicFonts => None,
TextLineFonts::InsnHeaderFonts => None,
TextLineFonts::InsnBitFieldBitNumberFonts => None,
TextLineFonts::InsnBitFieldNameFonts => None,
TextLineFonts::InsnBitFieldsAffixTitleFonts => None,
TextLineFonts::InsnCodeFonts => None,
TextLineFonts::InsnDescFonts => Some(KnownFontGroup::InsnCodeSubscript.fonts()),
}
}
fn get_fonts(
self,
part_kind: TextLineFontKind,
) -> Option<(&'static [Font], Option<BaselinePos>)> {
let fonts = match part_kind {
TextLineFontKind::Regular => self.regular(),
TextLineFontKind::Italic => self.italic()?,
TextLineFontKind::Bold => self.bold()?,
TextLineFontKind::BoldItalic => self.bold_italic()?,
TextLineFontKind::Subscript => self.subscript()?,
TextLineFontKind::Superscript => self.subscript()?,
TextLineFontKind::BoldSubscript => self.bold_subscript()?,
TextLineFontKind::BoldSuperscript => self.bold_subscript()?,
TextLineFontKind::ItalicSubscript => self.italic_subscript()?,
TextLineFontKind::ItalicSuperscript => self.italic_subscript()?,
TextLineFontKind::BoldItalicSubscript => self.bold_italic_subscript()?,
TextLineFontKind::BoldItalicSuperscript => self.bold_italic_subscript()?,
TextLineFontKind::Code => self.code()?,
TextLineFontKind::CodeSubscript => self.code_subscript()?,
TextLineFontKind::CodeSuperscript => self.code_subscript()?,
};
Some((fonts, part_kind.sub_super().baseline_pos()))
}
fn font_to_kind_map(self) -> &'static HashMap<(Font, Option<BaselinePos>), TextLineFontKind> {
static MAPS: OnceLock<
HashMap<TextLineFonts, HashMap<(Font, Option<BaselinePos>), TextLineFontKind>>,
> = OnceLock::new();
&MAPS.get_or_init(|| {
Self::VALUES
.iter()
.map(|&this: &TextLineFonts| {
let mut map = HashMap::new();
for &kind in TextLineFontKind::VALUES {
let Some((fonts, baseline_pos)) = this.get_fonts(kind) else {
continue;
};
for font in fonts {
let old_kind = map.insert((font.clone(), baseline_pos), kind);
assert!(
old_kind.is_none(),
"duplicate font: kind={kind:?} old_kind={old_kind:?} font={font:?}"
);
}
}
(this, map)
})
.collect()
})[&self]
}
fn fonts(self) -> &'static HashSet<Font> {
static SETS: OnceLock<HashMap<TextLineFonts, HashSet<Font>>> = OnceLock::new();
&SETS.get_or_init(|| {
Self::VALUES
.iter()
.map(|&this: &TextLineFonts| {
let mut set = HashSet::new();
for &kind in TextLineFontKind::VALUES {
let Some((fonts, _baseline_pos)) = this.get_fonts(kind) else {
continue;
};
set.extend(fonts.iter().cloned());
}
(this, set)
})
.collect()
})[&self]
}
fn get_kind(self, font: Font, baseline_pos: BaselinePos) -> Option<TextLineFontKind> {
let font_to_kind_map = self.font_to_kind_map();
font_to_kind_map
.get(&(font.clone(), Some(baseline_pos)))
.or_else(|| font_to_kind_map.get(&(font, None)))
.copied()
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
enum FontVariantCode {
Code,
NotCode,
}
impl FontVariantCode {
const fn value(self) -> &'static [&'static str] {
match self {
Self::Code => &["code"],
Self::NotCode => &[],
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
enum FontVariantBold {
Bold,
NotBold,
}
impl FontVariantBold {
const fn value(self) -> &'static [&'static str] {
match self {
Self::Bold => &["b"],
Self::NotBold => &[],
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
enum FontVariantItalic {
Italic,
NotItalic,
}
impl FontVariantItalic {
const fn value(self) -> &'static [&'static str] {
match self {
Self::Italic => &["i"],
Self::NotItalic => &[],
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
enum FontVariantSubSuper {
NotSubSuper,
Subscript,
Superscript,
}
impl FontVariantSubSuper {
const fn value(self) -> &'static [&'static str] {
match self {
Self::NotSubSuper => &[],
Self::Subscript => &["sub"],
Self::Superscript => &["sup"],
}
}
}
impl FontVariantSubSuper {
fn baseline_pos(self) -> Option<BaselinePos> {
match self {
FontVariantSubSuper::NotSubSuper => None,
FontVariantSubSuper::Subscript => Some(BaselinePos::Below),
FontVariantSubSuper::Superscript => Some(BaselinePos::Above),
}
}
}
make_enum_with_values! {
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
enum TextLineFontKind {
Regular,
Subscript,
Superscript,
Italic,
ItalicSubscript,
ItalicSuperscript,
Bold,
BoldSubscript,
BoldSuperscript,
BoldItalic,
BoldItalicSubscript,
BoldItalicSuperscript,
Code,
CodeSubscript,
CodeSuperscript,
}
}
impl TextLineFontKind {
fn code(self) -> FontVariantCode {
match self {
Self::Regular
| Self::Subscript
| Self::Superscript
| Self::Italic
| Self::ItalicSubscript
| Self::ItalicSuperscript
| Self::Bold
| Self::BoldSubscript
| Self::BoldSuperscript
| Self::BoldItalic
| Self::BoldItalicSubscript
| Self::BoldItalicSuperscript => FontVariantCode::NotCode,
Self::Code | Self::CodeSubscript | Self::CodeSuperscript => FontVariantCode::Code,
}
}
fn bold(self) -> FontVariantBold {
match self {
Self::Regular
| Self::Subscript
| Self::Superscript
| Self::Italic
| Self::ItalicSubscript
| Self::ItalicSuperscript => FontVariantBold::NotBold,
Self::Bold
| Self::BoldSubscript
| Self::BoldSuperscript
| Self::BoldItalic
| Self::BoldItalicSubscript
| Self::BoldItalicSuperscript => FontVariantBold::Bold,
Self::Code | Self::CodeSubscript | Self::CodeSuperscript => FontVariantBold::NotBold,
}
}
fn italic(self) -> FontVariantItalic {
match self {
Self::Regular | Self::Subscript | Self::Superscript => FontVariantItalic::NotItalic,
Self::Italic | Self::ItalicSubscript | Self::ItalicSuperscript => {
FontVariantItalic::Italic
}
Self::Bold | Self::BoldSubscript | Self::BoldSuperscript => {
FontVariantItalic::NotItalic
}
Self::BoldItalic | Self::BoldItalicSubscript | Self::BoldItalicSuperscript => {
FontVariantItalic::Italic
}
Self::Code | Self::CodeSubscript | Self::CodeSuperscript => {
FontVariantItalic::NotItalic
}
}
}
fn sub_super(self) -> FontVariantSubSuper {
match self {
Self::Regular => FontVariantSubSuper::NotSubSuper,
Self::Subscript => FontVariantSubSuper::Subscript,
Self::Superscript => FontVariantSubSuper::Superscript,
Self::Italic => FontVariantSubSuper::NotSubSuper,
Self::ItalicSubscript => FontVariantSubSuper::Subscript,
Self::ItalicSuperscript => FontVariantSubSuper::Superscript,
Self::Bold => FontVariantSubSuper::NotSubSuper,
Self::BoldSubscript => FontVariantSubSuper::Subscript,
Self::BoldSuperscript => FontVariantSubSuper::Superscript,
Self::BoldItalic => FontVariantSubSuper::NotSubSuper,
Self::BoldItalicSubscript => FontVariantSubSuper::Subscript,
Self::BoldItalicSuperscript => FontVariantSubSuper::Superscript,
Self::Code => FontVariantSubSuper::NotSubSuper,
Self::CodeSubscript => FontVariantSubSuper::Subscript,
Self::CodeSuperscript => FontVariantSubSuper::Superscript,
}
}
fn text_line_tags(self) -> impl Clone + Iterator<Item = &'static str> {
self.code()
.value()
.iter()
.copied()
.chain(self.bold().value().iter().copied())
.chain(self.italic().value().iter().copied())
.chain(self.sub_super().value().iter().copied())
}
}
#[derive(Debug)]
struct ElementBodyBuilder<'a> {
containing_element: &'a mut xml_tree::Element,
stack: Vec<xml_tree::Element>,
}
impl<'a> ElementBodyBuilder<'a> {
fn new(containing_element: &'a mut xml_tree::Element) -> Self {
Self {
containing_element,
stack: Vec::with_capacity(5),
}
}
fn shrink_stack(&mut self, new_len: usize) {
while new_len < self.stack.len() {
let Some(element) = self.stack.pop() else {
unreachable!();
};
self.insert_point().children.push(element);
}
}
fn set_tag_stack<'b>(&mut self, tag_stack: impl IntoIterator<Item = &'b str>) {
let mut new_len = 0;
for (i, tag) in tag_stack.into_iter().enumerate() {
new_len = i + 1;
if i >= self.stack.len() {
self.stack.push(xml_tree::Element::new(tag.into(), []));
} else if self.stack[i].tag.normal() != Some(tag) {
self.shrink_stack(new_len);
}
}
self.shrink_stack(new_len);
}
fn write_text(&mut self, text: impl Borrow<str>) {
let text = text.borrow();
let insert_point = self.insert_point();
if let Some(child) = insert_point.children.last_mut() {
child.tail += text;
} else {
insert_point.text += text;
}
}
fn insert_point(&mut self) -> &mut xml_tree::Element {
self.stack.last_mut().unwrap_or(self.containing_element)
}
fn scope<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
let retval = f(self);
self.flush();
retval
}
fn flush(&mut self) {
self.set_tag_stack([]);
}
}
fn main() {}