From 8c02483b655d5110e9aa4e4dfe6fbd835e198636 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Thu, 8 Jan 2026 00:09:20 -0800 Subject: [PATCH] wip parsing the xml --- Cargo.lock | 10 + Cargo.toml | 1 + crates/cpu/Cargo.toml | 1 + crates/cpu/src/lib.rs | 1 + crates/cpu/src/powerisa.rs | 954 +++++++++++++++++++++++++++++++++++++ 5 files changed, 967 insertions(+) create mode 100644 crates/cpu/src/powerisa.rs diff --git a/Cargo.lock b/Cargo.lock index 192a826..f33bbae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,6 +279,7 @@ dependencies = [ "fayalite", "hex-literal", "parse_powerisa_pdf", + "roxmltree", "serde", "sha2", "simple-mermaid", @@ -897,6 +898,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "roxmltree" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1964b10c76125c36f8afe190065a4bf9a87bf324842c05701330bba9f1cacbb" +dependencies = [ + "memchr", +] + [[package]] name = "rustc-hash" version = "2.1.1" diff --git a/Cargo.toml b/Cargo.toml index b610217..b7dc4d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ base16ct = "1.0.0" fayalite = { git = "https://git.libre-chip.org/libre-chip/fayalite.git", version = "0.3.0", branch = "master" } hex-literal = "1.1.0" parse_powerisa_pdf = { git = "https://git.libre-chip.org/libre-chip/parse_powerisa_pdf.git", version = "0.1.0", branch = "master" } +roxmltree = "0.21.1" serde = { version = "1.0.202", features = ["derive"] } sha2 = "0.10.9" simple-mermaid = "0.2.0" diff --git a/crates/cpu/Cargo.toml b/crates/cpu/Cargo.toml index b630b3e..ccff0d7 100644 --- a/crates/cpu/Cargo.toml +++ b/crates/cpu/Cargo.toml @@ -16,6 +16,7 @@ version.workspace = true [dependencies] fayalite.workspace = true +roxmltree.workspace = true serde.workspace = true simple-mermaid.workspace = true diff --git a/crates/cpu/src/lib.rs b/crates/cpu/src/lib.rs index a00b668..0a8a938 100644 --- a/crates/cpu/src/lib.rs +++ b/crates/cpu/src/lib.rs @@ -3,6 +3,7 @@ pub mod config; pub mod instruction; pub mod next_pc; +pub mod powerisa; pub mod reg_alloc; pub mod register; pub mod unit; diff --git a/crates/cpu/src/powerisa.rs b/crates/cpu/src/powerisa.rs new file mode 100644 index 0000000..ad3f7d3 --- /dev/null +++ b/crates/cpu/src/powerisa.rs @@ -0,0 +1,954 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use roxmltree::{Attribute, Document, Node, NodeType}; +use std::{fmt, panic::Location, sync::OnceLock}; + +const POWERISA_INSTRUCTIONS_XML: &str = + include_str!(concat!(env!("OUT_DIR"), "/powerisa-instructions.xml")); + +enum Error<'a> { + XmlError(roxmltree::Error), + Unexpected { + loc: &'static Location<'static>, + node: Node<'a, 'static>, + }, + BodyTooShort { + loc: &'static Location<'static>, + node: Node<'a, 'static>, + }, + ExpectedTag { + loc: &'static Location<'static>, + expected_tag_name: &'a str, + got_element: Node<'a, 'static>, + }, + MissingAttribute { + loc: &'static Location<'static>, + attribute_name: &'a str, + element: Node<'a, 'static>, + }, + ExpectedAttribute { + loc: &'static Location<'static>, + expected_attribute_name: &'a str, + attribute: Attribute<'a, 'static>, + element: Node<'a, 'static>, + }, + UnexpectedAttribute { + loc: &'static Location<'static>, + attribute: Attribute<'a, 'static>, + element: Node<'a, 'static>, + }, + IsSubsetMustBeFalse { + loc: &'static Location<'static>, + is_subset: Attribute<'a, 'static>, + }, +} + +impl From for Error<'_> { + fn from(v: roxmltree::Error) -> Self { + Self::XmlError(v) + } +} + +impl fmt::Display for Error<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::XmlError(v) => v.fmt(f), + Error::Unexpected { loc, node } => write!(f, "at {loc}: unexpected node: {node:?}"), + Error::BodyTooShort { loc, node } => { + write!(f, "at {loc}: node's body is too short: {node:?}") + } + Error::ExpectedTag { + loc, + expected_tag_name, + got_element, + } => write!( + f, + "at {loc}: expected tag {expected_tag_name:?} but got: {got_element:?}" + ), + Error::MissingAttribute { + loc, + attribute_name, + element, + } => write!( + f, + "at {loc}: missing attribute {attribute_name:?}: {element:?}" + ), + Error::ExpectedAttribute { + loc, + expected_attribute_name, + attribute, + element, + } => write!( + f, + "at {loc}: expected attribute with name {expected_attribute_name:?}: {attribute:?}\n\ + in element: {element:?}" + ), + Error::UnexpectedAttribute { + loc, + attribute, + element, + } => write!( + f, + "at {loc}: unexpected attribute: {attribute:?}\n\ + in element: {element:?}" + ), + Error::IsSubsetMustBeFalse { loc, is_subset } => { + write!( + f, + "at {loc}: `is-subset` attribute must be `False`: {is_subset:?}" + ) + } + } + } +} + +pub struct Instructions { + instructions: Box<[Instruction]>, +} + +impl fmt::Debug for Instructions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("Instructions ")?; + self.instructions.fmt(f) + } +} + +#[derive(Clone)] +struct Parser<'a> { + parent: Node<'a, 'static>, + cur_node: Option>, +} + +impl<'a> Parser<'a> { + fn skip_comments(&mut self) { + while let Some(cur_node) = self.cur_node { + match cur_node.node_type() { + NodeType::Comment => {} + NodeType::Text | NodeType::Root | NodeType::Element | NodeType::PI => break, + } + self.cur_node = cur_node.next_sibling(); + } + } + fn skip_ws_and_comments(&mut self) { + while let Some(cur_node) = self.cur_node { + match cur_node.node_type() { + NodeType::Comment => {} + NodeType::Text => { + if cur_node + .text() + .is_some_and(|s| !s.trim_ascii_start().is_empty()) + { + break; + } + } + NodeType::Root | NodeType::Element | NodeType::PI => break, + } + self.cur_node = cur_node.next_sibling(); + } + } + fn element_body_todo(&mut self) { + self.cur_node = None; + } + fn peek(&self) -> bool { + T::peek(self) + } + fn peek_any_element(&self) -> Option> { + let mut parser = self.clone(); + parser.skip_ws_and_comments(); + let element = parser.cur_node?; + let NodeType::Element = element.node_type() else { + return None; + }; + Some(element) + } + fn peek_element(&self, tag_name: &'a str) -> Option> { + self.peek_any_element() + .filter(|element| element.has_tag_name(tag_name)) + } + #[track_caller] + fn parse(&mut self) -> Result> { + T::parse(self) + } + #[track_caller] + fn parse_element( + &mut self, + tag_name: &'a str, + attr_names: [&'a str; N], + f: impl FnOnce( + Node<'a, 'static>, + [Attribute<'a, 'static>; N], + &mut Parser<'a>, + ) -> Result>, + ) -> Result> { + self.parse_any_element(|element, parser| { + if !element.has_tag_name(tag_name) { + return Err(Error::ExpectedTag { + loc: Location::caller(), + expected_tag_name: tag_name, + got_element: element, + }); + } + let mut attrs = [const { None }; N]; + let mut attrs_iter = element.attributes(); + for i in 0..N { + let Some(attr) = attrs_iter.next() else { + return Err(Error::MissingAttribute { + loc: Location::caller(), + attribute_name: attr_names[i], + element, + }); + }; + if (attr.namespace(), attr.name()) != (None, attr_names[i]) { + return Err(Error::ExpectedAttribute { + loc: Location::caller(), + expected_attribute_name: attr_names[i], + attribute: attr, + element, + }); + } + attrs[i] = Some(attr); + } + if let Some(attribute) = attrs_iter.next() { + return Err(Error::UnexpectedAttribute { + loc: Location::caller(), + attribute, + element, + }); + } + let attrs = attrs.map(|attr| attr.expect("filled in loop above")); + f(element, attrs, parser) + }) + } + fn parse_any_element( + &mut self, + f: impl FnOnce(Node<'a, 'static>, &mut Parser<'a>) -> Result>, + ) -> Result> { + self.skip_ws_and_comments(); + let Some(element) = self.cur_node else { + return Err(Error::BodyTooShort { + loc: Location::caller(), + node: self.parent, + }); + }; + let NodeType::Element = element.node_type() else { + return Err(Error::Unexpected { + loc: Location::caller(), + node: element, + }); + }; + let mut parser = Parser { + parent: element, + cur_node: element.first_child(), + }; + let retval = f(element, &mut parser)?; + parser.skip_ws_and_comments(); + if let Some(node) = parser.cur_node { + Err(Error::Unexpected { + loc: Location::caller(), + node, + }) + } else { + self.cur_node = element.next_sibling(); + Ok(retval) + } + } + fn parse_document(document: &'a Document<'static>) -> Result> { + let parent = document.root(); + let mut parser = Parser { + parent, + cur_node: parent.first_child(), + }; + let retval = parser.parse()?; + parser.skip_ws_and_comments(); + if let Some(node) = parser.cur_node { + Err(Error::Unexpected { + loc: Location::caller(), + node, + }) + } else { + Ok(retval) + } + } +} + +trait Parse: Sized { + fn peek<'a>(parser: &Parser<'a>) -> bool; + fn parse<'a>(parser: &mut Parser<'a>) -> Result>; +} + +impl Parse for Box<[T]> { + fn peek<'a>(_parser: &Parser<'a>) -> bool { + true + } + fn parse<'a>(parser: &mut Parser<'a>) -> Result> { + let mut retval = Vec::new(); + while parser.peek::() { + retval.push(parser.parse()?); + } + Ok(retval.into_boxed_slice()) + } +} + +impl Parse for Option { + fn peek<'a>(_parser: &Parser<'a>) -> bool { + true + } + fn parse<'a>(parser: &mut Parser<'a>) -> Result> { + parser.peek::().then(|| parser.parse()).transpose() + } +} + +trait ParseElementWithAttributes: Sized { + type Attributes<'a>: 'a; + + fn parse_element_with_attributes<'a, T: ParseElement>( + parser: &mut Parser<'a>, + ) -> Result>; +} + +impl ParseElementWithAttributes for [&'static str; N] { + type Attributes<'a> = [Attribute<'a, 'static>; N]; + + fn parse_element_with_attributes<'a, T: ParseElement>( + parser: &mut Parser<'a>, + ) -> Result> { + parser.parse_element(T::TAG_NAME, T::ATTRIBUTE_NAMES, T::parse_element) + } +} + +impl ParseElementWithAttributes for () { + type Attributes<'a> = (); + + fn parse_element_with_attributes<'a, T: ParseElement>( + parser: &mut Parser<'a>, + ) -> Result> { + parser.parse_element(T::TAG_NAME, [], |element, [], parser| { + T::parse_element(element, (), parser) + }) + } +} + +trait ParseElement: Parse { + type AttributeNames: ParseElementWithAttributes; + const TAG_NAME: &'static str; + const ATTRIBUTE_NAMES: Self::AttributeNames; + fn parse_element<'a>( + element: Node<'a, 'static>, + attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result>; +} + +impl Parse for T { + fn peek<'a>(parser: &Parser<'a>) -> bool { + parser.peek_element(Self::TAG_NAME).is_some() + } + + fn parse<'a>(parser: &mut Parser<'a>) -> Result> { + T::AttributeNames::parse_element_with_attributes::(parser) + } +} + +impl Instructions { + pub fn instructions(&self) -> &[Instruction] { + &self.instructions + } + pub fn get() -> &'static Self { + static INSTRUCTIONS: OnceLock = OnceLock::new(); + INSTRUCTIONS.get_or_init(|| { + let handle_error = + |e: Error<'_>| unreachable!("powerisa-instructions.xml failed to parse: {e}"); + match Document::parse(POWERISA_INSTRUCTIONS_XML) { + Ok(document) => match Parser::parse_document(&document) { + Ok(v) => v, + Err(e) => handle_error(e), + }, + Err(e) => handle_error(e.into()), + } + }) + } +} + +impl ParseElement for Instructions { + type AttributeNames = [&'static str; 1]; + const TAG_NAME: &'static str = "instructions"; + const ATTRIBUTE_NAMES: Self::AttributeNames = ["is-subset"]; + + fn parse_element<'a>( + _element: Node<'a, 'static>, + attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + let [is_subset] = attributes; + if is_subset.value() != "False" { + return Err(Error::IsSubsetMustBeFalse { + loc: Location::caller(), + is_subset, + }); + } + Ok(Self { + instructions: parser.parse()?, + }) + } +} + +pub struct Instruction { + header: Box<[InstructionHeader]>, + code: Option, + description: Option, + special_registers_altered: Option, +} + +struct FlattenedOption<'a, T>(&'a Option); + +impl fmt::Debug for FlattenedOption<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + Some(v) => v.fmt(f), + None => f.write_str("None"), + } + } +} + +impl fmt::Debug for Instruction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { + header, + code, + description, + special_registers_altered, + } = self; + f.debug_struct("Instruction") + .field("header", header) + .field("code", &FlattenedOption(code)) + .field("description", &FlattenedOption(description)) + .field( + "special_registers_altered", + &FlattenedOption(special_registers_altered), + ) + .finish() + } +} + +impl Instruction { + pub fn header(&self) -> &[InstructionHeader] { + &self.header + } + pub fn code(&self) -> Option<&InstructionCode> { + self.code.as_ref() + } + pub fn description(&self) -> Option<&InstructionDescription> { + self.description.as_ref() + } + pub fn special_registers_altered(&self) -> Option<&InstructionSpecialRegistersAltered> { + self.special_registers_altered.as_ref() + } +} + +impl ParseElement for Instruction { + type AttributeNames = (); + const TAG_NAME: &'static str = "instruction"; + const ATTRIBUTE_NAMES: Self::AttributeNames = (); + + fn parse_element<'a>( + _element: Node<'a, 'static>, + _attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + Ok(Self { + header: parser.parse()?, + code: parser.parse()?, + description: parser.parse()?, + special_registers_altered: parser.parse()?, + }) + } +} + +#[derive(Debug)] +pub struct InstructionHeader { + title: InstructionTitle, + mnemonics: InstructionMnemonics, + bit_fields: InstructionBitFields, +} + +impl InstructionHeader { + pub fn title(&self) -> &InstructionTitle { + &self.title + } + pub fn mnemonics(&self) -> &InstructionMnemonics { + &self.mnemonics + } + pub fn bit_fields(&self) -> &InstructionBitFields { + &self.bit_fields + } +} + +impl ParseElement for InstructionHeader { + type AttributeNames = (); + const TAG_NAME: &'static str = "header"; + const ATTRIBUTE_NAMES: Self::AttributeNames = (); + + fn parse_element<'a>( + _element: Node<'a, 'static>, + _attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + Ok(Self { + title: parser.parse()?, + mnemonics: parser.parse()?, + bit_fields: parser.parse()?, + }) + } +} + +pub struct InstructionTitle { + text_lines: TextLines, +} + +impl fmt::Debug for InstructionTitle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { text_lines } = self; + text_lines.debug_fmt("InstructionTitle", f) + } +} + +impl InstructionTitle { + pub fn text_lines(&self) -> &TextLines { + &self.text_lines + } + pub fn lines(&self) -> &[TextLine] { + self.text_lines.lines() + } +} + +impl ParseElement for InstructionTitle { + type AttributeNames = (); + const TAG_NAME: &'static str = "title"; + const ATTRIBUTE_NAMES: Self::AttributeNames = (); + + fn parse_element<'a>( + _element: Node<'a, 'static>, + _attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + Ok(Self { + text_lines: parser.parse()?, + }) + } +} + +pub struct InstructionMnemonics { + text_lines: TextLines, +} + +impl fmt::Debug for InstructionMnemonics { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { text_lines } = self; + text_lines.debug_fmt("InstructionMnemonics", f) + } +} + +impl InstructionMnemonics { + pub fn text_lines(&self) -> &TextLines { + &self.text_lines + } + pub fn lines(&self) -> &[TextLine] { + self.text_lines.lines() + } +} + +impl ParseElement for InstructionMnemonics { + type AttributeNames = (); + const TAG_NAME: &'static str = "mnemonics"; + const ATTRIBUTE_NAMES: Self::AttributeNames = (); + + fn parse_element<'a>( + _element: Node<'a, 'static>, + _attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + Ok(Self { + text_lines: parser.parse()?, + }) + } +} + +#[derive(Debug)] +pub struct InstructionBitFields {} + +impl ParseElement for InstructionBitFields { + type AttributeNames = (); + const TAG_NAME: &'static str = "bit-fields"; + const ATTRIBUTE_NAMES: Self::AttributeNames = (); + + fn parse_element<'a>( + _element: Node<'a, 'static>, + _attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + // TODO + parser.element_body_todo(); + Ok(Self {}) + } +} + +pub struct InstructionCode { + text_lines: TextLines, +} + +impl InstructionCode { + pub fn text_lines(&self) -> &TextLines { + &self.text_lines + } + pub fn lines(&self) -> &[TextLine] { + self.text_lines.lines() + } +} + +impl fmt::Debug for InstructionCode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { text_lines } = self; + text_lines.debug_fmt("InstructionCode", f) + } +} + +impl ParseElement for InstructionCode { + type AttributeNames = (); + const TAG_NAME: &'static str = "code"; + const ATTRIBUTE_NAMES: Self::AttributeNames = (); + + fn parse_element<'a>( + _element: Node<'a, 'static>, + _attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + Ok(Self { + text_lines: parser.parse()?, + }) + } +} + +pub struct InstructionDescription { + text_lines: TextLines, +} + +impl InstructionDescription { + pub fn text_lines(&self) -> &TextLines { + &self.text_lines + } + pub fn lines(&self) -> &[TextLine] { + self.text_lines.lines() + } +} + +impl fmt::Debug for InstructionDescription { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { text_lines } = self; + text_lines.debug_fmt("InstructionDescription", f) + } +} + +impl ParseElement for InstructionDescription { + type AttributeNames = (); + const TAG_NAME: &'static str = "description"; + const ATTRIBUTE_NAMES: Self::AttributeNames = (); + + fn parse_element<'a>( + _element: Node<'a, 'static>, + _attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + Ok(Self { + text_lines: parser.parse()?, + }) + } +} + +#[derive(Debug)] +pub struct InstructionSpecialRegistersAltered {} + +impl ParseElement for InstructionSpecialRegistersAltered { + type AttributeNames = (); + const TAG_NAME: &'static str = "special-registers-altered"; + const ATTRIBUTE_NAMES: Self::AttributeNames = (); + + fn parse_element<'a>( + _element: Node<'a, 'static>, + _attributes: ::Attributes<'a>, + parser: &mut Parser<'a>, + ) -> Result> { + // TODO + parser.element_body_todo(); + Ok(Self {}) + } +} + +pub enum TextLineItem { + Text(Box), + Code(Box<[TextLineItem]>), + Bold(Box<[TextLineItem]>), + Italic(Box<[TextLineItem]>), + Subscript(Box<[TextLineItem]>), + Superscript(Box<[TextLineItem]>), +} + +impl fmt::Debug for TextLineItem { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Text(v) => v.fmt(f), + Self::Code(v) => { + f.write_str("Code")?; + v.fmt(f) + } + Self::Bold(v) => { + f.write_str("Bold")?; + v.fmt(f) + } + Self::Italic(v) => { + f.write_str("Italic")?; + v.fmt(f) + } + Self::Subscript(v) => { + f.write_str("Subscript")?; + v.fmt(f) + } + Self::Superscript(v) => { + f.write_str("Superscript")?; + v.fmt(f) + } + } + } +} + +trait TextLineItemMatch<'a>: Sized { + type Output; + fn text(self, node: Node<'a, 'static>) -> Self::Output; + fn code(self, node: Node<'a, 'static>) -> Self::Output; + fn bold(self, node: Node<'a, 'static>) -> Self::Output; + fn italic(self, node: Node<'a, 'static>) -> Self::Output; + fn subscript(self, node: Node<'a, 'static>) -> Self::Output; + fn superscript(self, node: Node<'a, 'static>) -> Self::Output; + fn match_node(self, node: Node<'a, 'static>) -> Option { + match node.node_type() { + NodeType::Element => { + if node.tag_name().namespace().is_none() { + Some(match node.tag_name().name() { + "code" => self.code(node), + "b" => self.bold(node), + "i" => self.italic(node), + "sub" => self.subscript(node), + "sup" => self.superscript(node), + _ => return None, + }) + } else { + None + } + } + NodeType::Root | NodeType::PI | NodeType::Comment => None, + NodeType::Text => Some(self.text(node)), + } + } +} + +impl Parse for TextLineItem { + fn peek<'a>(parser: &Parser<'a>) -> bool { + let mut parser = parser.clone(); + parser.skip_comments(); + struct PeekMatch; + impl<'a> TextLineItemMatch<'a> for PeekMatch { + type Output = (); + + fn text(self, _node: Node<'a, 'static>) -> Self::Output {} + fn code(self, _node: Node<'a, 'static>) -> Self::Output {} + fn bold(self, _node: Node<'a, 'static>) -> Self::Output {} + fn italic(self, _node: Node<'a, 'static>) -> Self::Output {} + fn subscript(self, _node: Node<'a, 'static>) -> Self::Output {} + fn superscript(self, _node: Node<'a, 'static>) -> Self::Output {} + } + parser + .cur_node + .is_some_and(|node| PeekMatch.match_node(node).is_some()) + } + + fn parse<'a>(parser: &mut Parser<'a>) -> Result> { + parser.skip_comments(); + struct ParseMatch<'b, 'a>(&'b mut Parser<'a>); + impl<'a> TextLineItemMatch<'a> for ParseMatch<'_, 'a> { + type Output = Result>; + + fn text(self, node: Node<'a, 'static>) -> Self::Output { + self.0.cur_node = node.next_sibling(); + self.0.skip_comments(); + Ok(TextLineItem::Text(node.text().unwrap_or("").into())) + } + + fn code(self, _node: Node<'a, 'static>) -> Self::Output { + let retval = self.0.parse_element("code", [], |_node, [], parser| { + Ok(TextLineItem::Code(TextLine::parse(parser)?.items)) + })?; + self.0.skip_comments(); + Ok(retval) + } + + fn bold(self, _node: Node<'a, 'static>) -> Self::Output { + let retval = self.0.parse_element("b", [], |_node, [], parser| { + Ok(TextLineItem::Bold(TextLine::parse(parser)?.items)) + })?; + self.0.skip_comments(); + Ok(retval) + } + + fn italic(self, _node: Node<'a, 'static>) -> Self::Output { + let retval = self.0.parse_element("i", [], |_node, [], parser| { + Ok(TextLineItem::Italic(TextLine::parse(parser)?.items)) + })?; + self.0.skip_comments(); + Ok(retval) + } + + fn subscript(self, _node: Node<'a, 'static>) -> Self::Output { + let retval = self.0.parse_element("sub", [], |_node, [], parser| { + Ok(TextLineItem::Subscript(TextLine::parse(parser)?.items)) + })?; + self.0.skip_comments(); + Ok(retval) + } + + fn superscript(self, _node: Node<'a, 'static>) -> Self::Output { + let retval = self.0.parse_element("sup", [], |_node, [], parser| { + Ok(TextLineItem::Superscript(TextLine::parse(parser)?.items)) + })?; + self.0.skip_comments(); + Ok(retval) + } + } + let Some(item) = parser + .cur_node + .and_then(|node| ParseMatch(parser).match_node(node)) + .transpose()? + else { + return Err(Error::BodyTooShort { + loc: Location::caller(), + node: parser.parent, + }); + }; + Ok(item) + } +} + +pub struct TextLine { + items: Box<[TextLineItem]>, +} + +impl fmt::Debug for TextLine { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { items } = self; + f.write_str("TextLine ")?; + items.fmt(f) + } +} + +impl TextLine { + pub fn items(&self) -> &[TextLineItem] { + &self.items + } + fn parse_with_options<'a>( + parser: &mut Parser<'a>, + remove_leading_nl: bool, + ) -> Result> { + parser.skip_comments(); + let mut items = Vec::new(); + if let Some(node) = parser.cur_node { + if node.is_text() { + let mut text = node.text().expect("known to be text"); + if remove_leading_nl { + text = text + .strip_prefix("\r\n") + .or_else(|| text.strip_prefix(&['\r', '\n'])) + .unwrap_or(text); + } + if !text.is_empty() { + items.push(TextLineItem::Text(text.into())); + } + parser.cur_node = node.next_sibling(); + parser.skip_comments(); + } + } + while TextLineItem::peek(parser) { + items.push(TextLineItem::parse(parser)?); + parser.skip_comments(); + } + Ok(Self { + items: items.into_boxed_slice(), + }) + } +} + +impl Parse for TextLine { + fn peek<'a>(_parser: &Parser<'a>) -> bool { + true + } + + fn parse<'a>(parser: &mut Parser<'a>) -> Result> { + Self::parse_with_options(parser, false) + } +} + +pub struct TextLines { + lines: Box<[TextLine]>, +} + +impl fmt::Debug for TextLines { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.debug_fmt("TextLines", f) + } +} + +impl TextLines { + pub fn lines(&self) -> &[TextLine] { + &self.lines + } + fn debug_fmt(&self, name: &str, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { lines } = self; + f.write_str(name)?; + fmt::Debug::fmt(lines, f) + } +} + +impl Parse for TextLines { + fn peek<'a>(parser: &Parser<'a>) -> bool { + parser.peek_element("br").is_some() || TextLine::peek(parser) + } + + fn parse<'a>(parser: &mut Parser<'a>) -> Result> { + let mut lines = Vec::new(); + lines.push(TextLine::parse(parser)?); + while parser.peek_element("br").is_some() { + parser.parse_element("br", [], |_element, [], _parser| Ok(()))?; + lines.push(TextLine::parse_with_options(parser, true)?); + } + Ok(Self { + lines: lines.into_boxed_slice(), + }) + } +} + +#[cfg(test)] +#[test] +fn test_instructions_parses() { + use std::fmt::Write; + let instructions = Instructions::get(); + let mut written = String::new(); + for (i, instruction) in instructions.instructions().iter().enumerate() { + written.clear(); + write!(written, "{instruction:#?}").expect("known to not error"); + println!("------\n{written}\n------"); + let expected: &str = match i { + #[cfg(todo)] + 0 => "", + _ => continue, + }; + assert!(written == expected); + } +}