From c58bc23904f659d2bb0e1c79e4563b837ea90a9a Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Mon, 5 Jan 2026 09:41:56 -0800 Subject: [PATCH] wip --- src/main.rs | 296 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 182 insertions(+), 114 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5182add..2e9e391 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2942,132 +2942,200 @@ impl Parser { final_regular_min_y: regular_min_y, }); } - /*fn extract_insn(&mut self, header_start_char: Char) -> Result { + fn extract_insn(&mut self, header_start_char: Char) -> Result { assert_eq!(header_start_char.font, Font::InsnHeader); println!("{header_start_char:?}"); let Some(header) = self.extract_insn_header_mnemonics_and_bit_fields( header_start_char.min_y.get(), - header_start_char, - )? else { - return Err(ExtractInsnsError::PageParseError("can't find header text line".into(), Backtrace::capture())); + Some(header_start_char), + )? + else { + return Err(ExtractInsnsError::PageParseError( + "can't find header text line".into(), + Backtrace::capture(), + )); }; - let next_start_min_y = header.min_y.get() - 5.0; + let mut next_start_min_y = header.min_y() - 5.0; let mut headers = vec![header]; let mut code_lines: Vec = Vec::new(); let mut desc_lines: Vec = Vec::new(); let mut sp_regs_altered = None; loop { let search_min_y = next_start_min_y - 70.0; - let Some(next_char) = self.find_top_left_char_in_range( - min_x=self.text_section.min_x.get() - 5.0, - max_x=self.text_section.max_x.get() + 5.0, - min_y=max(search_min_y, self.text_section.min_y), - max_y=next_start_min_y, - allow_processed=False, - )?; - if next_char is None: - if search_min_y <= self.text_section.min_y \ - and self.text_section.next is not None and \ - self.text_section.next.page_num in self.pages: - # go to next section - self.text_section = self.text_section.next - next_start_min_y = self.text_section.max_y - continue - else: - raise InsnParseError("can't find insn code or description text") - match next_char.font: - case font if font in TextLineFonts.INSN_CODE_FONTS.fonts: - next_section = _InsnParseSection.CODE - case font if font in TextLineFonts.INSN_DESC_FONTS.fonts: - next_section = _InsnParseSection.DESC - case Font.INSN_HEADER: - next_section = _InsnParseSection.HEADER - case font: - raise InsnParseError(f"can't find insn code or description text\nfont={font}") - match next_section: - case _InsnParseSection.CODE: - if len(desc_lines) != 0: - break - code_line = self.extract_text_line( - start_char=next_char, - start_min_y=next_char.min_y, - min_x=next_char.min_x, - max_x=self.text_section.max_x, - fonts=TextLineFonts.INSN_CODE_FONTS, - preceding_blank_lines=0 if len(code_lines) == 0 else 1, - ) - if code_line is None: - raise InsnParseError("can't find insn code text line") - more_code_lines = self.extract_following_text_lines( - first_text_line=code_line, - min_x=code_line.chars[0].min_x, - max_x=self.text_section.max_x, - allowed_start_min_y_error=0.05, - ) - print("more insn code lines:") - print("\n".join(map(str, more_code_lines))) - code_lines.extend(more_code_lines) - next_start_min_y = code_lines[-1].regular_min_y - 5 - case _InsnParseSection.HEADER: - if len(code_lines) != 0 or len(desc_lines) != 0: - break - header = self.extract_insn_header_mnemonics_and_bit_fields( - start_min_y=next_char.min_y, - header_start_char=next_char, - ) - if header is None: - raise InsnParseError("can't find header text line") - headers.append(header) - next_start_min_y = header.min_y - 5 - case _InsnParseSection.DESC: - desc_line = self.extract_text_line( - start_char=next_char, - start_min_y=next_char.min_y, - min_x=next_char.min_x, - max_x=self.text_section.max_x, - fonts=TextLineFonts.INSN_DESC_FONTS, - preceding_blank_lines=0 if len(desc_lines) == 0 else 1, - allowed_start_min_y_error=3, - ) - if desc_line is None: - raise InsnParseError("can't find insn desc text line") - match desc_line.get_header_text(): - case None: - more_desc_lines = self.extract_following_text_lines( - first_text_line=desc_line, - min_x=desc_line.chars[0].min_x, - max_x=self.text_section.max_x, - allowed_start_min_y_error=3.5, - ) - print("more insn desc lines:") - print("\n".join(map(str, more_desc_lines))) - desc_lines.extend(more_desc_lines) - next_start_min_y = desc_lines[-1].regular_min_y - 5 - case "Special Registers Altered:": - sp_regs_altered = self.extract_insn_sp_regs_altered( - sp_regs_altered_text=desc_line, - ) - next_start_min_y = sp_regs_altered.final_regular_min_y - break - case header_text: - raise AssertionError(f"unhandled header text: {header_text!r}\n{desc_line}") - case _: - assert_never(next_section) + let Some(next_char) = self + .find_top_left_char_in_range( + self.text_section.min_x.get() - 5.0, + self.text_section.max_x.get() + 5.0, + search_min_y.max(self.text_section.min_y.get()), + next_start_min_y, + false, + ) + .map_err(ExtractInsnsError::Other)? + else { + if search_min_y <= self.text_section.min_y.get() + && self + .pages + .get(self.text_section.next().page_num) + .map_err(ExtractInsnsError::Other)? + .is_some() + { + // go to next section + self.text_section = self.text_section.next(); + next_start_min_y = self.text_section.max_y.get(); + continue; + } else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn code or description text".into(), + Backtrace::capture(), + )); + } + }; + let next_section = match &next_char.font { + font if TextLineFonts::InsnCodeFonts.fonts().contains(font) => { + InsnParseSection::Code + } + font if TextLineFonts::InsnDescFonts.fonts().contains(font) => { + InsnParseSection::Desc + } + Font::InsnHeader => InsnParseSection::Header, + font => { + return Err(ExtractInsnsError::InsnParseError( + format!("can't find insn code or description text\nfont={font:?}"), + Backtrace::capture(), + )); + } + }; + match next_section { + InsnParseSection::Code => { + if !desc_lines.is_empty() { + break; + } + let start_min_y = next_char.min_y.get(); + let min_x = next_char.min_x.get(); + let Some(code_line) = self.extract_text_line( + Some(next_char), + start_min_y, + min_x, + self.text_section.max_x.get(), + TextLineFonts::InsnCodeFonts, + if code_lines.is_empty() { 0 } else { 1 }, + false, + None, + )? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn code text line".into(), + Backtrace::capture(), + )); + }; + let min_x = code_line.chars[0].min_x.get(); + let more_code_lines = self.extract_following_text_lines( + code_line, + min_x, + self.text_section.max_x.get(), + Some(0.05), + )?; + println!("more insn code lines:"); + for i in &more_code_lines { + println!("{i}"); + } + code_lines.extend(more_code_lines); + let Some(last) = code_lines.last() else { + unreachable!() + }; + next_start_min_y = last.regular_min_y - 5.0; + } + InsnParseSection::Header => { + if !(code_lines.is_empty() && desc_lines.is_empty()) { + break; + } + let Some(header) = self.extract_insn_header_mnemonics_and_bit_fields( + next_char.min_y.get(), + Some(next_char), + )? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find header text line".into(), + Backtrace::capture(), + )); + }; + next_start_min_y = header.min_y() - 5.0; + headers.push(header); + } + InsnParseSection::Desc => { + let start_min_y = next_char.min_y.get(); + let min_x = next_char.min_x.get(); + let Some(desc_line) = self.extract_text_line( + Some(next_char), + start_min_y, + min_x, + self.text_section.max_x.get(), + TextLineFonts::InsnDescFonts, + if desc_lines.is_empty() { 0 } else { 1 }, + false, + Some(3.0), + )? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn desc text line".into(), + Backtrace::capture(), + )); + }; + match desc_line.get_header_text() { + None => { + let min_x = desc_line.chars[0].min_x.get(); + let more_desc_lines = self.extract_following_text_lines( + desc_line, + min_x, + self.text_section.max_x.get(), + Some(3.5), + )?; + println!("more insn desc lines:"); + for i in &more_desc_lines { + println!("{i}"); + } + desc_lines.extend(more_desc_lines); + next_start_min_y = desc_lines + .last() + .expect("known to be non-empty") + .regular_min_y + - 5.0; + } + Some(header_text) if header_text == "Special Registers Altered:" => { + let new_sp_regs_altered = + self.extract_insn_sp_regs_altered(desc_line)?; + next_start_min_y = new_sp_regs_altered.final_regular_min_y; + sp_regs_altered = Some(new_sp_regs_altered); + break; + } + Some(header_text) => { + return Err(ExtractInsnsError::Other( + format!("unhandled header text: {header_text:?}\n{desc_line}") + .into(), + )); + } + } + } + } } - print("insn code lines:") - print("\n".join(map(str, code_lines))) - print("insn desc lines:") - print("\n".join(map(str, desc_lines))) - print("sp_regs_altered:") - print(sp_regs_altered) - # TODO: finish - return Insn( - headers=tuple(headers), - code_lines=tuple(code_lines), - desc_lines=tuple(desc_lines), - sp_regs_altered=sp_regs_altered, - ) - }*/ + println!("insn code lines:"); + for i in &code_lines { + println!("{i}"); + } + println!("insn desc lines:"); + for i in &desc_lines { + println!("{i}"); + } + println!("sp_regs_altered:"); + println!("{sp_regs_altered:?}"); + // TODO: finish + return Ok(Insn { + headers, + code_lines, + desc_lines, + sp_regs_altered, + }); + } fn extract_insns(&mut self) -> Result<(), ExtractInsnsError> { loop { let Some(header_start_char) =