diff --git a/src/main.rs b/src/main.rs index 0b5aae3..5182add 100644 --- a/src/main.rs +++ b/src/main.rs @@ -44,6 +44,12 @@ mod non_nan_float { pub(crate) const fn get(self) -> f32 { self.0 } + pub(crate) const fn min(self, other: Self) -> Self { + Self(self.0.min(other.0)) + } + pub(crate) const fn max(self, other: Self) -> Self { + Self(self.0.max(other.0)) + } } impl std::hash::Hash for NonNaNF32 { @@ -1484,10 +1490,95 @@ enum PageItem { LineOrRect(LineOrRect), } -#[derive(Clone, Debug)] +#[derive(Copy, Clone, Debug)] enum LineOrRect { - Line(()), - Rect(()), + Line(Line), + Rect(Rect), +} + +impl LineOrRect { + fn width(self) -> f32 { + match self { + Self::Line(v) => v.width(), + Self::Rect(v) => v.width(), + } + } + fn height(self) -> f32 { + match self { + Self::Line(v) => v.height(), + Self::Rect(v) => v.height(), + } + } + fn min_x(self) -> NonNaNF32 { + match self { + Self::Line(v) => v.min_x(), + Self::Rect(v) => v.min_x, + } + } + fn max_x(self) -> NonNaNF32 { + match self { + Self::Line(v) => v.max_x(), + Self::Rect(v) => v.max_x, + } + } + fn min_y(self) -> NonNaNF32 { + match self { + Self::Line(v) => v.min_y(), + Self::Rect(v) => v.min_y, + } + } + fn max_y(self) -> NonNaNF32 { + match self { + Self::Line(v) => v.max_y(), + Self::Rect(v) => v.max_y, + } + } +} + +#[derive(Copy, Clone, Debug)] +struct Line { + p0_x: NonNaNF32, + p0_y: NonNaNF32, + p1_x: NonNaNF32, + p1_y: NonNaNF32, +} + +impl Line { + fn width(self) -> f32 { + f32::abs(self.p0_x.get() - self.p1_x.get()) + } + fn height(self) -> f32 { + f32::abs(self.p0_y.get() - self.p1_y.get()) + } + fn min_x(self) -> NonNaNF32 { + self.p0_x.min(self.p1_x) + } + fn max_x(self) -> NonNaNF32 { + self.p0_x.max(self.p1_x) + } + fn min_y(self) -> NonNaNF32 { + self.p0_y.min(self.p1_y) + } + fn max_y(self) -> NonNaNF32 { + self.p0_y.max(self.p1_y) + } +} + +#[derive(Copy, Clone, Debug)] +struct Rect { + min_x: NonNaNF32, + max_x: NonNaNF32, + min_y: NonNaNF32, + max_y: NonNaNF32, +} + +impl Rect { + fn width(self) -> f32 { + self.max_x.get() - self.min_x.get() + } + fn height(self) -> f32 { + self.max_y.get() - self.min_y.get() + } } #[derive(Debug)] @@ -2039,7 +2130,7 @@ impl Parser { Err( e @ ErrorWithNote { error: - ExtractInsnsError::InsnParseError(_) | ExtractInsnsError::PageParseError(_), + ExtractInsnsError::InsnParseError(..) | ExtractInsnsError::PageParseError(..), .. }, ) => { @@ -2064,7 +2155,7 @@ impl Parser { let page = self.page()?; let unprocessed_chars = self.unprocessed_chars()?; let ControlFlow::::Continue(()) = - page.qt[&self.text_section].range(min_x, max_x, min_y, max_y, |x, y, ch| { + page.qt[&self.text_section].range(min_x, max_x, min_y, max_y, |_x, _y, ch| { let PageItem::Char(ch) = ch else { return ControlFlow::Continue(()); }; @@ -2119,7 +2210,7 @@ impl Parser { max_x, start_min_y - fonts.regular()[0].size() * 0.4, start_min_y + fonts.regular()[0].size() * 0.6, - |x, y, ch| { + |_x, _y, ch| { let PageItem::Char(ch) = ch else { return ControlFlow::Continue(()); }; @@ -2260,6 +2351,597 @@ impl Parser { } Ok(Some(retval)) } + fn extract_following_text_lines( + &mut self, + first_text_line: ParsedTextLine, + min_x: f32, + max_x: f32, + allowed_start_min_y_error: Option, + ) -> Result, ExtractInsnsError> { + let mut retval = Vec::new(); + let fonts = first_text_line.fonts; + let mut line = Some(first_text_line); + while let Some(cur_line) = line { + let start_min_y = cur_line.regular_min_y - fonts.regular()[0].line_height(); + retval.push(cur_line); + line = self.extract_text_line( + None, + start_min_y, + min_x, + max_x, + fonts, + 0, + false, + allowed_start_min_y_error, + )?; + } + return Ok(retval); + } + fn extract_insn_bit_fields( + &mut self, + mnemonic_lines: &[ParsedTextLine], + ) -> Result, ExtractInsnsError> { + let mut found_non_affix_line = false; + let [.., last_mnemonic_line] = mnemonic_lines else { + unreachable!(); + }; + let expected_non_affix_line_y = last_mnemonic_line.regular_min_y + - if mnemonic_lines.len() > 1 { + INSN_BIT_FIELDS_TOP_PAD_HEIGHT2 + } else { + INSN_BIT_FIELDS_TOP_PAD_HEIGHT + }; + let page = self.page().map_err(ExtractInsnsError::Other)?; + let _ = page.qt[&self.text_section].range( + self.text_section.min_x.get() - 5.0, + self.text_section.max_x.get() + 5.0, + expected_non_affix_line_y - 5.0, + expected_non_affix_line_y + 5.0, + |_x, _y, line| { + let PageItem::LineOrRect(LineOrRect::Line(line)) = line else { + return ControlFlow::Continue(()); + }; + if line.width() > line.height() { + found_non_affix_line = true; + return ControlFlow::Break(()); + } + ControlFlow::Continue(()) + }, + ); + if found_non_affix_line { + return self.extract_insn_bit_fields_box(expected_non_affix_line_y); + }; + let prefix_text = self.extract_text_line( + None, + last_mnemonic_line.regular_min_y - INSN_BIT_FIELDS_PREFIX_TEXT_TOP_PAD_HEIGHT, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + TextLineFonts::InsnBitFieldsAffixTitleFonts, + 0, + true, + Some(2.0), + )?; + let Some(prefix_text) = prefix_text else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn prefix bit fields title".into(), + Backtrace::capture(), + )); + }; + let prefix_text_str = prefix_text.element.inner_text(); + if prefix_text_str != "Prefix:" { + return Err(ExtractInsnsError::InsnParseError( + format!("insn prefix bit fields title is not as expected: {prefix_text_str:?}"), + Backtrace::capture(), + )); + } + let prefix_bit_fields = self.extract_insn_bit_fields_box( + prefix_text.regular_min_y - INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT, + )?; + let Some(prefix_bit_fields) = prefix_bit_fields else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn prefix bit fields".into(), + Backtrace::capture(), + )); + }; + let suffix_text = self.extract_text_line( + None, + prefix_bit_fields.box_min_y - INSN_BIT_FIELDS_PREFIX_BOX_BOTTOM_TO_SUFFIX_TEXT_HEIGHT, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + TextLineFonts::InsnBitFieldsAffixTitleFonts, + 0, + true, + Some(2.0), + )?; + let Some(suffix_text) = suffix_text else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn suffix bit fields title".into(), + Backtrace::capture(), + )); + }; + let suffix_text_str = suffix_text.element.inner_text(); + if suffix_text_str != "Suffix:" { + return Err(ExtractInsnsError::InsnParseError( + format!("insn suffix bit fields title is not as expected: {suffix_text_str:?}"), + Backtrace::capture(), + )); + } + let suffix_bit_fields = self.extract_insn_bit_fields_box( + suffix_text.regular_min_y - INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT, + )?; + let Some(suffix_bit_fields) = suffix_bit_fields else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn suffix bit fields".into(), + Backtrace::capture(), + )); + }; + return Ok(Some(InsnBitFields { + prefix: Some(InsnBitFieldsPrefix { + box_min_x: prefix_bit_fields.box_min_x, + box_min_y: prefix_bit_fields.box_min_y, + box_max_x: prefix_bit_fields.box_max_x, + box_max_y: prefix_bit_fields.box_max_y, + prefix_text: prefix_text, + fields: prefix_bit_fields.fields, + suffix_text: suffix_text, + }), + box_min_x: suffix_bit_fields.box_min_x, + box_min_y: suffix_bit_fields.box_min_y, + box_max_x: suffix_bit_fields.box_max_x, + box_max_y: suffix_bit_fields.box_max_y, + fields: suffix_bit_fields.fields, + })); + } + fn extract_insn_bit_fields_box( + &mut self, + expected_box_max_y: f32, + ) -> Result, ExtractInsnsError> { + let mut h_lines = Vec::new(); + let mut v_lines = Vec::new(); + let page = self.page().map_err(ExtractInsnsError::Other)?; + let ControlFlow::::Continue(()) = page.qt[&self.text_section].range( + self.text_section.min_x.get() - 5.0, + self.text_section.max_x.get() + 5.0, + expected_box_max_y - INSN_BIT_FIELDS_BOX_HEIGHT - 5.0, + expected_box_max_y + 5.0, + |_x, _y, line| { + let PageItem::LineOrRect(LineOrRect::Line(line)) = *line else { + return ControlFlow::Continue(()); + }; + if line.width() > line.height() { + h_lines.push(line); + } else { + v_lines.push(line); + } + ControlFlow::Continue(()) + }, + ); + h_lines.sort_by_key(|line| line.min_y()); + v_lines.sort_by_key(|line| line.min_x()); + for i in (0..v_lines.len().saturating_sub(1)).rev() { + if f32::abs(v_lines[i].min_x().get() - v_lines[i + 1].min_x().get()) < 0.5 { + v_lines.remove(i + 1); // remove duplicates + } + } + if h_lines.is_empty() && v_lines.is_empty() { + return Ok(None); + } + let [bottom_line, top_line] = &*h_lines else { + return Err(ExtractInsnsError::InsnParseError( + format!( + "instruction bit fields box has wrong number of horizontal lines:\n{h_lines:?}" + ), + Backtrace::capture(), + )); + }; + let [leftmost_line, .., rightmost_line] = &*v_lines else { + return Err(ExtractInsnsError::InsnParseError( + format!("instruction bit fields box has too few vertical lines:\n{v_lines:?}"), + Backtrace::capture(), + )); + }; + let box_min_x = leftmost_line.min_x().get(); + let box_max_x = rightmost_line.min_x().get(); + let box_min_y = bottom_line.min_y().get(); + let box_max_y = top_line.max_y().get(); + let box_mid_y = (box_min_y + box_max_y) * 0.5; + println!("bottom_line={bottom_line:?}"); + println!("top_line={top_line:?}"); + println!("{v_lines:?}"); + let mut fields = Vec::new(); + for i in 0..v_lines.len() - 1 { + let left_line = v_lines[i]; + let right_line = v_lines[i + 1]; + let field_box_min_x = left_line.max_x().get(); + let field_box_max_x = right_line.min_x().get(); + let bit_field_name_start_min_y = box_mid_y + 3.288; + let bit_field_name = self.extract_text_line( + None, + bit_field_name_start_min_y, + field_box_min_x, + field_box_max_x, + TextLineFonts::InsnBitFieldNameFonts, + 0, + true, + Some(0.4), + )?; + let Some(bit_field_name) = bit_field_name else { + return Err(ExtractInsnsError::InsnParseError( + format!( + "instruction bit field name not found:\n\ + start_min_y={bit_field_name_start_min_y} \ + field_box_min_x={field_box_min_x} \ + field_box_max_x={field_box_max_x}" + ), + Backtrace::capture(), + )); + }; + let bit_field_number_start_min_y = box_min_y + 3.487; + let bit_number = self.extract_text_line( + None, + bit_field_number_start_min_y, + field_box_min_x, + field_box_max_x, + TextLineFonts::InsnBitFieldBitNumberFonts, + 0, + true, + None, + )?; + let Some(bit_number) = bit_number else { + return Err(ExtractInsnsError::InsnParseError( + format!( + "instruction bit field bit number not found:\n\ + start_min_y={bit_field_number_start_min_y} \ + field_box_min_x={field_box_min_x} \ + field_box_max_x={field_box_max_x}" + ), + Backtrace::capture(), + )); + }; + fields.push(InsnBitField { + box_min_x: field_box_min_x, + box_max_x: field_box_max_x, + name: bit_field_name, + bit_number: bit_number, + }); + } + return Ok(Some(InsnBitFields { + prefix: None, + box_min_x, + box_min_y, + box_max_x, + box_max_y, + fields, + })); + } + fn extract_insn_header_mnemonics_and_bit_fields( + &mut self, + start_min_y: f32, + header_start_char: Option, + ) -> Result, ExtractInsnsError> { + assert!( + header_start_char + .as_ref() + .is_none_or(|v| v.font == Font::InsnHeader) + ); + let Some(header_line) = self.extract_text_line( + header_start_char, + start_min_y, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + TextLineFonts::InsnHeaderFonts, + 0, + true, + Some(6.0), + )? + else { + return Ok(None); + }; + println!("found header line:\n{header_line}"); + let header_lines = self.extract_following_text_lines( + header_line, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + Some(1.5), + )?; + println!("insn header lines:"); + for header_line in &header_lines { + println!("{header_line}"); + } + let [.., last_header_line] = &*header_lines else { + unreachable!(); + }; + let Some(mnemonic_start_char) = self + .find_top_left_char_in_range( + self.text_section.min_x.get() - 5.0, + self.text_section.max_x.get() + 5.0, + last_header_line.regular_min_y - 50.0, + last_header_line.regular_min_y - 5.0, + false, + ) + .map_err(ExtractInsnsError::Other)? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn mnemonic text line".into(), + Backtrace::capture(), + )); + }; + let mnemonic_start_char_min_y = mnemonic_start_char.min_y.get(); + let Some(mnemonic_line) = self.extract_text_line( + Some(mnemonic_start_char), + mnemonic_start_char_min_y, + self.text_section.min_x.get(), + self.text_section.max_x.get(), + TextLineFonts::InsnMnemonicFonts, + 0, + true, + None, + )? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn mnemonic text line".into(), + Backtrace::capture(), + )); + }; + let mnemonic_line_first_char_min_x = mnemonic_line.chars[0].min_x.get(); + let mnemonic_lines = self.extract_following_text_lines( + mnemonic_line, + mnemonic_line_first_char_min_x, + self.text_section.max_x.get(), + None, + )?; + println!("insn mnemonic lines:"); + for mnemonic_line in &mnemonic_lines { + println!("{mnemonic_line}"); + } + let Some(insn_bit_fields) = self.extract_insn_bit_fields(&mnemonic_lines)? else { + return Err(ExtractInsnsError::InsnParseError( + "can't find insn bit fields".into(), + Backtrace::capture(), + )); + }; + println!("{insn_bit_fields}"); + return Ok(Some(InsnHeader { + header_lines, + mnemonic_lines, + bit_fields: insn_bit_fields, + })); + } + fn extract_insn_sp_regs_altered( + &mut self, + mut sp_regs_altered_text: ParsedTextLine, + ) -> Result { + sp_regs_altered_text.preceding_blank_lines = 0; + let fonts = TextLineFonts::InsnDescFonts; + let column_min_x = sp_regs_altered_text.chars[0].min_x.get(); + let Some(table_header_reg_char) = self + .find_top_left_char_in_range( + column_min_x - 1.0, + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X - 1.0, + sp_regs_altered_text.regular_min_y - 30.0, + sp_regs_altered_text.regular_min_y - 5.0, + false, + ) + .map_err(ExtractInsnsError::Other)? + else { + return Err(ExtractInsnsError::InsnParseError( + "can't find special registers altered table's register-column's header".into(), + Backtrace::capture(), + )); + }; + const KNOWN_SPECIAL_TEXTS: &[&str] = &[ + "None", + "Dependent on the system service", + "See above.", + "See Table 5.1", + ]; + match &*table_header_reg_char.text { + "R" => {} + text if KNOWN_SPECIAL_TEXTS.iter().any(|i| text == &i[..1]) => { + let start_min_y = table_header_reg_char.min_y.get(); + let special_text = self.extract_text_line( + Some(table_header_reg_char), + start_min_y, + column_min_x, + self.text_section.max_x.get(), + fonts, + 0, + true, + None, + )?; + let special_text = match special_text { + Some(special_text) + if KNOWN_SPECIAL_TEXTS.contains(&&*special_text.element.text) => + { + special_text + } + _ => return Err(ExtractInsnsError::Other( + format!( + "can't find special-registers-altered special-text:\n{special_text:?}" + ) + .into(), + )), + }; + let final_regular_min_y = special_text.regular_min_y; + return Ok(InsnSpRegsAltered { + sp_regs_altered_text, + special_text: Some(special_text), + table_header_reg: None, + table_header_fields: None, + entries: vec![], + final_regular_min_y, + }); + } + text => { + return Err(ExtractInsnsError::InsnParseError( + format!( + "unknown special-registers-altered special-text start character: {text:?}" + ), + Backtrace::capture(), + )); + } + } + let Some(table_header_fields_char) = self + .find_top_left_char_in_range( + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X - 10.0, + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, + table_header_reg_char.min_y.get() - 5.0, + table_header_reg_char.min_y.get() + 5.0, + false, + ) + .map_err(ExtractInsnsError::Other)? + else { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's fields-column's header".into(), + )); + }; + if table_header_fields_char.text != "F" { + return Err(ExtractInsnsError::Other( + format!( + "can't find special registers altered table's fields-column's header:\n\ + table_header_fields_char={table_header_fields_char:?}" + ) + .into(), + )); + } + let columns_x_bounds = [ + ( + table_header_reg_char.min_x.get(), + table_header_fields_char.min_x.get() - 1.0, + ), + ( + table_header_fields_char.min_x.get(), + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, + ), + ( + column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X, + self.text_section.max_x.get(), + ), + ]; + let start_min_y = table_header_reg_char.min_y.get(); + let Some(table_header_reg) = self.extract_text_line( + Some(table_header_reg_char), + start_min_y, + columns_x_bounds[0].0, + columns_x_bounds[0].1, + fonts, + 0, + false, + None, + )? + else { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's register-column's header".into(), + )); + }; + let table_header_reg_text = table_header_reg.element.inner_text(); + if table_header_reg_text != "Register" { + return Err(ExtractInsnsError::Other( + format!( + "can't find special registers altered table's register-column's header:\n\ + table_header_reg_text={table_header_reg_text:?}" + ) + .into(), + )); + } + let start_min_y = table_header_fields_char.min_y.get(); + let Some(table_header_fields) = self.extract_text_line( + Some(table_header_fields_char), + start_min_y, + columns_x_bounds[1].0, + columns_x_bounds[1].1, + fonts, + 0, + false, + None, + )? + else { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's fields-column's header".into(), + )); + }; + let table_header_fields_text = table_header_fields.element.inner_text(); + if table_header_reg_text != "Field(s)" { + return Err(ExtractInsnsError::Other( + format!( + "can't find special registers altered table's fields-column's header:\n\ + table_header_fields_text={table_header_fields_text:?}" + ) + .into(), + )); + } + let mut regular_min_y = table_header_reg.regular_min_y; + let mut entries = Vec::new(); + let mut cur_reg = None; + let mut cur_fields = Vec::new(); + let mut cur_conds = Vec::new(); + loop { + let mut row = [None, None, None]; + let mut next_regular_min_y = None; + for (i, (min_x, max_x)) in columns_x_bounds.into_iter().enumerate() { + row[i] = self.extract_text_line( + None, + regular_min_y - fonts.regular()[0].line_height(), + min_x, + max_x, + fonts, + 0, + true, + Some(2.0), + )?; + if let Some(cell) = &row[i] + && next_regular_min_y.is_none() + { + next_regular_min_y = Some(cell.regular_min_y); + } + } + match next_regular_min_y { + Some(v) => regular_min_y = v, + None => break, + } + let [cur_reg_cell, cur_fields_cell, cur_conds_cell] = row; + if cur_reg_cell.is_none() { + if cur_reg.is_none() { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's first register".into(), + )); + } + cur_fields.extend(cur_fields_cell); + cur_conds.extend(cur_conds_cell); + continue; + } + if let Some(cur_reg) = cur_reg { + entries.push(InsnSpRegsAlteredEntry { + reg: cur_reg, + fields: cur_fields, + conds: cur_conds, + }); + cur_fields = Vec::new(); + cur_conds = Vec::new(); + } + cur_reg = cur_reg_cell; + cur_fields.extend(cur_fields_cell); + cur_conds.extend(cur_conds_cell); + } + let Some(cur_reg) = cur_reg else { + return Err(ExtractInsnsError::Other( + "can't find special registers altered table's first register".into(), + )); + }; + entries.push(InsnSpRegsAlteredEntry { + reg: cur_reg, + fields: cur_fields, + conds: cur_conds, + }); + return Ok(InsnSpRegsAltered { + sp_regs_altered_text: sp_regs_altered_text, + special_text: None, + table_header_reg: Some(table_header_reg), + table_header_fields: Some(table_header_fields), + entries, + final_regular_min_y: regular_min_y, + }); + } /*fn extract_insn(&mut self, header_start_char: Char) -> Result { assert_eq!(header_start_char.font, Font::InsnHeader); println!("{header_start_char:?}"); @@ -2401,57 +3083,192 @@ impl Parser { } } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] struct MyDevice { + page_num: u32, qt: Rc>>>, unprocessed_non_text: Rc>>, } +impl MyDevice { + fn new(page_num: u32) -> Self { + Self { + page_num, + qt: Default::default(), + unprocessed_non_text: Default::default(), + } + } + fn path(&mut self, path: &mupdf::Path, cmt: mupdf::Matrix) { + enum Walker { + Empty, + Moved { x: f32, y: f32 }, + Line(Line), + Rect { x1: f32, y1: f32, x2: f32, y2: f32 }, + NotRecognized, + } + fn new_line(p0_x: f32, p0_y: f32, p1_x: f32, p1_y: f32) -> Option { + Some(Line { + p0_x: NonNaNF32::new(p0_x)?, + p0_y: NonNaNF32::new(p0_y)?, + p1_x: NonNaNF32::new(p1_x)?, + p1_y: NonNaNF32::new(p1_y)?, + }) + } + impl mupdf::PathWalker for Walker { + fn move_to(&mut self, x: f32, y: f32) { + *self = match *self { + Walker::Empty | Walker::Moved { .. } => Walker::Moved { x, y }, + Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => { + Walker::NotRecognized + } + }; + } + fn line_to(&mut self, x: f32, y: f32) { + *self = match *self { + Walker::Empty => Walker::NotRecognized, + Walker::Moved { x: p0_x, y: p0_y } => new_line(p0_x, p0_y, x, y) + .map(Walker::Line) + .unwrap_or(Walker::NotRecognized), + Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => { + Walker::NotRecognized + } + }; + } + fn curve_to(&mut self, _cx1: f32, _cy1: f32, _cx2: f32, _cy2: f32, _ex: f32, _ey: f32) { + *self = Walker::NotRecognized; + } + fn close(&mut self) {} + fn rect(&mut self, x1: f32, y1: f32, x2: f32, y2: f32) { + *self = match *self { + Walker::Empty => Walker::Rect { x1, y1, x2, y2 }, + Walker::Moved { .. } + | Walker::Line(..) + | Walker::Rect { .. } + | Walker::NotRecognized => Walker::NotRecognized, + }; + } + } + let mut walker = Walker::Empty; + let Ok(()) = path.walk(&mut walker) else { + return; + }; + let component = match walker { + Walker::Empty | Walker::Moved { .. } | Walker::NotRecognized => return, + Walker::Line(Line { + p0_x, + p0_y, + p1_x, + p1_y, + }) => { + let mupdf::Point { x: p0_x, y: p0_y } = mupdf::Point { + x: p0_x.get(), + y: p0_y.get(), + } + .transform(&cmt); + let mupdf::Point { x: p1_x, y: p1_y } = mupdf::Point { + x: p1_x.get(), + y: p1_y.get(), + } + .transform(&cmt); + let Some(line) = new_line(p0_x, p0_y, p1_x, p1_y) else { + return; + }; + LineOrRect::Line(line) + } + Walker::Rect { x1, y1, x2, y2 } => { + let p1 = mupdf::Point { x: x1, y: y1 }.transform(&cmt); + let p2 = mupdf::Point { x: x2, y: y1 }.transform(&cmt); + let p3 = mupdf::Point { x: x2, y: y2 }.transform(&cmt); + let p4 = mupdf::Point { x: x1, y: y2 }.transform(&cmt); + let min_x = NonNaNF32::new(p1.x.min(p2.x).min(p3.x).min(p4.x)); + let max_x = NonNaNF32::new(p1.x.max(p2.x).max(p3.x).max(p4.x)); + let min_y = NonNaNF32::new(p1.y.min(p2.y).min(p3.y).min(p4.y)); + let max_y = NonNaNF32::new(p1.y.max(p2.y).max(p3.y).max(p4.y)); + let (Some(min_x), Some(max_x), Some(min_y), Some(max_y)) = + (min_x, max_x, min_y, max_y) + else { + return; + }; + LineOrRect::Rect(Rect { + min_x, + max_x, + min_y, + max_y, + }) + } + }; + if component.width() > 100.0 + && component.min_x().get() < COLUMN_SPLIT_X - 10.0 + && component.max_x().get() > COLUMN_SPLIT_X + 10.0 + { + println!("wide component: {component:?}"); + } else { + println!("component: {component:?}"); + } + let text_section = TextSection::for_position( + self.page_num, + (component.min_x().get() + component.max_x().get()) * 0.5, + (component.min_y().get() + component.max_y().get()) * 0.5, + ); + if let Some(text_section) = text_section { + self.qt + .borrow_mut() + .entry(text_section) + .or_default() + .insert( + component.min_x().get(), + component.min_y().get(), + PageItem::LineOrRect(component), + ); + } + } +} + impl mupdf::NativeDevice for MyDevice { fn fill_path( &mut self, path: &mupdf::Path, - even_odd: bool, + _even_odd: bool, cmt: mupdf::Matrix, - color_space: &mupdf::Colorspace, - color: &[f32], - alpha: f32, - cp: mupdf::ColorParams, + _color_space: &mupdf::Colorspace, + _color: &[f32], + _alpha: f32, + _cp: mupdf::ColorParams, ) { - // TODO + self.path(path, cmt); } fn stroke_path( &mut self, path: &mupdf::Path, - stroke_state: &mupdf::StrokeState, + _stroke_state: &mupdf::StrokeState, cmt: mupdf::Matrix, - color_space: &mupdf::Colorspace, - color: &[f32], - alpha: f32, - cp: mupdf::ColorParams, + _color_space: &mupdf::Colorspace, + _color: &[f32], + _alpha: f32, + _cp: mupdf::ColorParams, ) { - // TODO + self.path(path, cmt); } fn clip_path( &mut self, path: &mupdf::Path, - even_odd: bool, + _even_odd: bool, cmt: mupdf::Matrix, - scissor: mupdf::Rect, + _scissor: mupdf::Rect, ) { - // TODO + self.path(path, cmt); } fn clip_stroke_path( &mut self, path: &mupdf::Path, - stroke_state: &mupdf::StrokeState, + _stroke_state: &mupdf::StrokeState, cmt: mupdf::Matrix, - scissor: mupdf::Rect, + _scissor: mupdf::Rect, ) { - // TODO + self.path(path, cmt); } } @@ -2528,12 +3345,13 @@ impl Page { page_num: u32, page: mupdf::Page, ) -> Result> { - let device = MyDevice::default(); + let device = MyDevice::new(page_num); page.run( &mupdf::Device::from_native(device.clone())?, &mupdf::Matrix::IDENTITY, )?; let MyDevice { + page_num, qt, unprocessed_non_text, } = device;