wip
This commit is contained in:
parent
e9830566c0
commit
3d66c853f6
1 changed files with 844 additions and 26 deletions
870
src/main.rs
870
src/main.rs
|
|
@ -44,6 +44,12 @@ mod non_nan_float {
|
|||
pub(crate) const fn get(self) -> f32 {
|
||||
self.0
|
||||
}
|
||||
pub(crate) const fn min(self, other: Self) -> Self {
|
||||
Self(self.0.min(other.0))
|
||||
}
|
||||
pub(crate) const fn max(self, other: Self) -> Self {
|
||||
Self(self.0.max(other.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::hash::Hash for NonNaNF32 {
|
||||
|
|
@ -1484,10 +1490,95 @@ enum PageItem {
|
|||
LineOrRect(LineOrRect),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
enum LineOrRect {
|
||||
Line(()),
|
||||
Rect(()),
|
||||
Line(Line),
|
||||
Rect(Rect),
|
||||
}
|
||||
|
||||
impl LineOrRect {
|
||||
fn width(self) -> f32 {
|
||||
match self {
|
||||
Self::Line(v) => v.width(),
|
||||
Self::Rect(v) => v.width(),
|
||||
}
|
||||
}
|
||||
fn height(self) -> f32 {
|
||||
match self {
|
||||
Self::Line(v) => v.height(),
|
||||
Self::Rect(v) => v.height(),
|
||||
}
|
||||
}
|
||||
fn min_x(self) -> NonNaNF32 {
|
||||
match self {
|
||||
Self::Line(v) => v.min_x(),
|
||||
Self::Rect(v) => v.min_x,
|
||||
}
|
||||
}
|
||||
fn max_x(self) -> NonNaNF32 {
|
||||
match self {
|
||||
Self::Line(v) => v.max_x(),
|
||||
Self::Rect(v) => v.max_x,
|
||||
}
|
||||
}
|
||||
fn min_y(self) -> NonNaNF32 {
|
||||
match self {
|
||||
Self::Line(v) => v.min_y(),
|
||||
Self::Rect(v) => v.min_y,
|
||||
}
|
||||
}
|
||||
fn max_y(self) -> NonNaNF32 {
|
||||
match self {
|
||||
Self::Line(v) => v.max_y(),
|
||||
Self::Rect(v) => v.max_y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
struct Line {
|
||||
p0_x: NonNaNF32,
|
||||
p0_y: NonNaNF32,
|
||||
p1_x: NonNaNF32,
|
||||
p1_y: NonNaNF32,
|
||||
}
|
||||
|
||||
impl Line {
|
||||
fn width(self) -> f32 {
|
||||
f32::abs(self.p0_x.get() - self.p1_x.get())
|
||||
}
|
||||
fn height(self) -> f32 {
|
||||
f32::abs(self.p0_y.get() - self.p1_y.get())
|
||||
}
|
||||
fn min_x(self) -> NonNaNF32 {
|
||||
self.p0_x.min(self.p1_x)
|
||||
}
|
||||
fn max_x(self) -> NonNaNF32 {
|
||||
self.p0_x.max(self.p1_x)
|
||||
}
|
||||
fn min_y(self) -> NonNaNF32 {
|
||||
self.p0_y.min(self.p1_y)
|
||||
}
|
||||
fn max_y(self) -> NonNaNF32 {
|
||||
self.p0_y.max(self.p1_y)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
struct Rect {
|
||||
min_x: NonNaNF32,
|
||||
max_x: NonNaNF32,
|
||||
min_y: NonNaNF32,
|
||||
max_y: NonNaNF32,
|
||||
}
|
||||
|
||||
impl Rect {
|
||||
fn width(self) -> f32 {
|
||||
self.max_x.get() - self.min_x.get()
|
||||
}
|
||||
fn height(self) -> f32 {
|
||||
self.max_y.get() - self.min_y.get()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -2039,7 +2130,7 @@ impl Parser {
|
|||
Err(
|
||||
e @ ErrorWithNote {
|
||||
error:
|
||||
ExtractInsnsError::InsnParseError(_) | ExtractInsnsError::PageParseError(_),
|
||||
ExtractInsnsError::InsnParseError(..) | ExtractInsnsError::PageParseError(..),
|
||||
..
|
||||
},
|
||||
) => {
|
||||
|
|
@ -2064,7 +2155,7 @@ impl Parser {
|
|||
let page = self.page()?;
|
||||
let unprocessed_chars = self.unprocessed_chars()?;
|
||||
let ControlFlow::<Infallible>::Continue(()) =
|
||||
page.qt[&self.text_section].range(min_x, max_x, min_y, max_y, |x, y, ch| {
|
||||
page.qt[&self.text_section].range(min_x, max_x, min_y, max_y, |_x, _y, ch| {
|
||||
let PageItem::Char(ch) = ch else {
|
||||
return ControlFlow::Continue(());
|
||||
};
|
||||
|
|
@ -2119,7 +2210,7 @@ impl Parser {
|
|||
max_x,
|
||||
start_min_y - fonts.regular()[0].size() * 0.4,
|
||||
start_min_y + fonts.regular()[0].size() * 0.6,
|
||||
|x, y, ch| {
|
||||
|_x, _y, ch| {
|
||||
let PageItem::Char(ch) = ch else {
|
||||
return ControlFlow::Continue(());
|
||||
};
|
||||
|
|
@ -2260,6 +2351,597 @@ impl Parser {
|
|||
}
|
||||
Ok(Some(retval))
|
||||
}
|
||||
fn extract_following_text_lines(
|
||||
&mut self,
|
||||
first_text_line: ParsedTextLine,
|
||||
min_x: f32,
|
||||
max_x: f32,
|
||||
allowed_start_min_y_error: Option<f32>,
|
||||
) -> Result<Vec<ParsedTextLine>, ExtractInsnsError> {
|
||||
let mut retval = Vec::new();
|
||||
let fonts = first_text_line.fonts;
|
||||
let mut line = Some(first_text_line);
|
||||
while let Some(cur_line) = line {
|
||||
let start_min_y = cur_line.regular_min_y - fonts.regular()[0].line_height();
|
||||
retval.push(cur_line);
|
||||
line = self.extract_text_line(
|
||||
None,
|
||||
start_min_y,
|
||||
min_x,
|
||||
max_x,
|
||||
fonts,
|
||||
0,
|
||||
false,
|
||||
allowed_start_min_y_error,
|
||||
)?;
|
||||
}
|
||||
return Ok(retval);
|
||||
}
|
||||
fn extract_insn_bit_fields(
|
||||
&mut self,
|
||||
mnemonic_lines: &[ParsedTextLine],
|
||||
) -> Result<Option<InsnBitFields>, ExtractInsnsError> {
|
||||
let mut found_non_affix_line = false;
|
||||
let [.., last_mnemonic_line] = mnemonic_lines else {
|
||||
unreachable!();
|
||||
};
|
||||
let expected_non_affix_line_y = last_mnemonic_line.regular_min_y
|
||||
- if mnemonic_lines.len() > 1 {
|
||||
INSN_BIT_FIELDS_TOP_PAD_HEIGHT2
|
||||
} else {
|
||||
INSN_BIT_FIELDS_TOP_PAD_HEIGHT
|
||||
};
|
||||
let page = self.page().map_err(ExtractInsnsError::Other)?;
|
||||
let _ = page.qt[&self.text_section].range(
|
||||
self.text_section.min_x.get() - 5.0,
|
||||
self.text_section.max_x.get() + 5.0,
|
||||
expected_non_affix_line_y - 5.0,
|
||||
expected_non_affix_line_y + 5.0,
|
||||
|_x, _y, line| {
|
||||
let PageItem::LineOrRect(LineOrRect::Line(line)) = line else {
|
||||
return ControlFlow::Continue(());
|
||||
};
|
||||
if line.width() > line.height() {
|
||||
found_non_affix_line = true;
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
ControlFlow::Continue(())
|
||||
},
|
||||
);
|
||||
if found_non_affix_line {
|
||||
return self.extract_insn_bit_fields_box(expected_non_affix_line_y);
|
||||
};
|
||||
let prefix_text = self.extract_text_line(
|
||||
None,
|
||||
last_mnemonic_line.regular_min_y - INSN_BIT_FIELDS_PREFIX_TEXT_TOP_PAD_HEIGHT,
|
||||
self.text_section.min_x.get(),
|
||||
self.text_section.max_x.get(),
|
||||
TextLineFonts::InsnBitFieldsAffixTitleFonts,
|
||||
0,
|
||||
true,
|
||||
Some(2.0),
|
||||
)?;
|
||||
let Some(prefix_text) = prefix_text else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
"can't find insn prefix bit fields title".into(),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
let prefix_text_str = prefix_text.element.inner_text();
|
||||
if prefix_text_str != "Prefix:" {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
format!("insn prefix bit fields title is not as expected: {prefix_text_str:?}"),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
}
|
||||
let prefix_bit_fields = self.extract_insn_bit_fields_box(
|
||||
prefix_text.regular_min_y - INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT,
|
||||
)?;
|
||||
let Some(prefix_bit_fields) = prefix_bit_fields else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
"can't find insn prefix bit fields".into(),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
let suffix_text = self.extract_text_line(
|
||||
None,
|
||||
prefix_bit_fields.box_min_y - INSN_BIT_FIELDS_PREFIX_BOX_BOTTOM_TO_SUFFIX_TEXT_HEIGHT,
|
||||
self.text_section.min_x.get(),
|
||||
self.text_section.max_x.get(),
|
||||
TextLineFonts::InsnBitFieldsAffixTitleFonts,
|
||||
0,
|
||||
true,
|
||||
Some(2.0),
|
||||
)?;
|
||||
let Some(suffix_text) = suffix_text else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
"can't find insn suffix bit fields title".into(),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
let suffix_text_str = suffix_text.element.inner_text();
|
||||
if suffix_text_str != "Suffix:" {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
format!("insn suffix bit fields title is not as expected: {suffix_text_str:?}"),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
}
|
||||
let suffix_bit_fields = self.extract_insn_bit_fields_box(
|
||||
suffix_text.regular_min_y - INSN_BIT_FIELDS_AFFIX_TEXT_TO_BOX_TOP_HEIGHT,
|
||||
)?;
|
||||
let Some(suffix_bit_fields) = suffix_bit_fields else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
"can't find insn suffix bit fields".into(),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
return Ok(Some(InsnBitFields {
|
||||
prefix: Some(InsnBitFieldsPrefix {
|
||||
box_min_x: prefix_bit_fields.box_min_x,
|
||||
box_min_y: prefix_bit_fields.box_min_y,
|
||||
box_max_x: prefix_bit_fields.box_max_x,
|
||||
box_max_y: prefix_bit_fields.box_max_y,
|
||||
prefix_text: prefix_text,
|
||||
fields: prefix_bit_fields.fields,
|
||||
suffix_text: suffix_text,
|
||||
}),
|
||||
box_min_x: suffix_bit_fields.box_min_x,
|
||||
box_min_y: suffix_bit_fields.box_min_y,
|
||||
box_max_x: suffix_bit_fields.box_max_x,
|
||||
box_max_y: suffix_bit_fields.box_max_y,
|
||||
fields: suffix_bit_fields.fields,
|
||||
}));
|
||||
}
|
||||
fn extract_insn_bit_fields_box(
|
||||
&mut self,
|
||||
expected_box_max_y: f32,
|
||||
) -> Result<Option<InsnBitFields>, ExtractInsnsError> {
|
||||
let mut h_lines = Vec::new();
|
||||
let mut v_lines = Vec::new();
|
||||
let page = self.page().map_err(ExtractInsnsError::Other)?;
|
||||
let ControlFlow::<Infallible>::Continue(()) = page.qt[&self.text_section].range(
|
||||
self.text_section.min_x.get() - 5.0,
|
||||
self.text_section.max_x.get() + 5.0,
|
||||
expected_box_max_y - INSN_BIT_FIELDS_BOX_HEIGHT - 5.0,
|
||||
expected_box_max_y + 5.0,
|
||||
|_x, _y, line| {
|
||||
let PageItem::LineOrRect(LineOrRect::Line(line)) = *line else {
|
||||
return ControlFlow::Continue(());
|
||||
};
|
||||
if line.width() > line.height() {
|
||||
h_lines.push(line);
|
||||
} else {
|
||||
v_lines.push(line);
|
||||
}
|
||||
ControlFlow::Continue(())
|
||||
},
|
||||
);
|
||||
h_lines.sort_by_key(|line| line.min_y());
|
||||
v_lines.sort_by_key(|line| line.min_x());
|
||||
for i in (0..v_lines.len().saturating_sub(1)).rev() {
|
||||
if f32::abs(v_lines[i].min_x().get() - v_lines[i + 1].min_x().get()) < 0.5 {
|
||||
v_lines.remove(i + 1); // remove duplicates
|
||||
}
|
||||
}
|
||||
if h_lines.is_empty() && v_lines.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let [bottom_line, top_line] = &*h_lines else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
format!(
|
||||
"instruction bit fields box has wrong number of horizontal lines:\n{h_lines:?}"
|
||||
),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
let [leftmost_line, .., rightmost_line] = &*v_lines else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
format!("instruction bit fields box has too few vertical lines:\n{v_lines:?}"),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
let box_min_x = leftmost_line.min_x().get();
|
||||
let box_max_x = rightmost_line.min_x().get();
|
||||
let box_min_y = bottom_line.min_y().get();
|
||||
let box_max_y = top_line.max_y().get();
|
||||
let box_mid_y = (box_min_y + box_max_y) * 0.5;
|
||||
println!("bottom_line={bottom_line:?}");
|
||||
println!("top_line={top_line:?}");
|
||||
println!("{v_lines:?}");
|
||||
let mut fields = Vec::new();
|
||||
for i in 0..v_lines.len() - 1 {
|
||||
let left_line = v_lines[i];
|
||||
let right_line = v_lines[i + 1];
|
||||
let field_box_min_x = left_line.max_x().get();
|
||||
let field_box_max_x = right_line.min_x().get();
|
||||
let bit_field_name_start_min_y = box_mid_y + 3.288;
|
||||
let bit_field_name = self.extract_text_line(
|
||||
None,
|
||||
bit_field_name_start_min_y,
|
||||
field_box_min_x,
|
||||
field_box_max_x,
|
||||
TextLineFonts::InsnBitFieldNameFonts,
|
||||
0,
|
||||
true,
|
||||
Some(0.4),
|
||||
)?;
|
||||
let Some(bit_field_name) = bit_field_name else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
format!(
|
||||
"instruction bit field name not found:\n\
|
||||
start_min_y={bit_field_name_start_min_y} \
|
||||
field_box_min_x={field_box_min_x} \
|
||||
field_box_max_x={field_box_max_x}"
|
||||
),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
let bit_field_number_start_min_y = box_min_y + 3.487;
|
||||
let bit_number = self.extract_text_line(
|
||||
None,
|
||||
bit_field_number_start_min_y,
|
||||
field_box_min_x,
|
||||
field_box_max_x,
|
||||
TextLineFonts::InsnBitFieldBitNumberFonts,
|
||||
0,
|
||||
true,
|
||||
None,
|
||||
)?;
|
||||
let Some(bit_number) = bit_number else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
format!(
|
||||
"instruction bit field bit number not found:\n\
|
||||
start_min_y={bit_field_number_start_min_y} \
|
||||
field_box_min_x={field_box_min_x} \
|
||||
field_box_max_x={field_box_max_x}"
|
||||
),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
fields.push(InsnBitField {
|
||||
box_min_x: field_box_min_x,
|
||||
box_max_x: field_box_max_x,
|
||||
name: bit_field_name,
|
||||
bit_number: bit_number,
|
||||
});
|
||||
}
|
||||
return Ok(Some(InsnBitFields {
|
||||
prefix: None,
|
||||
box_min_x,
|
||||
box_min_y,
|
||||
box_max_x,
|
||||
box_max_y,
|
||||
fields,
|
||||
}));
|
||||
}
|
||||
fn extract_insn_header_mnemonics_and_bit_fields(
|
||||
&mut self,
|
||||
start_min_y: f32,
|
||||
header_start_char: Option<Char>,
|
||||
) -> Result<Option<InsnHeader>, ExtractInsnsError> {
|
||||
assert!(
|
||||
header_start_char
|
||||
.as_ref()
|
||||
.is_none_or(|v| v.font == Font::InsnHeader)
|
||||
);
|
||||
let Some(header_line) = self.extract_text_line(
|
||||
header_start_char,
|
||||
start_min_y,
|
||||
self.text_section.min_x.get(),
|
||||
self.text_section.max_x.get(),
|
||||
TextLineFonts::InsnHeaderFonts,
|
||||
0,
|
||||
true,
|
||||
Some(6.0),
|
||||
)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
println!("found header line:\n{header_line}");
|
||||
let header_lines = self.extract_following_text_lines(
|
||||
header_line,
|
||||
self.text_section.min_x.get(),
|
||||
self.text_section.max_x.get(),
|
||||
Some(1.5),
|
||||
)?;
|
||||
println!("insn header lines:");
|
||||
for header_line in &header_lines {
|
||||
println!("{header_line}");
|
||||
}
|
||||
let [.., last_header_line] = &*header_lines else {
|
||||
unreachable!();
|
||||
};
|
||||
let Some(mnemonic_start_char) = self
|
||||
.find_top_left_char_in_range(
|
||||
self.text_section.min_x.get() - 5.0,
|
||||
self.text_section.max_x.get() + 5.0,
|
||||
last_header_line.regular_min_y - 50.0,
|
||||
last_header_line.regular_min_y - 5.0,
|
||||
false,
|
||||
)
|
||||
.map_err(ExtractInsnsError::Other)?
|
||||
else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
"can't find insn mnemonic text line".into(),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
let mnemonic_start_char_min_y = mnemonic_start_char.min_y.get();
|
||||
let Some(mnemonic_line) = self.extract_text_line(
|
||||
Some(mnemonic_start_char),
|
||||
mnemonic_start_char_min_y,
|
||||
self.text_section.min_x.get(),
|
||||
self.text_section.max_x.get(),
|
||||
TextLineFonts::InsnMnemonicFonts,
|
||||
0,
|
||||
true,
|
||||
None,
|
||||
)?
|
||||
else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
"can't find insn mnemonic text line".into(),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
let mnemonic_line_first_char_min_x = mnemonic_line.chars[0].min_x.get();
|
||||
let mnemonic_lines = self.extract_following_text_lines(
|
||||
mnemonic_line,
|
||||
mnemonic_line_first_char_min_x,
|
||||
self.text_section.max_x.get(),
|
||||
None,
|
||||
)?;
|
||||
println!("insn mnemonic lines:");
|
||||
for mnemonic_line in &mnemonic_lines {
|
||||
println!("{mnemonic_line}");
|
||||
}
|
||||
let Some(insn_bit_fields) = self.extract_insn_bit_fields(&mnemonic_lines)? else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
"can't find insn bit fields".into(),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
println!("{insn_bit_fields}");
|
||||
return Ok(Some(InsnHeader {
|
||||
header_lines,
|
||||
mnemonic_lines,
|
||||
bit_fields: insn_bit_fields,
|
||||
}));
|
||||
}
|
||||
fn extract_insn_sp_regs_altered(
|
||||
&mut self,
|
||||
mut sp_regs_altered_text: ParsedTextLine,
|
||||
) -> Result<InsnSpRegsAltered, ExtractInsnsError> {
|
||||
sp_regs_altered_text.preceding_blank_lines = 0;
|
||||
let fonts = TextLineFonts::InsnDescFonts;
|
||||
let column_min_x = sp_regs_altered_text.chars[0].min_x.get();
|
||||
let Some(table_header_reg_char) = self
|
||||
.find_top_left_char_in_range(
|
||||
column_min_x - 1.0,
|
||||
column_min_x + INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X - 1.0,
|
||||
sp_regs_altered_text.regular_min_y - 30.0,
|
||||
sp_regs_altered_text.regular_min_y - 5.0,
|
||||
false,
|
||||
)
|
||||
.map_err(ExtractInsnsError::Other)?
|
||||
else {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
"can't find special registers altered table's register-column's header".into(),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
};
|
||||
const KNOWN_SPECIAL_TEXTS: &[&str] = &[
|
||||
"None",
|
||||
"Dependent on the system service",
|
||||
"See above.",
|
||||
"See Table 5.1",
|
||||
];
|
||||
match &*table_header_reg_char.text {
|
||||
"R" => {}
|
||||
text if KNOWN_SPECIAL_TEXTS.iter().any(|i| text == &i[..1]) => {
|
||||
let start_min_y = table_header_reg_char.min_y.get();
|
||||
let special_text = self.extract_text_line(
|
||||
Some(table_header_reg_char),
|
||||
start_min_y,
|
||||
column_min_x,
|
||||
self.text_section.max_x.get(),
|
||||
fonts,
|
||||
0,
|
||||
true,
|
||||
None,
|
||||
)?;
|
||||
let special_text = match special_text {
|
||||
Some(special_text)
|
||||
if KNOWN_SPECIAL_TEXTS.contains(&&*special_text.element.text) =>
|
||||
{
|
||||
special_text
|
||||
}
|
||||
_ => return Err(ExtractInsnsError::Other(
|
||||
format!(
|
||||
"can't find special-registers-altered special-text:\n{special_text:?}"
|
||||
)
|
||||
.into(),
|
||||
)),
|
||||
};
|
||||
let final_regular_min_y = special_text.regular_min_y;
|
||||
return Ok(InsnSpRegsAltered {
|
||||
sp_regs_altered_text,
|
||||
special_text: Some(special_text),
|
||||
table_header_reg: None,
|
||||
table_header_fields: None,
|
||||
entries: vec![],
|
||||
final_regular_min_y,
|
||||
});
|
||||
}
|
||||
text => {
|
||||
return Err(ExtractInsnsError::InsnParseError(
|
||||
format!(
|
||||
"unknown special-registers-altered special-text start character: {text:?}"
|
||||
),
|
||||
Backtrace::capture(),
|
||||
));
|
||||
}
|
||||
}
|
||||
let Some(table_header_fields_char) = self
|
||||
.find_top_left_char_in_range(
|
||||
column_min_x + INSN_SP_REGS_ALTERED_FIELDS_COLUMN_X - 10.0,
|
||||
column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X,
|
||||
table_header_reg_char.min_y.get() - 5.0,
|
||||
table_header_reg_char.min_y.get() + 5.0,
|
||||
false,
|
||||
)
|
||||
.map_err(ExtractInsnsError::Other)?
|
||||
else {
|
||||
return Err(ExtractInsnsError::Other(
|
||||
"can't find special registers altered table's fields-column's header".into(),
|
||||
));
|
||||
};
|
||||
if table_header_fields_char.text != "F" {
|
||||
return Err(ExtractInsnsError::Other(
|
||||
format!(
|
||||
"can't find special registers altered table's fields-column's header:\n\
|
||||
table_header_fields_char={table_header_fields_char:?}"
|
||||
)
|
||||
.into(),
|
||||
));
|
||||
}
|
||||
let columns_x_bounds = [
|
||||
(
|
||||
table_header_reg_char.min_x.get(),
|
||||
table_header_fields_char.min_x.get() - 1.0,
|
||||
),
|
||||
(
|
||||
table_header_fields_char.min_x.get(),
|
||||
column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X,
|
||||
),
|
||||
(
|
||||
column_min_x + INSN_SP_REGS_ALTERED_FIELDS_CONDS_SPLIT_X,
|
||||
self.text_section.max_x.get(),
|
||||
),
|
||||
];
|
||||
let start_min_y = table_header_reg_char.min_y.get();
|
||||
let Some(table_header_reg) = self.extract_text_line(
|
||||
Some(table_header_reg_char),
|
||||
start_min_y,
|
||||
columns_x_bounds[0].0,
|
||||
columns_x_bounds[0].1,
|
||||
fonts,
|
||||
0,
|
||||
false,
|
||||
None,
|
||||
)?
|
||||
else {
|
||||
return Err(ExtractInsnsError::Other(
|
||||
"can't find special registers altered table's register-column's header".into(),
|
||||
));
|
||||
};
|
||||
let table_header_reg_text = table_header_reg.element.inner_text();
|
||||
if table_header_reg_text != "Register" {
|
||||
return Err(ExtractInsnsError::Other(
|
||||
format!(
|
||||
"can't find special registers altered table's register-column's header:\n\
|
||||
table_header_reg_text={table_header_reg_text:?}"
|
||||
)
|
||||
.into(),
|
||||
));
|
||||
}
|
||||
let start_min_y = table_header_fields_char.min_y.get();
|
||||
let Some(table_header_fields) = self.extract_text_line(
|
||||
Some(table_header_fields_char),
|
||||
start_min_y,
|
||||
columns_x_bounds[1].0,
|
||||
columns_x_bounds[1].1,
|
||||
fonts,
|
||||
0,
|
||||
false,
|
||||
None,
|
||||
)?
|
||||
else {
|
||||
return Err(ExtractInsnsError::Other(
|
||||
"can't find special registers altered table's fields-column's header".into(),
|
||||
));
|
||||
};
|
||||
let table_header_fields_text = table_header_fields.element.inner_text();
|
||||
if table_header_reg_text != "Field(s)" {
|
||||
return Err(ExtractInsnsError::Other(
|
||||
format!(
|
||||
"can't find special registers altered table's fields-column's header:\n\
|
||||
table_header_fields_text={table_header_fields_text:?}"
|
||||
)
|
||||
.into(),
|
||||
));
|
||||
}
|
||||
let mut regular_min_y = table_header_reg.regular_min_y;
|
||||
let mut entries = Vec::new();
|
||||
let mut cur_reg = None;
|
||||
let mut cur_fields = Vec::new();
|
||||
let mut cur_conds = Vec::new();
|
||||
loop {
|
||||
let mut row = [None, None, None];
|
||||
let mut next_regular_min_y = None;
|
||||
for (i, (min_x, max_x)) in columns_x_bounds.into_iter().enumerate() {
|
||||
row[i] = self.extract_text_line(
|
||||
None,
|
||||
regular_min_y - fonts.regular()[0].line_height(),
|
||||
min_x,
|
||||
max_x,
|
||||
fonts,
|
||||
0,
|
||||
true,
|
||||
Some(2.0),
|
||||
)?;
|
||||
if let Some(cell) = &row[i]
|
||||
&& next_regular_min_y.is_none()
|
||||
{
|
||||
next_regular_min_y = Some(cell.regular_min_y);
|
||||
}
|
||||
}
|
||||
match next_regular_min_y {
|
||||
Some(v) => regular_min_y = v,
|
||||
None => break,
|
||||
}
|
||||
let [cur_reg_cell, cur_fields_cell, cur_conds_cell] = row;
|
||||
if cur_reg_cell.is_none() {
|
||||
if cur_reg.is_none() {
|
||||
return Err(ExtractInsnsError::Other(
|
||||
"can't find special registers altered table's first register".into(),
|
||||
));
|
||||
}
|
||||
cur_fields.extend(cur_fields_cell);
|
||||
cur_conds.extend(cur_conds_cell);
|
||||
continue;
|
||||
}
|
||||
if let Some(cur_reg) = cur_reg {
|
||||
entries.push(InsnSpRegsAlteredEntry {
|
||||
reg: cur_reg,
|
||||
fields: cur_fields,
|
||||
conds: cur_conds,
|
||||
});
|
||||
cur_fields = Vec::new();
|
||||
cur_conds = Vec::new();
|
||||
}
|
||||
cur_reg = cur_reg_cell;
|
||||
cur_fields.extend(cur_fields_cell);
|
||||
cur_conds.extend(cur_conds_cell);
|
||||
}
|
||||
let Some(cur_reg) = cur_reg else {
|
||||
return Err(ExtractInsnsError::Other(
|
||||
"can't find special registers altered table's first register".into(),
|
||||
));
|
||||
};
|
||||
entries.push(InsnSpRegsAlteredEntry {
|
||||
reg: cur_reg,
|
||||
fields: cur_fields,
|
||||
conds: cur_conds,
|
||||
});
|
||||
return Ok(InsnSpRegsAltered {
|
||||
sp_regs_altered_text: sp_regs_altered_text,
|
||||
special_text: None,
|
||||
table_header_reg: Some(table_header_reg),
|
||||
table_header_fields: Some(table_header_fields),
|
||||
entries,
|
||||
final_regular_min_y: regular_min_y,
|
||||
});
|
||||
}
|
||||
/*fn extract_insn(&mut self, header_start_char: Char) -> Result<Insn, ExtractInsnsError> {
|
||||
assert_eq!(header_start_char.font, Font::InsnHeader);
|
||||
println!("{header_start_char:?}");
|
||||
|
|
@ -2401,57 +3083,192 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
#[derive(Clone, Debug)]
|
||||
struct MyDevice {
|
||||
page_num: u32,
|
||||
qt: Rc<RefCell<BTreeMap<TextSection, QuadTree<PageItem>>>>,
|
||||
unprocessed_non_text: Rc<RefCell<IndexSet<LineOrRect>>>,
|
||||
}
|
||||
|
||||
impl MyDevice {
|
||||
fn new(page_num: u32) -> Self {
|
||||
Self {
|
||||
page_num,
|
||||
qt: Default::default(),
|
||||
unprocessed_non_text: Default::default(),
|
||||
}
|
||||
}
|
||||
fn path(&mut self, path: &mupdf::Path, cmt: mupdf::Matrix) {
|
||||
enum Walker {
|
||||
Empty,
|
||||
Moved { x: f32, y: f32 },
|
||||
Line(Line),
|
||||
Rect { x1: f32, y1: f32, x2: f32, y2: f32 },
|
||||
NotRecognized,
|
||||
}
|
||||
fn new_line(p0_x: f32, p0_y: f32, p1_x: f32, p1_y: f32) -> Option<Line> {
|
||||
Some(Line {
|
||||
p0_x: NonNaNF32::new(p0_x)?,
|
||||
p0_y: NonNaNF32::new(p0_y)?,
|
||||
p1_x: NonNaNF32::new(p1_x)?,
|
||||
p1_y: NonNaNF32::new(p1_y)?,
|
||||
})
|
||||
}
|
||||
impl mupdf::PathWalker for Walker {
|
||||
fn move_to(&mut self, x: f32, y: f32) {
|
||||
*self = match *self {
|
||||
Walker::Empty | Walker::Moved { .. } => Walker::Moved { x, y },
|
||||
Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => {
|
||||
Walker::NotRecognized
|
||||
}
|
||||
};
|
||||
}
|
||||
fn line_to(&mut self, x: f32, y: f32) {
|
||||
*self = match *self {
|
||||
Walker::Empty => Walker::NotRecognized,
|
||||
Walker::Moved { x: p0_x, y: p0_y } => new_line(p0_x, p0_y, x, y)
|
||||
.map(Walker::Line)
|
||||
.unwrap_or(Walker::NotRecognized),
|
||||
Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => {
|
||||
Walker::NotRecognized
|
||||
}
|
||||
};
|
||||
}
|
||||
fn curve_to(&mut self, _cx1: f32, _cy1: f32, _cx2: f32, _cy2: f32, _ex: f32, _ey: f32) {
|
||||
*self = Walker::NotRecognized;
|
||||
}
|
||||
fn close(&mut self) {}
|
||||
fn rect(&mut self, x1: f32, y1: f32, x2: f32, y2: f32) {
|
||||
*self = match *self {
|
||||
Walker::Empty => Walker::Rect { x1, y1, x2, y2 },
|
||||
Walker::Moved { .. }
|
||||
| Walker::Line(..)
|
||||
| Walker::Rect { .. }
|
||||
| Walker::NotRecognized => Walker::NotRecognized,
|
||||
};
|
||||
}
|
||||
}
|
||||
let mut walker = Walker::Empty;
|
||||
let Ok(()) = path.walk(&mut walker) else {
|
||||
return;
|
||||
};
|
||||
let component = match walker {
|
||||
Walker::Empty | Walker::Moved { .. } | Walker::NotRecognized => return,
|
||||
Walker::Line(Line {
|
||||
p0_x,
|
||||
p0_y,
|
||||
p1_x,
|
||||
p1_y,
|
||||
}) => {
|
||||
let mupdf::Point { x: p0_x, y: p0_y } = mupdf::Point {
|
||||
x: p0_x.get(),
|
||||
y: p0_y.get(),
|
||||
}
|
||||
.transform(&cmt);
|
||||
let mupdf::Point { x: p1_x, y: p1_y } = mupdf::Point {
|
||||
x: p1_x.get(),
|
||||
y: p1_y.get(),
|
||||
}
|
||||
.transform(&cmt);
|
||||
let Some(line) = new_line(p0_x, p0_y, p1_x, p1_y) else {
|
||||
return;
|
||||
};
|
||||
LineOrRect::Line(line)
|
||||
}
|
||||
Walker::Rect { x1, y1, x2, y2 } => {
|
||||
let p1 = mupdf::Point { x: x1, y: y1 }.transform(&cmt);
|
||||
let p2 = mupdf::Point { x: x2, y: y1 }.transform(&cmt);
|
||||
let p3 = mupdf::Point { x: x2, y: y2 }.transform(&cmt);
|
||||
let p4 = mupdf::Point { x: x1, y: y2 }.transform(&cmt);
|
||||
let min_x = NonNaNF32::new(p1.x.min(p2.x).min(p3.x).min(p4.x));
|
||||
let max_x = NonNaNF32::new(p1.x.max(p2.x).max(p3.x).max(p4.x));
|
||||
let min_y = NonNaNF32::new(p1.y.min(p2.y).min(p3.y).min(p4.y));
|
||||
let max_y = NonNaNF32::new(p1.y.max(p2.y).max(p3.y).max(p4.y));
|
||||
let (Some(min_x), Some(max_x), Some(min_y), Some(max_y)) =
|
||||
(min_x, max_x, min_y, max_y)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
LineOrRect::Rect(Rect {
|
||||
min_x,
|
||||
max_x,
|
||||
min_y,
|
||||
max_y,
|
||||
})
|
||||
}
|
||||
};
|
||||
if component.width() > 100.0
|
||||
&& component.min_x().get() < COLUMN_SPLIT_X - 10.0
|
||||
&& component.max_x().get() > COLUMN_SPLIT_X + 10.0
|
||||
{
|
||||
println!("wide component: {component:?}");
|
||||
} else {
|
||||
println!("component: {component:?}");
|
||||
}
|
||||
let text_section = TextSection::for_position(
|
||||
self.page_num,
|
||||
(component.min_x().get() + component.max_x().get()) * 0.5,
|
||||
(component.min_y().get() + component.max_y().get()) * 0.5,
|
||||
);
|
||||
if let Some(text_section) = text_section {
|
||||
self.qt
|
||||
.borrow_mut()
|
||||
.entry(text_section)
|
||||
.or_default()
|
||||
.insert(
|
||||
component.min_x().get(),
|
||||
component.min_y().get(),
|
||||
PageItem::LineOrRect(component),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl mupdf::NativeDevice for MyDevice {
|
||||
fn fill_path(
|
||||
&mut self,
|
||||
path: &mupdf::Path,
|
||||
even_odd: bool,
|
||||
_even_odd: bool,
|
||||
cmt: mupdf::Matrix,
|
||||
color_space: &mupdf::Colorspace,
|
||||
color: &[f32],
|
||||
alpha: f32,
|
||||
cp: mupdf::ColorParams,
|
||||
_color_space: &mupdf::Colorspace,
|
||||
_color: &[f32],
|
||||
_alpha: f32,
|
||||
_cp: mupdf::ColorParams,
|
||||
) {
|
||||
// TODO
|
||||
self.path(path, cmt);
|
||||
}
|
||||
|
||||
fn stroke_path(
|
||||
&mut self,
|
||||
path: &mupdf::Path,
|
||||
stroke_state: &mupdf::StrokeState,
|
||||
_stroke_state: &mupdf::StrokeState,
|
||||
cmt: mupdf::Matrix,
|
||||
color_space: &mupdf::Colorspace,
|
||||
color: &[f32],
|
||||
alpha: f32,
|
||||
cp: mupdf::ColorParams,
|
||||
_color_space: &mupdf::Colorspace,
|
||||
_color: &[f32],
|
||||
_alpha: f32,
|
||||
_cp: mupdf::ColorParams,
|
||||
) {
|
||||
// TODO
|
||||
self.path(path, cmt);
|
||||
}
|
||||
|
||||
fn clip_path(
|
||||
&mut self,
|
||||
path: &mupdf::Path,
|
||||
even_odd: bool,
|
||||
_even_odd: bool,
|
||||
cmt: mupdf::Matrix,
|
||||
scissor: mupdf::Rect,
|
||||
_scissor: mupdf::Rect,
|
||||
) {
|
||||
// TODO
|
||||
self.path(path, cmt);
|
||||
}
|
||||
|
||||
fn clip_stroke_path(
|
||||
&mut self,
|
||||
path: &mupdf::Path,
|
||||
stroke_state: &mupdf::StrokeState,
|
||||
_stroke_state: &mupdf::StrokeState,
|
||||
cmt: mupdf::Matrix,
|
||||
scissor: mupdf::Rect,
|
||||
_scissor: mupdf::Rect,
|
||||
) {
|
||||
// TODO
|
||||
self.path(path, cmt);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2528,12 +3345,13 @@ impl Page {
|
|||
page_num: u32,
|
||||
page: mupdf::Page,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let device = MyDevice::default();
|
||||
let device = MyDevice::new(page_num);
|
||||
page.run(
|
||||
&mupdf::Device::from_native(device.clone())?,
|
||||
&mupdf::Matrix::IDENTITY,
|
||||
)?;
|
||||
let MyDevice {
|
||||
page_num,
|
||||
qt,
|
||||
unprocessed_non_text,
|
||||
} = device;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue