This commit is contained in:
Jacob Lifshay 2026-01-05 17:53:57 -08:00
parent f9a24f4c48
commit 103f986bc0
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
2 changed files with 236 additions and 64 deletions

View file

@ -2,12 +2,12 @@
// See Notices.txt for copyright information
use crate::quad_tree::QuadTree;
use indexmap::{IndexMap, IndexSet};
use indexmap::IndexSet;
use mupdf_sys::{FZ_STEXT_BOLD, fz_matrix};
use non_nan_float::NonNaNF32;
use std::{
backtrace::Backtrace,
borrow::{Borrow, Cow},
borrow::Cow,
cell::RefCell,
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
convert::Infallible,
@ -681,7 +681,7 @@ impl ParsedTextLine {
}
}
fn write_xml_lines(
lines: impl IntoIterator<Item: Borrow<ParsedTextLine>>,
lines: impl IntoIterator<Item: std::borrow::Borrow<ParsedTextLine>>,
parent: &mut xml_tree::Element,
trailing_nl: bool,
preceding_nl: bool,
@ -691,7 +691,7 @@ impl ParsedTextLine {
}
let mut first = true;
for line in lines {
let line = line.borrow();
let line = std::borrow::Borrow::borrow(&line);
if first {
first = false;
} else {
@ -1182,8 +1182,8 @@ impl<'a> ElementBodyBuilder<'a> {
}
self.shrink_stack(new_len);
}
fn write_text(&mut self, text: impl Borrow<str>) {
let text = text.borrow();
fn write_text(&mut self, text: impl std::borrow::Borrow<str>) {
let text = std::borrow::Borrow::borrow(&text);
let insert_point = self.insert_point();
if let Some(child) = insert_point.children.last_mut() {
child.tail += text;
@ -1328,14 +1328,14 @@ impl fmt::Display for InsnBitFields {
impl InsnBitFields {
fn write_xml_fields(
fields: impl IntoIterator<Item: Borrow<InsnBitField>>,
fields: impl IntoIterator<Item: std::borrow::Borrow<InsnBitField>>,
parent: &mut xml_tree::Element,
) {
let fields_elm = parent.sub_element("fields".into(), []);
fields_elm.text = "\n".into();
fields_elm.tail = "\n".into();
for field in fields {
field.borrow().write_xml(fields_elm);
std::borrow::Borrow::borrow(&field).write_xml(fields_elm);
}
}
fn write_xml(&self, parent: &mut xml_tree::Element) {
@ -3170,7 +3170,9 @@ impl<'ctx> Parser<'ctx> {
#[derive(Clone, Debug)]
struct MyDevice {
page_num: u32,
qt: BTreeMap<TextSection, QuadTree<PageItem>>,
qt: Rc<RefCell<BTreeMap<TextSection, QuadTree<PageItem>>>>,
unprocessed_chars:
Rc<RefCell<BTreeMap<TextSection, Rc<RefCell<BTreeMap<Font, IndexSet<Char>>>>>>>,
unprocessed_non_text: Rc<RefCell<IndexSet<LineOrRect>>>,
}
@ -3179,10 +3181,11 @@ impl MyDevice {
Self {
page_num,
qt: Default::default(),
unprocessed_chars: Default::default(),
unprocessed_non_text: Default::default(),
}
}
fn path(&mut self, path: &mupdf_ffi::Path<'_>, cmt: fz_matrix) {
fn path(&self, path: &mupdf_ffi::Path<'_>, cmt: fz_matrix) {
enum Walker {
Empty,
Moved { x: f32, y: f32 },
@ -3198,8 +3201,8 @@ impl MyDevice {
p1_y: NonNaNF32::new(p1_y)?,
})
}
impl mupdf::PathWalker for Walker {
fn move_to(&mut self, x: f32, y: f32) {
impl<'ctx> mupdf_ffi::PathWalker<'ctx> for Walker {
fn move_to(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>, x: f32, y: f32) {
*self = match *self {
Walker::Empty | Walker::Moved { .. } => Walker::Moved { x, y },
Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => {
@ -3207,7 +3210,7 @@ impl MyDevice {
}
};
}
fn line_to(&mut self, x: f32, y: f32) {
fn line_to(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>, x: f32, y: f32) {
*self = match *self {
Walker::Empty => Walker::NotRecognized,
Walker::Moved { x: p0_x, y: p0_y } => new_line(p0_x, p0_y, x, y)
@ -3218,11 +3221,27 @@ impl MyDevice {
}
};
}
fn curve_to(&mut self, _cx1: f32, _cy1: f32, _cx2: f32, _cy2: f32, _ex: f32, _ey: f32) {
fn curve_to(
&mut self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
_cx1: f32,
_cy1: f32,
_cx2: f32,
_cy2: f32,
_ex: f32,
_ey: f32,
) {
*self = Walker::NotRecognized;
}
fn close(&mut self) {}
fn rect(&mut self, x1: f32, y1: f32, x2: f32, y2: f32) {
fn close_path(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>) {}
fn rect_to(
&mut self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
) {
*self = match *self {
Walker::Empty => Walker::Rect { x1, y1, x2, y2 },
Walker::Moved { .. }
@ -3233,9 +3252,7 @@ impl MyDevice {
}
}
let mut walker = Walker::Empty;
let Ok(()) = path.walk(&mut walker) else {
return;
};
path.walk(&mut walker);
let component = match walker {
Walker::Empty | Walker::Moved { .. } | Walker::NotRecognized => return,
Walker::Line(Line {
@ -3244,26 +3261,20 @@ impl MyDevice {
p1_x,
p1_y,
}) => {
let mupdf::Point { x: p0_x, y: p0_y } = mupdf::Point {
x: p0_x.get(),
y: p0_y.get(),
}
.transform(&cmt);
let mupdf::Point { x: p1_x, y: p1_y } = mupdf::Point {
x: p1_x.get(),
y: p1_y.get(),
}
.transform(&cmt);
let mupdf_sys::fz_point { x: p0_x, y: p0_y } =
mupdf_ffi::transform_point_xy(p0_x.get(), p0_y.get(), cmt);
let mupdf_sys::fz_point { x: p1_x, y: p1_y } =
mupdf_ffi::transform_point_xy(p1_x.get(), p1_y.get(), cmt);
let Some(line) = new_line(p0_x, p0_y, p1_x, p1_y) else {
return;
};
LineOrRect::Line(line)
}
Walker::Rect { x1, y1, x2, y2 } => {
let p1 = mupdf::Point { x: x1, y: y1 }.transform(&cmt);
let p2 = mupdf::Point { x: x2, y: y1 }.transform(&cmt);
let p3 = mupdf::Point { x: x2, y: y2 }.transform(&cmt);
let p4 = mupdf::Point { x: x1, y: y2 }.transform(&cmt);
let p1 = mupdf_ffi::transform_point_xy(x1, y1, cmt);
let p2 = mupdf_ffi::transform_point_xy(x2, y1, cmt);
let p3 = mupdf_ffi::transform_point_xy(x2, y2, cmt);
let p4 = mupdf_ffi::transform_point_xy(x1, y2, cmt);
let min_x = NonNaNF32::new(p1.x.min(p2.x).min(p3.x).min(p4.x));
let max_x = NonNaNF32::new(p1.x.max(p2.x).max(p3.x).max(p4.x));
let min_y = NonNaNF32::new(p1.y.min(p2.y).min(p3.y).min(p4.y));
@ -3306,25 +3317,25 @@ impl MyDevice {
);
}
}
fn text(&mut self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) {
fn text(&self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) {
todo!()
}
}
impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice {
fn fill_path(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
path: &mupdf_ffi::Path<'ctx>,
even_odd: bool,
_even_odd: bool,
cmt: fz_matrix,
) {
self.path(path, cmt);
}
fn stroke_path(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
path: &mupdf_ffi::Path<'ctx>,
cmt: fz_matrix,
) {
@ -3332,29 +3343,29 @@ impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice {
}
fn clip_path(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
path: &mupdf_ffi::Path<'ctx>,
even_odd: bool,
_even_odd: bool,
cmt: fz_matrix,
scissor: mupdf_sys::fz_rect,
_scissor: mupdf_sys::fz_rect,
) {
self.path(path, cmt);
}
fn clip_stroke_path(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
path: &mupdf_ffi::Path<'ctx>,
cmt: fz_matrix,
scissor: mupdf_sys::fz_rect,
_scissor: mupdf_sys::fz_rect,
) {
self.path(path, cmt);
}
fn fill_text(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
text: &mupdf_ffi::Text<'ctx>,
cmt: fz_matrix,
) {
@ -3362,8 +3373,8 @@ impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice {
}
fn stroke_text(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
text: &mupdf_ffi::Text<'ctx>,
cmt: fz_matrix,
) {
@ -3371,28 +3382,28 @@ impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice {
}
fn clip_text(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
text: &mupdf_ffi::Text<'ctx>,
cmt: fz_matrix,
scissor: mupdf_sys::fz_rect,
_scissor: mupdf_sys::fz_rect,
) {
self.text(text, cmt);
}
fn clip_stroke_text(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
text: &mupdf_ffi::Text<'ctx>,
cmt: fz_matrix,
scissor: mupdf_sys::fz_rect,
_scissor: mupdf_sys::fz_rect,
) {
self.text(text, cmt);
}
fn ignore_text(
&mut self,
ctx: mupdf_ffi::ContextRef<'ctx>,
&self,
_ctx: mupdf_ffi::ContextRef<'ctx>,
text: &mupdf_ffi::Text<'ctx>,
cmt: fz_matrix,
) {
@ -3477,20 +3488,25 @@ impl Page {
let device = MyDevice::new(page_num);
page.run(
&mupdf_ffi::Device::new(page.ctx(), Box::new(device))?,
&mupdf::Matrix::IDENTITY,
fz_matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: 0.0,
f: 0.0,
},
)?;
let MyDevice {
page_num,
qt,
unprocessed_chars,
unprocessed_non_text,
} = device;
let mut qt = Rc::try_unwrap(qt)
.ok()
.expect("already dropped all other references")
.into_inner();
let unprocessed_chars: Rc<
RefCell<BTreeMap<TextSection, Rc<RefCell<BTreeMap<Font, IndexSet<Char>>>>>>,
> = Rc::default();
// we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need.
let json = page.stext_page_as_json_from_page(1.0)?;
todo!("{json}");

View file

@ -4,13 +4,14 @@
use mupdf_sys::{
fz_clone_context, fz_color_params, fz_colorspace, fz_context, fz_device, fz_document,
fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text,
fz_error_type_FZ_ERROR_GENERIC, fz_matrix, fz_page, fz_path, fz_rect, fz_stroke_state, fz_text,
fz_error_type_FZ_ERROR_GENERIC, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point, fz_rect,
fz_stroke_state, fz_text, fz_transform_point, fz_transform_point_xy, fz_walk_path,
mupdf_document_page_count, mupdf_drop_error, mupdf_error_t, mupdf_load_page,
mupdf_new_base_context, mupdf_new_derived_device, mupdf_open_document, mupdf_run_page,
};
use std::{
cell::Cell,
ffi::{CStr, CString, c_int},
ffi::{CStr, CString, c_int, c_void},
fmt,
marker::PhantomData,
mem::ManuallyDrop,
@ -542,11 +543,158 @@ struct DeviceStruct<T> {
value: Box<T>,
}
pub(crate) trait PathWalker<'ctx> {
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
fn curve_to(
&mut self,
ctx: ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
);
fn close_path(&mut self, ctx: ContextRef<'ctx>);
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
self.move_to(ctx, x1, y1);
self.move_to(ctx, x2, y1);
self.move_to(ctx, x2, y2);
self.move_to(ctx, x1, y2);
self.close_path(ctx);
}
}
impl<'ctx, T: ?Sized + PathWalker<'ctx>> PathWalker<'ctx> for &'_ mut T {
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
T::move_to(self, ctx, x, y);
}
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
T::line_to(self, ctx, x, y);
}
fn curve_to(
&mut self,
ctx: ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
) {
T::curve_to(self, ctx, x1, y1, x2, y2, x3, y3);
}
fn close_path(&mut self, ctx: ContextRef<'ctx>) {
T::close_path(self, ctx);
}
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
T::rect_to(self, ctx, x1, y1, x2, y2);
}
}
pub(crate) struct Path<'ctx> {
ptr: *mut fz_path,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Path<'ctx> {
pub(crate) fn walk<W: PathWalker<'ctx>>(&self, mut walker: W) {
unsafe {
fz_walk_path(
self.ctx.0.as_ptr(),
self.ptr,
const {
&fz_path_walker {
moveto: Some(Self::move_to_fn::<W>),
lineto: Some(Self::line_to_fn::<W>),
curveto: Some(Self::curve_to_fn::<W>),
closepath: Some(Self::close_path_fn::<W>),
quadto: None,
curvetov: None,
curvetoy: None,
rectto: Some(Self::rect_to_fn::<W>),
}
},
(&raw mut walker).cast(),
);
}
}
unsafe extern "C" fn move_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x: f32,
y: f32,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
};
let ctx = ContextRef(ctx, PhantomData);
let this = unsafe { &mut *arg.cast::<W>() };
this.move_to(ctx, x, y);
}
unsafe extern "C" fn line_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x: f32,
y: f32,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
};
let ctx = ContextRef(ctx, PhantomData);
let this = unsafe { &mut *arg.cast::<W>() };
this.line_to(ctx, x, y);
}
unsafe extern "C" fn curve_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
};
let ctx = ContextRef(ctx, PhantomData);
let this = unsafe { &mut *arg.cast::<W>() };
this.curve_to(ctx, x1, y1, x2, y2, x3, y3);
}
unsafe extern "C" fn close_path_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
};
let ctx = ContextRef(ctx, PhantomData);
let this = unsafe { &mut *arg.cast::<W>() };
this.close_path(ctx);
}
unsafe extern "C" fn rect_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
};
let ctx = ContextRef(ctx, PhantomData);
let this = unsafe { &mut *arg.cast::<W>() };
this.rect_to(ctx, x1, y1, x2, y2);
}
}
impl<'ctx> Drop for Path<'ctx> {
fn drop(&mut self) {
unsafe {
@ -567,3 +715,11 @@ impl<'ctx> Drop for Text<'ctx> {
}
}
}
pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point(point, m) }
}
pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point_xy(x, y, m) }
}