From 103f986bc06ed6c2ba12d475aff3d9be8a10ebef Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Mon, 5 Jan 2026 17:53:57 -0800 Subject: [PATCH] wip --- src/main.rs | 140 +++++++++++++++++++++++------------------ src/mupdf_ffi.rs | 160 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 236 insertions(+), 64 deletions(-) diff --git a/src/main.rs b/src/main.rs index d2668ee..b29bf07 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,12 +2,12 @@ // See Notices.txt for copyright information use crate::quad_tree::QuadTree; -use indexmap::{IndexMap, IndexSet}; +use indexmap::IndexSet; use mupdf_sys::{FZ_STEXT_BOLD, fz_matrix}; use non_nan_float::NonNaNF32; use std::{ backtrace::Backtrace, - borrow::{Borrow, Cow}, + borrow::Cow, cell::RefCell, collections::{BTreeMap, BTreeSet, HashMap, HashSet}, convert::Infallible, @@ -681,7 +681,7 @@ impl ParsedTextLine { } } fn write_xml_lines( - lines: impl IntoIterator>, + lines: impl IntoIterator>, parent: &mut xml_tree::Element, trailing_nl: bool, preceding_nl: bool, @@ -691,7 +691,7 @@ impl ParsedTextLine { } let mut first = true; for line in lines { - let line = line.borrow(); + let line = std::borrow::Borrow::borrow(&line); if first { first = false; } else { @@ -1182,8 +1182,8 @@ impl<'a> ElementBodyBuilder<'a> { } self.shrink_stack(new_len); } - fn write_text(&mut self, text: impl Borrow) { - let text = text.borrow(); + fn write_text(&mut self, text: impl std::borrow::Borrow) { + let text = std::borrow::Borrow::borrow(&text); let insert_point = self.insert_point(); if let Some(child) = insert_point.children.last_mut() { child.tail += text; @@ -1328,14 +1328,14 @@ impl fmt::Display for InsnBitFields { impl InsnBitFields { fn write_xml_fields( - fields: impl IntoIterator>, + fields: impl IntoIterator>, parent: &mut xml_tree::Element, ) { let fields_elm = parent.sub_element("fields".into(), []); fields_elm.text = "\n".into(); fields_elm.tail = "\n".into(); for field in fields { - field.borrow().write_xml(fields_elm); + std::borrow::Borrow::borrow(&field).write_xml(fields_elm); } } fn write_xml(&self, parent: &mut xml_tree::Element) { @@ -3170,7 +3170,9 @@ impl<'ctx> Parser<'ctx> { #[derive(Clone, Debug)] struct MyDevice { page_num: u32, - qt: BTreeMap>, + qt: Rc>>>, + unprocessed_chars: + Rc>>>>>>, unprocessed_non_text: Rc>>, } @@ -3179,10 +3181,11 @@ impl MyDevice { Self { page_num, qt: Default::default(), + unprocessed_chars: Default::default(), unprocessed_non_text: Default::default(), } } - fn path(&mut self, path: &mupdf_ffi::Path<'_>, cmt: fz_matrix) { + fn path(&self, path: &mupdf_ffi::Path<'_>, cmt: fz_matrix) { enum Walker { Empty, Moved { x: f32, y: f32 }, @@ -3198,8 +3201,8 @@ impl MyDevice { p1_y: NonNaNF32::new(p1_y)?, }) } - impl mupdf::PathWalker for Walker { - fn move_to(&mut self, x: f32, y: f32) { + impl<'ctx> mupdf_ffi::PathWalker<'ctx> for Walker { + fn move_to(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>, x: f32, y: f32) { *self = match *self { Walker::Empty | Walker::Moved { .. } => Walker::Moved { x, y }, Walker::Line(_) | Walker::Rect { .. } | Walker::NotRecognized => { @@ -3207,7 +3210,7 @@ impl MyDevice { } }; } - fn line_to(&mut self, x: f32, y: f32) { + fn line_to(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>, x: f32, y: f32) { *self = match *self { Walker::Empty => Walker::NotRecognized, Walker::Moved { x: p0_x, y: p0_y } => new_line(p0_x, p0_y, x, y) @@ -3218,11 +3221,27 @@ impl MyDevice { } }; } - fn curve_to(&mut self, _cx1: f32, _cy1: f32, _cx2: f32, _cy2: f32, _ex: f32, _ey: f32) { + fn curve_to( + &mut self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + _cx1: f32, + _cy1: f32, + _cx2: f32, + _cy2: f32, + _ex: f32, + _ey: f32, + ) { *self = Walker::NotRecognized; } - fn close(&mut self) {} - fn rect(&mut self, x1: f32, y1: f32, x2: f32, y2: f32) { + fn close_path(&mut self, _ctx: mupdf_ffi::ContextRef<'ctx>) {} + fn rect_to( + &mut self, + _ctx: mupdf_ffi::ContextRef<'ctx>, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + ) { *self = match *self { Walker::Empty => Walker::Rect { x1, y1, x2, y2 }, Walker::Moved { .. } @@ -3233,9 +3252,7 @@ impl MyDevice { } } let mut walker = Walker::Empty; - let Ok(()) = path.walk(&mut walker) else { - return; - }; + path.walk(&mut walker); let component = match walker { Walker::Empty | Walker::Moved { .. } | Walker::NotRecognized => return, Walker::Line(Line { @@ -3244,26 +3261,20 @@ impl MyDevice { p1_x, p1_y, }) => { - let mupdf::Point { x: p0_x, y: p0_y } = mupdf::Point { - x: p0_x.get(), - y: p0_y.get(), - } - .transform(&cmt); - let mupdf::Point { x: p1_x, y: p1_y } = mupdf::Point { - x: p1_x.get(), - y: p1_y.get(), - } - .transform(&cmt); + let mupdf_sys::fz_point { x: p0_x, y: p0_y } = + mupdf_ffi::transform_point_xy(p0_x.get(), p0_y.get(), cmt); + let mupdf_sys::fz_point { x: p1_x, y: p1_y } = + mupdf_ffi::transform_point_xy(p1_x.get(), p1_y.get(), cmt); let Some(line) = new_line(p0_x, p0_y, p1_x, p1_y) else { return; }; LineOrRect::Line(line) } Walker::Rect { x1, y1, x2, y2 } => { - let p1 = mupdf::Point { x: x1, y: y1 }.transform(&cmt); - let p2 = mupdf::Point { x: x2, y: y1 }.transform(&cmt); - let p3 = mupdf::Point { x: x2, y: y2 }.transform(&cmt); - let p4 = mupdf::Point { x: x1, y: y2 }.transform(&cmt); + let p1 = mupdf_ffi::transform_point_xy(x1, y1, cmt); + let p2 = mupdf_ffi::transform_point_xy(x2, y1, cmt); + let p3 = mupdf_ffi::transform_point_xy(x2, y2, cmt); + let p4 = mupdf_ffi::transform_point_xy(x1, y2, cmt); let min_x = NonNaNF32::new(p1.x.min(p2.x).min(p3.x).min(p4.x)); let max_x = NonNaNF32::new(p1.x.max(p2.x).max(p3.x).max(p4.x)); let min_y = NonNaNF32::new(p1.y.min(p2.y).min(p3.y).min(p4.y)); @@ -3306,25 +3317,25 @@ impl MyDevice { ); } } - fn text(&mut self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) { + fn text(&self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) { todo!() } } impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice { fn fill_path( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, path: &mupdf_ffi::Path<'ctx>, - even_odd: bool, + _even_odd: bool, cmt: fz_matrix, ) { self.path(path, cmt); } fn stroke_path( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, path: &mupdf_ffi::Path<'ctx>, cmt: fz_matrix, ) { @@ -3332,29 +3343,29 @@ impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice { } fn clip_path( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, path: &mupdf_ffi::Path<'ctx>, - even_odd: bool, + _even_odd: bool, cmt: fz_matrix, - scissor: mupdf_sys::fz_rect, + _scissor: mupdf_sys::fz_rect, ) { self.path(path, cmt); } fn clip_stroke_path( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, path: &mupdf_ffi::Path<'ctx>, cmt: fz_matrix, - scissor: mupdf_sys::fz_rect, + _scissor: mupdf_sys::fz_rect, ) { self.path(path, cmt); } fn fill_text( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, cmt: fz_matrix, ) { @@ -3362,8 +3373,8 @@ impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice { } fn stroke_text( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, cmt: fz_matrix, ) { @@ -3371,28 +3382,28 @@ impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice { } fn clip_text( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, cmt: fz_matrix, - scissor: mupdf_sys::fz_rect, + _scissor: mupdf_sys::fz_rect, ) { self.text(text, cmt); } fn clip_stroke_text( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, cmt: fz_matrix, - scissor: mupdf_sys::fz_rect, + _scissor: mupdf_sys::fz_rect, ) { self.text(text, cmt); } fn ignore_text( - &mut self, - ctx: mupdf_ffi::ContextRef<'ctx>, + &self, + _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, cmt: fz_matrix, ) { @@ -3477,20 +3488,25 @@ impl Page { let device = MyDevice::new(page_num); page.run( &mupdf_ffi::Device::new(page.ctx(), Box::new(device))?, - &mupdf::Matrix::IDENTITY, + fz_matrix { + a: 1.0, + b: 0.0, + c: 0.0, + d: 1.0, + e: 0.0, + f: 0.0, + }, )?; let MyDevice { page_num, qt, + unprocessed_chars, unprocessed_non_text, } = device; let mut qt = Rc::try_unwrap(qt) .ok() .expect("already dropped all other references") .into_inner(); - let unprocessed_chars: Rc< - RefCell>>>>>, - > = Rc::default(); // we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need. let json = page.stext_page_as_json_from_page(1.0)?; todo!("{json}"); diff --git a/src/mupdf_ffi.rs b/src/mupdf_ffi.rs index 1a0e669..3eb24fb 100644 --- a/src/mupdf_ffi.rs +++ b/src/mupdf_ffi.rs @@ -4,13 +4,14 @@ use mupdf_sys::{ fz_clone_context, fz_color_params, fz_colorspace, fz_context, fz_device, fz_document, fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text, - fz_error_type_FZ_ERROR_GENERIC, fz_matrix, fz_page, fz_path, fz_rect, fz_stroke_state, fz_text, + fz_error_type_FZ_ERROR_GENERIC, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point, fz_rect, + fz_stroke_state, fz_text, fz_transform_point, fz_transform_point_xy, fz_walk_path, mupdf_document_page_count, mupdf_drop_error, mupdf_error_t, mupdf_load_page, mupdf_new_base_context, mupdf_new_derived_device, mupdf_open_document, mupdf_run_page, }; use std::{ cell::Cell, - ffi::{CStr, CString, c_int}, + ffi::{CStr, CString, c_int, c_void}, fmt, marker::PhantomData, mem::ManuallyDrop, @@ -542,11 +543,158 @@ struct DeviceStruct { value: Box, } +pub(crate) trait PathWalker<'ctx> { + fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32); + fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32); + fn curve_to( + &mut self, + ctx: ContextRef<'ctx>, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + x3: f32, + y3: f32, + ); + fn close_path(&mut self, ctx: ContextRef<'ctx>); + fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) { + self.move_to(ctx, x1, y1); + self.move_to(ctx, x2, y1); + self.move_to(ctx, x2, y2); + self.move_to(ctx, x1, y2); + self.close_path(ctx); + } +} + +impl<'ctx, T: ?Sized + PathWalker<'ctx>> PathWalker<'ctx> for &'_ mut T { + fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) { + T::move_to(self, ctx, x, y); + } + + fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) { + T::line_to(self, ctx, x, y); + } + + fn curve_to( + &mut self, + ctx: ContextRef<'ctx>, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + x3: f32, + y3: f32, + ) { + T::curve_to(self, ctx, x1, y1, x2, y2, x3, y3); + } + + fn close_path(&mut self, ctx: ContextRef<'ctx>) { + T::close_path(self, ctx); + } + + fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) { + T::rect_to(self, ctx, x1, y1, x2, y2); + } +} + pub(crate) struct Path<'ctx> { ptr: *mut fz_path, ctx: ContextRef<'ctx>, } +impl<'ctx> Path<'ctx> { + pub(crate) fn walk>(&self, mut walker: W) { + unsafe { + fz_walk_path( + self.ctx.0.as_ptr(), + self.ptr, + const { + &fz_path_walker { + moveto: Some(Self::move_to_fn::), + lineto: Some(Self::line_to_fn::), + curveto: Some(Self::curve_to_fn::), + closepath: Some(Self::close_path_fn::), + quadto: None, + curvetov: None, + curvetoy: None, + rectto: Some(Self::rect_to_fn::), + } + }, + (&raw mut walker).cast(), + ); + } + } + unsafe extern "C" fn move_to_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + x: f32, + y: f32, + ) { + let Some(ctx) = NonNull::new(ctx) else { + return; + }; + let ctx = ContextRef(ctx, PhantomData); + let this = unsafe { &mut *arg.cast::() }; + this.move_to(ctx, x, y); + } + unsafe extern "C" fn line_to_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + x: f32, + y: f32, + ) { + let Some(ctx) = NonNull::new(ctx) else { + return; + }; + let ctx = ContextRef(ctx, PhantomData); + let this = unsafe { &mut *arg.cast::() }; + this.line_to(ctx, x, y); + } + unsafe extern "C" fn curve_to_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + x3: f32, + y3: f32, + ) { + let Some(ctx) = NonNull::new(ctx) else { + return; + }; + let ctx = ContextRef(ctx, PhantomData); + let this = unsafe { &mut *arg.cast::() }; + this.curve_to(ctx, x1, y1, x2, y2, x3, y3); + } + unsafe extern "C" fn close_path_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + ) { + let Some(ctx) = NonNull::new(ctx) else { + return; + }; + let ctx = ContextRef(ctx, PhantomData); + let this = unsafe { &mut *arg.cast::() }; + this.close_path(ctx); + } + unsafe extern "C" fn rect_to_fn>( + ctx: *mut fz_context, + arg: *mut c_void, + x1: f32, + y1: f32, + x2: f32, + y2: f32, + ) { + let Some(ctx) = NonNull::new(ctx) else { + return; + }; + let ctx = ContextRef(ctx, PhantomData); + let this = unsafe { &mut *arg.cast::() }; + this.rect_to(ctx, x1, y1, x2, y2); + } +} + impl<'ctx> Drop for Path<'ctx> { fn drop(&mut self) { unsafe { @@ -567,3 +715,11 @@ impl<'ctx> Drop for Text<'ctx> { } } } + +pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point { + unsafe { fz_transform_point(point, m) } +} + +pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point { + unsafe { fz_transform_point_xy(x, y, m) } +}