diff --git a/src/main.rs b/src/main.rs index b29bf07..b56937c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3318,7 +3318,43 @@ impl MyDevice { } } fn text(&self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) { - todo!() + for span in text.spans() { + let mupdf_sys::fz_text_span { trm, .. } = span.get(); + let mupdf_sys::fz_font { + refs, + name, + buffer, + flags, + ft_face, + shaper_data, + t3matrix, + t3resources, + t3procs, + t3lists, + t3widths, + t3flags, + t3doc, + t3run, + t3freeres, + bbox, + ascender, + descender, + glyph_count, + bbox_table, + use_glyph_bbox, + width_count, + width_default, + width_table, + advance_cache, + encoding_cache, + has_digest, + digest, + subfont, + } = *span.font().get(); + for item in span.items() { + todo!() + } + } } } @@ -3487,7 +3523,7 @@ impl Page { ) -> Result> { let device = MyDevice::new(page_num); page.run( - &mupdf_ffi::Device::new(page.ctx(), Box::new(device))?, + &mupdf_ffi::Device::new(page.ctx(), Box::new(device.clone()))?, fz_matrix { a: 1.0, b: 0.0, @@ -3508,9 +3544,7 @@ impl Page { .expect("already dropped all other references") .into_inner(); // we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need. - let json = page.stext_page_as_json_from_page(1.0)?; - todo!("{json}"); - let xml = page.to_xml()?; + let xml: String = todo!("page.to_xml()?"); let MuPdfXml::Page(xml_page) = quick_xml::de::from_str(&xml)?; for xml_block in xml_page.block { for xml_line in xml_block.line { @@ -3649,27 +3683,29 @@ fn main_inner() -> Result<(), Box> { } else { None }; - let mut parser = Parser::new(); - let is_subset = page_numbers.is_some(); - let file_name = &args[1]; - parser.parse_pdf(file_name, page_numbers)?; - let mut insns = xml_tree::Element::new( - "instructions".into(), - [("is-subset".into(), is_subset.to_string())], - ); - insns.text = "\n".into(); - insns.tail = "\n".into(); - let mut comment = - xml_tree::Element::comment(format!(" Automatically generated from {file_name} ")); - comment.tail = "\n".into(); - insns.children.push(comment); - for insn in parser.insns { - insn.write_xml(&mut insns); - } - let mut output = Vec::new(); - insns.write(&mut output, true)?; - std::fs::write("powerisa-instructions.xml", output)?; - Ok(()) + mupdf_ffi::Context::with(|ctx| { + let mut parser = Parser::new(); + let is_subset = page_numbers.is_some(); + let file_name = &args[1]; + parser.parse_pdf(ctx, file_name, page_numbers)?; + let mut insns = xml_tree::Element::new( + "instructions".into(), + [("is-subset".into(), is_subset.to_string())], + ); + insns.text = "\n".into(); + insns.tail = "\n".into(); + let mut comment = + xml_tree::Element::comment(format!(" Automatically generated from {file_name} ")); + comment.tail = "\n".into(); + insns.children.push(comment); + for insn in parser.insns { + insn.write_xml(&mut insns); + } + let mut output = Vec::new(); + insns.write(&mut output, true)?; + std::fs::write("powerisa-instructions.xml", output)?; + Ok(()) + }) } fn main() -> std::process::ExitCode { diff --git a/src/mupdf_ffi.rs b/src/mupdf_ffi.rs index 3eb24fb..2e8f325 100644 --- a/src/mupdf_ffi.rs +++ b/src/mupdf_ffi.rs @@ -4,14 +4,15 @@ use mupdf_sys::{ fz_clone_context, fz_color_params, fz_colorspace, fz_context, fz_device, fz_document, fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text, - fz_error_type_FZ_ERROR_GENERIC, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point, fz_rect, - fz_stroke_state, fz_text, fz_transform_point, fz_transform_point_xy, fz_walk_path, - mupdf_document_page_count, mupdf_drop_error, mupdf_error_t, mupdf_load_page, - mupdf_new_base_context, mupdf_new_derived_device, mupdf_open_document, mupdf_run_page, + fz_error_type_FZ_ERROR_GENERIC, fz_font, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point, + fz_rect, fz_stroke_state, fz_text, fz_text_item, fz_text_span, fz_transform_point, + fz_transform_point_xy, fz_walk_path, mupdf_document_page_count, mupdf_drop_error, + mupdf_error_t, mupdf_load_page, mupdf_new_base_context, mupdf_new_derived_device, + mupdf_open_document, mupdf_run_page, }; use std::{ cell::Cell, - ffi::{CStr, CString, c_int, c_void}, + ffi::{CStr, c_int, c_void}, fmt, marker::PhantomData, mem::ManuallyDrop, @@ -20,16 +21,16 @@ use std::{ }; #[derive(Debug)] -struct MuPdfError { +pub(crate) struct MuPdfError { type_: c_int, - message: CString, + message: String, } impl MuPdfError { fn new_generic(message: impl ToString) -> Self { Self { type_: fz_error_type_FZ_ERROR_GENERIC as _, - message: message.try_into().expect("nul byte in message"), + message: message.to_string(), } } } @@ -57,15 +58,17 @@ impl Drop for OwnedMuPdfError { } unsafe fn mupdf_try(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result { - let mut err = <*mut mupdf_error_t>::null_mut(); + let mut err = ptr::null_mut(); let retval = f(&mut err); let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else { return Ok(retval); }; unsafe { Err(MuPdfError { - type_: (*err.0).type_, - message: CString::from(CStr::from_ptr((*err.0).message)), + type_: (*err.0.as_ptr()).type_, + message: CStr::from_ptr((*err.0.as_ptr()).message) + .to_string_lossy() + .into_owned(), }) } } @@ -254,10 +257,6 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { }; (&raw mut (*dev_ptr).value).write(value); let fz_device { - refs, - hints, - flags, - close_device, drop_device, fill_path, stroke_path, @@ -268,29 +267,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { clip_text, clip_stroke_text, ignore_text, - fill_shade, - fill_image, - fill_image_mask, - clip_image_mask, - pop_clip, - begin_mask, - end_mask, - begin_group, - end_group, - begin_tile, - end_tile, - render_flags, - set_default_colorspaces, - begin_layer, - end_layer, - begin_structure, - end_structure, - begin_metatext, - end_metatext, - d1_rect, - container_len, - container_cap, - container, + .. } = &mut (*dev_ptr).base; *drop_device = Some(Self::drop_device_fn); *fill_path = Some(Self::fill_path_fn); @@ -306,7 +283,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { } } pub(crate) fn get(&self) -> &T { - unsafe { &(*self.ptr.cast::>()).value } + unsafe { &(*self.dev.cast::>()).value } } unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) { unsafe { @@ -319,10 +296,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { path: *const fz_path, even_odd: c_int, cmt: fz_matrix, - color_space: *mut fz_colorspace, - color: *const f32, - alpha: f32, - color_params: fz_color_params, + _color_space: *mut fz_colorspace, + _color: *const f32, + _alpha: f32, + _color_params: fz_color_params, ) { let Some(ctx) = NonNull::new(ctx) else { return; @@ -343,12 +320,12 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ctx: *mut fz_context, dev: *mut fz_device, path: *const fz_path, - stroke_state: *const fz_stroke_state, + _stroke_state: *const fz_stroke_state, cmt: fz_matrix, - color_space: *mut fz_colorspace, - color: *const f32, - alpha: f32, - color_params: fz_color_params, + _color_space: *mut fz_colorspace, + _color: *const f32, + _alpha: f32, + _color_params: fz_color_params, ) { let Some(ctx) = NonNull::new(ctx) else { return; @@ -392,7 +369,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ctx: *mut fz_context, dev: *mut fz_device, path: *const fz_path, - stroke_state: *const fz_stroke_state, + _stroke_state: *const fz_stroke_state, cmt: fz_matrix, scissor: fz_rect, ) { @@ -416,10 +393,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { dev: *mut fz_device, text: *const fz_text, cmt: fz_matrix, - color_space: *mut fz_colorspace, - color: *const f32, - alpha: f32, - color_params: fz_color_params, + _color_space: *mut fz_colorspace, + _color: *const f32, + _alpha: f32, + _color_params: fz_color_params, ) { let Some(ctx) = NonNull::new(ctx) else { return; @@ -439,12 +416,12 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ctx: *mut fz_context, dev: *mut fz_device, text: *const fz_text, - stroke_state: *const fz_stroke_state, + _stroke_state: *const fz_stroke_state, cmt: fz_matrix, - color_space: *mut fz_colorspace, - color: *const f32, - alpha: f32, - color_params: fz_color_params, + _color_space: *mut fz_colorspace, + _color: *const f32, + _alpha: f32, + _color_params: fz_color_params, ) { let Some(ctx) = NonNull::new(ctx) else { return; @@ -486,7 +463,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ctx: *mut fz_context, dev: *mut fz_device, text: *const fz_text, - stroke_state: *const fz_stroke_state, + _stroke_state: *const fz_stroke_state, cmt: fz_matrix, scissor: fz_rect, ) { @@ -716,6 +693,77 @@ impl<'ctx> Drop for Text<'ctx> { } } +impl<'ctx> Text<'ctx> { + pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> { + TextSpanIter { + ptr: unsafe { NonNull::new((*self.ptr).head) }, + ctx: self.ctx, + _phantom: PhantomData, + } + } +} + +#[derive(Clone)] +pub(crate) struct TextSpanIter<'a, 'ctx> { + ptr: Option>, + ctx: ContextRef<'ctx>, + _phantom: PhantomData<&'a Text<'ctx>>, +} + +impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> { + type Item = TextSpanRef<'a, 'ctx>; + + fn next(&mut self) -> Option { + let ptr = self.ptr?; + self.ptr = NonNull::new(unsafe { ptr.as_ref().next }); + Some(TextSpanRef { + ptr: unsafe { &*ptr.as_ptr() }, + ctx: self.ctx, + _phantom: PhantomData, + }) + } +} + +#[derive(Copy, Clone)] +pub(crate) struct TextSpanRef<'a, 'ctx> { + ptr: &'a fz_text_span, + ctx: ContextRef<'ctx>, + _phantom: PhantomData<&'a Text<'ctx>>, +} + +impl<'a, 'ctx> TextSpanRef<'a, 'ctx> { + pub(crate) fn get(self) -> &'a fz_text_span { + self.ptr + } + pub(crate) fn font(self) -> FontRef<'a, 'ctx> { + FontRef { + ptr: unsafe { &*self.ptr.font }, + ctx: self.ctx, + _phantom: PhantomData, + } + } + pub(crate) fn items(self) -> &'a [fz_text_item] { + let len = self.ptr.len as usize; + if len == 0 { + return &[]; + } + unsafe { std::slice::from_raw_parts(self.ptr.items, len) } + } +} + +#[derive(Clone, Copy)] +pub(crate) struct FontRef<'a, 'ctx> { + ptr: &'a fz_font, + ctx: ContextRef<'ctx>, + _phantom: PhantomData<&'a Text<'ctx>>, +} + +impl<'a, 'ctx> FontRef<'a, 'ctx> { + pub(crate) fn get(self) -> &'a fz_font { + self.ptr + } +} + pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point { unsafe { fz_transform_point(point, m) } }