From 040afcc435e69fa234caaef2719c1bc188c2999c Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Tue, 6 Jan 2026 08:00:38 -0800 Subject: [PATCH] extracts pdf items using mupdf-sys directly --- src/main.rs | 384 ++++++++++++++++++++++++++++------------------- src/mupdf_ffi.rs | 263 ++++++++++++++++++-------------- 2 files changed, 373 insertions(+), 274 deletions(-) diff --git a/src/main.rs b/src/main.rs index b56937c..a6a36e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,14 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information -use crate::quad_tree::QuadTree; +use crate::{ + mupdf_ffi::{ + WriteMode, add_points, point_max_components, point_min_components, transform_vector, + }, + quad_tree::QuadTree, +}; use indexmap::IndexSet; -use mupdf_sys::{FZ_STEXT_BOLD, fz_matrix}; +use mupdf_sys::{fz_matrix, fz_point, fz_text_item}; use non_nan_float::NonNaNF32; use std::{ backtrace::Backtrace, @@ -135,6 +140,12 @@ macro_rules! make_enum_font { panic!("invalid font name with id") } } + const fn known_from_name_with_tag(font_name_with_tag: &str, size: NonNaNF32) -> Option { + match size.get() { + $($($known_font_size if str_eq(font_name_with_tag, $known_font_name_with_tag) => Some(Self::$KnownFont),)*)* + _ => None, + } + } const fn new_known(font_name: &str, size: NonNaNF32) -> Option { match size.get() { $($($known_font_size if str_eq(font_name, const { @@ -3167,25 +3178,32 @@ impl<'ctx> Parser<'ctx> { } } -#[derive(Clone, Debug)] -struct MyDevice { +#[derive(Debug)] +struct MyDevice<'a> { page_num: u32, - qt: Rc>>>, + qt: RefCell>>, unprocessed_chars: Rc>>>>>>, unprocessed_non_text: Rc>>, + first_seen_fonts: RefCell<&'a mut BTreeMap>>, + error: RefCell>>, } -impl MyDevice { - fn new(page_num: u32) -> Self { +impl<'a> MyDevice<'a> { + fn new(page_num: u32, first_seen_fonts: &'a mut BTreeMap>) -> Self { Self { page_num, qt: Default::default(), unprocessed_chars: Default::default(), unprocessed_non_text: Default::default(), + first_seen_fonts: RefCell::new(first_seen_fonts), + error: RefCell::new(Ok(())), } } - fn path(&self, path: &mupdf_ffi::Path<'_>, cmt: fz_matrix) { + fn path(&self, path: &mupdf_ffi::Path<'_>, ctm: fz_matrix) { + if self.error.borrow().is_err() { + return; + } enum Walker { Empty, Moved { x: f32, y: f32 }, @@ -3262,19 +3280,19 @@ impl MyDevice { p1_y, }) => { let mupdf_sys::fz_point { x: p0_x, y: p0_y } = - mupdf_ffi::transform_point_xy(p0_x.get(), p0_y.get(), cmt); + mupdf_ffi::transform_point_xy(p0_x.get(), p0_y.get(), ctm); let mupdf_sys::fz_point { x: p1_x, y: p1_y } = - mupdf_ffi::transform_point_xy(p1_x.get(), p1_y.get(), cmt); + mupdf_ffi::transform_point_xy(p1_x.get(), p1_y.get(), ctm); let Some(line) = new_line(p0_x, p0_y, p1_x, p1_y) else { return; }; LineOrRect::Line(line) } Walker::Rect { x1, y1, x2, y2 } => { - let p1 = mupdf_ffi::transform_point_xy(x1, y1, cmt); - let p2 = mupdf_ffi::transform_point_xy(x2, y1, cmt); - let p3 = mupdf_ffi::transform_point_xy(x2, y2, cmt); - let p4 = mupdf_ffi::transform_point_xy(x1, y2, cmt); + let p1 = mupdf_ffi::transform_point_xy(x1, y1, ctm); + let p2 = mupdf_ffi::transform_point_xy(x2, y1, ctm); + let p3 = mupdf_ffi::transform_point_xy(x2, y2, ctm); + let p4 = mupdf_ffi::transform_point_xy(x1, y2, ctm); let min_x = NonNaNF32::new(p1.x.min(p2.x).min(p3.x).min(p4.x)); let max_x = NonNaNF32::new(p1.x.max(p2.x).max(p3.x).max(p4.x)); let min_y = NonNaNF32::new(p1.y.min(p2.y).min(p3.y).min(p4.y)); @@ -3317,65 +3335,192 @@ impl MyDevice { ); } } - fn text(&self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) { + fn text(&self, text: &mupdf_ffi::Text<'_>, ctm: fz_matrix) { + if self.error.borrow().is_err() { + return; + } + let mut first_seen_fonts = self.first_seen_fonts.borrow_mut(); for span in text.spans() { - let mupdf_sys::fz_text_span { trm, .. } = span.get(); - let mupdf_sys::fz_font { - refs, - name, - buffer, - flags, - ft_face, - shaper_data, - t3matrix, - t3resources, - t3procs, - t3lists, - t3widths, - t3flags, - t3doc, - t3run, - t3freeres, - bbox, - ascender, - descender, - glyph_count, - bbox_table, - use_glyph_bbox, - width_count, - width_default, - width_table, - advance_cache, - encoding_cache, - has_digest, - digest, - subfont, - } = *span.font().get(); - for item in span.items() { - todo!() + let tm = span.trm(); + const ROUND_FACTOR: f32 = 1000.0; + let font_size = (mupdf_ffi::matrix_expansion(tm) * ROUND_FACTOR).round() / ROUND_FACTOR; + let Some(font_size) = NonNaNF32::new(font_size) else { + continue; + }; + let font_name_with_tag = span.font().name(); + let font_name_with_tag = match font_name_with_tag { + "CGMSHV+DejaVuSansCondensed-Obli" => "CGMSHV+DejaVuSansCondensed-Oblique", + "YDJYQV+DejaVuSansCondensed-Bold" => "YDJYQV+DejaVuSansCondensed-BoldOblique", + "NHUPPK+DejaVuSansCondensed-Bold" => "NHUPPK+DejaVuSansCondensed-Bold", + _ if font_name_with_tag.len() == 31 => { + let _ = self.error.replace(Err(format!( + "probably truncated font name: {font_name_with_tag:?}" + ) + .into())); + return; + } + _ => font_name_with_tag, + }; + for &fz_text_item { + x, + y, + adv, + gid, + ucs, + cid: _, + } in span.items() + { + let adv = if gid >= 0 { adv } else { 0.0 }; + let tm = fz_matrix { e: x, f: y, ..tm }; + let trm = mupdf_ffi::concat(tm, ctm); + let dir = match span.write_mode() { + WriteMode::Horizontal => fz_point { x: 1.0, y: 0.0 }, + WriteMode::Vertical => fz_point { x: 0.0, y: -1.0 }, + }; + let dir = mupdf_ffi::transform_vector(dir, trm); + let glyph_start; + let glyph_stop; + let glyph_ascender; + let glyph_descender; + match span.write_mode() { + WriteMode::Horizontal => { + glyph_start = fz_point { x: trm.e, y: trm.f }; + glyph_stop = fz_point { + x: trm.e + adv * dir.x, + y: trm.f + adv * dir.y, + }; + glyph_ascender = fz_point { + x: 0.0, + y: span.font().ascender(), + }; + glyph_descender = fz_point { + x: 0.0, + y: span.font().descender(), + }; + } + WriteMode::Vertical => { + glyph_start = fz_point { + x: trm.e - adv * dir.x, + y: trm.f - adv * dir.y, + }; + glyph_stop = fz_point { x: trm.e, y: trm.f }; + glyph_ascender = fz_point { x: 1.0, y: 0.0 }; + glyph_descender = fz_point { x: 0.0, y: 0.0 }; + } + }; + let glyph_ascender = transform_vector(glyph_ascender, trm); + let glyph_descender = transform_vector(glyph_descender, trm); + let points = [ + add_points(glyph_start, glyph_descender), + add_points(glyph_start, glyph_ascender), + add_points(glyph_stop, glyph_descender), + add_points(glyph_stop, glyph_ascender), + ]; + let min = point_min_components( + point_min_components(point_min_components(points[0], points[1]), points[2]), + points[3], + ); + let max = point_max_components( + point_max_components(point_max_components(points[0], points[1]), points[2]), + points[3], + ); + let Some(ch) = u32::try_from(ucs).ok().and_then(|v| char::try_from(v).ok()) else { + continue; + }; + let text = String::from(ch); + if text.trim().is_empty() { + continue; + } + let font = Font::known_from_name_with_tag(font_name_with_tag, font_size) + .unwrap_or_else(|| Font::Other { + font_name: font_name_with_tag.into(), + size: font_size, + }); + let Some(text_section) = TextSection::for_position( + self.page_num, + (min.x + max.x) * 0.5, + (min.y + max.y) * 0.5, + ) else { + if PAGE_BODY_MIN_Y <= min.y && min.y <= PAGE_BODY_MAX_Y { + if self.page_num != 1072 { + // page 1072 has characters in the margins + let _ = self.error.replace(Err(format!( + "char not in text section: {text:?}\npage_num={}", + self.page_num, + ) + .into())); + return; + } + } + continue; + }; + let (Some(min_x), Some(min_y), Some(max_x), Some(max_y)) = ( + NonNaNF32::new(min.x), + NonNaNF32::new(min.y), + NonNaNF32::new(max.x), + NonNaNF32::new(max.y), + ) else { + let _ = self + .error + .replace(Err("char position shouldn't be NaN".into())); + return; + }; + let char = Char { + font, + text, + min_x, + min_y, + max_x, + max_y, + }; + let set = match first_seen_fonts.get_mut(font_name_with_tag) { + Some(v) => v, + None => first_seen_fonts + .entry(String::from(font_name_with_tag)) + .or_default(), + }; + if set.insert(font_size) { + println!( + "first seen font: {font_name_with_tag:?} {font_size}: page {} {char:?}", + self.page_num, + ); + } + self.qt + .borrow_mut() + .entry(text_section) + .or_default() + .insert(min_x.get(), min_y.get(), PageItem::Char(char.clone())); + self.unprocessed_chars + .borrow_mut() + .entry(text_section) + .or_default() + .borrow_mut() + .entry(char.font.clone()) + .or_default() + .insert(char); } } } } -impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice { +impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice<'_> { fn fill_path( &self, _ctx: mupdf_ffi::ContextRef<'ctx>, path: &mupdf_ffi::Path<'ctx>, _even_odd: bool, - cmt: fz_matrix, + ctm: fz_matrix, ) { - self.path(path, cmt); + self.path(path, ctm); } fn stroke_path( &self, _ctx: mupdf_ffi::ContextRef<'ctx>, path: &mupdf_ffi::Path<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, ) { - self.path(path, cmt); + self.path(path, ctm); } fn clip_path( @@ -3383,67 +3528,67 @@ impl<'ctx> mupdf_ffi::DeviceCallbacks<'ctx> for MyDevice { _ctx: mupdf_ffi::ContextRef<'ctx>, path: &mupdf_ffi::Path<'ctx>, _even_odd: bool, - cmt: fz_matrix, + ctm: fz_matrix, _scissor: mupdf_sys::fz_rect, ) { - self.path(path, cmt); + self.path(path, ctm); } fn clip_stroke_path( &self, _ctx: mupdf_ffi::ContextRef<'ctx>, path: &mupdf_ffi::Path<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, _scissor: mupdf_sys::fz_rect, ) { - self.path(path, cmt); + self.path(path, ctm); } fn fill_text( &self, _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, ) { - self.text(text, cmt); + self.text(text, ctm); } fn stroke_text( &self, _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, ) { - self.text(text, cmt); + self.text(text, ctm); } fn clip_text( &self, _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, _scissor: mupdf_sys::fz_rect, ) { - self.text(text, cmt); + self.text(text, ctm); } fn clip_stroke_text( &self, _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, _scissor: mupdf_sys::fz_rect, ) { - self.text(text, cmt); + self.text(text, ctm); } fn ignore_text( &self, _ctx: mupdf_ffi::ContextRef<'ctx>, text: &mupdf_ffi::Text<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, ) { - self.text(text, cmt); + self.text(text, ctm); } } @@ -3521,9 +3666,12 @@ impl Page { page: &mupdf_ffi::Page<'_>, first_seen_fonts: &mut BTreeMap>, ) -> Result> { - let device = MyDevice::new(page_num); + let device = mupdf_ffi::Device::new( + page.ctx(), + Box::new(MyDevice::new(page_num, first_seen_fonts)), + )?; page.run( - &mupdf_ffi::Device::new(page.ctx(), Box::new(device.clone()))?, + &device, fz_matrix { a: 1.0, b: 0.0, @@ -3534,94 +3682,14 @@ impl Page { }, )?; let MyDevice { - page_num, + page_num: _, qt, unprocessed_chars, unprocessed_non_text, - } = device; - let mut qt = Rc::try_unwrap(qt) - .ok() - .expect("already dropped all other references") - .into_inner(); - // we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need. - let xml: String = todo!("page.to_xml()?"); - let MuPdfXml::Page(xml_page) = quick_xml::de::from_str(&xml)?; - for xml_block in xml_page.block { - for xml_line in xml_block.line { - for xml_font in xml_line.font { - const ROUND_FACTOR: f32 = 1000.0; - let font_size = (xml_font.size * ROUND_FACTOR).round() / ROUND_FACTOR; - let font_size = NonNaNF32::new(font_size).ok_or("font size must not be NaN")?; - for xml_char in xml_font.char { - if xml_char.c.trim().is_empty() { - continue; - } - let font_name = match &*xml_font.name { - "DejaVuSansCondensed-Obli" => { - if (xml_char.flags & FZ_STEXT_BOLD) != 0 { - "DejaVuSansCondensed-BoldOblique" - } else { - "DejaVuSansCondensed-Oblique" - } - } - font_name => font_name, - }; - let font = Font::new(font_name, font_size); - let [x0, y0, x1, y1, x2, y2, x3, y3] = xml_char.quad; - let min_x = x0.min(x1).min(x2).min(x3); - let max_x = x0.max(x1).max(x2).max(x3); - let min_y = y0.min(y1).min(y2).min(y3); - let max_y = y0.max(y1).max(y2).max(y3); - let Some(text_section) = TextSection::for_position( - page_num, - (min_x + max_x) * 0.5, - (min_y + max_y) * 0.5, - ) else { - if PAGE_BODY_MIN_Y <= min_y && min_y <= PAGE_BODY_MAX_Y { - if page_num != 1072 { - // page 1072 has characters in the margins - return Err( - format!("char not in text section: {xml_char:?}\npage_num={page_num}").into(), - ); - } - } - continue; - }; - let char = Char { - font: font.clone(), - text: xml_char.c.into_owned(), - min_x: NonNaNF32::new(min_x).ok_or("char position shouldn't be NaN")?, - min_y: NonNaNF32::new(min_y).ok_or("char position shouldn't be NaN")?, - max_x: NonNaNF32::new(max_x).ok_or("char position shouldn't be NaN")?, - max_y: NonNaNF32::new(max_y).ok_or("char position shouldn't be NaN")?, - }; - let set = match first_seen_fonts.get_mut(font_name) { - Some(v) => v, - None => first_seen_fonts.entry(String::from(font_name)).or_default(), - }; - if set.insert(font_size) { - println!( - "first seen font: {font_name:?} {font_size}: page {page_num} {char:?} {:x}", - xml_char.flags, - ); - } - qt.entry(text_section).or_default().insert( - min_x, - min_y, - PageItem::Char(char.clone()), - ); - unprocessed_chars - .borrow_mut() - .entry(text_section) - .or_default() - .borrow_mut() - .entry(char.font.clone()) - .or_default() - .insert(char); - } - } - } - } + first_seen_fonts: _, + error, + } = device.get(); + error.replace(Ok(()))?; for (text_section, i) in unprocessed_chars.borrow_mut().iter_mut() { for chars in i.borrow_mut().values_mut() { chars.sort_by_key(Char::top_down_left_to_right_sort_key); @@ -3656,9 +3724,9 @@ impl Page { } Ok(Self { page_num, - qt, - unprocessed_chars, - unprocessed_non_text, + qt: qt.take(), + unprocessed_chars: unprocessed_chars.clone(), + unprocessed_non_text: unprocessed_non_text.clone(), }) } } diff --git a/src/mupdf_ffi.rs b/src/mupdf_ffi.rs index 2e8f325..942bcfc 100644 --- a/src/mupdf_ffi.rs +++ b/src/mupdf_ffi.rs @@ -2,16 +2,17 @@ // See Notices.txt for copyright information use mupdf_sys::{ - fz_clone_context, fz_color_params, fz_colorspace, fz_context, fz_device, fz_document, - fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text, - fz_error_type_FZ_ERROR_GENERIC, fz_font, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point, - fz_rect, fz_stroke_state, fz_text, fz_text_item, fz_text_span, fz_transform_point, - fz_transform_point_xy, fz_walk_path, mupdf_document_page_count, mupdf_drop_error, - mupdf_error_t, mupdf_load_page, mupdf_new_base_context, mupdf_new_derived_device, - mupdf_open_document, mupdf_run_page, + fz_clone_context, fz_color_params, fz_colorspace, fz_concat, fz_context, fz_device, + fz_document, fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, + fz_drop_text, fz_error_type_FZ_ERROR_GENERIC, fz_font, fz_font_ascender, fz_font_descender, + fz_font_is_bold, fz_font_is_italic, fz_font_name, fz_matrix, fz_matrix_expansion, fz_page, + fz_path, fz_path_walker, fz_point, fz_rect, fz_stroke_state, fz_text, fz_text_item, + fz_text_span, fz_transform_point, fz_transform_point_xy, fz_transform_vector, fz_walk_path, + mupdf_document_page_count, mupdf_drop_error, mupdf_error_t, mupdf_load_page, + mupdf_new_base_context, mupdf_new_derived_device, mupdf_open_document, mupdf_run_page, }; use std::{ - cell::Cell, + cell::{Cell, UnsafeCell}, ffi::{CStr, c_int, c_void}, fmt, marker::PhantomData, @@ -104,7 +105,7 @@ impl Context { CTX.with(f) } pub(crate) fn as_ref(&self) -> ContextRef<'_> { - ContextRef(self.0, PhantomData) + unsafe { ContextRef::from_ptr(self.0.as_ptr()) } } } @@ -117,7 +118,13 @@ impl Drop for Context { } #[derive(Clone, Copy)] -pub(crate) struct ContextRef<'ctx>(NonNull, PhantomData<&'ctx Context>); +pub(crate) struct ContextRef<'ctx>(&'ctx UnsafeCell); + +impl<'ctx> ContextRef<'ctx> { + unsafe fn from_ptr(ptr: *mut fz_context) -> Self { + Self(unsafe { &*ptr.cast() }) + } +} impl<'ctx> From<&'ctx Context> for ContextRef<'ctx> { fn from(value: &'ctx Context) -> Self { @@ -137,13 +144,13 @@ impl<'ctx> Document<'ctx> { ) -> Result, MuPdfError> { let ctx = ctx.into(); unsafe { - mupdf_try(|errptr| mupdf_open_document(ctx.0.as_ptr(), file_name.as_ptr(), errptr)) + mupdf_try(|errptr| mupdf_open_document(ctx.0.get(), file_name.as_ptr(), errptr)) .map(|ptr| Document { ptr, ctx }) } } pub(crate) fn page_count(&self) -> Result { unsafe { - mupdf_try(|errptr| mupdf_document_page_count(self.ctx.0.as_ptr(), self.ptr, errptr))? + mupdf_try(|errptr| mupdf_document_page_count(self.ctx.0.get(), self.ptr, errptr))? .try_into() .map_err(MuPdfError::new_generic) } @@ -151,7 +158,7 @@ impl<'ctx> Document<'ctx> { pub(crate) fn load_page(&self, page: usize) -> Result, MuPdfError> { let page = page.try_into().map_err(MuPdfError::new_generic)?; unsafe { - mupdf_try(|errptr| mupdf_load_page(self.ctx.0.as_ptr(), self.ptr, page, errptr)) + mupdf_try(|errptr| mupdf_load_page(self.ctx.0.get(), self.ptr, page, errptr)) .map(|ptr| Page { ptr, ctx: self.ctx }) } } @@ -160,7 +167,7 @@ impl<'ctx> Document<'ctx> { impl<'ctx> Drop for Document<'ctx> { fn drop(&mut self) { unsafe { - fz_drop_document(self.ctx.0.as_ptr(), self.ptr); + fz_drop_document(self.ctx.0.get(), self.ptr); } } } @@ -182,7 +189,7 @@ impl<'ctx> Page<'ctx> { unsafe { mupdf_try(|errptr| { mupdf_run_page( - self.ctx.0.as_ptr(), + self.ctx.0.get(), self.ptr, device.dev, ctm, @@ -197,7 +204,7 @@ impl<'ctx> Page<'ctx> { impl<'ctx> Drop for Page<'ctx> { fn drop(&mut self) { unsafe { - fz_drop_page(self.ctx.0.as_ptr(), self.ptr); + fz_drop_page(self.ctx.0.get(), self.ptr); } } } @@ -209,34 +216,34 @@ pub(crate) struct Device<'ctx, T: 'ctx> { } pub(crate) trait DeviceCallbacks<'ctx> { - fn fill_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, cmt: fz_matrix); - fn stroke_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, cmt: fz_matrix); + fn fill_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, ctm: fz_matrix); + fn stroke_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, ctm: fz_matrix); fn clip_path( &self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, - cmt: fz_matrix, + ctm: fz_matrix, scissor: fz_rect, ); fn clip_stroke_path( &self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, scissor: fz_rect, ); - fn fill_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, cmt: fz_matrix); - fn stroke_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, cmt: fz_matrix); - fn clip_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, cmt: fz_matrix, scissor: fz_rect); + fn fill_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); + fn stroke_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); + fn clip_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix, scissor: fz_rect); fn clip_stroke_text( &self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, - cmt: fz_matrix, + ctm: fz_matrix, scissor: fz_rect, ); - fn ignore_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, cmt: fz_matrix); + fn ignore_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix); } impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { @@ -245,7 +252,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { unsafe { let dev_ptr = mupdf_try(|errptr| { mupdf_new_derived_device::>( - ctx.0.as_ptr(), + ctx.0.get(), c"parse_powerisa_pdf::mupdf_ffi::Device", errptr, ) @@ -295,16 +302,13 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { dev: *mut fz_device, path: *const fz_path, even_odd: c_int, - cmt: fz_matrix, + ctm: fz_matrix, _color_space: *mut fz_colorspace, _color: *const f32, _alpha: f32, _color_params: fz_color_params, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.fill_path( ctx, @@ -313,7 +317,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ctx, }), even_odd != 0, - cmt, + ctm, ); } unsafe extern "C" fn stroke_path_fn( @@ -321,16 +325,13 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { dev: *mut fz_device, path: *const fz_path, _stroke_state: *const fz_stroke_state, - cmt: fz_matrix, + ctm: fz_matrix, _color_space: *mut fz_colorspace, _color: *const f32, _alpha: f32, _color_params: fz_color_params, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.stroke_path( ctx, @@ -338,7 +339,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ptr: path.cast_mut(), ctx, }), - cmt, + ctm, ); } unsafe extern "C" fn clip_path_fn( @@ -346,13 +347,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { dev: *mut fz_device, path: *const fz_path, even_odd: ::std::os::raw::c_int, - cmt: fz_matrix, + ctm: fz_matrix, scissor: fz_rect, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.clip_path( ctx, @@ -361,7 +359,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ctx, }), even_odd != 0, - cmt, + ctm, scissor, ); } @@ -370,13 +368,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { dev: *mut fz_device, path: *const fz_path, _stroke_state: *const fz_stroke_state, - cmt: fz_matrix, + ctm: fz_matrix, scissor: fz_rect, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.clip_stroke_path( ctx, @@ -384,7 +379,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ptr: path.cast_mut(), ctx, }), - cmt, + ctm, scissor, ); } @@ -392,16 +387,13 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ctx: *mut fz_context, dev: *mut fz_device, text: *const fz_text, - cmt: fz_matrix, + ctm: fz_matrix, _color_space: *mut fz_colorspace, _color: *const f32, _alpha: f32, _color_params: fz_color_params, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.fill_text( ctx, @@ -409,7 +401,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ptr: text.cast_mut(), ctx, }), - cmt, + ctm, ); } unsafe extern "C" fn stroke_text_fn( @@ -417,16 +409,13 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { dev: *mut fz_device, text: *const fz_text, _stroke_state: *const fz_stroke_state, - cmt: fz_matrix, + ctm: fz_matrix, _color_space: *mut fz_colorspace, _color: *const f32, _alpha: f32, _color_params: fz_color_params, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.stroke_text( ctx, @@ -434,20 +423,17 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ptr: text.cast_mut(), ctx, }), - cmt, + ctm, ); } unsafe extern "C" fn clip_text_fn( ctx: *mut fz_context, dev: *mut fz_device, text: *const fz_text, - cmt: fz_matrix, + ctm: fz_matrix, scissor: fz_rect, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.clip_text( ctx, @@ -455,7 +441,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ptr: text.cast_mut(), ctx, }), - cmt, + ctm, scissor, ); } @@ -464,13 +450,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { dev: *mut fz_device, text: *const fz_text, _stroke_state: *const fz_stroke_state, - cmt: fz_matrix, + ctm: fz_matrix, scissor: fz_rect, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.clip_stroke_text( ctx, @@ -478,7 +461,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ptr: text.cast_mut(), ctx, }), - cmt, + ctm, scissor, ); } @@ -486,12 +469,9 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ctx: *mut fz_context, dev: *mut fz_device, text: *const fz_text, - cmt: fz_matrix, + ctm: fz_matrix, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut (*dev.cast::>()).value }; this.ignore_text( ctx, @@ -499,7 +479,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> { ptr: text.cast_mut(), ctx, }), - cmt, + ctm, ); } } @@ -508,8 +488,8 @@ impl<'ctx, T> Drop for Device<'ctx, T> { fn drop(&mut self) { unsafe { // FIXME: fz_close_device may throw exceptions - // fz_close_device(self.ctx.0.as_ptr(), self.dev); - fz_drop_device(self.ctx.0.as_ptr(), self.dev); + // fz_close_device(self.ctx.0.get(), self.dev); + fz_drop_device(self.ctx.0.get(), self.dev); } } } @@ -583,7 +563,7 @@ impl<'ctx> Path<'ctx> { pub(crate) fn walk>(&self, mut walker: W) { unsafe { fz_walk_path( - self.ctx.0.as_ptr(), + self.ctx.0.get(), self.ptr, const { &fz_path_walker { @@ -607,10 +587,7 @@ impl<'ctx> Path<'ctx> { x: f32, y: f32, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut *arg.cast::() }; this.move_to(ctx, x, y); } @@ -620,10 +597,7 @@ impl<'ctx> Path<'ctx> { x: f32, y: f32, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut *arg.cast::() }; this.line_to(ctx, x, y); } @@ -637,10 +611,7 @@ impl<'ctx> Path<'ctx> { x3: f32, y3: f32, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut *arg.cast::() }; this.curve_to(ctx, x1, y1, x2, y2, x3, y3); } @@ -648,10 +619,7 @@ impl<'ctx> Path<'ctx> { ctx: *mut fz_context, arg: *mut c_void, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut *arg.cast::() }; this.close_path(ctx); } @@ -663,10 +631,7 @@ impl<'ctx> Path<'ctx> { x2: f32, y2: f32, ) { - let Some(ctx) = NonNull::new(ctx) else { - return; - }; - let ctx = ContextRef(ctx, PhantomData); + let ctx = unsafe { ContextRef::from_ptr(ctx) }; let this = unsafe { &mut *arg.cast::() }; this.rect_to(ctx, x1, y1, x2, y2); } @@ -675,7 +640,7 @@ impl<'ctx> Path<'ctx> { impl<'ctx> Drop for Path<'ctx> { fn drop(&mut self) { unsafe { - fz_drop_path(self.ctx.0.as_ptr(), self.ptr); + fz_drop_path(self.ctx.0.get(), self.ptr); } } } @@ -688,7 +653,7 @@ pub(crate) struct Text<'ctx> { impl<'ctx> Drop for Text<'ctx> { fn drop(&mut self) { unsafe { - fz_drop_text(self.ctx.0.as_ptr(), self.ptr); + fz_drop_text(self.ctx.0.get(), self.ptr); } } } @@ -696,7 +661,7 @@ impl<'ctx> Drop for Text<'ctx> { impl<'ctx> Text<'ctx> { pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> { TextSpanIter { - ptr: unsafe { NonNull::new((*self.ptr).head) }, + ptr: unsafe { NonNull::new((*self.ptr).head).map(|ptr| &*ptr.as_ptr().cast()) }, ctx: self.ctx, _phantom: PhantomData, } @@ -705,7 +670,7 @@ impl<'ctx> Text<'ctx> { #[derive(Clone)] pub(crate) struct TextSpanIter<'a, 'ctx> { - ptr: Option>, + ptr: Option<&'a UnsafeCell>, ctx: ContextRef<'ctx>, _phantom: PhantomData<&'a Text<'ctx>>, } @@ -715,9 +680,9 @@ impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> { fn next(&mut self) -> Option { let ptr = self.ptr?; - self.ptr = NonNull::new(unsafe { ptr.as_ref().next }); + self.ptr = unsafe { NonNull::new((*ptr.get()).next).map(|ptr| &*ptr.as_ptr().cast()) }; Some(TextSpanRef { - ptr: unsafe { &*ptr.as_ptr() }, + ptr, ctx: self.ctx, _phantom: PhantomData, }) @@ -726,42 +691,75 @@ impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> { #[derive(Copy, Clone)] pub(crate) struct TextSpanRef<'a, 'ctx> { - ptr: &'a fz_text_span, + ptr: &'a UnsafeCell, ctx: ContextRef<'ctx>, _phantom: PhantomData<&'a Text<'ctx>>, } +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub(crate) enum WriteMode { + Horizontal, + Vertical, +} + impl<'a, 'ctx> TextSpanRef<'a, 'ctx> { - pub(crate) fn get(self) -> &'a fz_text_span { + pub(crate) fn get(self) -> &'a UnsafeCell { self.ptr } pub(crate) fn font(self) -> FontRef<'a, 'ctx> { FontRef { - ptr: unsafe { &*self.ptr.font }, + ptr: unsafe { &*(*self.ptr.get()).font.cast::>() }, ctx: self.ctx, _phantom: PhantomData, } } + pub(crate) fn trm(self) -> fz_matrix { + unsafe { (*self.ptr.get()).trm } + } + pub(crate) fn write_mode(self) -> WriteMode { + if unsafe { (*self.ptr.get()).wmode() != 0 } { + WriteMode::Vertical + } else { + WriteMode::Horizontal + } + } pub(crate) fn items(self) -> &'a [fz_text_item] { - let len = self.ptr.len as usize; + let len = unsafe { (*self.ptr.get()).len } as usize; if len == 0 { return &[]; } - unsafe { std::slice::from_raw_parts(self.ptr.items, len) } + unsafe { std::slice::from_raw_parts((*self.ptr.get()).items, len) } } } #[derive(Clone, Copy)] pub(crate) struct FontRef<'a, 'ctx> { - ptr: &'a fz_font, + ptr: &'a UnsafeCell, ctx: ContextRef<'ctx>, _phantom: PhantomData<&'a Text<'ctx>>, } impl<'a, 'ctx> FontRef<'a, 'ctx> { - pub(crate) fn get(self) -> &'a fz_font { + pub(crate) fn get(self) -> &'a UnsafeCell { self.ptr } + pub(crate) fn name(self) -> &'a str { + unsafe { CStr::from_ptr(fz_font_name(self.ctx.0.get(), self.ptr.get())) } + .to_str() + .expect("font name isn't valid UTF-8") + } + pub(crate) fn is_bold(self) -> bool { + unsafe { fz_font_is_bold(self.ctx.0.get(), self.ptr.get()) != 0 } + } + pub(crate) fn is_italic(self) -> bool { + unsafe { fz_font_is_italic(self.ctx.0.get(), self.ptr.get()) != 0 } + } + pub(crate) fn ascender(self) -> f32 { + unsafe { fz_font_ascender(self.ctx.0.get(), self.ptr.get()) } + } + pub(crate) fn descender(self) -> f32 { + unsafe { fz_font_descender(self.ctx.0.get(), self.ptr.get()) } + } } pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point { @@ -771,3 +769,36 @@ pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point { pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point { unsafe { fz_transform_point_xy(x, y, m) } } + +pub(crate) fn transform_vector(vector: fz_point, m: fz_matrix) -> fz_point { + unsafe { fz_transform_vector(vector, m) } +} + +pub(crate) fn matrix_expansion(m: fz_matrix) -> f32 { + unsafe { fz_matrix_expansion(m) } +} + +pub(crate) fn concat(left: fz_matrix, right: fz_matrix) -> fz_matrix { + unsafe { fz_concat(left, right) } +} + +pub(crate) fn add_points(a: fz_point, b: fz_point) -> fz_point { + fz_point { + x: a.x + b.x, + y: a.y + b.y, + } +} + +pub(crate) fn point_min_components(a: fz_point, b: fz_point) -> fz_point { + fz_point { + x: a.x.min(b.x), + y: a.y.min(b.y), + } +} + +pub(crate) fn point_max_components(a: fz_point, b: fz_point) -> fz_point { + fz_point { + x: a.x.max(b.x), + y: a.y.max(b.y), + } +}