This commit is contained in:
Jacob Lifshay 2026-01-05 18:33:44 -08:00
parent 103f986bc0
commit a677cd8a33
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
2 changed files with 169 additions and 85 deletions

View file

@ -3318,7 +3318,43 @@ impl MyDevice {
} }
} }
fn text(&self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) { fn text(&self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) {
todo!() for span in text.spans() {
let mupdf_sys::fz_text_span { trm, .. } = span.get();
let mupdf_sys::fz_font {
refs,
name,
buffer,
flags,
ft_face,
shaper_data,
t3matrix,
t3resources,
t3procs,
t3lists,
t3widths,
t3flags,
t3doc,
t3run,
t3freeres,
bbox,
ascender,
descender,
glyph_count,
bbox_table,
use_glyph_bbox,
width_count,
width_default,
width_table,
advance_cache,
encoding_cache,
has_digest,
digest,
subfont,
} = *span.font().get();
for item in span.items() {
todo!()
}
}
} }
} }
@ -3487,7 +3523,7 @@ impl Page {
) -> Result<Self, Box<dyn std::error::Error>> { ) -> Result<Self, Box<dyn std::error::Error>> {
let device = MyDevice::new(page_num); let device = MyDevice::new(page_num);
page.run( page.run(
&mupdf_ffi::Device::new(page.ctx(), Box::new(device))?, &mupdf_ffi::Device::new(page.ctx(), Box::new(device.clone()))?,
fz_matrix { fz_matrix {
a: 1.0, a: 1.0,
b: 0.0, b: 0.0,
@ -3508,9 +3544,7 @@ impl Page {
.expect("already dropped all other references") .expect("already dropped all other references")
.into_inner(); .into_inner();
// we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need. // we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need.
let json = page.stext_page_as_json_from_page(1.0)?; let xml: String = todo!("page.to_xml()?");
todo!("{json}");
let xml = page.to_xml()?;
let MuPdfXml::Page(xml_page) = quick_xml::de::from_str(&xml)?; let MuPdfXml::Page(xml_page) = quick_xml::de::from_str(&xml)?;
for xml_block in xml_page.block { for xml_block in xml_page.block {
for xml_line in xml_block.line { for xml_line in xml_block.line {
@ -3649,27 +3683,29 @@ fn main_inner() -> Result<(), Box<dyn std::error::Error>> {
} else { } else {
None None
}; };
let mut parser = Parser::new(); mupdf_ffi::Context::with(|ctx| {
let is_subset = page_numbers.is_some(); let mut parser = Parser::new();
let file_name = &args[1]; let is_subset = page_numbers.is_some();
parser.parse_pdf(file_name, page_numbers)?; let file_name = &args[1];
let mut insns = xml_tree::Element::new( parser.parse_pdf(ctx, file_name, page_numbers)?;
"instructions".into(), let mut insns = xml_tree::Element::new(
[("is-subset".into(), is_subset.to_string())], "instructions".into(),
); [("is-subset".into(), is_subset.to_string())],
insns.text = "\n".into(); );
insns.tail = "\n".into(); insns.text = "\n".into();
let mut comment = insns.tail = "\n".into();
xml_tree::Element::comment(format!(" Automatically generated from {file_name} ")); let mut comment =
comment.tail = "\n".into(); xml_tree::Element::comment(format!(" Automatically generated from {file_name} "));
insns.children.push(comment); comment.tail = "\n".into();
for insn in parser.insns { insns.children.push(comment);
insn.write_xml(&mut insns); for insn in parser.insns {
} insn.write_xml(&mut insns);
let mut output = Vec::new(); }
insns.write(&mut output, true)?; let mut output = Vec::new();
std::fs::write("powerisa-instructions.xml", output)?; insns.write(&mut output, true)?;
Ok(()) std::fs::write("powerisa-instructions.xml", output)?;
Ok(())
})
} }
fn main() -> std::process::ExitCode { fn main() -> std::process::ExitCode {

View file

@ -4,14 +4,15 @@
use mupdf_sys::{ use mupdf_sys::{
fz_clone_context, fz_color_params, fz_colorspace, fz_context, fz_device, fz_document, fz_clone_context, fz_color_params, fz_colorspace, fz_context, fz_device, fz_document,
fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text, fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text,
fz_error_type_FZ_ERROR_GENERIC, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point, fz_rect, fz_error_type_FZ_ERROR_GENERIC, fz_font, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point,
fz_stroke_state, fz_text, fz_transform_point, fz_transform_point_xy, fz_walk_path, fz_rect, fz_stroke_state, fz_text, fz_text_item, fz_text_span, fz_transform_point,
mupdf_document_page_count, mupdf_drop_error, mupdf_error_t, mupdf_load_page, fz_transform_point_xy, fz_walk_path, mupdf_document_page_count, mupdf_drop_error,
mupdf_new_base_context, mupdf_new_derived_device, mupdf_open_document, mupdf_run_page, mupdf_error_t, mupdf_load_page, mupdf_new_base_context, mupdf_new_derived_device,
mupdf_open_document, mupdf_run_page,
}; };
use std::{ use std::{
cell::Cell, cell::Cell,
ffi::{CStr, CString, c_int, c_void}, ffi::{CStr, c_int, c_void},
fmt, fmt,
marker::PhantomData, marker::PhantomData,
mem::ManuallyDrop, mem::ManuallyDrop,
@ -20,16 +21,16 @@ use std::{
}; };
#[derive(Debug)] #[derive(Debug)]
struct MuPdfError { pub(crate) struct MuPdfError {
type_: c_int, type_: c_int,
message: CString, message: String,
} }
impl MuPdfError { impl MuPdfError {
fn new_generic(message: impl ToString) -> Self { fn new_generic(message: impl ToString) -> Self {
Self { Self {
type_: fz_error_type_FZ_ERROR_GENERIC as _, type_: fz_error_type_FZ_ERROR_GENERIC as _,
message: message.try_into().expect("nul byte in message"), message: message.to_string(),
} }
} }
} }
@ -57,15 +58,17 @@ impl Drop for OwnedMuPdfError {
} }
unsafe fn mupdf_try<R>(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result<R, MuPdfError> { unsafe fn mupdf_try<R>(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result<R, MuPdfError> {
let mut err = <*mut mupdf_error_t>::null_mut(); let mut err = ptr::null_mut();
let retval = f(&mut err); let retval = f(&mut err);
let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else { let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else {
return Ok(retval); return Ok(retval);
}; };
unsafe { unsafe {
Err(MuPdfError { Err(MuPdfError {
type_: (*err.0).type_, type_: (*err.0.as_ptr()).type_,
message: CString::from(CStr::from_ptr((*err.0).message)), message: CStr::from_ptr((*err.0.as_ptr()).message)
.to_string_lossy()
.into_owned(),
}) })
} }
} }
@ -254,10 +257,6 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
}; };
(&raw mut (*dev_ptr).value).write(value); (&raw mut (*dev_ptr).value).write(value);
let fz_device { let fz_device {
refs,
hints,
flags,
close_device,
drop_device, drop_device,
fill_path, fill_path,
stroke_path, stroke_path,
@ -268,29 +267,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
clip_text, clip_text,
clip_stroke_text, clip_stroke_text,
ignore_text, ignore_text,
fill_shade, ..
fill_image,
fill_image_mask,
clip_image_mask,
pop_clip,
begin_mask,
end_mask,
begin_group,
end_group,
begin_tile,
end_tile,
render_flags,
set_default_colorspaces,
begin_layer,
end_layer,
begin_structure,
end_structure,
begin_metatext,
end_metatext,
d1_rect,
container_len,
container_cap,
container,
} = &mut (*dev_ptr).base; } = &mut (*dev_ptr).base;
*drop_device = Some(Self::drop_device_fn); *drop_device = Some(Self::drop_device_fn);
*fill_path = Some(Self::fill_path_fn); *fill_path = Some(Self::fill_path_fn);
@ -306,7 +283,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
} }
} }
pub(crate) fn get(&self) -> &T { pub(crate) fn get(&self) -> &T {
unsafe { &(*self.ptr.cast::<DeviceStruct<T>>()).value } unsafe { &(*self.dev.cast::<DeviceStruct<T>>()).value }
} }
unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) { unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) {
unsafe { unsafe {
@ -319,10 +296,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
path: *const fz_path, path: *const fz_path,
even_odd: c_int, even_odd: c_int,
cmt: fz_matrix, cmt: fz_matrix,
color_space: *mut fz_colorspace, _color_space: *mut fz_colorspace,
color: *const f32, _color: *const f32,
alpha: f32, _alpha: f32,
color_params: fz_color_params, _color_params: fz_color_params,
) { ) {
let Some(ctx) = NonNull::new(ctx) else { let Some(ctx) = NonNull::new(ctx) else {
return; return;
@ -343,12 +320,12 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
ctx: *mut fz_context, ctx: *mut fz_context,
dev: *mut fz_device, dev: *mut fz_device,
path: *const fz_path, path: *const fz_path,
stroke_state: *const fz_stroke_state, _stroke_state: *const fz_stroke_state,
cmt: fz_matrix, cmt: fz_matrix,
color_space: *mut fz_colorspace, _color_space: *mut fz_colorspace,
color: *const f32, _color: *const f32,
alpha: f32, _alpha: f32,
color_params: fz_color_params, _color_params: fz_color_params,
) { ) {
let Some(ctx) = NonNull::new(ctx) else { let Some(ctx) = NonNull::new(ctx) else {
return; return;
@ -392,7 +369,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
ctx: *mut fz_context, ctx: *mut fz_context,
dev: *mut fz_device, dev: *mut fz_device,
path: *const fz_path, path: *const fz_path,
stroke_state: *const fz_stroke_state, _stroke_state: *const fz_stroke_state,
cmt: fz_matrix, cmt: fz_matrix,
scissor: fz_rect, scissor: fz_rect,
) { ) {
@ -416,10 +393,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
dev: *mut fz_device, dev: *mut fz_device,
text: *const fz_text, text: *const fz_text,
cmt: fz_matrix, cmt: fz_matrix,
color_space: *mut fz_colorspace, _color_space: *mut fz_colorspace,
color: *const f32, _color: *const f32,
alpha: f32, _alpha: f32,
color_params: fz_color_params, _color_params: fz_color_params,
) { ) {
let Some(ctx) = NonNull::new(ctx) else { let Some(ctx) = NonNull::new(ctx) else {
return; return;
@ -439,12 +416,12 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
ctx: *mut fz_context, ctx: *mut fz_context,
dev: *mut fz_device, dev: *mut fz_device,
text: *const fz_text, text: *const fz_text,
stroke_state: *const fz_stroke_state, _stroke_state: *const fz_stroke_state,
cmt: fz_matrix, cmt: fz_matrix,
color_space: *mut fz_colorspace, _color_space: *mut fz_colorspace,
color: *const f32, _color: *const f32,
alpha: f32, _alpha: f32,
color_params: fz_color_params, _color_params: fz_color_params,
) { ) {
let Some(ctx) = NonNull::new(ctx) else { let Some(ctx) = NonNull::new(ctx) else {
return; return;
@ -486,7 +463,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
ctx: *mut fz_context, ctx: *mut fz_context,
dev: *mut fz_device, dev: *mut fz_device,
text: *const fz_text, text: *const fz_text,
stroke_state: *const fz_stroke_state, _stroke_state: *const fz_stroke_state,
cmt: fz_matrix, cmt: fz_matrix,
scissor: fz_rect, scissor: fz_rect,
) { ) {
@ -716,6 +693,77 @@ impl<'ctx> Drop for Text<'ctx> {
} }
} }
impl<'ctx> Text<'ctx> {
pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> {
TextSpanIter {
ptr: unsafe { NonNull::new((*self.ptr).head) },
ctx: self.ctx,
_phantom: PhantomData,
}
}
}
#[derive(Clone)]
pub(crate) struct TextSpanIter<'a, 'ctx> {
ptr: Option<NonNull<fz_text_span>>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> {
type Item = TextSpanRef<'a, 'ctx>;
fn next(&mut self) -> Option<Self::Item> {
let ptr = self.ptr?;
self.ptr = NonNull::new(unsafe { ptr.as_ref().next });
Some(TextSpanRef {
ptr: unsafe { &*ptr.as_ptr() },
ctx: self.ctx,
_phantom: PhantomData,
})
}
}
#[derive(Copy, Clone)]
pub(crate) struct TextSpanRef<'a, 'ctx> {
ptr: &'a fz_text_span,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> TextSpanRef<'a, 'ctx> {
pub(crate) fn get(self) -> &'a fz_text_span {
self.ptr
}
pub(crate) fn font(self) -> FontRef<'a, 'ctx> {
FontRef {
ptr: unsafe { &*self.ptr.font },
ctx: self.ctx,
_phantom: PhantomData,
}
}
pub(crate) fn items(self) -> &'a [fz_text_item] {
let len = self.ptr.len as usize;
if len == 0 {
return &[];
}
unsafe { std::slice::from_raw_parts(self.ptr.items, len) }
}
}
#[derive(Clone, Copy)]
pub(crate) struct FontRef<'a, 'ctx> {
ptr: &'a fz_font,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> FontRef<'a, 'ctx> {
pub(crate) fn get(self) -> &'a fz_font {
self.ptr
}
}
pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point { pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point(point, m) } unsafe { fz_transform_point(point, m) }
} }