This commit is contained in:
Jacob Lifshay 2026-01-05 18:33:44 -08:00
parent 103f986bc0
commit a677cd8a33
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
2 changed files with 169 additions and 85 deletions

View file

@ -3318,7 +3318,43 @@ impl MyDevice {
}
}
fn text(&self, text: &mupdf_ffi::Text<'_>, cmt: fz_matrix) {
todo!()
for span in text.spans() {
let mupdf_sys::fz_text_span { trm, .. } = span.get();
let mupdf_sys::fz_font {
refs,
name,
buffer,
flags,
ft_face,
shaper_data,
t3matrix,
t3resources,
t3procs,
t3lists,
t3widths,
t3flags,
t3doc,
t3run,
t3freeres,
bbox,
ascender,
descender,
glyph_count,
bbox_table,
use_glyph_bbox,
width_count,
width_default,
width_table,
advance_cache,
encoding_cache,
has_digest,
digest,
subfont,
} = *span.font().get();
for item in span.items() {
todo!()
}
}
}
}
@ -3487,7 +3523,7 @@ impl Page {
) -> Result<Self, Box<dyn std::error::Error>> {
let device = MyDevice::new(page_num);
page.run(
&mupdf_ffi::Device::new(page.ctx(), Box::new(device))?,
&mupdf_ffi::Device::new(page.ctx(), Box::new(device.clone()))?,
fz_matrix {
a: 1.0,
b: 0.0,
@ -3508,9 +3544,7 @@ impl Page {
.expect("already dropped all other references")
.into_inner();
// we convert to xml and parse that becuase the mupdf rust crate doesn't include all the API surface we need.
let json = page.stext_page_as_json_from_page(1.0)?;
todo!("{json}");
let xml = page.to_xml()?;
let xml: String = todo!("page.to_xml()?");
let MuPdfXml::Page(xml_page) = quick_xml::de::from_str(&xml)?;
for xml_block in xml_page.block {
for xml_line in xml_block.line {
@ -3649,27 +3683,29 @@ fn main_inner() -> Result<(), Box<dyn std::error::Error>> {
} else {
None
};
let mut parser = Parser::new();
let is_subset = page_numbers.is_some();
let file_name = &args[1];
parser.parse_pdf(file_name, page_numbers)?;
let mut insns = xml_tree::Element::new(
"instructions".into(),
[("is-subset".into(), is_subset.to_string())],
);
insns.text = "\n".into();
insns.tail = "\n".into();
let mut comment =
xml_tree::Element::comment(format!(" Automatically generated from {file_name} "));
comment.tail = "\n".into();
insns.children.push(comment);
for insn in parser.insns {
insn.write_xml(&mut insns);
}
let mut output = Vec::new();
insns.write(&mut output, true)?;
std::fs::write("powerisa-instructions.xml", output)?;
Ok(())
mupdf_ffi::Context::with(|ctx| {
let mut parser = Parser::new();
let is_subset = page_numbers.is_some();
let file_name = &args[1];
parser.parse_pdf(ctx, file_name, page_numbers)?;
let mut insns = xml_tree::Element::new(
"instructions".into(),
[("is-subset".into(), is_subset.to_string())],
);
insns.text = "\n".into();
insns.tail = "\n".into();
let mut comment =
xml_tree::Element::comment(format!(" Automatically generated from {file_name} "));
comment.tail = "\n".into();
insns.children.push(comment);
for insn in parser.insns {
insn.write_xml(&mut insns);
}
let mut output = Vec::new();
insns.write(&mut output, true)?;
std::fs::write("powerisa-instructions.xml", output)?;
Ok(())
})
}
fn main() -> std::process::ExitCode {

View file

@ -4,14 +4,15 @@
use mupdf_sys::{
fz_clone_context, fz_color_params, fz_colorspace, fz_context, fz_device, fz_document,
fz_drop_context, fz_drop_device, fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text,
fz_error_type_FZ_ERROR_GENERIC, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point, fz_rect,
fz_stroke_state, fz_text, fz_transform_point, fz_transform_point_xy, fz_walk_path,
mupdf_document_page_count, mupdf_drop_error, mupdf_error_t, mupdf_load_page,
mupdf_new_base_context, mupdf_new_derived_device, mupdf_open_document, mupdf_run_page,
fz_error_type_FZ_ERROR_GENERIC, fz_font, fz_matrix, fz_page, fz_path, fz_path_walker, fz_point,
fz_rect, fz_stroke_state, fz_text, fz_text_item, fz_text_span, fz_transform_point,
fz_transform_point_xy, fz_walk_path, mupdf_document_page_count, mupdf_drop_error,
mupdf_error_t, mupdf_load_page, mupdf_new_base_context, mupdf_new_derived_device,
mupdf_open_document, mupdf_run_page,
};
use std::{
cell::Cell,
ffi::{CStr, CString, c_int, c_void},
ffi::{CStr, c_int, c_void},
fmt,
marker::PhantomData,
mem::ManuallyDrop,
@ -20,16 +21,16 @@ use std::{
};
#[derive(Debug)]
struct MuPdfError {
pub(crate) struct MuPdfError {
type_: c_int,
message: CString,
message: String,
}
impl MuPdfError {
fn new_generic(message: impl ToString) -> Self {
Self {
type_: fz_error_type_FZ_ERROR_GENERIC as _,
message: message.try_into().expect("nul byte in message"),
message: message.to_string(),
}
}
}
@ -57,15 +58,17 @@ impl Drop for OwnedMuPdfError {
}
unsafe fn mupdf_try<R>(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result<R, MuPdfError> {
let mut err = <*mut mupdf_error_t>::null_mut();
let mut err = ptr::null_mut();
let retval = f(&mut err);
let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else {
return Ok(retval);
};
unsafe {
Err(MuPdfError {
type_: (*err.0).type_,
message: CString::from(CStr::from_ptr((*err.0).message)),
type_: (*err.0.as_ptr()).type_,
message: CStr::from_ptr((*err.0.as_ptr()).message)
.to_string_lossy()
.into_owned(),
})
}
}
@ -254,10 +257,6 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
};
(&raw mut (*dev_ptr).value).write(value);
let fz_device {
refs,
hints,
flags,
close_device,
drop_device,
fill_path,
stroke_path,
@ -268,29 +267,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
clip_text,
clip_stroke_text,
ignore_text,
fill_shade,
fill_image,
fill_image_mask,
clip_image_mask,
pop_clip,
begin_mask,
end_mask,
begin_group,
end_group,
begin_tile,
end_tile,
render_flags,
set_default_colorspaces,
begin_layer,
end_layer,
begin_structure,
end_structure,
begin_metatext,
end_metatext,
d1_rect,
container_len,
container_cap,
container,
..
} = &mut (*dev_ptr).base;
*drop_device = Some(Self::drop_device_fn);
*fill_path = Some(Self::fill_path_fn);
@ -306,7 +283,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
}
}
pub(crate) fn get(&self) -> &T {
unsafe { &(*self.ptr.cast::<DeviceStruct<T>>()).value }
unsafe { &(*self.dev.cast::<DeviceStruct<T>>()).value }
}
unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) {
unsafe {
@ -319,10 +296,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
path: *const fz_path,
even_odd: c_int,
cmt: fz_matrix,
color_space: *mut fz_colorspace,
color: *const f32,
alpha: f32,
color_params: fz_color_params,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
@ -343,12 +320,12 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
stroke_state: *const fz_stroke_state,
_stroke_state: *const fz_stroke_state,
cmt: fz_matrix,
color_space: *mut fz_colorspace,
color: *const f32,
alpha: f32,
color_params: fz_color_params,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
@ -392,7 +369,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
stroke_state: *const fz_stroke_state,
_stroke_state: *const fz_stroke_state,
cmt: fz_matrix,
scissor: fz_rect,
) {
@ -416,10 +393,10 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
dev: *mut fz_device,
text: *const fz_text,
cmt: fz_matrix,
color_space: *mut fz_colorspace,
color: *const f32,
alpha: f32,
color_params: fz_color_params,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
@ -439,12 +416,12 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
stroke_state: *const fz_stroke_state,
_stroke_state: *const fz_stroke_state,
cmt: fz_matrix,
color_space: *mut fz_colorspace,
color: *const f32,
alpha: f32,
color_params: fz_color_params,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let Some(ctx) = NonNull::new(ctx) else {
return;
@ -486,7 +463,7 @@ impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
stroke_state: *const fz_stroke_state,
_stroke_state: *const fz_stroke_state,
cmt: fz_matrix,
scissor: fz_rect,
) {
@ -716,6 +693,77 @@ impl<'ctx> Drop for Text<'ctx> {
}
}
impl<'ctx> Text<'ctx> {
pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> {
TextSpanIter {
ptr: unsafe { NonNull::new((*self.ptr).head) },
ctx: self.ctx,
_phantom: PhantomData,
}
}
}
#[derive(Clone)]
pub(crate) struct TextSpanIter<'a, 'ctx> {
ptr: Option<NonNull<fz_text_span>>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> {
type Item = TextSpanRef<'a, 'ctx>;
fn next(&mut self) -> Option<Self::Item> {
let ptr = self.ptr?;
self.ptr = NonNull::new(unsafe { ptr.as_ref().next });
Some(TextSpanRef {
ptr: unsafe { &*ptr.as_ptr() },
ctx: self.ctx,
_phantom: PhantomData,
})
}
}
#[derive(Copy, Clone)]
pub(crate) struct TextSpanRef<'a, 'ctx> {
ptr: &'a fz_text_span,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> TextSpanRef<'a, 'ctx> {
pub(crate) fn get(self) -> &'a fz_text_span {
self.ptr
}
pub(crate) fn font(self) -> FontRef<'a, 'ctx> {
FontRef {
ptr: unsafe { &*self.ptr.font },
ctx: self.ctx,
_phantom: PhantomData,
}
}
pub(crate) fn items(self) -> &'a [fz_text_item] {
let len = self.ptr.len as usize;
if len == 0 {
return &[];
}
unsafe { std::slice::from_raw_parts(self.ptr.items, len) }
}
}
#[derive(Clone, Copy)]
pub(crate) struct FontRef<'a, 'ctx> {
ptr: &'a fz_font,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> FontRef<'a, 'ctx> {
pub(crate) fn get(self) -> &'a fz_font {
self.ptr
}
}
pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point(point, m) }
}